string_util.c
Go to the documentation of this file.
1 /* Copyright (C) 2013-2016, The Regents of The University of Michigan.
2 All rights reserved.
3 
4 This software was developed in the APRIL Robotics Lab under the
5 direction of Edwin Olson, ebolson@umich.edu. This software may be
6 available under alternative licensing terms; contact the address above.
7 
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 
11 1. Redistributions of source code must retain the above copyright notice, this
12  list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright notice,
14  this list of conditions and the following disclaimer in the documentation
15  and/or other materials provided with the distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 The views and conclusions contained in the software and documentation are those
29 of the authors and should not be interpreted as representing official policies,
30 either expressed or implied, of the Regents of The University of Michigan.
31 */
32 
33 #include <assert.h>
34 #include <ctype.h>
35 #include <errno.h>
36 #include <string.h>
37 #include <stdarg.h>
38 #include <stdlib.h>
39 #include <stdio.h>
40 
41 #include "string_util.h"
42 #include "zarray.h"
43 
45 {
46  char *s;
47  int alloc;
48  size_t size; // as if strlen() was called; not counting terminating \0
49 };
50 
51 #define MIN_PRINTF_ALLOC 16
52 
53 char *sprintf_alloc(const char *fmt, ...)
54 {
55  assert(fmt != NULL);
56 
57  va_list args;
58 
59  va_start(args,fmt);
60  char *buf = vsprintf_alloc(fmt, args);
61  va_end(args);
62 
63  return buf;
64 }
65 
66 char *vsprintf_alloc(const char *fmt, va_list orig_args)
67 {
68  assert(fmt != NULL);
69 
70  int size = MIN_PRINTF_ALLOC;
71  char *buf = malloc(size * sizeof(char));
72 
73  int returnsize;
74  va_list args;
75 
76  va_copy(args, orig_args);
77  returnsize = vsnprintf(buf, size, fmt, args);
78  va_end(args);
79 
80  // it was successful
81  if (returnsize < size) {
82  return buf;
83  }
84 
85  // otherwise, we should try again
86  free(buf);
87  size = returnsize + 1;
88  buf = malloc(size * sizeof(char));
89 
90  va_copy(args, orig_args);
91  returnsize = vsnprintf(buf, size, fmt, args);
92  va_end(args);
93 
94  assert(returnsize <= size);
95  return buf;
96 }
97 
98 char *_str_concat_private(const char *first, ...)
99 {
100  size_t len = 0;
101 
102  // get the total length (for the allocation)
103  {
104  va_list args;
105  va_start(args, first);
106  const char *arg = first;
107  while(arg != NULL) {
108  len += strlen(arg);
109  arg = va_arg(args, const char *);
110  }
111  va_end(args);
112  }
113 
114  // write the string
115  char *str = malloc(len*sizeof(char) + 1);
116  char *ptr = str;
117  {
118  va_list args;
119  va_start(args, first);
120  const char *arg = first;
121  while(arg != NULL) {
122  while(*arg)
123  *ptr++ = *arg++;
124  arg = va_arg(args, const char *);
125  }
126  *ptr = '\0';
127  va_end(args);
128  }
129 
130  return str;
131 }
132 
133 // Returns the index of the first character that differs:
134 int str_diff_idx(const char * a, const char * b)
135 {
136  assert(a != NULL);
137  assert(b != NULL);
138 
139  int i = 0;
140 
141  size_t lena = strlen(a);
142  size_t lenb = strlen(b);
143 
144  size_t minlen = lena < lenb ? lena : lenb;
145 
146  for (; i < minlen; i++)
147  if (a[i] != b[i])
148  break;
149 
150  return i;
151 }
152 
153 
154 zarray_t *str_split(const char *str, const char *delim)
155 {
156  assert(str != NULL);
157  assert(delim != NULL);
158 
159  zarray_t *parts = zarray_create(sizeof(char*));
161 
162  size_t delim_len = strlen(delim);
163  size_t len = strlen(str);
164  size_t pos = 0;
165 
166  while (pos < len) {
167  if (str_starts_with(&str[pos], delim) && delim_len > 0) {
168  pos += delim_len;
169  // never add empty strings (repeated tokens)
170  if (string_buffer_size(sb) > 0) {
171  char *part = string_buffer_to_string(sb);
172  zarray_add(parts, &part);
173  }
175  } else {
176  string_buffer_append(sb, str[pos]);
177  pos++;
178  }
179  }
180 
181  if (string_buffer_size(sb) > 0) {
182  char *part = string_buffer_to_string(sb);
183  zarray_add(parts, &part);
184  }
185 
187  return parts;
188 }
189 
190 // split on one or more spaces.
192 {
193  zarray_t *parts = zarray_create(sizeof(char*));
194  size_t len = strlen(str);
195  size_t pos = 0;
196 
197  while (pos < len) {
198 
199  while (pos < len && str[pos] == ' ')
200  pos++;
201 
202  // produce a token?
203  if (pos < len) {
204  // yes!
205  size_t off0 = pos;
206  while (pos < len && str[pos] != ' ')
207  pos++;
208  size_t off1 = pos;
209 
210  size_t len = off1 - off0;
211  char *tok = malloc(len + 1);
212  memcpy(tok, &str[off0], len);
213  tok[len] = 0;
214  zarray_add(parts, &tok);
215  }
216  }
217 
218  return parts;
219 }
220 
222 {
223  if (!za)
224  return;
225 
226  zarray_vmap(za, free);
227  zarray_destroy(za);
228 }
229 
230 char *str_trim(char *str)
231 {
232  assert(str != NULL);
233 
234  return str_lstrip(str_rstrip(str));
235 }
236 
237 char *str_lstrip(char *str)
238 {
239  assert(str != NULL);
240 
241  char *ptr = str;
242  char *end = str + strlen(str);
243  for(; ptr != end && isspace(*ptr); ptr++);
244  // shift the string to the left so the original pointer still works
245  memmove(str, ptr, strlen(ptr)+1);
246  return str;
247 }
248 
249 char *str_rstrip(char *str)
250 {
251  assert(str != NULL);
252 
253  char *ptr = str + strlen(str) - 1;
254  for(; ptr+1 != str && isspace(*ptr); ptr--);
255  *(ptr+1) = '\0';
256  return str;
257 }
258 
259 int str_indexof(const char *haystack, const char *needle)
260 {
261  assert(haystack != NULL);
262  assert(needle != NULL);
263 
264  // use signed types for hlen/nlen because hlen - nlen can be negative.
265  int hlen = (int) strlen(haystack);
266  int nlen = (int) strlen(needle);
267 
268  if (nlen > hlen) return -1;
269 
270  for (int i = 0; i <= hlen - nlen; i++) {
271  if (!strncmp(&haystack[i], needle, nlen))
272  return i;
273  }
274 
275  return -1;
276 }
277 
278 int str_last_indexof(const char *haystack, const char *needle)
279 {
280  assert(haystack != NULL);
281  assert(needle != NULL);
282 
283  // use signed types for hlen/nlen because hlen - nlen can be negative.
284  int hlen = (int) strlen(haystack);
285  int nlen = (int) strlen(needle);
286 
287  int last_index = -1;
288  for (int i = 0; i <= hlen - nlen; i++) {
289  if (!strncmp(&haystack[i], needle, nlen))
290  last_index = i;
291  }
292 
293  return last_index;
294 }
295 
296 // in-place modification.
297 char *str_tolowercase(char *s)
298 {
299  assert(s != NULL);
300 
301  size_t slen = strlen(s);
302  for (int i = 0; i < slen; i++) {
303  if (s[i] >= 'A' && s[i] <= 'Z')
304  s[i] = s[i] + 'a' - 'A';
305  }
306 
307  return s;
308 }
309 
310 char *str_touppercase(char *s)
311 {
312  assert(s != NULL);
313 
314  size_t slen = strlen(s);
315  for (int i = 0; i < slen; i++) {
316  if (s[i] >= 'a' && s[i] <= 'z')
317  s[i] = s[i] - ('a' - 'A');
318  }
319 
320  return s;
321 }
322 
324 {
325  string_buffer_t *sb = (string_buffer_t*) calloc(1, sizeof(string_buffer_t));
326  assert(sb != NULL);
327  sb->alloc = 32;
328  sb->s = calloc(sb->alloc, 1);
329  return sb;
330 }
331 
333 {
334  if (sb == NULL)
335  return;
336 
337  if (sb->s)
338  free(sb->s);
339 
340  memset(sb, 0, sizeof(string_buffer_t));
341  free(sb);
342 }
343 
345 {
346  assert(sb != NULL);
347 
348  if (sb->size+2 >= sb->alloc) {
349  sb->alloc *= 2;
350  sb->s = realloc(sb->s, sb->alloc);
351  }
352 
353  sb->s[sb->size++] = c;
354  sb->s[sb->size] = 0;
355 }
356 
358  assert(sb != NULL);
359  if (sb->size == 0)
360  return 0;
361 
362  char back = sb->s[--sb->size];
363  sb->s[sb->size] = 0;
364  return back;
365 }
366 
367 void string_buffer_appendf(string_buffer_t *sb, const char *fmt, ...)
368 {
369  assert(sb != NULL);
370  assert(fmt != NULL);
371 
372  int size = MIN_PRINTF_ALLOC;
373  char *buf = malloc(size * sizeof(char));
374 
375  int returnsize;
376  va_list args;
377 
378  va_start(args,fmt);
379  returnsize = vsnprintf(buf, size, fmt, args);
380  va_end(args);
381 
382  if (returnsize >= size) {
383  // otherwise, we should try again
384  free(buf);
385  size = returnsize + 1;
386  buf = malloc(size * sizeof(char));
387 
388  va_start(args, fmt);
389  returnsize = vsnprintf(buf, size, fmt, args);
390  va_end(args);
391 
392  assert(returnsize <= size);
393  }
394 
396  free(buf);
397 }
398 
400 {
401  assert(sb != NULL);
402  assert(str != NULL);
403 
404  size_t len = strlen(str);
405 
406  while (sb->size+len + 1 >= sb->alloc) {
407  sb->alloc *= 2;
408  sb->s = realloc(sb->s, sb->alloc);
409  }
410 
411  memcpy(&sb->s[sb->size], str, len);
412  sb->size += len;
413  sb->s[sb->size] = 0;
414 }
415 
417 {
418  assert(sb != NULL);
419  assert(str != NULL);
420 
421  return str_ends_with(sb->s, str);
422 }
423 
425 {
426  assert(sb != NULL);
427 
428  return strdup(sb->s);
429 }
430 
431 // returns length of string (not counting \0)
433 {
434  assert(sb != NULL);
435 
436  return sb->size;
437 }
438 
440 {
441  assert(sb != NULL);
442 
443  sb->s[0] = 0;
444  sb->size = 0;
445 }
446 
448 {
449  assert(str != NULL);
450 
451  string_feeder_t *sf = (string_feeder_t*) calloc(1, sizeof(string_feeder_t));
452  sf->s = strdup(str);
453  sf->len = strlen(sf->s);
454  sf->line = 1;
455  sf->col = 0;
456  sf->pos = 0;
457  return sf;
458 }
459 
461 {
462  assert(sf != NULL);
463  return sf->line;
464 }
465 
467 {
468  assert(sf != NULL);
469  return sf->col;
470 }
471 
473 {
474  if (sf == NULL)
475  return;
476 
477  free(sf->s);
478  memset(sf, 0, sizeof(string_feeder_t));
479  free(sf);
480 }
481 
483 {
484  assert(sf != NULL);
485 
486  return sf->s[sf->pos] != 0 && sf->pos <= sf->len;
487 }
488 
490 {
491  assert(sf != NULL);
492  assert(sf->pos <= sf->len);
493 
494  char c = sf->s[sf->pos++];
495  if (c == '\n') {
496  sf->line++;
497  sf->col = 0;
498  } else {
499  sf->col++;
500  }
501 
502  return c;
503 }
504 
506 {
507  assert(sf != NULL);
508  assert(length >= 0);
509  assert(sf->pos <= sf->len);
510 
511  if (sf->pos + length > sf->len)
512  length = sf->len - sf->pos;
513 
514  char *substr = calloc(length+1, sizeof(char));
515  for (int i = 0 ; i < length ; i++)
516  substr[i] = string_feeder_next(sf);
517  return substr;
518 }
519 
521 {
522  assert(sf != NULL);
523  assert(sf->pos <= sf->len);
524 
525  return sf->s[sf->pos];
526 }
527 
529 {
530  assert(sf != NULL);
531  assert(length >= 0);
532  assert(sf->pos <= sf->len);
533 
534  if (sf->pos + length > sf->len)
535  length = sf->len - sf->pos;
536 
537  char *substr = calloc(length+1, sizeof(char));
538  memcpy(substr, &sf->s[sf->pos], length*sizeof(char));
539  return substr;
540 }
541 
543 {
544  assert(sf != NULL);
545  assert(str != NULL);
546  assert(sf->pos <= sf->len);
547 
548  return str_starts_with(&sf->s[sf->pos], str);
549 }
550 
552 {
553  assert(sf != NULL);
554  assert(str != NULL);
555  assert(sf->pos <= sf->len);
556 
557  size_t len = strlen(str);
558 
559  for (int i = 0; i < len; i++) {
560  char c = string_feeder_next(sf);
561  assert(c == str[i]);
562  }
563 }
564 
566 bool str_ends_with(const char *haystack, const char *needle)
567 {
568  assert(haystack != NULL);
569  assert(needle != NULL);
570 
571  size_t lens = strlen(haystack);
572  size_t lenneedle = strlen(needle);
573 
574  if (lenneedle > lens)
575  return false;
576 
577  return !strncmp(&haystack[lens - lenneedle], needle, lenneedle);
578 }
579 
580 inline bool str_starts_with(const char *haystack, const char *needle)
581 {
582  assert(haystack != NULL);
583  assert(needle != NULL);
584 
585  // haystack[pos] doesn't have to be compared to zero; if it were
586  // zero, it either doesn't match needle (in which case the loop
587  // terminates) or it matches needle[pos] (in which case the loop
588  // terminates).
589  int pos = 0;
590  while (haystack[pos] == needle[pos] && needle[pos] != 0)
591  pos++;
592 
593  return (needle[pos] == 0);
594 }
595 
596 bool str_starts_with_any(const char *haystack, const char **needles, int num_needles)
597 {
598  assert(haystack != NULL);
599  assert(needles != NULL);
600  assert(num_needles >= 0);
601 
602  for (int i = 0; i < num_needles; i++) {
603  assert(needles[i] != NULL);
604  if (str_starts_with(haystack, needles[i]))
605  return true;
606  }
607 
608  return false;
609 }
610 
611 bool str_matches_any(const char *haystack, const char **needles, int num_needles)
612 {
613  assert(haystack != NULL);
614  assert(needles != NULL);
615  assert(num_needles >= 0);
616 
617  for (int i = 0; i < num_needles; i++) {
618  assert(needles[i] != NULL);
619  if (!strcmp(haystack, needles[i]))
620  return true;
621  }
622 
623  return false;
624 }
625 
626 char *str_substring(const char *str, size_t startidx, long endidx)
627 {
628  assert(str != NULL);
629  assert(startidx >= 0 && startidx <= strlen(str)+1);
630  assert(endidx < 0 || endidx >= startidx);
631  assert(endidx < 0 || endidx <= strlen(str)+1);
632 
633  if (endidx < 0)
634  endidx = (long) strlen(str);
635 
636  size_t blen = endidx - startidx; // not counting \0
637  char *b = malloc(blen + 1);
638  memcpy(b, &str[startidx], blen);
639  b[blen] = 0;
640  return b;
641 }
642 
643 char *str_replace(const char *haystack, const char *needle, const char *replacement)
644 {
645  assert(haystack != NULL);
646  assert(needle != NULL);
647  assert(replacement != NULL);
648 
650  size_t haystack_len = strlen(haystack);
651  size_t needle_len = strlen(needle);
652 
653  int pos = 0;
654  while (pos < haystack_len) {
655  if (needle_len > 0 && str_starts_with(&haystack[pos], needle)) {
656  string_buffer_append_string(sb, replacement);
657  pos += needle_len;
658  } else {
659  string_buffer_append(sb, haystack[pos]);
660  pos++;
661  }
662  }
663  if (needle_len == 0 && haystack_len == 0)
664  string_buffer_append_string(sb, replacement);
665 
666  char *res = string_buffer_to_string(sb);
668  return res;
669 }
670 
671 char *str_replace_many(const char *_haystack, ...)
672 {
673  va_list ap;
674  va_start(ap, _haystack);
675 
676  char *haystack = strdup(_haystack);
677 
678  while (true) {
679  char *needle = va_arg(ap, char*);
680  if (!needle)
681  break;
682 
683  char *replacement = va_arg(ap, char*);
684  char *tmp = str_replace(haystack, needle, replacement);
685  free(haystack);
686  haystack = tmp;
687  }
688 
689  va_end(ap);
690 
691  return haystack;
692 }
693 
694 static void buffer_appendf(char **_buf, int *bufpos, void *fmt, ...)
695 {
696  char *buf = *_buf;
697  va_list ap;
698 
699  int salloc = 128;
700  char *s = malloc(salloc);
701 
702  va_start(ap, fmt);
703  int slen = vsnprintf(s, salloc, fmt, ap);
704  va_end(ap);
705 
706  if (slen >= salloc) {
707  s = realloc(s, slen + 1);
708  va_start(ap, fmt);
709  vsprintf((char*) s, fmt, ap);
710  va_end(ap);
711  }
712 
713  buf = realloc(buf, *bufpos + slen + 1);
714  *_buf = buf;
715 
716  memcpy(&buf[*bufpos], s, slen + 1); // get trailing \0
717  (*bufpos) += slen;
718 
719  free(s);
720 }
721 
722 static int is_variable_character(char c)
723 {
724  if (c >= 'a' && c <= 'z')
725  return 1;
726 
727  if (c >= 'A' && c <= 'Z')
728  return 1;
729 
730  if (c >= '0' && c <= '9')
731  return 1;
732 
733  if (c == '_')
734  return 1;
735 
736  return 0;
737 }
738 
739 char *str_expand_envs(const char *in)
740 {
741  size_t inlen = strlen(in);
742  size_t inpos = 0;
743 
744  char *out = NULL;
745  int outpos = 0;
746 
747  while (inpos < inlen) {
748 
749  if (in[inpos] != '$') {
750  buffer_appendf(&out, &outpos, "%c", in[inpos]);
751  inpos++;
752  continue;
753 
754  } else {
755  inpos++; // consume '$'
756 
757  char *varname = NULL;
758  int varnamepos = 0;
759 
760  while (varnamepos < sizeof(varname) && inpos < inlen && is_variable_character(in[inpos])) {
761  buffer_appendf(&varname, &varnamepos, "%c", in[inpos]);
762  inpos++;
763  }
764 
765  char *env = getenv(varname);
766  if (env)
767  buffer_appendf(&out, &outpos, "%s", env);
768 
769  free(varname);
770  }
771  }
772 
773  return out;
774 }
bool str_matches_any(const char *haystack, const char **needles, int num_needles)
Definition: string_util.c:611
zarray_t * str_split_spaces(const char *str)
Definition: string_util.c:191
char * str_lstrip(char *str)
Definition: string_util.c:237
char * str_trim(char *str)
Definition: string_util.c:230
void str_split_destroy(zarray_t *za)
Definition: string_util.c:221
bool str_ends_with(const char *haystack, const char *needle)
Definition: string_util.c:566
void string_buffer_append(string_buffer_t *sb, char c)
Definition: string_util.c:344
int str_diff_idx(const char *a, const char *b)
Definition: string_util.c:134
char string_buffer_pop_back(string_buffer_t *sb)
Definition: string_util.c:357
char * vsprintf_alloc(const char *fmt, va_list orig_args)
Definition: string_util.c:66
bool string_feeder_starts_with(string_feeder_t *sf, const char *str)
Definition: string_util.c:542
static void zarray_destroy(zarray_t *za)
Definition: zarray.h:76
char * string_buffer_to_string(string_buffer_t *sb)
Definition: string_util.c:424
char * str_rstrip(char *str)
Definition: string_util.c:249
char * string_feeder_peek_length(string_feeder_t *sf, size_t length)
Definition: string_util.c:528
#define str(s)
char * str_substring(const char *str, size_t startidx, long endidx)
Definition: string_util.c:626
char string_feeder_next(string_feeder_t *sf)
Definition: string_util.c:489
#define MIN_PRINTF_ALLOC
Definition: string_util.c:51
static zarray_t * zarray_create(size_t el_sz)
Definition: zarray.h:63
int string_feeder_get_column(string_feeder_t *sf)
Definition: string_util.c:466
void string_buffer_appendf(string_buffer_t *sb, const char *fmt,...)
Definition: string_util.c:367
char * str_expand_envs(const char *in)
Definition: string_util.c:739
int string_feeder_get_line(string_feeder_t *sf)
Definition: string_util.c:460
size_t string_buffer_size(string_buffer_t *sb)
Definition: string_util.c:432
string_buffer_t * string_buffer_create()
Definition: string_util.c:323
char * string_feeder_next_length(string_feeder_t *sf, size_t length)
Definition: string_util.c:505
void string_feeder_require(string_feeder_t *sf, const char *str)
Definition: string_util.c:551
string_feeder_t * string_feeder_create(const char *str)
Definition: string_util.c:447
bool string_feeder_has_next(string_feeder_t *sf)
Definition: string_util.c:482
char * str_tolowercase(char *s)
Definition: string_util.c:297
char * str_replace_many(const char *_haystack,...)
Definition: string_util.c:671
bool str_starts_with(const char *haystack, const char *needle)
Definition: string_util.c:580
zarray_t * str_split(const char *str, const char *delim)
Definition: string_util.c:154
Definition: zarray.h:49
char * str_replace(const char *haystack, const char *needle, const char *replacement)
Definition: string_util.c:643
void string_feeder_destroy(string_feeder_t *sf)
Definition: string_util.c:472
void string_buffer_append_string(string_buffer_t *sb, const char *str)
Definition: string_util.c:399
char * sprintf_alloc(const char *fmt,...)
Definition: string_util.c:53
void string_buffer_reset(string_buffer_t *sb)
Definition: string_util.c:439
static void buffer_appendf(char **_buf, int *bufpos, void *fmt,...)
Definition: string_util.c:694
bool str_starts_with_any(const char *haystack, const char **needles, int num_needles)
Definition: string_util.c:596
int str_indexof(const char *haystack, const char *needle)
Definition: string_util.c:259
bool string_buffer_ends_with(string_buffer_t *sb, const char *str)
Definition: string_util.c:416
void zarray_vmap(zarray_t *za, void(*f)())
Definition: zarray.c:51
void string_buffer_destroy(string_buffer_t *sb)
Definition: string_util.c:332
char string_feeder_peek(string_feeder_t *sf)
Definition: string_util.c:520
int str_last_indexof(const char *haystack, const char *needle)
Definition: string_util.c:278
char * str_touppercase(char *s)
Definition: string_util.c:310
char * _str_concat_private(const char *first,...)
Definition: string_util.c:98
static int is_variable_character(char c)
Definition: string_util.c:722
static void zarray_add(zarray_t *za, const void *p)
Definition: zarray.h:185


apriltags2
Author(s): Danylo Malyuta
autogenerated on Fri Oct 19 2018 04:02:32