strutil.cc
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // from google3/strings/strutil.cc
32 
34 
35 #include <errno.h>
36 #include <float.h> // FLT_DIG and DBL_DIG
37 #include <limits.h>
38 #include <stdio.h>
39 #include <cmath>
40 #include <iterator>
41 #include <limits>
42 
46 
47 #ifdef _WIN32
48 // MSVC has only _snprintf, not snprintf.
49 //
50 // MinGW has both snprintf and _snprintf, but they appear to be different
51 // functions. The former is buggy. When invoked like so:
52 // char buffer[32];
53 // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
54 // it prints "1.23000e+10". This is plainly wrong: %g should never print
55 // trailing zeros after the decimal point. For some reason this bug only
56 // occurs with some input values, not all. In any case, _snprintf does the
57 // right thing, so we use it.
58 #define snprintf _snprintf
59 #endif
60 
61 namespace google {
62 namespace protobuf {
63 
64 // These are defined as macros on some platforms. #undef them so that we can
65 // redefine them.
66 #undef isxdigit
67 #undef isprint
68 
69 // The definitions of these in ctype.h change based on locale. Since our
70 // string manipulation is all in relation to the protocol buffer and C++
71 // languages, we always want to use the C locale. So, we re-define these
72 // exactly as we want them.
73 inline bool isxdigit(char c) {
74  return ('0' <= c && c <= '9') ||
75  ('a' <= c && c <= 'f') ||
76  ('A' <= c && c <= 'F');
77 }
78 
79 inline bool isprint(char c) {
80  return c >= 0x20 && c <= 0x7E;
81 }
82 
83 // ----------------------------------------------------------------------
84 // StripString
85 // Replaces any occurrence of the character 'remove' (or the characters
86 // in 'remove') with the character 'replacewith'.
87 // ----------------------------------------------------------------------
88 void StripString(string* s, const char* remove, char replacewith) {
89  const char * str_start = s->c_str();
90  const char * str = str_start;
91  for (str = strpbrk(str, remove);
92  str != nullptr;
93  str = strpbrk(str + 1, remove)) {
94  (*s)[str - str_start] = replacewith;
95  }
96 }
97 
98 // ----------------------------------------------------------------------
99 // ReplaceCharacters
100 // Replaces any occurrence of the character 'remove' (or the characters
101 // in 'remove') with the character 'replacewith'.
102 // ----------------------------------------------------------------------
103 void ReplaceCharacters(string *s, const char *remove, char replacewith) {
104  const char *str_start = s->c_str();
105  const char *str = str_start;
106  for (str = strpbrk(str, remove);
107  str != nullptr;
108  str = strpbrk(str + 1, remove)) {
109  (*s)[str - str_start] = replacewith;
110  }
111 }
112 
113 void StripWhitespace(string* str) {
114  int str_length = str->length();
115 
116  // Strip off leading whitespace.
117  int first = 0;
118  while (first < str_length && ascii_isspace(str->at(first))) {
119  ++first;
120  }
121  // If entire string is white space.
122  if (first == str_length) {
123  str->clear();
124  return;
125  }
126  if (first > 0) {
127  str->erase(0, first);
128  str_length -= first;
129  }
130 
131  // Strip off trailing whitespace.
132  int last = str_length - 1;
133  while (last >= 0 && ascii_isspace(str->at(last))) {
134  --last;
135  }
136  if (last != (str_length - 1) && last >= 0) {
137  str->erase(last + 1, string::npos);
138  }
139 }
140 
141 // ----------------------------------------------------------------------
142 // StringReplace()
143 // Replace the "old" pattern with the "new" pattern in a string,
144 // and append the result to "res". If replace_all is false,
145 // it only replaces the first instance of "old."
146 // ----------------------------------------------------------------------
147 
148 void StringReplace(const string& s, const string& oldsub,
149  const string& newsub, bool replace_all,
150  string* res) {
151  if (oldsub.empty()) {
152  res->append(s); // if empty, append the given string.
153  return;
154  }
155 
156  string::size_type start_pos = 0;
157  string::size_type pos;
158  do {
159  pos = s.find(oldsub, start_pos);
160  if (pos == string::npos) {
161  break;
162  }
163  res->append(s, start_pos, pos - start_pos);
164  res->append(newsub);
165  start_pos = pos + oldsub.size(); // start searching again after the "old"
166  } while (replace_all);
167  res->append(s, start_pos, s.length() - start_pos);
168 }
169 
170 // ----------------------------------------------------------------------
171 // StringReplace()
172 // Give me a string and two patterns "old" and "new", and I replace
173 // the first instance of "old" in the string with "new", if it
174 // exists. If "global" is true; call this repeatedly until it
175 // fails. RETURN a new string, regardless of whether the replacement
176 // happened or not.
177 // ----------------------------------------------------------------------
178 
179 string StringReplace(const string& s, const string& oldsub,
180  const string& newsub, bool replace_all) {
181  string ret;
182  StringReplace(s, oldsub, newsub, replace_all, &ret);
183  return ret;
184 }
185 
186 // ----------------------------------------------------------------------
187 // SplitStringUsing()
188 // Split a string using a character delimiter. Append the components
189 // to 'result'.
190 //
191 // Note: For multi-character delimiters, this routine will split on *ANY* of
192 // the characters in the string, not the entire string as a single delimiter.
193 // ----------------------------------------------------------------------
194 template <typename ITR>
195 static inline
196 void SplitStringToIteratorUsing(const string& full,
197  const char* delim,
198  ITR& result) {
199  // Optimize the common case where delim is a single character.
200  if (delim[0] != '\0' && delim[1] == '\0') {
201  char c = delim[0];
202  const char* p = full.data();
203  const char* end = p + full.size();
204  while (p != end) {
205  if (*p == c) {
206  ++p;
207  } else {
208  const char* start = p;
209  while (++p != end && *p != c);
210  *result++ = string(start, p - start);
211  }
212  }
213  return;
214  }
215 
216  string::size_type begin_index, end_index;
217  begin_index = full.find_first_not_of(delim);
218  while (begin_index != string::npos) {
219  end_index = full.find_first_of(delim, begin_index);
220  if (end_index == string::npos) {
221  *result++ = full.substr(begin_index);
222  return;
223  }
224  *result++ = full.substr(begin_index, (end_index - begin_index));
225  begin_index = full.find_first_not_of(delim, end_index);
226  }
227 }
228 
229 void SplitStringUsing(const string& full,
230  const char* delim,
231  std::vector<string>* result) {
232  std::back_insert_iterator< std::vector<string> > it(*result);
233  SplitStringToIteratorUsing(full, delim, it);
234 }
235 
236 // Split a string using a character delimiter. Append the components
237 // to 'result'. If there are consecutive delimiters, this function
238 // will return corresponding empty strings. The string is split into
239 // at most the specified number of pieces greedily. This means that the
240 // last piece may possibly be split further. To split into as many pieces
241 // as possible, specify 0 as the number of pieces.
242 //
243 // If "full" is the empty string, yields an empty string as the only value.
244 //
245 // If "pieces" is negative for some reason, it returns the whole string
246 // ----------------------------------------------------------------------
247 template <typename StringType, typename ITR>
248 static inline
250  const char* delim,
251  int pieces,
252  ITR& result) {
253  string::size_type begin_index, end_index;
254  begin_index = 0;
255 
256  for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
257  end_index = full.find_first_of(delim, begin_index);
258  if (end_index == string::npos) {
259  *result++ = full.substr(begin_index);
260  return;
261  }
262  *result++ = full.substr(begin_index, (end_index - begin_index));
263  begin_index = end_index + 1;
264  }
265  *result++ = full.substr(begin_index);
266 }
267 
268 void SplitStringAllowEmpty(const string& full, const char* delim,
269  std::vector<string>* result) {
270  std::back_insert_iterator<std::vector<string> > it(*result);
271  SplitStringToIteratorAllowEmpty(full, delim, 0, it);
272 }
273 
274 // ----------------------------------------------------------------------
275 // JoinStrings()
276 // This merges a vector of string components with delim inserted
277 // as separaters between components.
278 //
279 // ----------------------------------------------------------------------
280 template <class ITERATOR>
281 static void JoinStringsIterator(const ITERATOR& start,
282  const ITERATOR& end,
283  const char* delim,
284  string* result) {
285  GOOGLE_CHECK(result != nullptr);
286  result->clear();
287  int delim_length = strlen(delim);
288 
289  // Precompute resulting length so we can reserve() memory in one shot.
290  int length = 0;
291  for (ITERATOR iter = start; iter != end; ++iter) {
292  if (iter != start) {
293  length += delim_length;
294  }
295  length += iter->size();
296  }
297  result->reserve(length);
298 
299  // Now combine everything.
300  for (ITERATOR iter = start; iter != end; ++iter) {
301  if (iter != start) {
302  result->append(delim, delim_length);
303  }
304  result->append(iter->data(), iter->size());
305  }
306 }
307 
308 void JoinStrings(const std::vector<string>& components,
309  const char* delim,
310  string * result) {
311  JoinStringsIterator(components.begin(), components.end(), delim, result);
312 }
313 
314 // ----------------------------------------------------------------------
315 // UnescapeCEscapeSequences()
316 // This does all the unescaping that C does: \ooo, \r, \n, etc
317 // Returns length of resulting string.
318 // The implementation of \x parses any positive number of hex digits,
319 // but it is an error if the value requires more than 8 bits, and the
320 // result is truncated to 8 bits.
321 //
322 // The second call stores its errors in a supplied string vector.
323 // If the string vector pointer is nullptr, it reports the errors with LOG().
324 // ----------------------------------------------------------------------
325 
326 #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
327 
328 // Protocol buffers doesn't ever care about errors, but I don't want to remove
329 // the code.
330 #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
331 
332 int UnescapeCEscapeSequences(const char* source, char* dest) {
333  return UnescapeCEscapeSequences(source, dest, nullptr);
334 }
335 
336 int UnescapeCEscapeSequences(const char* source, char* dest,
337  std::vector<string> *errors) {
338  GOOGLE_DCHECK(errors == nullptr) << "Error reporting not implemented.";
339 
340  char* d = dest;
341  const char* p = source;
342 
343  // Small optimization for case where source = dest and there's no escaping
344  while ( p == d && *p != '\0' && *p != '\\' )
345  p++, d++;
346 
347  while (*p != '\0') {
348  if (*p != '\\') {
349  *d++ = *p++;
350  } else {
351  switch ( *++p ) { // skip past the '\\'
352  case '\0':
353  LOG_STRING(ERROR, errors) << "String cannot end with \\";
354  *d = '\0';
355  return d - dest; // we're done with p
356  case 'a': *d++ = '\a'; break;
357  case 'b': *d++ = '\b'; break;
358  case 'f': *d++ = '\f'; break;
359  case 'n': *d++ = '\n'; break;
360  case 'r': *d++ = '\r'; break;
361  case 't': *d++ = '\t'; break;
362  case 'v': *d++ = '\v'; break;
363  case '\\': *d++ = '\\'; break;
364  case '?': *d++ = '\?'; break; // \? Who knew?
365  case '\'': *d++ = '\''; break;
366  case '"': *d++ = '\"'; break;
367  case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
368  case '4': case '5': case '6': case '7': {
369  char ch = *p - '0';
370  if ( IS_OCTAL_DIGIT(p[1]) )
371  ch = ch * 8 + *++p - '0';
372  if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice
373  ch = ch * 8 + *++p - '0'; // now points at last digit
374  *d++ = ch;
375  break;
376  }
377  case 'x': case 'X': {
378  if (!isxdigit(p[1])) {
379  if (p[1] == '\0') {
380  LOG_STRING(ERROR, errors) << "String cannot end with \\x";
381  } else {
382  LOG_STRING(ERROR, errors) <<
383  "\\x cannot be followed by non-hex digit: \\" << *p << p[1];
384  }
385  break;
386  }
387  unsigned int ch = 0;
388  const char *hex_start = p;
389  while (isxdigit(p[1])) // arbitrarily many hex digits
390  ch = (ch << 4) + hex_digit_to_int(*++p);
391  if (ch > 0xFF)
392  LOG_STRING(ERROR, errors) << "Value of " <<
393  "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
394  *d++ = ch;
395  break;
396  }
397 #if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
398  case 'u': {
399  // \uhhhh => convert 4 hex digits to UTF-8
400  char32 rune = 0;
401  const char *hex_start = p;
402  for (int i = 0; i < 4; ++i) {
403  if (isxdigit(p[1])) { // Look one char ahead.
404  rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
405  } else {
407  << "\\u must be followed by 4 hex digits: \\"
408  << string(hex_start, p+1-hex_start);
409  break;
410  }
411  }
412  d += runetochar(d, &rune);
413  break;
414  }
415  case 'U': {
416  // \Uhhhhhhhh => convert 8 hex digits to UTF-8
417  char32 rune = 0;
418  const char *hex_start = p;
419  for (int i = 0; i < 8; ++i) {
420  if (isxdigit(p[1])) { // Look one char ahead.
421  // Don't change rune until we're sure this
422  // is within the Unicode limit, but do advance p.
423  char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
424  if (newrune > 0x10FFFF) {
426  << "Value of \\"
427  << string(hex_start, p + 1 - hex_start)
428  << " exceeds Unicode limit (0x10FFFF)";
429  break;
430  } else {
431  rune = newrune;
432  }
433  } else {
435  << "\\U must be followed by 8 hex digits: \\"
436  << string(hex_start, p+1-hex_start);
437  break;
438  }
439  }
440  d += runetochar(d, &rune);
441  break;
442  }
443 #endif
444  default:
445  LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
446  }
447  p++; // read past letter we escaped
448  }
449  }
450  *d = '\0';
451  return d - dest;
452 }
453 
454 // ----------------------------------------------------------------------
455 // UnescapeCEscapeString()
456 // This does the same thing as UnescapeCEscapeSequences, but creates
457 // a new string. The caller does not need to worry about allocating
458 // a dest buffer. This should be used for non performance critical
459 // tasks such as printing debug messages. It is safe for src and dest
460 // to be the same.
461 //
462 // The second call stores its errors in a supplied string vector.
463 // If the string vector pointer is nullptr, it reports the errors with LOG().
464 //
465 // In the first and second calls, the length of dest is returned. In the
466 // the third call, the new string is returned.
467 // ----------------------------------------------------------------------
468 int UnescapeCEscapeString(const string& src, string* dest) {
469  return UnescapeCEscapeString(src, dest, nullptr);
470 }
471 
472 int UnescapeCEscapeString(const string& src, string* dest,
473  std::vector<string> *errors) {
474  std::unique_ptr<char[]> unescaped(new char[src.size() + 1]);
475  int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
477  dest->assign(unescaped.get(), len);
478  return len;
479 }
480 
481 string UnescapeCEscapeString(const string& src) {
482  std::unique_ptr<char[]> unescaped(new char[src.size() + 1]);
483  int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr);
484  return string(unescaped.get(), len);
485 }
486 
487 // ----------------------------------------------------------------------
488 // CEscapeString()
489 // CHexEscapeString()
490 // Copies 'src' to 'dest', escaping dangerous characters using
491 // C-style escape sequences. This is very useful for preparing query
492 // flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
493 // hexadecimal rather than octal sequences.
494 // Returns the number of bytes written to 'dest' (not including the \0)
495 // or -1 if there was insufficient space.
496 //
497 // Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
498 // ----------------------------------------------------------------------
499 int CEscapeInternal(const char* src, int src_len, char* dest,
500  int dest_len, bool use_hex, bool utf8_safe) {
501  const char* src_end = src + src_len;
502  int used = 0;
503  bool last_hex_escape = false; // true if last output char was \xNN
504 
505  for (; src < src_end; src++) {
506  if (dest_len - used < 2) // Need space for two letter escape
507  return -1;
508 
509  bool is_hex_escape = false;
510  switch (*src) {
511  case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
512  case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
513  case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
514  case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
515  case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
516  case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
517  default:
518  // Note that if we emit \xNN and the src character after that is a hex
519  // digit then that digit must be escaped too to prevent it being
520  // interpreted as part of the character code by C.
521  if ((!utf8_safe || static_cast<uint8>(*src) < 0x80) &&
522  (!isprint(*src) ||
523  (last_hex_escape && isxdigit(*src)))) {
524  if (dest_len - used < 4) // need space for 4 letter escape
525  return -1;
526  sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
527  static_cast<uint8>(*src));
528  is_hex_escape = use_hex;
529  used += 4;
530  } else {
531  dest[used++] = *src; break;
532  }
533  }
534  last_hex_escape = is_hex_escape;
535  }
536 
537  if (dest_len - used < 1) // make sure that there is room for \0
538  return -1;
539 
540  dest[used] = '\0'; // doesn't count towards return value though
541  return used;
542 }
543 
544 // Calculates the length of the C-style escaped version of 'src'.
545 // Assumes that non-printable characters are escaped using octal sequences, and
546 // that UTF-8 bytes are not handled specially.
547 static inline size_t CEscapedLength(StringPiece src) {
548  static char c_escaped_len[256] = {
549  4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
550  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
551  1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", '
552  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
553  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
554  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
555  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
556  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
557  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
558  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
559  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
560  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
561  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
562  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
563  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
564  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
565  };
566 
567  size_t escaped_len = 0;
568  for (int i = 0; i < src.size(); ++i) {
569  unsigned char c = static_cast<unsigned char>(src[i]);
570  escaped_len += c_escaped_len[c];
571  }
572  return escaped_len;
573 }
574 
575 // ----------------------------------------------------------------------
576 // Escapes 'src' using C-style escape sequences, and appends the escaped string
577 // to 'dest'. This version is faster than calling CEscapeInternal as it computes
578 // the required space using a lookup table, and also does not do any special
579 // handling for Hex or UTF-8 characters.
580 // ----------------------------------------------------------------------
582  size_t escaped_len = CEscapedLength(src);
583  if (escaped_len == src.size()) {
584  dest->append(src.data(), src.size());
585  return;
586  }
587 
588  size_t cur_dest_len = dest->size();
589  dest->resize(cur_dest_len + escaped_len);
590  char* append_ptr = &(*dest)[cur_dest_len];
591 
592  for (int i = 0; i < src.size(); ++i) {
593  unsigned char c = static_cast<unsigned char>(src[i]);
594  switch (c) {
595  case '\n': *append_ptr++ = '\\'; *append_ptr++ = 'n'; break;
596  case '\r': *append_ptr++ = '\\'; *append_ptr++ = 'r'; break;
597  case '\t': *append_ptr++ = '\\'; *append_ptr++ = 't'; break;
598  case '\"': *append_ptr++ = '\\'; *append_ptr++ = '\"'; break;
599  case '\'': *append_ptr++ = '\\'; *append_ptr++ = '\''; break;
600  case '\\': *append_ptr++ = '\\'; *append_ptr++ = '\\'; break;
601  default:
602  if (!isprint(c)) {
603  *append_ptr++ = '\\';
604  *append_ptr++ = '0' + c / 64;
605  *append_ptr++ = '0' + (c % 64) / 8;
606  *append_ptr++ = '0' + c % 8;
607  } else {
608  *append_ptr++ = c;
609  }
610  break;
611  }
612  }
613 }
614 
615 string CEscape(const string& src) {
616  string dest;
618  return dest;
619 }
620 
621 namespace strings {
622 
623 string Utf8SafeCEscape(const string& src) {
624  const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
625  std::unique_ptr<char[]> dest(new char[dest_length]);
626  const int len = CEscapeInternal(src.data(), src.size(),
627  dest.get(), dest_length, false, true);
628  GOOGLE_DCHECK_GE(len, 0);
629  return string(dest.get(), len);
630 }
631 
632 string CHexEscape(const string& src) {
633  const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
634  std::unique_ptr<char[]> dest(new char[dest_length]);
635  const int len = CEscapeInternal(src.data(), src.size(),
636  dest.get(), dest_length, true, false);
637  GOOGLE_DCHECK_GE(len, 0);
638  return string(dest.get(), len);
639 }
640 
641 } // namespace strings
642 
643 // ----------------------------------------------------------------------
644 // strto32_adaptor()
645 // strtou32_adaptor()
646 // Implementation of strto[u]l replacements that have identical
647 // overflow and underflow characteristics for both ILP-32 and LP-64
648 // platforms, including errno preservation in error-free calls.
649 // ----------------------------------------------------------------------
650 
651 int32 strto32_adaptor(const char *nptr, char **endptr, int base) {
652  const int saved_errno = errno;
653  errno = 0;
654  const long result = strtol(nptr, endptr, base);
655  if (errno == ERANGE && result == LONG_MIN) {
656  return kint32min;
657  } else if (errno == ERANGE && result == LONG_MAX) {
658  return kint32max;
659  } else if (errno == 0 && result < kint32min) {
660  errno = ERANGE;
661  return kint32min;
662  } else if (errno == 0 && result > kint32max) {
663  errno = ERANGE;
664  return kint32max;
665  }
666  if (errno == 0)
667  errno = saved_errno;
668  return static_cast<int32>(result);
669 }
670 
671 uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) {
672  const int saved_errno = errno;
673  errno = 0;
674  const unsigned long result = strtoul(nptr, endptr, base);
675  if (errno == ERANGE && result == ULONG_MAX) {
676  return kuint32max;
677  } else if (errno == 0 && result > kuint32max) {
678  errno = ERANGE;
679  return kuint32max;
680  }
681  if (errno == 0)
682  errno = saved_errno;
683  return static_cast<uint32>(result);
684 }
685 
686 inline bool safe_parse_sign(string* text /*inout*/,
687  bool* negative_ptr /*output*/) {
688  const char* start = text->data();
689  const char* end = start + text->size();
690 
691  // Consume whitespace.
692  while (start < end && (start[0] == ' ')) {
693  ++start;
694  }
695  while (start < end && (end[-1] == ' ')) {
696  --end;
697  }
698  if (start >= end) {
699  return false;
700  }
701 
702  // Consume sign.
703  *negative_ptr = (start[0] == '-');
704  if (*negative_ptr || start[0] == '+') {
705  ++start;
706  if (start >= end) {
707  return false;
708  }
709  }
710  *text = text->substr(start - text->data(), end - start);
711  return true;
712 }
713 
714 template<typename IntType>
716  string text, IntType* value_p) {
717  int base = 10;
718  IntType value = 0;
719  const IntType vmax = std::numeric_limits<IntType>::max();
720  assert(vmax > 0);
721  assert(vmax >= base);
722  const IntType vmax_over_base = vmax / base;
723  const char* start = text.data();
724  const char* end = start + text.size();
725  // loop over digits
726  for (; start < end; ++start) {
727  unsigned char c = static_cast<unsigned char>(start[0]);
728  int digit = c - '0';
729  if (digit >= base || digit < 0) {
730  *value_p = value;
731  return false;
732  }
733  if (value > vmax_over_base) {
734  *value_p = vmax;
735  return false;
736  }
737  value *= base;
738  if (value > vmax - digit) {
739  *value_p = vmax;
740  return false;
741  }
742  value += digit;
743  }
744  *value_p = value;
745  return true;
746 }
747 
748 template<typename IntType>
750  const string& text, IntType* value_p) {
751  int base = 10;
752  IntType value = 0;
753  const IntType vmin = std::numeric_limits<IntType>::min();
754  assert(vmin < 0);
755  assert(vmin <= 0 - base);
756  IntType vmin_over_base = vmin / base;
757  // 2003 c++ standard [expr.mul]
758  // "... the sign of the remainder is implementation-defined."
759  // Although (vmin/base)*base + vmin%base is always vmin.
760  // 2011 c++ standard tightens the spec but we cannot rely on it.
761  if (vmin % base > 0) {
762  vmin_over_base += 1;
763  }
764  const char* start = text.data();
765  const char* end = start + text.size();
766  // loop over digits
767  for (; start < end; ++start) {
768  unsigned char c = static_cast<unsigned char>(start[0]);
769  int digit = c - '0';
770  if (digit >= base || digit < 0) {
771  *value_p = value;
772  return false;
773  }
774  if (value < vmin_over_base) {
775  *value_p = vmin;
776  return false;
777  }
778  value *= base;
779  if (value < vmin + digit) {
780  *value_p = vmin;
781  return false;
782  }
783  value -= digit;
784  }
785  *value_p = value;
786  return true;
787 }
788 
789 template<typename IntType>
790 bool safe_int_internal(string text, IntType* value_p) {
791  *value_p = 0;
792  bool negative;
793  if (!safe_parse_sign(&text, &negative)) {
794  return false;
795  }
796  if (!negative) {
797  return safe_parse_positive_int(text, value_p);
798  } else {
799  return safe_parse_negative_int(text, value_p);
800  }
801 }
802 
803 template<typename IntType>
804 bool safe_uint_internal(string text, IntType* value_p) {
805  *value_p = 0;
806  bool negative;
807  if (!safe_parse_sign(&text, &negative) || negative) {
808  return false;
809  }
810  return safe_parse_positive_int(text, value_p);
811 }
812 
813 // ----------------------------------------------------------------------
814 // FastIntToBuffer()
815 // FastInt64ToBuffer()
816 // FastHexToBuffer()
817 // FastHex64ToBuffer()
818 // FastHex32ToBuffer()
819 // ----------------------------------------------------------------------
820 
821 // Offset into buffer where FastInt64ToBuffer places the end of string
822 // null character. Also used by FastInt64ToBufferLeft.
823 static const int kFastInt64ToBufferOffset = 21;
824 
826  // We could collapse the positive and negative sections, but that
827  // would be slightly slower for positive numbers...
828  // 22 bytes is enough to store -2**64, -18446744073709551616.
830  *p-- = '\0';
831  if (i >= 0) {
832  do {
833  *p-- = '0' + i % 10;
834  i /= 10;
835  } while (i > 0);
836  return p + 1;
837  } else {
838  // On different platforms, % and / have different behaviors for
839  // negative numbers, so we need to jump through hoops to make sure
840  // we don't divide negative numbers.
841  if (i > -10) {
842  i = -i;
843  *p-- = '0' + i;
844  *p = '-';
845  return p;
846  } else {
847  // Make sure we aren't at MIN_INT, in which case we can't say i = -i
848  i = i + 10;
849  i = -i;
850  *p-- = '0' + i % 10;
851  // Undo what we did a moment ago
852  i = i / 10 + 1;
853  do {
854  *p-- = '0' + i % 10;
855  i /= 10;
856  } while (i > 0);
857  *p = '-';
858  return p;
859  }
860  }
861 }
862 
863 // Offset into buffer where FastInt32ToBuffer places the end of string
864 // null character. Also used by FastInt32ToBufferLeft
865 static const int kFastInt32ToBufferOffset = 11;
866 
867 // Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
868 // compiler to generate 32 bit arithmetic instructions. It's much faster, at
869 // least with 32 bit binaries.
871  // We could collapse the positive and negative sections, but that
872  // would be slightly slower for positive numbers...
873  // 12 bytes is enough to store -2**32, -4294967296.
875  *p-- = '\0';
876  if (i >= 0) {
877  do {
878  *p-- = '0' + i % 10;
879  i /= 10;
880  } while (i > 0);
881  return p + 1;
882  } else {
883  // On different platforms, % and / have different behaviors for
884  // negative numbers, so we need to jump through hoops to make sure
885  // we don't divide negative numbers.
886  if (i > -10) {
887  i = -i;
888  *p-- = '0' + i;
889  *p = '-';
890  return p;
891  } else {
892  // Make sure we aren't at MIN_INT, in which case we can't say i = -i
893  i = i + 10;
894  i = -i;
895  *p-- = '0' + i % 10;
896  // Undo what we did a moment ago
897  i = i / 10 + 1;
898  do {
899  *p-- = '0' + i % 10;
900  i /= 10;
901  } while (i > 0);
902  *p = '-';
903  return p;
904  }
905  }
906 }
907 
908 char *FastHexToBuffer(int i, char* buffer) {
909  GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
910 
911  static const char *hexdigits = "0123456789abcdef";
912  char *p = buffer + 21;
913  *p-- = '\0';
914  do {
915  *p-- = hexdigits[i & 15]; // mod by 16
916  i >>= 4; // divide by 16
917  } while (i > 0);
918  return p + 1;
919 }
920 
921 char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
922  static const char *hexdigits = "0123456789abcdef";
923  buffer[num_byte] = '\0';
924  for (int i = num_byte - 1; i >= 0; i--) {
925 #ifdef _M_X64
926  // MSVC x64 platform has a bug optimizing the uint32(value) in the #else
927  // block. Given that the uint32 cast was to improve performance on 32-bit
928  // platforms, we use 64-bit '&' directly.
929  buffer[i] = hexdigits[value & 0xf];
930 #else
931  buffer[i] = hexdigits[uint32(value) & 0xf];
932 #endif
933  value >>= 4;
934  }
935  return buffer;
936 }
937 
939  return InternalFastHexToBuffer(value, buffer, 16);
940 }
941 
944 }
945 
946 // ----------------------------------------------------------------------
947 // FastInt32ToBufferLeft()
948 // FastUInt32ToBufferLeft()
949 // FastInt64ToBufferLeft()
950 // FastUInt64ToBufferLeft()
951 //
952 // Like the Fast*ToBuffer() functions above, these are intended for speed.
953 // Unlike the Fast*ToBuffer() functions, however, these functions write
954 // their output to the beginning of the buffer (hence the name, as the
955 // output is left-aligned). The caller is responsible for ensuring that
956 // the buffer has enough space to hold the output.
957 //
958 // Returns a pointer to the end of the string (i.e. the null character
959 // terminating the string).
960 // ----------------------------------------------------------------------
961 
962 static const char two_ASCII_digits[100][2] = {
963  {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
964  {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
965  {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
966  {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
967  {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
968  {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
969  {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
970  {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
971  {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
972  {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
973  {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
974  {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
975  {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
976  {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
977  {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
978  {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
979  {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
980  {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
981  {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
982  {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
983 };
984 
986  uint32 digits;
987  const char *ASCII_digits = nullptr;
988  // The idea of this implementation is to trim the number of divides to as few
989  // as possible by using multiplication and subtraction rather than mod (%),
990  // and by outputting two digits at a time rather than one.
991  // The huge-number case is first, in the hopes that the compiler will output
992  // that case in one branch-free block of code, and only output conditional
993  // branches into it from below.
994  if (u >= 1000000000) { // >= 1,000,000,000
995  digits = u / 100000000; // 100,000,000
996  ASCII_digits = two_ASCII_digits[digits];
997  buffer[0] = ASCII_digits[0];
998  buffer[1] = ASCII_digits[1];
999  buffer += 2;
1000 sublt100_000_000:
1001  u -= digits * 100000000; // 100,000,000
1002 lt100_000_000:
1003  digits = u / 1000000; // 1,000,000
1004  ASCII_digits = two_ASCII_digits[digits];
1005  buffer[0] = ASCII_digits[0];
1006  buffer[1] = ASCII_digits[1];
1007  buffer += 2;
1008 sublt1_000_000:
1009  u -= digits * 1000000; // 1,000,000
1010 lt1_000_000:
1011  digits = u / 10000; // 10,000
1012  ASCII_digits = two_ASCII_digits[digits];
1013  buffer[0] = ASCII_digits[0];
1014  buffer[1] = ASCII_digits[1];
1015  buffer += 2;
1016 sublt10_000:
1017  u -= digits * 10000; // 10,000
1018 lt10_000:
1019  digits = u / 100;
1020  ASCII_digits = two_ASCII_digits[digits];
1021  buffer[0] = ASCII_digits[0];
1022  buffer[1] = ASCII_digits[1];
1023  buffer += 2;
1024 sublt100:
1025  u -= digits * 100;
1026 lt100:
1027  digits = u;
1028  ASCII_digits = two_ASCII_digits[digits];
1029  buffer[0] = ASCII_digits[0];
1030  buffer[1] = ASCII_digits[1];
1031  buffer += 2;
1032 done:
1033  *buffer = 0;
1034  return buffer;
1035  }
1036 
1037  if (u < 100) {
1038  digits = u;
1039  if (u >= 10) goto lt100;
1040  *buffer++ = '0' + digits;
1041  goto done;
1042  }
1043  if (u < 10000) { // 10,000
1044  if (u >= 1000) goto lt10_000;
1045  digits = u / 100;
1046  *buffer++ = '0' + digits;
1047  goto sublt100;
1048  }
1049  if (u < 1000000) { // 1,000,000
1050  if (u >= 100000) goto lt1_000_000;
1051  digits = u / 10000; // 10,000
1052  *buffer++ = '0' + digits;
1053  goto sublt10_000;
1054  }
1055  if (u < 100000000) { // 100,000,000
1056  if (u >= 10000000) goto lt100_000_000;
1057  digits = u / 1000000; // 1,000,000
1058  *buffer++ = '0' + digits;
1059  goto sublt1_000_000;
1060  }
1061  // we already know that u < 1,000,000,000
1062  digits = u / 100000000; // 100,000,000
1063  *buffer++ = '0' + digits;
1064  goto sublt100_000_000;
1065 }
1066 
1068  uint32 u = i;
1069  if (i < 0) {
1070  *buffer++ = '-';
1071  u = -i;
1072  }
1073  return FastUInt32ToBufferLeft(u, buffer);
1074 }
1075 
1077  int digits;
1078  const char *ASCII_digits = nullptr;
1079 
1080  uint32 u = static_cast<uint32>(u64);
1081  if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
1082 
1083  uint64 top_11_digits = u64 / 1000000000;
1084  buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
1085  u = u64 - (top_11_digits * 1000000000);
1086 
1087  digits = u / 10000000; // 10,000,000
1088  GOOGLE_DCHECK_LT(digits, 100);
1089  ASCII_digits = two_ASCII_digits[digits];
1090  buffer[0] = ASCII_digits[0];
1091  buffer[1] = ASCII_digits[1];
1092  buffer += 2;
1093  u -= digits * 10000000; // 10,000,000
1094  digits = u / 100000; // 100,000
1095  ASCII_digits = two_ASCII_digits[digits];
1096  buffer[0] = ASCII_digits[0];
1097  buffer[1] = ASCII_digits[1];
1098  buffer += 2;
1099  u -= digits * 100000; // 100,000
1100  digits = u / 1000; // 1,000
1101  ASCII_digits = two_ASCII_digits[digits];
1102  buffer[0] = ASCII_digits[0];
1103  buffer[1] = ASCII_digits[1];
1104  buffer += 2;
1105  u -= digits * 1000; // 1,000
1106  digits = u / 10;
1107  ASCII_digits = two_ASCII_digits[digits];
1108  buffer[0] = ASCII_digits[0];
1109  buffer[1] = ASCII_digits[1];
1110  buffer += 2;
1111  u -= digits * 10;
1112  digits = u;
1113  *buffer++ = '0' + digits;
1114  *buffer = 0;
1115  return buffer;
1116 }
1117 
1119  uint64 u = 0;
1120  if (i < 0) {
1121  *buffer++ = '-';
1122  u -= i;
1123  } else {
1124  u = i;
1125  }
1126  return FastUInt64ToBufferLeft(u, buffer);
1127 }
1128 
1129 // ----------------------------------------------------------------------
1130 // SimpleItoa()
1131 // Description: converts an integer to a string.
1132 //
1133 // Return value: string
1134 // ----------------------------------------------------------------------
1135 
1136 string SimpleItoa(int i) {
1137  char buffer[kFastToBufferSize];
1138  return (sizeof(i) == 4) ?
1141 }
1142 
1143 string SimpleItoa(unsigned int i) {
1144  char buffer[kFastToBufferSize];
1145  return string(buffer, (sizeof(i) == 4) ?
1148 }
1149 
1150 string SimpleItoa(long i) {
1151  char buffer[kFastToBufferSize];
1152  return (sizeof(i) == 4) ?
1155 }
1156 
1157 string SimpleItoa(unsigned long i) {
1158  char buffer[kFastToBufferSize];
1159  return string(buffer, (sizeof(i) == 4) ?
1162 }
1163 
1164 string SimpleItoa(long long i) {
1165  char buffer[kFastToBufferSize];
1166  return (sizeof(i) == 4) ?
1169 }
1170 
1171 string SimpleItoa(unsigned long long i) {
1172  char buffer[kFastToBufferSize];
1173  return string(buffer, (sizeof(i) == 4) ?
1176 }
1177 
1178 // ----------------------------------------------------------------------
1179 // SimpleDtoa()
1180 // SimpleFtoa()
1181 // DoubleToBuffer()
1182 // FloatToBuffer()
1183 // We want to print the value without losing precision, but we also do
1184 // not want to print more digits than necessary. This turns out to be
1185 // trickier than it sounds. Numbers like 0.2 cannot be represented
1186 // exactly in binary. If we print 0.2 with a very large precision,
1187 // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1188 // On the other hand, if we set the precision too low, we lose
1189 // significant digits when printing numbers that actually need them.
1190 // It turns out there is no precision value that does the right thing
1191 // for all numbers.
1192 //
1193 // Our strategy is to first try printing with a precision that is never
1194 // over-precise, then parse the result with strtod() to see if it
1195 // matches. If not, we print again with a precision that will always
1196 // give a precise result, but may use more digits than necessary.
1197 //
1198 // An arguably better strategy would be to use the algorithm described
1199 // in "How to Print Floating-Point Numbers Accurately" by Steele &
1200 // White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
1201 // however, that the following implementation is about as fast as
1202 // DMG's code. Furthermore, DMG's code locks mutexes, which means it
1203 // will not scale well on multi-core machines. DMG's code is slightly
1204 // more accurate (in that it will never use more digits than
1205 // necessary), but this is probably irrelevant for most users.
1206 //
1207 // Rob Pike and Ken Thompson also have an implementation of dtoa() in
1208 // third_party/fmt/fltfmt.cc. Their implementation is similar to this
1209 // one in that it makes guesses and then uses strtod() to check them.
1210 // Their implementation is faster because they use their own code to
1211 // generate the digits in the first place rather than use snprintf(),
1212 // thus avoiding format string parsing overhead. However, this makes
1213 // it considerably more complicated than the following implementation,
1214 // and it is embedded in a larger library. If speed turns out to be
1215 // an issue, we could re-implement this in terms of their
1216 // implementation.
1217 // ----------------------------------------------------------------------
1218 
1219 string SimpleDtoa(double value) {
1221  return DoubleToBuffer(value, buffer);
1222 }
1223 
1224 string SimpleFtoa(float value) {
1225  char buffer[kFloatToBufferSize];
1226  return FloatToBuffer(value, buffer);
1227 }
1228 
1229 static inline bool IsValidFloatChar(char c) {
1230  return ('0' <= c && c <= '9') ||
1231  c == 'e' || c == 'E' ||
1232  c == '+' || c == '-';
1233 }
1234 
1236  // Fast check: if the buffer has a normal decimal point, assume no
1237  // translation is needed.
1238  if (strchr(buffer, '.') != nullptr) return;
1239 
1240  // Find the first unknown character.
1241  while (IsValidFloatChar(*buffer)) ++buffer;
1242 
1243  if (*buffer == '\0') {
1244  // No radix character found.
1245  return;
1246  }
1247 
1248  // We are now pointing at the locale-specific radix character. Replace it
1249  // with '.'.
1250  *buffer = '.';
1251  ++buffer;
1252 
1253  if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
1254  // It appears the radix was a multi-byte character. We need to remove the
1255  // extra bytes.
1256  char* target = buffer;
1257  do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
1258  memmove(target, buffer, strlen(buffer) + 1);
1259  }
1260 }
1261 
1262 char* DoubleToBuffer(double value, char* buffer) {
1263  // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1264  // platforms these days. Just in case some system exists where DBL_DIG
1265  // is significantly larger -- and risks overflowing our buffer -- we have
1266  // this assert.
1267  GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1268 
1269  if (value == std::numeric_limits<double>::infinity()) {
1270  strcpy(buffer, "inf");
1271  return buffer;
1272  } else if (value == -std::numeric_limits<double>::infinity()) {
1273  strcpy(buffer, "-inf");
1274  return buffer;
1275  } else if (std::isnan(value)) {
1276  strcpy(buffer, "nan");
1277  return buffer;
1278  }
1279 
1280  int snprintf_result =
1281  snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1282 
1283  // The snprintf should never overflow because the buffer is significantly
1284  // larger than the precision we asked for.
1285  GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1286 
1287  // We need to make parsed_value volatile in order to force the compiler to
1288  // write it out to the stack. Otherwise, it may keep the value in a
1289  // register, and if it does that, it may keep it as a long double instead
1290  // of a double. This long double may have extra bits that make it compare
1291  // unequal to "value" even though it would be exactly equal if it were
1292  // truncated to a double.
1293  volatile double parsed_value = io::NoLocaleStrtod(buffer, nullptr);
1294  if (parsed_value != value) {
1295  int snprintf_result =
1296  snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
1297 
1298  // Should never overflow; see above.
1299  GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1300  }
1301 
1303  return buffer;
1304 }
1305 
1306 static int memcasecmp(const char *s1, const char *s2, size_t len) {
1307  const unsigned char *us1 = reinterpret_cast<const unsigned char *>(s1);
1308  const unsigned char *us2 = reinterpret_cast<const unsigned char *>(s2);
1309 
1310  for ( int i = 0; i < len; i++ ) {
1311  const int diff =
1312  static_cast<int>(static_cast<unsigned char>(ascii_tolower(us1[i]))) -
1313  static_cast<int>(static_cast<unsigned char>(ascii_tolower(us2[i])));
1314  if (diff != 0) return diff;
1315  }
1316  return 0;
1317 }
1318 
1319 inline bool CaseEqual(StringPiece s1, StringPiece s2) {
1320  if (s1.size() != s2.size()) return false;
1321  return memcasecmp(s1.data(), s2.data(), s1.size()) == 0;
1322 }
1323 
1325  GOOGLE_CHECK(value != nullptr) << "nullptr output boolean given.";
1326  if (CaseEqual(str, "true") || CaseEqual(str, "t") ||
1327  CaseEqual(str, "yes") || CaseEqual(str, "y") ||
1328  CaseEqual(str, "1")) {
1329  *value = true;
1330  return true;
1331  }
1332  if (CaseEqual(str, "false") || CaseEqual(str, "f") ||
1333  CaseEqual(str, "no") || CaseEqual(str, "n") ||
1334  CaseEqual(str, "0")) {
1335  *value = false;
1336  return true;
1337  }
1338  return false;
1339 }
1340 
1341 bool safe_strtof(const char* str, float* value) {
1342  char* endptr;
1343  errno = 0; // errno only gets set on errors
1344 #if defined(_WIN32) || defined (__hpux) // has no strtof()
1345  *value = io::NoLocaleStrtod(str, &endptr);
1346 #else
1347  *value = strtof(str, &endptr);
1348 #endif
1349  return *str != 0 && *endptr == 0 && errno == 0;
1350 }
1351 
1352 bool safe_strtod(const char* str, double* value) {
1353  char* endptr;
1354  *value = io::NoLocaleStrtod(str, &endptr);
1355  if (endptr != str) {
1356  while (ascii_isspace(*endptr)) ++endptr;
1357  }
1358  // Ignore range errors from strtod. The values it
1359  // returns on underflow and overflow are the right
1360  // fallback in a robust setting.
1361  return *str != '\0' && *endptr == '\0';
1362 }
1363 
1364 bool safe_strto32(const string& str, int32* value) {
1365  return safe_int_internal(str, value);
1366 }
1367 
1368 bool safe_strtou32(const string& str, uint32* value) {
1369  return safe_uint_internal(str, value);
1370 }
1371 
1372 bool safe_strto64(const string& str, int64* value) {
1373  return safe_int_internal(str, value);
1374 }
1375 
1376 bool safe_strtou64(const string& str, uint64* value) {
1377  return safe_uint_internal(str, value);
1378 }
1379 
1380 char* FloatToBuffer(float value, char* buffer) {
1381  // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1382  // platforms these days. Just in case some system exists where FLT_DIG
1383  // is significantly larger -- and risks overflowing our buffer -- we have
1384  // this assert.
1385  GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1386 
1387  if (value == std::numeric_limits<double>::infinity()) {
1388  strcpy(buffer, "inf");
1389  return buffer;
1390  } else if (value == -std::numeric_limits<double>::infinity()) {
1391  strcpy(buffer, "-inf");
1392  return buffer;
1393  } else if (std::isnan(value)) {
1394  strcpy(buffer, "nan");
1395  return buffer;
1396  }
1397 
1398  int snprintf_result =
1399  snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1400 
1401  // The snprintf should never overflow because the buffer is significantly
1402  // larger than the precision we asked for.
1403  GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1404 
1405  float parsed_value;
1406  if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1407  int snprintf_result =
1408  snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+3, value);
1409 
1410  // Should never overflow; see above.
1411  GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1412  }
1413 
1415  return buffer;
1416 }
1417 
1418 namespace strings {
1419 
1420 AlphaNum::AlphaNum(strings::Hex hex) {
1421  char *const end = &digits[kFastToBufferSize];
1422  char *writer = end;
1423  uint64 value = hex.value;
1424  uint64 width = hex.spec;
1425  // We accomplish minimum width by OR'ing in 0x10000 to the user's value,
1426  // where 0x10000 is the smallest hex number that is as wide as the user
1427  // asked for.
1428  uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) | value;
1429  static const char hexdigits[] = "0123456789abcdef";
1430  do {
1431  *--writer = hexdigits[value & 0xF];
1432  value >>= 4;
1433  mask >>= 4;
1434  } while (mask != 0);
1435  piece_data_ = writer;
1436  piece_size_ = end - writer;
1437 }
1438 
1439 } // namespace strings
1440 
1441 // ----------------------------------------------------------------------
1442 // StrCat()
1443 // This merges the given strings or integers, with no delimiter. This
1444 // is designed to be the fastest possible way to construct a string out
1445 // of a mix of raw C strings, C++ strings, and integer values.
1446 // ----------------------------------------------------------------------
1447 
1448 // Append is merely a version of memcpy that returns the address of the byte
1449 // after the area just overwritten. It comes in multiple flavors to minimize
1450 // call overhead.
1451 static char *Append1(char *out, const AlphaNum &x) {
1452  memcpy(out, x.data(), x.size());
1453  return out + x.size();
1454 }
1455 
1456 static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) {
1457  memcpy(out, x1.data(), x1.size());
1458  out += x1.size();
1459 
1460  memcpy(out, x2.data(), x2.size());
1461  return out + x2.size();
1462 }
1463 
1464 static char *Append4(char *out,
1465  const AlphaNum &x1, const AlphaNum &x2,
1466  const AlphaNum &x3, const AlphaNum &x4) {
1467  memcpy(out, x1.data(), x1.size());
1468  out += x1.size();
1469 
1470  memcpy(out, x2.data(), x2.size());
1471  out += x2.size();
1472 
1473  memcpy(out, x3.data(), x3.size());
1474  out += x3.size();
1475 
1476  memcpy(out, x4.data(), x4.size());
1477  return out + x4.size();
1478 }
1479 
1480 string StrCat(const AlphaNum &a, const AlphaNum &b) {
1481  string result;
1482  result.resize(a.size() + b.size());
1483  char *const begin = &*result.begin();
1484  char *out = Append2(begin, a, b);
1485  GOOGLE_DCHECK_EQ(out, begin + result.size());
1486  return result;
1487 }
1488 
1489 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
1490  string result;
1491  result.resize(a.size() + b.size() + c.size());
1492  char *const begin = &*result.begin();
1493  char *out = Append2(begin, a, b);
1494  out = Append1(out, c);
1495  GOOGLE_DCHECK_EQ(out, begin + result.size());
1496  return result;
1497 }
1498 
1499 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1500  const AlphaNum &d) {
1501  string result;
1502  result.resize(a.size() + b.size() + c.size() + d.size());
1503  char *const begin = &*result.begin();
1504  char *out = Append4(begin, a, b, c, d);
1505  GOOGLE_DCHECK_EQ(out, begin + result.size());
1506  return result;
1507 }
1508 
1509 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1510  const AlphaNum &d, const AlphaNum &e) {
1511  string result;
1512  result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
1513  char *const begin = &*result.begin();
1514  char *out = Append4(begin, a, b, c, d);
1515  out = Append1(out, e);
1516  GOOGLE_DCHECK_EQ(out, begin + result.size());
1517  return result;
1518 }
1519 
1520 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1521  const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
1522  string result;
1523  result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1524  f.size());
1525  char *const begin = &*result.begin();
1526  char *out = Append4(begin, a, b, c, d);
1527  out = Append2(out, e, f);
1528  GOOGLE_DCHECK_EQ(out, begin + result.size());
1529  return result;
1530 }
1531 
1532 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1533  const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1534  const AlphaNum &g) {
1535  string result;
1536  result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1537  f.size() + g.size());
1538  char *const begin = &*result.begin();
1539  char *out = Append4(begin, a, b, c, d);
1540  out = Append2(out, e, f);
1541  out = Append1(out, g);
1542  GOOGLE_DCHECK_EQ(out, begin + result.size());
1543  return result;
1544 }
1545 
1546 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1547  const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1548  const AlphaNum &g, const AlphaNum &h) {
1549  string result;
1550  result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1551  f.size() + g.size() + h.size());
1552  char *const begin = &*result.begin();
1553  char *out = Append4(begin, a, b, c, d);
1554  out = Append4(out, e, f, g, h);
1555  GOOGLE_DCHECK_EQ(out, begin + result.size());
1556  return result;
1557 }
1558 
1559 string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1560  const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1561  const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
1562  string result;
1563  result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
1564  f.size() + g.size() + h.size() + i.size());
1565  char *const begin = &*result.begin();
1566  char *out = Append4(begin, a, b, c, d);
1567  out = Append4(out, e, f, g, h);
1568  out = Append1(out, i);
1569  GOOGLE_DCHECK_EQ(out, begin + result.size());
1570  return result;
1571 }
1572 
1573 // It's possible to call StrAppend with a char * pointer that is partway into
1574 // the string we're appending to. However the results of this are random.
1575 // Therefore, check for this in debug mode. Use unsigned math so we only have
1576 // to do one comparison.
1577 #define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
1578  GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
1579  uintptr_t((dest).size()))
1580 
1581 void StrAppend(string *result, const AlphaNum &a) {
1582  GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1583  result->append(a.data(), a.size());
1584 }
1585 
1586 void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
1587  GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1588  GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1589  string::size_type old_size = result->size();
1590  result->resize(old_size + a.size() + b.size());
1591  char *const begin = &*result->begin();
1592  char *out = Append2(begin + old_size, a, b);
1593  GOOGLE_DCHECK_EQ(out, begin + result->size());
1594 }
1595 
1596 void StrAppend(string *result,
1597  const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
1598  GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1599  GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1600  GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1601  string::size_type old_size = result->size();
1602  result->resize(old_size + a.size() + b.size() + c.size());
1603  char *const begin = &*result->begin();
1604  char *out = Append2(begin + old_size, a, b);
1605  out = Append1(out, c);
1606  GOOGLE_DCHECK_EQ(out, begin + result->size());
1607 }
1608 
1609 void StrAppend(string *result,
1610  const AlphaNum &a, const AlphaNum &b,
1611  const AlphaNum &c, const AlphaNum &d) {
1612  GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1613  GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1614  GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1615  GOOGLE_DCHECK_NO_OVERLAP(*result, d);
1616  string::size_type old_size = result->size();
1617  result->resize(old_size + a.size() + b.size() + c.size() + d.size());
1618  char *const begin = &*result->begin();
1619  char *out = Append4(begin + old_size, a, b, c, d);
1620  GOOGLE_DCHECK_EQ(out, begin + result->size());
1621 }
1622 
1623 int GlobalReplaceSubstring(const string& substring,
1624  const string& replacement,
1625  string* s) {
1626  GOOGLE_CHECK(s != nullptr);
1627  if (s->empty() || substring.empty())
1628  return 0;
1629  string tmp;
1630  int num_replacements = 0;
1631  int pos = 0;
1632  for (int match_pos = s->find(substring.data(), pos, substring.length());
1633  match_pos != string::npos;
1634  pos = match_pos + substring.length(),
1635  match_pos = s->find(substring.data(), pos, substring.length())) {
1636  ++num_replacements;
1637  // Append the original content before the match.
1638  tmp.append(*s, pos, match_pos - pos);
1639  // Append the replacement for the match.
1640  tmp.append(replacement.begin(), replacement.end());
1641  }
1642  // Append the content after the last match. If no replacements were made, the
1643  // original string is left untouched.
1644  if (num_replacements > 0) {
1645  tmp.append(*s, pos, s->length() - pos);
1646  s->swap(tmp);
1647  }
1648  return num_replacements;
1649 }
1650 
1651 int CalculateBase64EscapedLen(int input_len, bool do_padding) {
1652  // Base64 encodes three bytes of input at a time. If the input is not
1653  // divisible by three, we pad as appropriate.
1654  //
1655  // (from http://tools.ietf.org/html/rfc3548)
1656  // Special processing is performed if fewer than 24 bits are available
1657  // at the end of the data being encoded. A full encoding quantum is
1658  // always completed at the end of a quantity. When fewer than 24 input
1659  // bits are available in an input group, zero bits are added (on the
1660  // right) to form an integral number of 6-bit groups. Padding at the
1661  // end of the data is performed using the '=' character. Since all base
1662  // 64 input is an integral number of octets, only the following cases
1663  // can arise:
1664 
1665 
1666  // Base64 encodes each three bytes of input into four bytes of output.
1667  int len = (input_len / 3) * 4;
1668 
1669  if (input_len % 3 == 0) {
1670  // (from http://tools.ietf.org/html/rfc3548)
1671  // (1) the final quantum of encoding input is an integral multiple of 24
1672  // bits; here, the final unit of encoded output will be an integral
1673  // multiple of 4 characters with no "=" padding,
1674  } else if (input_len % 3 == 1) {
1675  // (from http://tools.ietf.org/html/rfc3548)
1676  // (2) the final quantum of encoding input is exactly 8 bits; here, the
1677  // final unit of encoded output will be two characters followed by two
1678  // "=" padding characters, or
1679  len += 2;
1680  if (do_padding) {
1681  len += 2;
1682  }
1683  } else { // (input_len % 3 == 2)
1684  // (from http://tools.ietf.org/html/rfc3548)
1685  // (3) the final quantum of encoding input is exactly 16 bits; here, the
1686  // final unit of encoded output will be three characters followed by one
1687  // "=" padding character.
1688  len += 3;
1689  if (do_padding) {
1690  len += 1;
1691  }
1692  }
1693 
1694  assert(len >= input_len); // make sure we didn't overflow
1695  return len;
1696 }
1697 
1698 // Base64Escape does padding, so this calculation includes padding.
1699 int CalculateBase64EscapedLen(int input_len) {
1700  return CalculateBase64EscapedLen(input_len, true);
1701 }
1702 
1703 // ----------------------------------------------------------------------
1704 // int Base64Unescape() - base64 decoder
1705 // int Base64Escape() - base64 encoder
1706 // int WebSafeBase64Unescape() - Google's variation of base64 decoder
1707 // int WebSafeBase64Escape() - Google's variation of base64 encoder
1708 //
1709 // Check out
1710 // http://tools.ietf.org/html/rfc2045 for formal description, but what we
1711 // care about is that...
1712 // Take the encoded stuff in groups of 4 characters and turn each
1713 // character into a code 0 to 63 thus:
1714 // A-Z map to 0 to 25
1715 // a-z map to 26 to 51
1716 // 0-9 map to 52 to 61
1717 // +(- for WebSafe) maps to 62
1718 // /(_ for WebSafe) maps to 63
1719 // There will be four numbers, all less than 64 which can be represented
1720 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
1721 // Arrange the 6 digit binary numbers into three bytes as such:
1722 // aaaaaabb bbbbcccc ccdddddd
1723 // Equals signs (one or two) are used at the end of the encoded block to
1724 // indicate that the text was not an integer multiple of three bytes long.
1725 // ----------------------------------------------------------------------
1726 
1727 int Base64UnescapeInternal(const char *src_param, int szsrc,
1728  char *dest, int szdest,
1729  const signed char* unbase64) {
1730  static const char kPad64Equals = '=';
1731  static const char kPad64Dot = '.';
1732 
1733  int decode = 0;
1734  int destidx = 0;
1735  int state = 0;
1736  unsigned int ch = 0;
1737  unsigned int temp = 0;
1738 
1739  // If "char" is signed by default, using *src as an array index results in
1740  // accessing negative array elements. Treat the input as a pointer to
1741  // unsigned char to avoid this.
1742  const unsigned char *src = reinterpret_cast<const unsigned char*>(src_param);
1743 
1744  // The GET_INPUT macro gets the next input character, skipping
1745  // over any whitespace, and stopping when we reach the end of the
1746  // string or when we read any non-data character. The arguments are
1747  // an arbitrary identifier (used as a label for goto) and the number
1748  // of data bytes that must remain in the input to avoid aborting the
1749  // loop.
1750 #define GET_INPUT(label, remain) \
1751  label: \
1752  --szsrc; \
1753  ch = *src++; \
1754  decode = unbase64[ch]; \
1755  if (decode < 0) { \
1756  if (ascii_isspace(ch) && szsrc >= remain) \
1757  goto label; \
1758  state = 4 - remain; \
1759  break; \
1760  }
1761 
1762  // if dest is null, we're just checking to see if it's legal input
1763  // rather than producing output. (I suspect this could just be done
1764  // with a regexp...). We duplicate the loop so this test can be
1765  // outside it instead of in every iteration.
1766 
1767  if (dest) {
1768  // This loop consumes 4 input bytes and produces 3 output bytes
1769  // per iteration. We can't know at the start that there is enough
1770  // data left in the string for a full iteration, so the loop may
1771  // break out in the middle; if so 'state' will be set to the
1772  // number of input bytes read.
1773 
1774  while (szsrc >= 4) {
1775  // We'll start by optimistically assuming that the next four
1776  // bytes of the string (src[0..3]) are four good data bytes
1777  // (that is, no nulls, whitespace, padding chars, or illegal
1778  // chars). We need to test src[0..2] for nulls individually
1779  // before constructing temp to preserve the property that we
1780  // never read past a null in the string (no matter how long
1781  // szsrc claims the string is).
1782 
1783  if (!src[0] || !src[1] || !src[2] ||
1784  (temp = ((unsigned(unbase64[src[0]]) << 18) |
1785  (unsigned(unbase64[src[1]]) << 12) |
1786  (unsigned(unbase64[src[2]]) << 6) |
1787  (unsigned(unbase64[src[3]])))) & 0x80000000) {
1788  // Iff any of those four characters was bad (null, illegal,
1789  // whitespace, padding), then temp's high bit will be set
1790  // (because unbase64[] is -1 for all bad characters).
1791  //
1792  // We'll back up and resort to the slower decoder, which knows
1793  // how to handle those cases.
1794 
1795  GET_INPUT(first, 4);
1796  temp = decode;
1797  GET_INPUT(second, 3);
1798  temp = (temp << 6) | decode;
1799  GET_INPUT(third, 2);
1800  temp = (temp << 6) | decode;
1801  GET_INPUT(fourth, 1);
1802  temp = (temp << 6) | decode;
1803  } else {
1804  // We really did have four good data bytes, so advance four
1805  // characters in the string.
1806 
1807  szsrc -= 4;
1808  src += 4;
1809  decode = -1;
1810  ch = '\0';
1811  }
1812 
1813  // temp has 24 bits of input, so write that out as three bytes.
1814 
1815  if (destidx+3 > szdest) return -1;
1816  dest[destidx+2] = temp;
1817  temp >>= 8;
1818  dest[destidx+1] = temp;
1819  temp >>= 8;
1820  dest[destidx] = temp;
1821  destidx += 3;
1822  }
1823  } else {
1824  while (szsrc >= 4) {
1825  if (!src[0] || !src[1] || !src[2] ||
1826  (temp = ((unsigned(unbase64[src[0]]) << 18) |
1827  (unsigned(unbase64[src[1]]) << 12) |
1828  (unsigned(unbase64[src[2]]) << 6) |
1829  (unsigned(unbase64[src[3]])))) & 0x80000000) {
1830  GET_INPUT(first_no_dest, 4);
1831  GET_INPUT(second_no_dest, 3);
1832  GET_INPUT(third_no_dest, 2);
1833  GET_INPUT(fourth_no_dest, 1);
1834  } else {
1835  szsrc -= 4;
1836  src += 4;
1837  decode = -1;
1838  ch = '\0';
1839  }
1840  destidx += 3;
1841  }
1842  }
1843 
1844 #undef GET_INPUT
1845 
1846  // if the loop terminated because we read a bad character, return
1847  // now.
1848  if (decode < 0 && ch != '\0' &&
1849  ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))
1850  return -1;
1851 
1852  if (ch == kPad64Equals || ch == kPad64Dot) {
1853  // if we stopped by hitting an '=' or '.', un-read that character -- we'll
1854  // look at it again when we count to check for the proper number of
1855  // equals signs at the end.
1856  ++szsrc;
1857  --src;
1858  } else {
1859  // This loop consumes 1 input byte per iteration. It's used to
1860  // clean up the 0-3 input bytes remaining when the first, faster
1861  // loop finishes. 'temp' contains the data from 'state' input
1862  // characters read by the first loop.
1863  while (szsrc > 0) {
1864  --szsrc;
1865  ch = *src++;
1866  decode = unbase64[ch];
1867  if (decode < 0) {
1868  if (ascii_isspace(ch)) {
1869  continue;
1870  } else if (ch == '\0') {
1871  break;
1872  } else if (ch == kPad64Equals || ch == kPad64Dot) {
1873  // back up one character; we'll read it again when we check
1874  // for the correct number of pad characters at the end.
1875  ++szsrc;
1876  --src;
1877  break;
1878  } else {
1879  return -1;
1880  }
1881  }
1882 
1883  // Each input character gives us six bits of output.
1884  temp = (temp << 6) | decode;
1885  ++state;
1886  if (state == 4) {
1887  // If we've accumulated 24 bits of output, write that out as
1888  // three bytes.
1889  if (dest) {
1890  if (destidx+3 > szdest) return -1;
1891  dest[destidx+2] = temp;
1892  temp >>= 8;
1893  dest[destidx+1] = temp;
1894  temp >>= 8;
1895  dest[destidx] = temp;
1896  }
1897  destidx += 3;
1898  state = 0;
1899  temp = 0;
1900  }
1901  }
1902  }
1903 
1904  // Process the leftover data contained in 'temp' at the end of the input.
1905  int expected_equals = 0;
1906  switch (state) {
1907  case 0:
1908  // Nothing left over; output is a multiple of 3 bytes.
1909  break;
1910 
1911  case 1:
1912  // Bad input; we have 6 bits left over.
1913  return -1;
1914 
1915  case 2:
1916  // Produce one more output byte from the 12 input bits we have left.
1917  if (dest) {
1918  if (destidx+1 > szdest) return -1;
1919  temp >>= 4;
1920  dest[destidx] = temp;
1921  }
1922  ++destidx;
1923  expected_equals = 2;
1924  break;
1925 
1926  case 3:
1927  // Produce two more output bytes from the 18 input bits we have left.
1928  if (dest) {
1929  if (destidx+2 > szdest) return -1;
1930  temp >>= 2;
1931  dest[destidx+1] = temp;
1932  temp >>= 8;
1933  dest[destidx] = temp;
1934  }
1935  destidx += 2;
1936  expected_equals = 1;
1937  break;
1938 
1939  default:
1940  // state should have no other values at this point.
1941  GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
1942  }
1943 
1944  // The remainder of the string should be all whitespace, mixed with
1945  // exactly 0 equals signs, or exactly 'expected_equals' equals
1946  // signs. (Always accepting 0 equals signs is a google extension
1947  // not covered in the RFC, as is accepting dot as the pad character.)
1948 
1949  int equals = 0;
1950  while (szsrc > 0 && *src) {
1951  if (*src == kPad64Equals || *src == kPad64Dot)
1952  ++equals;
1953  else if (!ascii_isspace(*src))
1954  return -1;
1955  --szsrc;
1956  ++src;
1957  }
1958 
1959  return (equals == 0 || equals == expected_equals) ? destidx : -1;
1960 }
1961 
1962 // The arrays below were generated by the following code
1963 // #include <sys/time.h>
1964 // #include <stdlib.h>
1965 // #include <string.h>
1966 // #include <stdio.h>
1967 // main()
1968 // {
1969 // static const char Base64[] =
1970 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1971 // const char *pos;
1972 // int idx, i, j;
1973 // printf(" ");
1974 // for (i = 0; i < 255; i += 8) {
1975 // for (j = i; j < i + 8; j++) {
1976 // pos = strchr(Base64, j);
1977 // if ((pos == nullptr) || (j == 0))
1978 // idx = -1;
1979 // else
1980 // idx = pos - Base64;
1981 // if (idx == -1)
1982 // printf(" %2d, ", idx);
1983 // else
1984 // printf(" %2d/""*%c*""/,", idx, j);
1985 // }
1986 // printf("\n ");
1987 // }
1988 // }
1989 //
1990 // where the value of "Base64[]" was replaced by one of the base-64 conversion
1991 // tables from the functions below.
1992 static const signed char kUnBase64[] = {
1993  -1, -1, -1, -1, -1, -1, -1, -1,
1994  -1, -1, -1, -1, -1, -1, -1, -1,
1995  -1, -1, -1, -1, -1, -1, -1, -1,
1996  -1, -1, -1, -1, -1, -1, -1, -1,
1997  -1, -1, -1, -1, -1, -1, -1, -1,
1998  -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
1999  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
2000  60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
2001  -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
2002  7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
2003  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
2004  23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
2005  -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
2006  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
2007  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
2008  49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
2009  -1, -1, -1, -1, -1, -1, -1, -1,
2010  -1, -1, -1, -1, -1, -1, -1, -1,
2011  -1, -1, -1, -1, -1, -1, -1, -1,
2012  -1, -1, -1, -1, -1, -1, -1, -1,
2013  -1, -1, -1, -1, -1, -1, -1, -1,
2014  -1, -1, -1, -1, -1, -1, -1, -1,
2015  -1, -1, -1, -1, -1, -1, -1, -1,
2016  -1, -1, -1, -1, -1, -1, -1, -1,
2017  -1, -1, -1, -1, -1, -1, -1, -1,
2018  -1, -1, -1, -1, -1, -1, -1, -1,
2019  -1, -1, -1, -1, -1, -1, -1, -1,
2020  -1, -1, -1, -1, -1, -1, -1, -1,
2021  -1, -1, -1, -1, -1, -1, -1, -1,
2022  -1, -1, -1, -1, -1, -1, -1, -1,
2023  -1, -1, -1, -1, -1, -1, -1, -1,
2024  -1, -1, -1, -1, -1, -1, -1, -1
2025 };
2026 static const signed char kUnWebSafeBase64[] = {
2027  -1, -1, -1, -1, -1, -1, -1, -1,
2028  -1, -1, -1, -1, -1, -1, -1, -1,
2029  -1, -1, -1, -1, -1, -1, -1, -1,
2030  -1, -1, -1, -1, -1, -1, -1, -1,
2031  -1, -1, -1, -1, -1, -1, -1, -1,
2032  -1, -1, -1, -1, -1, 62/*-*/, -1, -1,
2033  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
2034  60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
2035  -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
2036  7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
2037  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
2038  23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/,
2039  -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
2040  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
2041  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
2042  49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
2043  -1, -1, -1, -1, -1, -1, -1, -1,
2044  -1, -1, -1, -1, -1, -1, -1, -1,
2045  -1, -1, -1, -1, -1, -1, -1, -1,
2046  -1, -1, -1, -1, -1, -1, -1, -1,
2047  -1, -1, -1, -1, -1, -1, -1, -1,
2048  -1, -1, -1, -1, -1, -1, -1, -1,
2049  -1, -1, -1, -1, -1, -1, -1, -1,
2050  -1, -1, -1, -1, -1, -1, -1, -1,
2051  -1, -1, -1, -1, -1, -1, -1, -1,
2052  -1, -1, -1, -1, -1, -1, -1, -1,
2053  -1, -1, -1, -1, -1, -1, -1, -1,
2054  -1, -1, -1, -1, -1, -1, -1, -1,
2055  -1, -1, -1, -1, -1, -1, -1, -1,
2056  -1, -1, -1, -1, -1, -1, -1, -1,
2057  -1, -1, -1, -1, -1, -1, -1, -1,
2058  -1, -1, -1, -1, -1, -1, -1, -1
2059 };
2060 
2061 int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) {
2062  return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64);
2063 }
2064 
2065 static bool Base64UnescapeInternal(const char* src, int slen, string* dest,
2066  const signed char* unbase64) {
2067  // Determine the size of the output string. Base64 encodes every 3 bytes into
2068  // 4 characters. any leftover chars are added directly for good measure.
2069  // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
2070  const int dest_len = 3 * (slen / 4) + (slen % 4);
2071 
2072  dest->resize(dest_len);
2073 
2074  // We are getting the destination buffer by getting the beginning of the
2075  // string and converting it into a char *.
2076  const int len = Base64UnescapeInternal(src, slen, string_as_array(dest),
2077  dest_len, unbase64);
2078  if (len < 0) {
2079  dest->clear();
2080  return false;
2081  }
2082 
2083  // could be shorter if there was padding
2084  GOOGLE_DCHECK_LE(len, dest_len);
2085  dest->erase(len);
2086 
2087  return true;
2088 }
2089 
2091  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
2092 }
2093 
2095  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
2096 }
2097 
2098 int Base64EscapeInternal(const unsigned char *src, int szsrc,
2099  char *dest, int szdest, const char *base64,
2100  bool do_padding) {
2101  static const char kPad64 = '=';
2102 
2103  if (szsrc <= 0) return 0;
2104 
2105  if (szsrc * 4 > szdest * 3) return 0;
2106 
2107  char *cur_dest = dest;
2108  const unsigned char *cur_src = src;
2109 
2110  char *limit_dest = dest + szdest;
2111  const unsigned char *limit_src = src + szsrc;
2112 
2113  // Three bytes of data encodes to four characters of cyphertext.
2114  // So we can pump through three-byte chunks atomically.
2115  while (cur_src < limit_src - 3) { // keep going as long as we have >= 32 bits
2116  uint32 in = BigEndian::Load32(cur_src) >> 8;
2117 
2118  cur_dest[0] = base64[in >> 18];
2119  in &= 0x3FFFF;
2120  cur_dest[1] = base64[in >> 12];
2121  in &= 0xFFF;
2122  cur_dest[2] = base64[in >> 6];
2123  in &= 0x3F;
2124  cur_dest[3] = base64[in];
2125 
2126  cur_dest += 4;
2127  cur_src += 3;
2128  }
2129  // To save time, we didn't update szdest or szsrc in the loop. So do it now.
2130  szdest = limit_dest - cur_dest;
2131  szsrc = limit_src - cur_src;
2132 
2133  /* now deal with the tail (<=3 bytes) */
2134  switch (szsrc) {
2135  case 0:
2136  // Nothing left; nothing more to do.
2137  break;
2138  case 1: {
2139  // One byte left: this encodes to two characters, and (optionally)
2140  // two pad characters to round out the four-character cypherblock.
2141  if ((szdest -= 2) < 0) return 0;
2142  uint32 in = cur_src[0];
2143  cur_dest[0] = base64[in >> 2];
2144  in &= 0x3;
2145  cur_dest[1] = base64[in << 4];
2146  cur_dest += 2;
2147  if (do_padding) {
2148  if ((szdest -= 2) < 0) return 0;
2149  cur_dest[0] = kPad64;
2150  cur_dest[1] = kPad64;
2151  cur_dest += 2;
2152  }
2153  break;
2154  }
2155  case 2: {
2156  // Two bytes left: this encodes to three characters, and (optionally)
2157  // one pad character to round out the four-character cypherblock.
2158  if ((szdest -= 3) < 0) return 0;
2159  uint32 in = BigEndian::Load16(cur_src);
2160  cur_dest[0] = base64[in >> 10];
2161  in &= 0x3FF;
2162  cur_dest[1] = base64[in >> 4];
2163  in &= 0x00F;
2164  cur_dest[2] = base64[in << 2];
2165  cur_dest += 3;
2166  if (do_padding) {
2167  if ((szdest -= 1) < 0) return 0;
2168  cur_dest[0] = kPad64;
2169  cur_dest += 1;
2170  }
2171  break;
2172  }
2173  case 3: {
2174  // Three bytes left: same as in the big loop above. We can't do this in
2175  // the loop because the loop above always reads 4 bytes, and the fourth
2176  // byte is past the end of the input.
2177  if ((szdest -= 4) < 0) return 0;
2178  uint32 in = (cur_src[0] << 16) + BigEndian::Load16(cur_src + 1);
2179  cur_dest[0] = base64[in >> 18];
2180  in &= 0x3FFFF;
2181  cur_dest[1] = base64[in >> 12];
2182  in &= 0xFFF;
2183  cur_dest[2] = base64[in >> 6];
2184  in &= 0x3F;
2185  cur_dest[3] = base64[in];
2186  cur_dest += 4;
2187  break;
2188  }
2189  default:
2190  // Should not be reached: blocks of 4 bytes are handled
2191  // in the while loop before this switch statement.
2192  GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;
2193  break;
2194  }
2195  return (cur_dest - dest);
2196 }
2197 
2198 static const char kBase64Chars[] =
2199 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2200 
2201 static const char kWebSafeBase64Chars[] =
2202 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2203 
2204 int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest) {
2205  return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
2206 }
2207 int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest,
2208  int szdest, bool do_padding) {
2209  return Base64EscapeInternal(src, szsrc, dest, szdest,
2210  kWebSafeBase64Chars, do_padding);
2211 }
2212 
2213 void Base64EscapeInternal(const unsigned char* src, int szsrc,
2214  string* dest, bool do_padding,
2215  const char* base64_chars) {
2216  const int calc_escaped_size =
2217  CalculateBase64EscapedLen(szsrc, do_padding);
2218  dest->resize(calc_escaped_size);
2219  const int escaped_len = Base64EscapeInternal(src, szsrc,
2221  dest->size(),
2222  base64_chars,
2223  do_padding);
2224  GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);
2225  dest->erase(escaped_len);
2226 }
2227 
2228 void Base64Escape(const unsigned char *src, int szsrc,
2229  string* dest, bool do_padding) {
2230  Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
2231 }
2232 
2233 void WebSafeBase64Escape(const unsigned char *src, int szsrc,
2234  string *dest, bool do_padding) {
2235  Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars);
2236 }
2237 
2239  Base64Escape(reinterpret_cast<const unsigned char*>(src.data()),
2240  src.size(), dest, true);
2241 }
2242 
2244  WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
2245  src.size(), dest, false);
2246 }
2247 
2249  WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
2250  src.size(), dest, true);
2251 }
2252 
2253 // Helper to append a Unicode code point to a string as UTF8, without bringing
2254 // in any external dependencies.
2255 int EncodeAsUTF8Char(uint32 code_point, char* output) {
2256  uint32 tmp = 0;
2257  int len = 0;
2258  if (code_point <= 0x7f) {
2259  tmp = code_point;
2260  len = 1;
2261  } else if (code_point <= 0x07ff) {
2262  tmp = 0x0000c080 |
2263  ((code_point & 0x07c0) << 2) |
2264  (code_point & 0x003f);
2265  len = 2;
2266  } else if (code_point <= 0xffff) {
2267  tmp = 0x00e08080 |
2268  ((code_point & 0xf000) << 4) |
2269  ((code_point & 0x0fc0) << 2) |
2270  (code_point & 0x003f);
2271  len = 3;
2272  } else {
2273  // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
2274  // normally only defined up to there as well.
2275  tmp = 0xf0808080 |
2276  ((code_point & 0x1c0000) << 6) |
2277  ((code_point & 0x03f000) << 4) |
2278  ((code_point & 0x000fc0) << 2) |
2279  (code_point & 0x003f);
2280  len = 4;
2281  }
2282  tmp = ghtonl(tmp);
2283  memcpy(output, reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
2284  return len;
2285 }
2286 
2287 // Table of UTF-8 character lengths, based on first byte
2288 static const unsigned char kUTF8LenTbl[256] = {
2289  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2290  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2291  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2292  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2293 
2294  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2295  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
2296  2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
2297  3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
2298 };
2299 
2300 // Return length of a single UTF-8 source character
2301 int UTF8FirstLetterNumBytes(const char* src, int len) {
2302  if (len == 0) {
2303  return 0;
2304  }
2305  return kUTF8LenTbl[*reinterpret_cast<const uint8*>(src)];
2306 }
2307 
2308 // ----------------------------------------------------------------------
2309 // CleanStringLineEndings()
2310 // Clean up a multi-line string to conform to Unix line endings.
2311 // Reads from src and appends to dst, so usually dst should be empty.
2312 //
2313 // If there is no line ending at the end of a non-empty string, it can
2314 // be added automatically.
2315 //
2316 // Four different types of input are correctly handled:
2317 //
2318 // - Unix/Linux files: line ending is LF: pass through unchanged
2319 //
2320 // - DOS/Windows files: line ending is CRLF: convert to LF
2321 //
2322 // - Legacy Mac files: line ending is CR: convert to LF
2323 //
2324 // - Garbled files: random line endings: convert gracefully
2325 // lonely CR, lonely LF, CRLF: convert to LF
2326 //
2327 // @param src The multi-line string to convert
2328 // @param dst The converted string is appended to this string
2329 // @param auto_end_last_line Automatically terminate the last line
2330 //
2331 // Limitations:
2332 //
2333 // This does not do the right thing for CRCRLF files created by
2334 // broken programs that do another Unix->DOS conversion on files
2335 // that are already in CRLF format. For this, a two-pass approach
2336 // brute-force would be needed that
2337 //
2338 // (1) determines the presence of LF (first one is ok)
2339 // (2) if yes, removes any CR, else convert every CR to LF
2340 
2341 void CleanStringLineEndings(const string &src, string *dst,
2342  bool auto_end_last_line) {
2343  if (dst->empty()) {
2344  dst->append(src);
2345  CleanStringLineEndings(dst, auto_end_last_line);
2346  } else {
2347  string tmp = src;
2348  CleanStringLineEndings(&tmp, auto_end_last_line);
2349  dst->append(tmp);
2350  }
2351 }
2352 
2353 void CleanStringLineEndings(string *str, bool auto_end_last_line) {
2354  ptrdiff_t output_pos = 0;
2355  bool r_seen = false;
2356  ptrdiff_t len = str->size();
2357 
2358  char *p = &(*str)[0];
2359 
2360  for (ptrdiff_t input_pos = 0; input_pos < len;) {
2361  if (!r_seen && input_pos + 8 < len) {
2362  uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos);
2363  // Loop over groups of 8 bytes at a time until we come across
2364  // a word that has a byte whose value is less than or equal to
2365  // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ).
2366  //
2367  // We use a has_less macro that quickly tests a whole 64-bit
2368  // word to see if any of the bytes has a value < N.
2369  //
2370  // For more details, see:
2371  // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
2372 #define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128)
2373  if (!has_less(v, '\r' + 1)) {
2374 #undef has_less
2375  // No byte in this word has a value that could be a \r or a \n
2376  if (output_pos != input_pos) {
2377  GOOGLE_UNALIGNED_STORE64(p + output_pos, v);
2378  }
2379  input_pos += 8;
2380  output_pos += 8;
2381  continue;
2382  }
2383  }
2384  string::const_reference in = p[input_pos];
2385  if (in == '\r') {
2386  if (r_seen) p[output_pos++] = '\n';
2387  r_seen = true;
2388  } else if (in == '\n') {
2389  if (input_pos != output_pos)
2390  p[output_pos++] = '\n';
2391  else
2392  output_pos++;
2393  r_seen = false;
2394  } else {
2395  if (r_seen) p[output_pos++] = '\n';
2396  r_seen = false;
2397  if (input_pos != output_pos)
2398  p[output_pos++] = in;
2399  else
2400  output_pos++;
2401  }
2402  input_pos++;
2403  }
2404  if (r_seen ||
2405  (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) {
2406  str->resize(output_pos + 1);
2407  str->operator[](output_pos) = '\n';
2408  } else if (output_pos < len) {
2409  str->resize(output_pos);
2410  }
2411 }
2412 
2413 } // namespace protobuf
2414 } // namespace google
google::protobuf::CalculateBase64EscapedLen
int CalculateBase64EscapedLen(int input_len, bool do_padding)
Definition: strutil.cc:1651
google::protobuf::Base64UnescapeInternal
int Base64UnescapeInternal(const char *src_param, int szsrc, char *dest, int szdest, const signed char *unbase64)
Definition: strutil.cc:1727
google::protobuf::WebSafeBase64Unescape
int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest)
Definition: strutil.cc:2061
google::protobuf::safe_parse_negative_int
bool safe_parse_negative_int(const string &text, IntType *value_p)
Definition: strutil.cc:749
google::protobuf::kDoubleToBufferSize
static const int kDoubleToBufferSize
Definition: strutil.h:570
google::protobuf::value
const Descriptor::ReservedRange value
Definition: src/google/protobuf/descriptor.h:1954
google::protobuf::UnescapeCEscapeString
int UnescapeCEscapeString(const string &src, string *dest)
Definition: strutil.cc:468
google::protobuf::SimpleDtoa
string SimpleDtoa(double value)
Definition: strutil.cc:1219
google::protobuf::FastHex32ToBuffer
char * FastHex32ToBuffer(uint32 value, char *buffer)
Definition: strutil.cc:942
GOOGLE_DCHECK_NO_OVERLAP
#define GOOGLE_DCHECK_NO_OVERLAP(dest, src)
Definition: strutil.cc:1577
google::protobuf::safe_uint_internal
bool safe_uint_internal(string text, IntType *value_p)
Definition: strutil.cc:804
end
GLuint GLuint end
Definition: glcorearb.h:2858
google::protobuf::UnescapeCEscapeSequences
int UnescapeCEscapeSequences(const char *source, char *dest)
Definition: strutil.cc:332
g
GLboolean GLboolean g
Definition: glcorearb.h:3228
src
GLenum src
Definition: glcorearb.h:3364
google::protobuf::int64
int64_t int64
Definition: protobuf/src/google/protobuf/stubs/port.h:151
google::protobuf::safe_strto32
bool safe_strto32(const string &str, int32 *value)
Definition: strutil.cc:1364
google::protobuf::kUTF8LenTbl
static const unsigned char kUTF8LenTbl[256]
Definition: strutil.cc:2288
google::protobuf::FastUInt64ToBufferLeft
char * FastUInt64ToBufferLeft(uint64 u64, char *buffer)
Definition: strutil.cc:1076
google::protobuf::FastHexToBuffer
char * FastHexToBuffer(int i, char *buffer)
Definition: strutil.cc:908
google::protobuf::StrCat
string StrCat(const AlphaNum &a, const AlphaNum &b)
Definition: strutil.cc:1480
google::protobuf::StringPiece::data
const char * data() const
Definition: stringpiece.h:247
google::protobuf::safe_parse_positive_int
bool safe_parse_positive_int(string text, IntType *value_p)
Definition: strutil.cc:715
GOOGLE_DCHECK_GE
#define GOOGLE_DCHECK_GE
Definition: logging.h:201
google::protobuf::BigEndian::Load16
static uint16 Load16(const void *p)
Definition: protobuf/src/google/protobuf/stubs/port.h:406
length
GLenum GLuint GLenum GLsizei length
Definition: glcorearb.h:2695
base
Definition: logging.cc:2162
google::protobuf::uint8
uint8_t uint8
Definition: protobuf/src/google/protobuf/stubs/port.h:153
FATAL
const int FATAL
Definition: log_severity.h:60
GOOGLE_DCHECK_LE
#define GOOGLE_DCHECK_LE
Definition: logging.h:199
google::protobuf::BigEndian::Load32
static uint32 Load32(const void *p)
Definition: protobuf/src/google/protobuf/stubs/port.h:414
LOG_STRING
#define LOG_STRING(LEVEL, VECTOR)
Definition: strutil.cc:330
GOOGLE_DCHECK
#define GOOGLE_DCHECK
Definition: logging.h:194
google::protobuf::Append1
static char * Append1(char *out, const AlphaNum &x)
Definition: strutil.cc:1451
s
XmlRpcServer s
google::protobuf::SplitStringToIteratorUsing
static void SplitStringToIteratorUsing(const string &full, const char *delim, ITR &result)
Definition: strutil.cc:196
google::protobuf::safe_strtob
bool safe_strtob(StringPiece str, bool *value)
Definition: strutil.cc:1324
IS_OCTAL_DIGIT
#define IS_OCTAL_DIGIT(c)
Definition: strutil.cc:326
has_less
#define has_less(x, n)
google::protobuf::InternalFastHexToBuffer
char * InternalFastHexToBuffer(uint64 value, char *buffer, int num_byte)
Definition: strutil.cc:921
google::protobuf::uint32
uint32_t uint32
Definition: protobuf/src/google/protobuf/stubs/port.h:155
google::protobuf::safe_strtod
bool safe_strtod(const char *str, double *value)
Definition: strutil.cc:1352
strings
GLsizei const GLchar *const * strings
Definition: glcorearb.h:4046
google::protobuf::CEscape
string CEscape(const string &src)
Definition: strutil.cc:615
google::protobuf::safe_strto64
bool safe_strto64(const string &str, int64 *value)
Definition: strutil.cc:1372
google::protobuf::SimpleItoa
string SimpleItoa(int i)
Definition: strutil.cc:1136
google::protobuf::kFastToBufferSize
static const int kFastToBufferSize
Definition: strutil.h:466
errors
const char * errors
Definition: tokenizer_unittest.cc:841
google::protobuf::io::NoLocaleStrtod
double NoLocaleStrtod(const char *text, char **original_endptr)
Definition: strtod.cc:82
string
GLsizei const GLchar *const * string
Definition: glcorearb.h:3083
errno
int errno
google::protobuf::ascii_tolower
char ascii_tolower(char c)
Definition: strutil.h:94
google::protobuf::kUnBase64
static const signed char kUnBase64[]
Definition: strutil.cc:1992
google::protobuf::WebSafeBase64EscapeWithPadding
void WebSafeBase64EscapeWithPadding(StringPiece src, string *dest)
Definition: strutil.cc:2248
google::protobuf::GlobalReplaceSubstring
int GlobalReplaceSubstring(const string &substring, const string &replacement, string *s)
Definition: strutil.cc:1623
google::protobuf::ReplaceCharacters
void ReplaceCharacters(string *s, const char *remove, char replacewith)
Definition: strutil.cc:103
target
GLenum target
Definition: glcorearb.h:3739
google::protobuf::safe_strtou32
bool safe_strtou32(const string &str, uint32 *value)
Definition: strutil.cc:1368
google::protobuf::strings::AlphaNum::digits
char digits[kFastToBufferSize]
Definition: strutil.h:618
x
GLint GLenum GLint x
Definition: glcorearb.h:2834
google::protobuf::kFastInt32ToBufferOffset
static const int kFastInt32ToBufferOffset
Definition: strutil.cc:865
google::protobuf::SplitStringToIteratorAllowEmpty
static void SplitStringToIteratorAllowEmpty(const StringType &full, const char *delim, int pieces, ITR &result)
Definition: strutil.cc:249
google::protobuf::DelocalizeRadix
void DelocalizeRadix(char *buffer)
Definition: strutil.cc:1235
google::protobuf::FastInt64ToBuffer
char * FastInt64ToBuffer(int64 i, char *buffer)
Definition: strutil.cc:825
google::protobuf::kWebSafeBase64Chars
static const char kWebSafeBase64Chars[]
Definition: strutil.cc:2201
b
GLboolean GLboolean GLboolean b
Definition: glcorearb.h:3228
google::protobuf::strings::AlphaNum::piece_size_
size_t piece_size_
Definition: strutil.h:616
google::protobuf::ghtonl
uint32 ghtonl(uint32 x)
Definition: common.cc:307
google::protobuf::CEscapeInternal
int CEscapeInternal(const char *src, int src_len, char *dest, int dest_len, bool use_hex, bool utf8_safe)
Definition: strutil.cc:499
begin
static size_t begin(const upb_table *t)
Definition: php/ext/google/protobuf/upb.c:4898
google::protobuf::int32
int32_t int32
Definition: protobuf/src/google/protobuf/stubs/port.h:150
google::protobuf::Base64Unescape
bool Base64Unescape(StringPiece src, string *dest)
Definition: strutil.cc:2090
base64
snprintf
int snprintf(char *str, size_t size, const char *format,...)
Definition: port.cc:64
google::protobuf::StripWhitespace
void StripWhitespace(string *str)
Definition: strutil.cc:113
strutil.h
google::protobuf::CEscapeAndAppend
void CEscapeAndAppend(StringPiece src, string *dest)
Definition: strutil.cc:581
mask
GLint GLuint mask
Definition: glcorearb.h:2789
google::protobuf::Append4
static char * Append4(char *out, const AlphaNum &x1, const AlphaNum &x2, const AlphaNum &x3, const AlphaNum &x4)
Definition: strutil.cc:1464
google::protobuf::kUnWebSafeBase64
static const signed char kUnWebSafeBase64[]
Definition: strutil.cc:2026
buffer
GLuint buffer
Definition: glcorearb.h:2939
google::protobuf::isprint
bool isprint(char c)
Definition: strutil.cc:79
google::protobuf::StringPiece
Definition: stringpiece.h:180
start
GLuint start
Definition: glcorearb.h:2858
update_failure_list.str
str
Definition: update_failure_list.py:41
google::protobuf::kBase64Chars
static const char kBase64Chars[]
Definition: strutil.cc:2198
google::protobuf::safe_int_internal
bool safe_int_internal(string text, IntType *value_p)
Definition: strutil.cc:790
p
const char * p
Definition: gmock-matchers_test.cc:3863
GOOGLE_LOG
#define GOOGLE_LOG(LEVEL)
Definition: logging.h:146
google::protobuf::FastInt32ToBuffer
char * FastInt32ToBuffer(int32 i, char *buffer)
Definition: strutil.cc:870
google::protobuf::safe_strtof
bool safe_strtof(const char *str, float *value)
Definition: strutil.cc:1341
google::protobuf::ascii_isspace
bool ascii_isspace(char c)
Definition: strutil.h:77
strtod.h
google::protobuf::JoinStringsIterator
static void JoinStringsIterator(const ITERATOR &start, const ITERATOR &end, const char *delim, string *result)
Definition: strutil.cc:281
google::protobuf::uint64
uint64_t uint64
Definition: protobuf/src/google/protobuf/stubs/port.h:156
google::protobuf::WebSafeBase64Escape
int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest, int szdest, bool do_padding)
Definition: strutil.cc:2207
google::protobuf.internal::StringType
StringType
Definition: generated_message_table_driven_lite.h:53
GOOGLE_COMPILE_ASSERT
#define GOOGLE_COMPILE_ASSERT(expr, msg)
Definition: macros.h:115
google::protobuf::string_as_array
char * string_as_array(string *str)
Definition: stl_util.h:83
buffer
Definition: buffer_processor.h:43
d
d
google::protobuf::FloatToBuffer
char * FloatToBuffer(float value, char *buffer)
Definition: strutil.cc:1380
google::protobuf::isxdigit
bool isxdigit(char c)
Definition: strutil.cc:73
source
GLsizei GLsizei GLchar * source
Definition: glcorearb.h:3072
google::protobuf::strto32_adaptor
int32 strto32_adaptor(const char *nptr, char **endptr, int base)
Definition: strutil.cc:651
google::protobuf::SimpleFtoa
string SimpleFtoa(float value)
Definition: strutil.cc:1224
google::uint64
uint64_t uint64
Definition: sdk/include/aditof/log.h:60
google::protobuf::FastHex64ToBuffer
char * FastHex64ToBuffer(uint64 value, char *buffer)
Definition: strutil.cc:938
google::protobuf::ERROR
static const LogLevel ERROR
Definition: protobuf/src/google/protobuf/testing/googletest.h:70
GOOGLE_CHECK
#define GOOGLE_CHECK(EXPRESSION)
Definition: logging.h:153
google::protobuf::strings::Utf8SafeCEscape
string Utf8SafeCEscape(const string &src)
Definition: strutil.cc:623
google::protobuf::memcasecmp
static int memcasecmp(const char *s1, const char *s2, size_t len)
Definition: strutil.cc:1306
google::protobuf::DoubleToBuffer
char * DoubleToBuffer(double value, char *buffer)
Definition: strutil.cc:1262
i
int i
Definition: gmock-matchers_test.cc:764
google::protobuf::kuint32max
static const uint32 kuint32max
Definition: protobuf/src/google/protobuf/stubs/port.h:163
GET_INPUT
#define GET_INPUT(label, remain)
google::protobuf::CaseEqual
bool CaseEqual(StringPiece s1, StringPiece s2)
Definition: strutil.cc:1319
google::protobuf::FastUInt32ToBufferLeft
char * FastUInt32ToBufferLeft(uint32 u, char *buffer)
Definition: strutil.cc:985
google::protobuf::GOOGLE_UNALIGNED_LOAD64
uint64 GOOGLE_UNALIGNED_LOAD64(const void *p)
Definition: protobuf/src/google/protobuf/stubs/port.h:229
google::protobuf::compiler::csharp::base64_chars
static const char base64_chars[]
Definition: csharp_helpers.cc:437
len
int len
Definition: php/ext/google/protobuf/map.c:206
google::protobuf::kint32max
static const int32 kint32max
Definition: protobuf/src/google/protobuf/stubs/port.h:159
google::protobuf::Base64Escape
int Base64Escape(const unsigned char *src, int szsrc, char *dest, int szdest)
Definition: strutil.cc:2204
google::protobuf::StringReplace
void StringReplace(const string &s, const string &oldsub, const string &newsub, bool replace_all, string *res)
Definition: strutil.cc:148
google::protobuf::SplitStringUsing
void SplitStringUsing(const string &full, const char *delim, std::vector< string > *result)
Definition: strutil.cc:229
v
const GLdouble * v
Definition: glcorearb.h:3106
ch
char ch
Definition: gmock-matchers_test.cc:3871
benchmarks.python.py_benchmark.dest
dest
Definition: py_benchmark.py:13
google::protobuf::safe_parse_sign
bool safe_parse_sign(string *text, bool *negative_ptr)
Definition: strutil.cc:686
google::protobuf::kFloatToBufferSize
static const int kFloatToBufferSize
Definition: strutil.h:571
google::protobuf::hex_digit_to_int
int hex_digit_to_int(char c)
Definition: strutil.h:98
google::protobuf::strings::AlphaNum::AlphaNum
AlphaNum(int i32)
Definition: strutil.h:623
stl_util.h
google::protobuf::StripString
void StripString(string *s, const char *remove, char replacewith)
Definition: strutil.cc:88
google::protobuf::Append2
static char * Append2(char *out, const AlphaNum &x1, const AlphaNum &x2)
Definition: strutil.cc:1456
logging.h
google::protobuf::CleanStringLineEndings
void CleanStringLineEndings(const string &src, string *dst, bool auto_end_last_line)
Definition: strutil.cc:2341
google::protobuf::CEscapedLength
static size_t CEscapedLength(StringPiece src)
Definition: strutil.cc:547
google::protobuf::FastInt64ToBufferLeft
char * FastInt64ToBufferLeft(int64 i, char *buffer)
Definition: strutil.cc:1118
dst
GLenum GLenum dst
Definition: glcorearb.h:3364
first
GLint first
Definition: glcorearb.h:2830
google::protobuf::GOOGLE_UNALIGNED_STORE64
void GOOGLE_UNALIGNED_STORE64(void *p, uint64 v)
Definition: protobuf/src/google/protobuf/stubs/port.h:243
GOOGLE_DCHECK_LT
#define GOOGLE_DCHECK_LT
Definition: logging.h:198
google::protobuf::IsValidFloatChar
static bool IsValidFloatChar(char c)
Definition: strutil.cc:1229
f
GLfloat f
Definition: glcorearb.h:3964
value
GLsizei const GLfloat * value
Definition: glcorearb.h:3093
google::protobuf::UTF8FirstLetterNumBytes
int UTF8FirstLetterNumBytes(const char *src, int len)
Definition: strutil.cc:2301
output
const upb_json_parsermethod const upb_symtab upb_sink * output
Definition: ruby/ext/google/protobuf_c/upb.h:10503
google::protobuf::safe_strtou64
bool safe_strtou64(const string &str, uint64 *value)
Definition: strutil.cc:1376
google::protobuf::Base64EscapeInternal
int Base64EscapeInternal(const unsigned char *src, int szsrc, char *dest, int szdest, const char *base64, bool do_padding)
Definition: strutil.cc:2098
google::protobuf::two_ASCII_digits
static const char two_ASCII_digits[100][2]
Definition: strutil.cc:962
google::protobuf::StringPiece::size
stringpiece_ssize_type size() const
Definition: stringpiece.h:248
google::protobuf::strings::CHexEscape
string CHexEscape(const string &src)
Definition: strutil.cc:632
a
GLboolean GLboolean GLboolean GLboolean a
Definition: glcorearb.h:3228
google::protobuf::FastInt32ToBufferLeft
char * FastInt32ToBufferLeft(int32 i, char *buffer)
Definition: strutil.cc:1067
it
MapIter it
Definition: php/ext/google/protobuf/map.c:205
google
Definition: data_proto2_to_proto3_util.h:11
google::protobuf::SplitStringAllowEmpty
void SplitStringAllowEmpty(const string &full, const char *delim, std::vector< string > *result)
Definition: strutil.cc:268
h
GLfloat GLfloat GLfloat GLfloat h
Definition: glcorearb.h:4147
width
GLint GLsizei width
Definition: glcorearb.h:2768
google::protobuf::strings::AlphaNum::piece_data_
const char * piece_data_
Definition: strutil.h:615
isnan
#define isnan(d)
Definition: cJSON.c:77
google::protobuf::kFastInt64ToBufferOffset
static const int kFastInt64ToBufferOffset
Definition: strutil.cc:823
google::protobuf::EncodeAsUTF8Char
int EncodeAsUTF8Char(uint32 code_point, char *output)
Definition: strutil.cc:2255
google::protobuf::kint32min
static const int32 kint32min
Definition: protobuf/src/google/protobuf/stubs/port.h:160
google::protobuf::strtou32_adaptor
uint32 strtou32_adaptor(const char *nptr, char **endptr, int base)
Definition: strutil.cc:671
google::protobuf::StrAppend
void StrAppend(string *result, const AlphaNum &a)
Definition: strutil.cc:1581
GOOGLE_DCHECK_EQ
#define GOOGLE_DCHECK_EQ
Definition: logging.h:196
google::protobuf::JoinStrings
void JoinStrings(const std::vector< string > &components, const char *delim, string *result)
Definition: strutil.cc:308


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:06:59