abseil-cpp/absl/strings/escaping.cc
Go to the documentation of this file.
1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/escaping.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstdint>
20 #include <cstring>
21 #include <iterator>
22 #include <limits>
23 #include <string>
24 
25 #include "absl/base/internal/endian.h"
26 #include "absl/base/internal/raw_logging.h"
27 #include "absl/base/internal/unaligned_access.h"
28 #include "absl/strings/internal/char_map.h"
29 #include "absl/strings/internal/escaping.h"
30 #include "absl/strings/internal/resize_uninitialized.h"
31 #include "absl/strings/internal/utf8.h"
32 #include "absl/strings/str_cat.h"
33 #include "absl/strings/str_join.h"
34 #include "absl/strings/string_view.h"
35 
36 namespace absl {
38 namespace {
39 
40 // These are used for the leave_nulls_escaped argument to CUnescapeInternal().
41 constexpr bool kUnescapeNulls = false;
42 
43 inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
44 
45 inline int hex_digit_to_int(char c) {
46  static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
47  "Character set must be ASCII.");
48  assert(absl::ascii_isxdigit(c));
49  int x = static_cast<unsigned char>(c);
50  if (x > '9') {
51  x += 9;
52  }
53  return x & 0xf;
54 }
55 
56 inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
57  if (c >= 0xD800 && c <= 0xDFFF) {
58  if (error) {
59  *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
60  src);
61  }
62  return true;
63  }
64  return false;
65 }
66 
67 // ----------------------------------------------------------------------
68 // CUnescapeInternal()
69 // Implements both CUnescape() and CUnescapeForNullTerminatedString().
70 //
71 // Unescapes C escape sequences and is the reverse of CEscape().
72 //
73 // If 'source' is valid, stores the unescaped string and its size in
74 // 'dest' and 'dest_len' respectively, and returns true. Otherwise
75 // returns false and optionally stores the error description in
76 // 'error'. Set 'error' to nullptr to disable error reporting.
77 //
78 // 'dest' should point to a buffer that is at least as big as 'source'.
79 // 'source' and 'dest' may be the same.
80 //
81 // NOTE: any changes to this function must also be reflected in the older
82 // UnescapeCEscapeSequences().
83 // ----------------------------------------------------------------------
84 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
85  char* dest, ptrdiff_t* dest_len, std::string* error) {
86  char* d = dest;
87  const char* p = source.data();
88  const char* end = p + source.size();
89  const char* last_byte = end - 1;
90 
91  // Small optimization for case where source = dest and there's no escaping
92  while (p == d && p < end && *p != '\\') p++, d++;
93 
94  while (p < end) {
95  if (*p != '\\') {
96  *d++ = *p++;
97  } else {
98  if (++p > last_byte) { // skip past the '\\'
99  if (error) *error = "String cannot end with \\";
100  return false;
101  }
102  switch (*p) {
103  case 'a': *d++ = '\a'; break;
104  case 'b': *d++ = '\b'; break;
105  case 'f': *d++ = '\f'; break;
106  case 'n': *d++ = '\n'; break;
107  case 'r': *d++ = '\r'; break;
108  case 't': *d++ = '\t'; break;
109  case 'v': *d++ = '\v'; break;
110  case '\\': *d++ = '\\'; break;
111  case '?': *d++ = '\?'; break; // \? Who knew?
112  case '\'': *d++ = '\''; break;
113  case '"': *d++ = '\"'; break;
114  case '0':
115  case '1':
116  case '2':
117  case '3':
118  case '4':
119  case '5':
120  case '6':
121  case '7': {
122  // octal digit: 1 to 3 digits
123  const char* octal_start = p;
124  unsigned int ch = *p - '0';
125  if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
126  if (p < last_byte && is_octal_digit(p[1]))
127  ch = ch * 8 + *++p - '0'; // now points at last digit
128  if (ch > 0xff) {
129  if (error) {
130  *error = "Value of \\" +
131  std::string(octal_start, p + 1 - octal_start) +
132  " exceeds 0xff";
133  }
134  return false;
135  }
136  if ((ch == 0) && leave_nulls_escaped) {
137  // Copy the escape sequence for the null character
138  const ptrdiff_t octal_size = p + 1 - octal_start;
139  *d++ = '\\';
140  memmove(d, octal_start, octal_size);
141  d += octal_size;
142  break;
143  }
144  *d++ = ch;
145  break;
146  }
147  case 'x':
148  case 'X': {
149  if (p >= last_byte) {
150  if (error) *error = "String cannot end with \\x";
151  return false;
152  } else if (!absl::ascii_isxdigit(p[1])) {
153  if (error) *error = "\\x cannot be followed by a non-hex digit";
154  return false;
155  }
156  unsigned int ch = 0;
157  const char* hex_start = p;
158  while (p < last_byte && absl::ascii_isxdigit(p[1]))
159  // Arbitrarily many hex digits
160  ch = (ch << 4) + hex_digit_to_int(*++p);
161  if (ch > 0xFF) {
162  if (error) {
163  *error = "Value of \\" +
164  std::string(hex_start, p + 1 - hex_start) +
165  " exceeds 0xff";
166  }
167  return false;
168  }
169  if ((ch == 0) && leave_nulls_escaped) {
170  // Copy the escape sequence for the null character
171  const ptrdiff_t hex_size = p + 1 - hex_start;
172  *d++ = '\\';
173  memmove(d, hex_start, hex_size);
174  d += hex_size;
175  break;
176  }
177  *d++ = ch;
178  break;
179  }
180  case 'u': {
181  // \uhhhh => convert 4 hex digits to UTF-8
182  char32_t rune = 0;
183  const char* hex_start = p;
184  if (p + 4 >= end) {
185  if (error) {
186  *error = "\\u must be followed by 4 hex digits: \\" +
187  std::string(hex_start, p + 1 - hex_start);
188  }
189  return false;
190  }
191  for (int i = 0; i < 4; ++i) {
192  // Look one char ahead.
193  if (absl::ascii_isxdigit(p[1])) {
194  rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
195  } else {
196  if (error) {
197  *error = "\\u must be followed by 4 hex digits: \\" +
198  std::string(hex_start, p + 1 - hex_start);
199  }
200  return false;
201  }
202  }
203  if ((rune == 0) && leave_nulls_escaped) {
204  // Copy the escape sequence for the null character
205  *d++ = '\\';
206  memmove(d, hex_start, 5); // u0000
207  d += 5;
208  break;
209  }
210  if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
211  return false;
212  }
213  d += strings_internal::EncodeUTF8Char(d, rune);
214  break;
215  }
216  case 'U': {
217  // \Uhhhhhhhh => convert 8 hex digits to UTF-8
218  char32_t rune = 0;
219  const char* hex_start = p;
220  if (p + 8 >= end) {
221  if (error) {
222  *error = "\\U must be followed by 8 hex digits: \\" +
223  std::string(hex_start, p + 1 - hex_start);
224  }
225  return false;
226  }
227  for (int i = 0; i < 8; ++i) {
228  // Look one char ahead.
229  if (absl::ascii_isxdigit(p[1])) {
230  // Don't change rune until we're sure this
231  // is within the Unicode limit, but do advance p.
232  uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
233  if (newrune > 0x10FFFF) {
234  if (error) {
235  *error = "Value of \\" +
236  std::string(hex_start, p + 1 - hex_start) +
237  " exceeds Unicode limit (0x10FFFF)";
238  }
239  return false;
240  } else {
241  rune = newrune;
242  }
243  } else {
244  if (error) {
245  *error = "\\U must be followed by 8 hex digits: \\" +
246  std::string(hex_start, p + 1 - hex_start);
247  }
248  return false;
249  }
250  }
251  if ((rune == 0) && leave_nulls_escaped) {
252  // Copy the escape sequence for the null character
253  *d++ = '\\';
254  memmove(d, hex_start, 9); // U00000000
255  d += 9;
256  break;
257  }
258  if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
259  return false;
260  }
261  d += strings_internal::EncodeUTF8Char(d, rune);
262  break;
263  }
264  default: {
265  if (error) *error = std::string("Unknown escape sequence: \\") + *p;
266  return false;
267  }
268  }
269  p++; // read past letter we escaped
270  }
271  }
272  *dest_len = d - dest;
273  return true;
274 }
275 
276 // ----------------------------------------------------------------------
277 // CUnescapeInternal()
278 //
279 // Same as above but uses a std::string for output. 'source' and 'dest'
280 // may be the same.
281 // ----------------------------------------------------------------------
282 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
285 
286  ptrdiff_t dest_size;
287  if (!CUnescapeInternal(source,
288  leave_nulls_escaped,
289  &(*dest)[0],
290  &dest_size,
291  error)) {
292  return false;
293  }
294  dest->erase(dest_size);
295  return true;
296 }
297 
298 // ----------------------------------------------------------------------
299 // CEscape()
300 // CHexEscape()
301 // Utf8SafeCEscape()
302 // Utf8SafeCHexEscape()
303 // Escapes 'src' using C-style escape sequences. This is useful for
304 // preparing query flags. The 'Hex' version uses hexadecimal rather than
305 // octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes.
306 //
307 // Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
308 // ----------------------------------------------------------------------
310  bool utf8_safe) {
312  bool last_hex_escape = false; // true if last output char was \xNN.
313 
314  for (unsigned char c : src) {
315  bool is_hex_escape = false;
316  switch (c) {
317  case '\n': dest.append("\\" "n"); break;
318  case '\r': dest.append("\\" "r"); break;
319  case '\t': dest.append("\\" "t"); break;
320  case '\"': dest.append("\\" "\""); break;
321  case '\'': dest.append("\\" "'"); break;
322  case '\\': dest.append("\\" "\\"); break;
323  default:
324  // Note that if we emit \xNN and the src character after that is a hex
325  // digit then that digit must be escaped too to prevent it being
326  // interpreted as part of the character code by C.
327  if ((!utf8_safe || c < 0x80) &&
328  (!absl::ascii_isprint(c) ||
329  (last_hex_escape && absl::ascii_isxdigit(c)))) {
330  if (use_hex) {
331  dest.append("\\" "x");
332  dest.push_back(numbers_internal::kHexChar[c / 16]);
333  dest.push_back(numbers_internal::kHexChar[c % 16]);
334  is_hex_escape = true;
335  } else {
336  dest.append("\\");
337  dest.push_back(numbers_internal::kHexChar[c / 64]);
338  dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]);
339  dest.push_back(numbers_internal::kHexChar[c % 8]);
340  }
341  } else {
342  dest.push_back(c);
343  break;
344  }
345  }
346  last_hex_escape = is_hex_escape;
347  }
348 
349  return dest;
350 }
351 
352 /* clang-format off */
353 constexpr char c_escaped_len[256] = {
354  4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
355  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
356  1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", '
357  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
358  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
359  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
360  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
361  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
362  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
363  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
364  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
365  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
366  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
367  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
368  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
369  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
370 };
371 /* clang-format on */
372 
373 // Calculates the length of the C-style escaped version of 'src'.
374 // Assumes that non-printable characters are escaped using octal sequences, and
375 // that UTF-8 bytes are not handled specially.
376 inline size_t CEscapedLength(absl::string_view src) {
377  size_t escaped_len = 0;
378  for (unsigned char c : src) escaped_len += c_escaped_len[c];
379  return escaped_len;
380 }
381 
383  size_t escaped_len = CEscapedLength(src);
384  if (escaped_len == src.size()) {
385  dest->append(src.data(), src.size());
386  return;
387  }
388 
389  size_t cur_dest_len = dest->size();
391  cur_dest_len + escaped_len);
392  char* append_ptr = &(*dest)[cur_dest_len];
393 
394  for (unsigned char c : src) {
395  int char_len = c_escaped_len[c];
396  if (char_len == 1) {
397  *append_ptr++ = c;
398  } else if (char_len == 2) {
399  switch (c) {
400  case '\n':
401  *append_ptr++ = '\\';
402  *append_ptr++ = 'n';
403  break;
404  case '\r':
405  *append_ptr++ = '\\';
406  *append_ptr++ = 'r';
407  break;
408  case '\t':
409  *append_ptr++ = '\\';
410  *append_ptr++ = 't';
411  break;
412  case '\"':
413  *append_ptr++ = '\\';
414  *append_ptr++ = '\"';
415  break;
416  case '\'':
417  *append_ptr++ = '\\';
418  *append_ptr++ = '\'';
419  break;
420  case '\\':
421  *append_ptr++ = '\\';
422  *append_ptr++ = '\\';
423  break;
424  }
425  } else {
426  *append_ptr++ = '\\';
427  *append_ptr++ = '0' + c / 64;
428  *append_ptr++ = '0' + (c % 64) / 8;
429  *append_ptr++ = '0' + c % 8;
430  }
431  }
432 }
433 
434 bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
435  size_t szdest, const signed char* unbase64,
436  size_t* len) {
437  static const char kPad64Equals = '=';
438  static const char kPad64Dot = '.';
439 
440  size_t destidx = 0;
441  int decode = 0;
442  int state = 0;
443  unsigned int ch = 0;
444  unsigned int temp = 0;
445 
446  // If "char" is signed by default, using *src as an array index results in
447  // accessing negative array elements. Treat the input as a pointer to
448  // unsigned char to avoid this.
449  const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
450 
451  // The GET_INPUT macro gets the next input character, skipping
452  // over any whitespace, and stopping when we reach the end of the
453  // string or when we read any non-data character. The arguments are
454  // an arbitrary identifier (used as a label for goto) and the number
455  // of data bytes that must remain in the input to avoid aborting the
456  // loop.
457 #define GET_INPUT(label, remain) \
458  label: \
459  --szsrc; \
460  ch = *src++; \
461  decode = unbase64[ch]; \
462  if (decode < 0) { \
463  if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
464  state = 4 - remain; \
465  break; \
466  }
467 
468  // if dest is null, we're just checking to see if it's legal input
469  // rather than producing output. (I suspect this could just be done
470  // with a regexp...). We duplicate the loop so this test can be
471  // outside it instead of in every iteration.
472 
473  if (dest) {
474  // This loop consumes 4 input bytes and produces 3 output bytes
475  // per iteration. We can't know at the start that there is enough
476  // data left in the string for a full iteration, so the loop may
477  // break out in the middle; if so 'state' will be set to the
478  // number of input bytes read.
479 
480  while (szsrc >= 4) {
481  // We'll start by optimistically assuming that the next four
482  // bytes of the string (src[0..3]) are four good data bytes
483  // (that is, no nulls, whitespace, padding chars, or illegal
484  // chars). We need to test src[0..2] for nulls individually
485  // before constructing temp to preserve the property that we
486  // never read past a null in the string (no matter how long
487  // szsrc claims the string is).
488 
489  if (!src[0] || !src[1] || !src[2] ||
490  ((temp = ((unsigned(unbase64[src[0]]) << 18) |
491  (unsigned(unbase64[src[1]]) << 12) |
492  (unsigned(unbase64[src[2]]) << 6) |
493  (unsigned(unbase64[src[3]])))) &
494  0x80000000)) {
495  // Iff any of those four characters was bad (null, illegal,
496  // whitespace, padding), then temp's high bit will be set
497  // (because unbase64[] is -1 for all bad characters).
498  //
499  // We'll back up and resort to the slower decoder, which knows
500  // how to handle those cases.
501 
502  GET_INPUT(first, 4);
503  temp = decode;
504  GET_INPUT(second, 3);
505  temp = (temp << 6) | decode;
506  GET_INPUT(third, 2);
507  temp = (temp << 6) | decode;
508  GET_INPUT(fourth, 1);
509  temp = (temp << 6) | decode;
510  } else {
511  // We really did have four good data bytes, so advance four
512  // characters in the string.
513 
514  szsrc -= 4;
515  src += 4;
516  }
517 
518  // temp has 24 bits of input, so write that out as three bytes.
519 
520  if (destidx + 3 > szdest) return false;
521  dest[destidx + 2] = temp;
522  temp >>= 8;
523  dest[destidx + 1] = temp;
524  temp >>= 8;
525  dest[destidx] = temp;
526  destidx += 3;
527  }
528  } else {
529  while (szsrc >= 4) {
530  if (!src[0] || !src[1] || !src[2] ||
531  ((temp = ((unsigned(unbase64[src[0]]) << 18) |
532  (unsigned(unbase64[src[1]]) << 12) |
533  (unsigned(unbase64[src[2]]) << 6) |
534  (unsigned(unbase64[src[3]])))) &
535  0x80000000)) {
536  GET_INPUT(first_no_dest, 4);
537  GET_INPUT(second_no_dest, 3);
538  GET_INPUT(third_no_dest, 2);
539  GET_INPUT(fourth_no_dest, 1);
540  } else {
541  szsrc -= 4;
542  src += 4;
543  }
544  destidx += 3;
545  }
546  }
547 
548 #undef GET_INPUT
549 
550  // if the loop terminated because we read a bad character, return
551  // now.
552  if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
554  return false;
555 
556  if (ch == kPad64Equals || ch == kPad64Dot) {
557  // if we stopped by hitting an '=' or '.', un-read that character -- we'll
558  // look at it again when we count to check for the proper number of
559  // equals signs at the end.
560  ++szsrc;
561  --src;
562  } else {
563  // This loop consumes 1 input byte per iteration. It's used to
564  // clean up the 0-3 input bytes remaining when the first, faster
565  // loop finishes. 'temp' contains the data from 'state' input
566  // characters read by the first loop.
567  while (szsrc > 0) {
568  --szsrc;
569  ch = *src++;
570  decode = unbase64[ch];
571  if (decode < 0) {
572  if (absl::ascii_isspace(ch)) {
573  continue;
574  } else if (ch == kPad64Equals || ch == kPad64Dot) {
575  // back up one character; we'll read it again when we check
576  // for the correct number of pad characters at the end.
577  ++szsrc;
578  --src;
579  break;
580  } else {
581  return false;
582  }
583  }
584 
585  // Each input character gives us six bits of output.
586  temp = (temp << 6) | decode;
587  ++state;
588  if (state == 4) {
589  // If we've accumulated 24 bits of output, write that out as
590  // three bytes.
591  if (dest) {
592  if (destidx + 3 > szdest) return false;
593  dest[destidx + 2] = temp;
594  temp >>= 8;
595  dest[destidx + 1] = temp;
596  temp >>= 8;
597  dest[destidx] = temp;
598  }
599  destidx += 3;
600  state = 0;
601  temp = 0;
602  }
603  }
604  }
605 
606  // Process the leftover data contained in 'temp' at the end of the input.
607  int expected_equals = 0;
608  switch (state) {
609  case 0:
610  // Nothing left over; output is a multiple of 3 bytes.
611  break;
612 
613  case 1:
614  // Bad input; we have 6 bits left over.
615  return false;
616 
617  case 2:
618  // Produce one more output byte from the 12 input bits we have left.
619  if (dest) {
620  if (destidx + 1 > szdest) return false;
621  temp >>= 4;
622  dest[destidx] = temp;
623  }
624  ++destidx;
625  expected_equals = 2;
626  break;
627 
628  case 3:
629  // Produce two more output bytes from the 18 input bits we have left.
630  if (dest) {
631  if (destidx + 2 > szdest) return false;
632  temp >>= 2;
633  dest[destidx + 1] = temp;
634  temp >>= 8;
635  dest[destidx] = temp;
636  }
637  destidx += 2;
638  expected_equals = 1;
639  break;
640 
641  default:
642  // state should have no other values at this point.
643  ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
644  state);
645  }
646 
647  // The remainder of the string should be all whitespace, mixed with
648  // exactly 0 equals signs, or exactly 'expected_equals' equals
649  // signs. (Always accepting 0 equals signs is an Abseil extension
650  // not covered in the RFC, as is accepting dot as the pad character.)
651 
652  int equals = 0;
653  while (szsrc > 0) {
654  if (*src == kPad64Equals || *src == kPad64Dot)
655  ++equals;
656  else if (!absl::ascii_isspace(*src))
657  return false;
658  --szsrc;
659  ++src;
660  }
661 
662  const bool ok = (equals == 0 || equals == expected_equals);
663  if (ok) *len = destidx;
664  return ok;
665 }
666 
667 // The arrays below were generated by the following code
668 // #include <sys/time.h>
669 // #include <stdlib.h>
670 // #include <string.h>
671 // main()
672 // {
673 // static const char Base64[] =
674 // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
675 // char* pos;
676 // int idx, i, j;
677 // printf(" ");
678 // for (i = 0; i < 255; i += 8) {
679 // for (j = i; j < i + 8; j++) {
680 // pos = strchr(Base64, j);
681 // if ((pos == nullptr) || (j == 0))
682 // idx = -1;
683 // else
684 // idx = pos - Base64;
685 // if (idx == -1)
686 // printf(" %2d, ", idx);
687 // else
688 // printf(" %2d/*%c*/,", idx, j);
689 // }
690 // printf("\n ");
691 // }
692 // }
693 //
694 // where the value of "Base64[]" was replaced by one of the base-64 conversion
695 // tables from the functions below.
696 /* clang-format off */
697 constexpr signed char kUnBase64[] = {
698  -1, -1, -1, -1, -1, -1, -1, -1,
699  -1, -1, -1, -1, -1, -1, -1, -1,
700  -1, -1, -1, -1, -1, -1, -1, -1,
701  -1, -1, -1, -1, -1, -1, -1, -1,
702  -1, -1, -1, -1, -1, -1, -1, -1,
703  -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
704  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
705  60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
706  -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
707  07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
708  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
709  23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
710  -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
711  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
712  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
713  49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
714  -1, -1, -1, -1, -1, -1, -1, -1,
715  -1, -1, -1, -1, -1, -1, -1, -1,
716  -1, -1, -1, -1, -1, -1, -1, -1,
717  -1, -1, -1, -1, -1, -1, -1, -1,
718  -1, -1, -1, -1, -1, -1, -1, -1,
719  -1, -1, -1, -1, -1, -1, -1, -1,
720  -1, -1, -1, -1, -1, -1, -1, -1,
721  -1, -1, -1, -1, -1, -1, -1, -1,
722  -1, -1, -1, -1, -1, -1, -1, -1,
723  -1, -1, -1, -1, -1, -1, -1, -1,
724  -1, -1, -1, -1, -1, -1, -1, -1,
725  -1, -1, -1, -1, -1, -1, -1, -1,
726  -1, -1, -1, -1, -1, -1, -1, -1,
727  -1, -1, -1, -1, -1, -1, -1, -1,
728  -1, -1, -1, -1, -1, -1, -1, -1,
729  -1, -1, -1, -1, -1, -1, -1, -1
730 };
731 
732 constexpr signed char kUnWebSafeBase64[] = {
733  -1, -1, -1, -1, -1, -1, -1, -1,
734  -1, -1, -1, -1, -1, -1, -1, -1,
735  -1, -1, -1, -1, -1, -1, -1, -1,
736  -1, -1, -1, -1, -1, -1, -1, -1,
737  -1, -1, -1, -1, -1, -1, -1, -1,
738  -1, -1, -1, -1, -1, 62/*-*/, -1, -1,
739  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
740  60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
741  -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
742  07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
743  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
744  23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/,
745  -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
746  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
747  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
748  49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
749  -1, -1, -1, -1, -1, -1, -1, -1,
750  -1, -1, -1, -1, -1, -1, -1, -1,
751  -1, -1, -1, -1, -1, -1, -1, -1,
752  -1, -1, -1, -1, -1, -1, -1, -1,
753  -1, -1, -1, -1, -1, -1, -1, -1,
754  -1, -1, -1, -1, -1, -1, -1, -1,
755  -1, -1, -1, -1, -1, -1, -1, -1,
756  -1, -1, -1, -1, -1, -1, -1, -1,
757  -1, -1, -1, -1, -1, -1, -1, -1,
758  -1, -1, -1, -1, -1, -1, -1, -1,
759  -1, -1, -1, -1, -1, -1, -1, -1,
760  -1, -1, -1, -1, -1, -1, -1, -1,
761  -1, -1, -1, -1, -1, -1, -1, -1,
762  -1, -1, -1, -1, -1, -1, -1, -1,
763  -1, -1, -1, -1, -1, -1, -1, -1,
764  -1, -1, -1, -1, -1, -1, -1, -1
765 };
766 /* clang-format on */
767 
768 constexpr char kWebSafeBase64Chars[] =
769  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
770 
771 template <typename String>
772 bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
773  const signed char* unbase64) {
774  // Determine the size of the output string. Base64 encodes every 3 bytes into
775  // 4 characters. any leftover chars are added directly for good measure.
776  // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
777  const size_t dest_len = 3 * (slen / 4) + (slen % 4);
778 
780 
781  // We are getting the destination buffer by getting the beginning of the
782  // string and converting it into a char *.
783  size_t len;
784  const bool ok =
785  Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
786  if (!ok) {
787  dest->clear();
788  return false;
789  }
790 
791  // could be shorter if there was padding
792  assert(len <= dest_len);
793  dest->erase(len);
794 
795  return true;
796 }
797 
798 /* clang-format off */
799 constexpr char kHexValueLenient[256] = {
800  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
801  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
802  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
803  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9'
804  0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F'
805  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
806  0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f'
807  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
808  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
809  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
810  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
811  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
812  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
813  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
814  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
815  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
816 };
817 
818 /* clang-format on */
819 
820 // This is a templated function so that T can be either a char*
821 // or a string. This works because we use the [] operator to access
822 // individual characters at a time.
823 template <typename T>
824 void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) {
825  for (int i = 0; i < num; i++) {
826  to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) +
827  (kHexValueLenient[from[i * 2 + 1] & 0xFF]);
828  }
829 }
830 
831 // This is a templated function so that T can be either a char* or a
832 // std::string.
833 template <typename T>
834 void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
835  auto dest_ptr = &dest[0];
836  for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
837  const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
838  std::copy(hex_p, hex_p + 2, dest_ptr);
839  }
840 }
841 
842 } // namespace
843 
844 // ----------------------------------------------------------------------
845 // CUnescape()
846 //
847 // See CUnescapeInternal() for implementation details.
848 // ----------------------------------------------------------------------
850  std::string* error) {
851  return CUnescapeInternal(source, kUnescapeNulls, dest, error);
852 }
853 
857  return dest;
858 }
859 
861  return CEscapeInternal(src, true, false);
862 }
863 
865  return CEscapeInternal(src, false, true);
866 }
867 
869  return CEscapeInternal(src, true, true);
870 }
871 
872 // ----------------------------------------------------------------------
873 // Base64Unescape() - base64 decoder
874 // Base64Escape() - base64 encoder
875 // WebSafeBase64Unescape() - Google's variation of base64 decoder
876 // WebSafeBase64Escape() - Google's variation of base64 encoder
877 //
878 // Check out
879 // http://tools.ietf.org/html/rfc2045 for formal description, but what we
880 // care about is that...
881 // Take the encoded stuff in groups of 4 characters and turn each
882 // character into a code 0 to 63 thus:
883 // A-Z map to 0 to 25
884 // a-z map to 26 to 51
885 // 0-9 map to 52 to 61
886 // +(- for WebSafe) maps to 62
887 // /(_ for WebSafe) maps to 63
888 // There will be four numbers, all less than 64 which can be represented
889 // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
890 // Arrange the 6 digit binary numbers into three bytes as such:
891 // aaaaaabb bbbbcccc ccdddddd
892 // Equals signs (one or two) are used at the end of the encoded block to
893 // indicate that the text was not an integer multiple of three bytes long.
894 // ----------------------------------------------------------------------
895 
897  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
898 }
899 
901  return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
902 }
903 
906  reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
908 }
909 
912  reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
913  false, kWebSafeBase64Chars);
914 }
915 
919  reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
921  return dest;
922 }
923 
927  reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
928  false, kWebSafeBase64Chars);
929  return dest;
930 }
931 
934  const auto num = from.size() / 2;
936  absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
937  return result;
938 }
939 
943  absl::BytesToHexStringInternal<std::string&>(
944  reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
945  return result;
946 }
947 
949 } // namespace absl
absl::Utf8SafeCHexEscape
std::string Utf8SafeCHexEscape(absl::string_view src)
Definition: abseil-cpp/absl/strings/escaping.cc:868
_gevent_test_main.result
result
Definition: _gevent_test_main.py:96
absl::ABSL_NAMESPACE_BEGIN::kWebSafeBase64Chars
constexpr char kWebSafeBase64Chars[]
Definition: abseil-cpp/absl/strings/escaping.cc:768
absl::ABSL_NAMESPACE_BEGIN::CEscapeAndAppendInternal
void CEscapeAndAppendInternal(absl::string_view src, std::string *dest)
Definition: abseil-cpp/absl/strings/escaping.cc:382
fix_build_deps.temp
temp
Definition: fix_build_deps.py:488
absl::StrCat
std::string StrCat(const AlphaNum &a, const AlphaNum &b)
Definition: abseil-cpp/absl/strings/str_cat.cc:98
absl::strings_internal::kBase64Chars
const ABSL_CONST_INIT char kBase64Chars[]
Definition: abseil-cpp/absl/strings/internal/escaping.cc:24
absl::ABSL_NAMESPACE_BEGIN::Base64UnescapeInternal
bool Base64UnescapeInternal(const char *src, size_t slen, String *dest, const signed char *unbase64)
Definition: abseil-cpp/absl/strings/escaping.cc:772
absl::ABSL_NAMESPACE_BEGIN::BytesToHexStringInternal
void BytesToHexStringInternal(const unsigned char *src, T dest, ptrdiff_t num)
Definition: abseil-cpp/absl/strings/escaping.cc:834
absl::ABSL_NAMESPACE_BEGIN::CEscapeInternal
std::string CEscapeInternal(absl::string_view src, bool use_hex, bool utf8_safe)
Definition: abseil-cpp/absl/strings/escaping.cc:309
copy
static int copy(grpc_slice_buffer *input, grpc_slice_buffer *output)
Definition: message_compress.cc:145
absl::string_view
Definition: abseil-cpp/absl/strings/string_view.h:167
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
error
grpc_error_handle error
Definition: retry_filter.cc:499
absl::ABSL_NAMESPACE_BEGIN::kUnWebSafeBase64
constexpr signed char kUnWebSafeBase64[]
Definition: abseil-cpp/absl/strings/escaping.cc:732
GET_INPUT
#define GET_INPUT(label, remain)
absl::ABSL_NAMESPACE_BEGIN::kUnBase64
constexpr signed char kUnBase64[]
Definition: abseil-cpp/absl/strings/escaping.cc:697
absl::strings_internal::EncodeUTF8Char
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char)
Definition: abseil-cpp/absl/strings/internal/utf8.cc:23
to
size_t to
Definition: abseil-cpp/absl/container/internal/layout_test.cc:1385
absl::CEscape
std::string CEscape(absl::string_view src)
Definition: abseil-cpp/absl/strings/escaping.cc:854
second
StrT second
Definition: cxa_demangle.cpp:4885
absl::Utf8SafeCEscape
std::string Utf8SafeCEscape(absl::string_view src)
Definition: abseil-cpp/absl/strings/escaping.cc:864
ABSL_NAMESPACE_END
#define ABSL_NAMESPACE_END
Definition: third_party/abseil-cpp/absl/base/config.h:171
T
#define T(upbtypeconst, upbtype, ctype, default_value)
absl::ascii_isspace
bool ascii_isspace(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:95
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
absl::ABSL_NAMESPACE_BEGIN::IsSurrogate
bool IsSurrogate(char32_t c, absl::string_view src, std::string *error)
Definition: abseil-cpp/absl/strings/escaping.cc:56
absl::CUnescape
bool CUnescape(absl::string_view source, std::string *dest, std::string *error)
Definition: abseil-cpp/absl/strings/escaping.cc:849
from
size_t from
Definition: abseil-cpp/absl/container/internal/layout_test.cc:1384
ABSL_NAMESPACE_BEGIN
#define ABSL_NAMESPACE_BEGIN
Definition: third_party/abseil-cpp/absl/base/config.h:170
absl::BytesToHexString
std::string BytesToHexString(absl::string_view from)
Definition: abseil-cpp/absl/strings/escaping.cc:940
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
absl::string_view::size
constexpr size_type size() const noexcept
Definition: abseil-cpp/absl/strings/string_view.h:277
absl::strings_internal::Base64EscapeInternal
size_t Base64EscapeInternal(const unsigned char *src, size_t szsrc, char *dest, size_t szdest, const char *base64, bool do_padding)
Definition: abseil-cpp/absl/strings/internal/escaping.cc:73
absl::ABSL_NAMESPACE_BEGIN::c_escaped_len
constexpr char c_escaped_len[256]
Definition: abseil-cpp/absl/strings/escaping.cc:353
absl::ascii_isprint
bool ascii_isprint(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:137
absl::HexStringToBytes
std::string HexStringToBytes(absl::string_view from)
Definition: abseil-cpp/absl/strings/escaping.cc:932
x
int x
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3610
absl::WebSafeBase64Escape
void WebSafeBase64Escape(absl::string_view src, std::string *dest)
Definition: abseil-cpp/absl/strings/escaping.cc:910
absl::WebSafeBase64Unescape
bool WebSafeBase64Unescape(absl::string_view src, std::string *dest)
Definition: abseil-cpp/absl/strings/escaping.cc:900
absl::CHexEscape
std::string CHexEscape(absl::string_view src)
Definition: abseil-cpp/absl/strings/escaping.cc:860
absl::ABSL_NAMESPACE_BEGIN::kHexValueLenient
constexpr char kHexValueLenient[256]
Definition: abseil-cpp/absl/strings/escaping.cc:799
tests.qps.qps_worker.dest
dest
Definition: qps_worker.py:45
absl::ABSL_NAMESPACE_BEGIN::kUnescapeNulls
constexpr bool kUnescapeNulls
Definition: abseil-cpp/absl/strings/escaping.cc:41
FATAL
#define FATAL(msg)
Definition: task.h:88
absl::ABSL_NAMESPACE_BEGIN::CEscapedLength
size_t CEscapedLength(absl::string_view src)
Definition: abseil-cpp/absl/strings/escaping.cc:376
absl::ABSL_NAMESPACE_BEGIN::is_octal_digit
bool is_octal_digit(char c)
Definition: abseil-cpp/absl/strings/escaping.cc:43
absl::Base64Unescape
bool Base64Unescape(absl::string_view src, std::string *dest)
Definition: abseil-cpp/absl/strings/escaping.cc:896
absl::strings_internal::STLStringResizeUninitialized
void STLStringResizeUninitialized(string_type *s, size_t new_size)
Definition: abseil-cpp/absl/strings/internal/resize_uninitialized.h:67
first
StrT first
Definition: cxa_demangle.cpp:4884
grpc._common.decode
def decode(b)
Definition: grpc/_common.py:75
xds_manager.num
num
Definition: xds_manager.py:56
absl::ABSL_NAMESPACE_BEGIN::HexStringToBytesInternal
void HexStringToBytesInternal(const char *from, T to, ptrdiff_t num)
Definition: abseil-cpp/absl/strings/escaping.cc:824
ok
bool ok
Definition: async_end2end_test.cc:197
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
absl::numbers_internal::kHexChar
ABSL_CONST_INIT const ABSL_DLL char kHexChar[]
Definition: abseil-cpp/absl/strings/numbers.cc:1027
absl::numbers_internal::kHexTable
ABSL_CONST_INIT const ABSL_DLL char kHexTable[513]
Definition: abseil-cpp/absl/strings/numbers.cc:1030
absl::Base64Escape
void Base64Escape(absl::string_view src, std::string *dest)
Definition: abseil-cpp/absl/strings/escaping.cc:904
absl
Definition: abseil-cpp/absl/algorithm/algorithm.h:31
ch
char ch
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3621
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
absl::ascii_isxdigit
bool ascii_isxdigit(unsigned char c)
Definition: abseil-cpp/absl/strings/ascii.h:124
absl::string_view::data
constexpr const_pointer data() const noexcept
Definition: abseil-cpp/absl/strings/string_view.h:336
absl::ABSL_NAMESPACE_BEGIN::CUnescapeInternal
bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, std::string *dest, std::string *error)
Definition: abseil-cpp/absl/strings/escaping.cc:282
ABSL_RAW_LOG
#define ABSL_RAW_LOG(severity,...)
Definition: abseil-cpp/absl/base/internal/raw_logging.h:44
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
state
static struct rpc_state state
Definition: bad_server_response_test.cc:87
absl::ABSL_NAMESPACE_BEGIN::hex_digit_to_int
int hex_digit_to_int(char c)
Definition: abseil-cpp/absl/strings/escaping.cc:45


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:19