39 constexpr
char kHexChar[] =
"0123456789abcdef";
41 constexpr
char kHexTable[513] =
42 "000102030405060708090a0b0c0d0e0f" 43 "101112131415161718191a1b1c1d1e1f" 44 "202122232425262728292a2b2c2d2e2f" 45 "303132333435363738393a3b3c3d3e3f" 46 "404142434445464748494a4b4c4d4e4f" 47 "505152535455565758595a5b5c5d5e5f" 48 "606162636465666768696a6b6c6d6e6f" 49 "707172737475767778797a7b7c7d7e7f" 50 "808182838485868788898a8b8c8d8e8f" 51 "909192939495969798999a9b9c9d9e9f" 52 "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" 53 "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" 54 "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" 55 "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" 56 "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" 57 "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
60 constexpr
bool kUnescapeNulls =
false;
62 inline bool is_octal_digit(
char c) {
return (
'0' <= c) && (c <=
'7'); }
64 inline int hex_digit_to_int(
char c) {
65 static_assert(
'0' == 0x30 &&
'A' == 0x41 &&
'a' == 0x61,
66 "Character set must be ASCII.");
68 int x =
static_cast<unsigned char>(c);
76 if (c >= 0xD800 && c <= 0xDFFF) {
78 *error =
absl::StrCat(
"invalid surrogate character (0xD800-DFFF): \\",
104 char* dest, ptrdiff_t* dest_len, std::string* error) {
106 const char* p = source.
data();
107 const char*
end = p + source.
size();
108 const char* last_byte = end - 1;
111 while (p == d && p < end && *p !=
'\\') p++, d++;
117 if (++p > last_byte) {
118 if (error) *error =
"String cannot end with \\";
122 case 'a': *d++ =
'\a';
break;
123 case 'b': *d++ =
'\b';
break;
124 case 'f': *d++ =
'\f';
break;
125 case 'n': *d++ =
'\n';
break;
126 case 'r': *d++ =
'\r';
break;
127 case 't': *d++ =
'\t';
break;
128 case 'v': *d++ =
'\v';
break;
129 case '\\': *d++ =
'\\';
break;
130 case '?': *d++ =
'\?';
break;
131 case '\'': *d++ =
'\'';
break;
132 case '"': *d++ =
'\"';
break;
142 const char* octal_start = p;
143 unsigned int ch = *p -
'0';
144 if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p -
'0';
145 if (p < last_byte && is_octal_digit(p[1]))
146 ch = ch * 8 + *++p -
'0';
149 *error =
"Value of \\" +
150 std::string(octal_start, p + 1 - octal_start) +
155 if ((ch == 0) && leave_nulls_escaped) {
157 const ptrdiff_t octal_size = p + 1 - octal_start;
159 memcpy(d, octal_start, octal_size);
168 if (p >= last_byte) {
169 if (error) *error =
"String cannot end with \\x";
172 if (error) *error =
"\\x cannot be followed by a non-hex digit";
176 const char* hex_start = p;
179 ch = (ch << 4) + hex_digit_to_int(*++p);
182 *error =
"Value of \\" +
183 std::string(hex_start, p + 1 - hex_start) +
188 if ((ch == 0) && leave_nulls_escaped) {
190 const ptrdiff_t hex_size = p + 1 - hex_start;
192 memcpy(d, hex_start, hex_size);
202 const char* hex_start = p;
205 *error =
"\\u must be followed by 4 hex digits: \\" +
206 std::string(hex_start, p + 1 - hex_start);
210 for (
int i = 0;
i < 4; ++
i) {
213 rune = (rune << 4) + hex_digit_to_int(*++p);
216 *error =
"\\u must be followed by 4 hex digits: \\" +
217 std::string(hex_start, p + 1 - hex_start);
222 if ((rune == 0) && leave_nulls_escaped) {
225 memcpy(d, hex_start, 5);
238 const char* hex_start = p;
241 *error =
"\\U must be followed by 8 hex digits: \\" +
242 std::string(hex_start, p + 1 - hex_start);
246 for (
int i = 0;
i < 8; ++
i) {
251 uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
252 if (newrune > 0x10FFFF) {
254 *error =
"Value of \\" +
255 std::string(hex_start, p + 1 - hex_start) +
256 " exceeds Unicode limit (0x10FFFF)";
264 *error =
"\\U must be followed by 8 hex digits: \\" +
265 std::string(hex_start, p + 1 - hex_start);
270 if ((rune == 0) && leave_nulls_escaped) {
273 memcpy(d, hex_start, 9);
284 if (error) *error = std::string(
"Unknown escape sequence: \\") + *p;
291 *dest_len = d - dest;
302 std::string* dest, std::string* error) {
306 if (!CUnescapeInternal(source,
313 dest->erase(dest_size);
331 bool last_hex_escape =
false;
333 for (
unsigned char c : src) {
334 bool is_hex_escape =
false;
336 case '\n': dest.append(
"\\" "n");
break;
337 case '\r': dest.append(
"\\" "r");
break;
338 case '\t': dest.append(
"\\" "t");
break;
339 case '\"': dest.append(
"\\" "\"");
break;
340 case '\'': dest.append(
"\\" "'");
break;
341 case '\\': dest.append(
"\\" "\\");
break;
346 if ((!utf8_safe || c < 0x80) &&
350 dest.append(
"\\" "x");
351 dest.push_back(kHexChar[c / 16]);
352 dest.push_back(kHexChar[c % 16]);
353 is_hex_escape =
true;
356 dest.push_back(kHexChar[c / 64]);
357 dest.push_back(kHexChar[(c % 64) / 8]);
358 dest.push_back(kHexChar[c % 8]);
365 last_hex_escape = is_hex_escape;
372 constexpr
char c_escaped_len[256] = {
373 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,
374 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
375 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
376 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
377 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
378 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
379 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
380 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,
381 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
382 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
383 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
387 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
388 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
396 size_t escaped_len = 0;
397 for (
unsigned char c : src) escaped_len += c_escaped_len[c];
402 size_t escaped_len = CEscapedLength(src);
403 if (escaped_len == src.
size()) {
404 dest->append(src.
data(), src.
size());
408 size_t cur_dest_len = dest->size();
410 cur_dest_len + escaped_len);
411 char* append_ptr = &(*dest)[cur_dest_len];
413 for (
unsigned char c : src) {
414 int char_len = c_escaped_len[c];
417 }
else if (char_len == 2) {
420 *append_ptr++ =
'\\';
424 *append_ptr++ =
'\\';
428 *append_ptr++ =
'\\';
432 *append_ptr++ =
'\\';
433 *append_ptr++ =
'\"';
436 *append_ptr++ =
'\\';
437 *append_ptr++ =
'\'';
440 *append_ptr++ =
'\\';
441 *append_ptr++ =
'\\';
445 *append_ptr++ =
'\\';
446 *append_ptr++ =
'0' + c / 64;
447 *append_ptr++ =
'0' + (c % 64) / 8;
448 *append_ptr++ =
'0' + c % 8;
453 bool Base64UnescapeInternal(
const char* src_param,
size_t szsrc,
char* dest,
454 size_t szdest,
const signed char* unbase64,
456 static const char kPad64Equals =
'=';
457 static const char kPad64Dot =
'.';
463 unsigned int temp = 0;
468 const unsigned char* src =
reinterpret_cast<const unsigned char*
>(src_param);
476 #define GET_INPUT(label, remain) \ 480 decode = unbase64[ch]; \ 482 if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ 483 state = 4 - remain; \ 508 if (!src[0] || !src[1] || !src[2] ||
509 ((temp = ((
unsigned(unbase64[src[0]]) << 18) |
510 (
unsigned(unbase64[src[1]]) << 12) |
511 (
unsigned(unbase64[src[2]]) << 6) |
512 (
unsigned(unbase64[src[3]])))) &
524 temp = (temp << 6) | decode;
526 temp = (temp << 6) | decode;
528 temp = (temp << 6) | decode;
539 if (destidx + 3 > szdest)
return false;
540 dest[destidx + 2] = temp;
542 dest[destidx + 1] = temp;
544 dest[destidx] = temp;
549 if (!src[0] || !src[1] || !src[2] ||
550 ((temp = ((
unsigned(unbase64[src[0]]) << 18) |
551 (
unsigned(unbase64[src[1]]) << 12) |
552 (
unsigned(unbase64[src[2]]) << 6) |
553 (
unsigned(unbase64[src[3]])))) &
571 if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
575 if (ch == kPad64Equals || ch == kPad64Dot) {
589 decode = unbase64[ch];
593 }
else if (ch == kPad64Equals || ch == kPad64Dot) {
605 temp = (temp << 6) | decode;
611 if (destidx + 3 > szdest)
return false;
612 dest[destidx + 2] = temp;
614 dest[destidx + 1] = temp;
616 dest[destidx] = temp;
626 int expected_equals = 0;
639 if (destidx + 1 > szdest)
return false;
641 dest[destidx] = temp;
650 if (destidx + 2 > szdest)
return false;
652 dest[destidx + 1] = temp;
654 dest[destidx] = temp;
662 ABSL_RAW_LOG(FATAL,
"This can't happen; base64 decoder state = %d",
673 if (*src == kPad64Equals || *src == kPad64Dot)
681 const bool ok = (equals == 0 || equals == expected_equals);
682 if (ok) *len = destidx;
716 constexpr
signed char kUnBase64[] = {
717 -1, -1, -1, -1, -1, -1, -1, -1,
718 -1, -1, -1, -1, -1, -1, -1, -1,
719 -1, -1, -1, -1, -1, -1, -1, -1,
720 -1, -1, -1, -1, -1, -1, -1, -1,
721 -1, -1, -1, -1, -1, -1, -1, -1,
722 -1, -1, -1, 62, -1, -1, -1, 63,
723 52, 53, 54, 55, 56, 57, 58, 59,
724 60, 61, -1, -1, -1, -1, -1, -1,
725 -1, 0, 1, 2, 3, 4, 5, 6,
726 07, 8, 9, 10, 11, 12, 13, 14,
727 15, 16, 17, 18, 19, 20, 21, 22,
728 23, 24, 25, -1, -1, -1, -1, -1,
729 -1, 26, 27, 28, 29, 30, 31, 32,
730 33, 34, 35, 36, 37, 38, 39, 40,
731 41, 42, 43, 44, 45, 46, 47, 48,
732 49, 50, 51, -1, -1, -1, -1, -1,
733 -1, -1, -1, -1, -1, -1, -1, -1,
734 -1, -1, -1, -1, -1, -1, -1, -1,
735 -1, -1, -1, -1, -1, -1, -1, -1,
736 -1, -1, -1, -1, -1, -1, -1, -1,
737 -1, -1, -1, -1, -1, -1, -1, -1,
738 -1, -1, -1, -1, -1, -1, -1, -1,
739 -1, -1, -1, -1, -1, -1, -1, -1,
740 -1, -1, -1, -1, -1, -1, -1, -1,
741 -1, -1, -1, -1, -1, -1, -1, -1,
742 -1, -1, -1, -1, -1, -1, -1, -1,
743 -1, -1, -1, -1, -1, -1, -1, -1,
744 -1, -1, -1, -1, -1, -1, -1, -1,
745 -1, -1, -1, -1, -1, -1, -1, -1,
746 -1, -1, -1, -1, -1, -1, -1, -1,
747 -1, -1, -1, -1, -1, -1, -1, -1,
748 -1, -1, -1, -1, -1, -1, -1, -1
751 constexpr
signed char kUnWebSafeBase64[] = {
752 -1, -1, -1, -1, -1, -1, -1, -1,
753 -1, -1, -1, -1, -1, -1, -1, -1,
754 -1, -1, -1, -1, -1, -1, -1, -1,
755 -1, -1, -1, -1, -1, -1, -1, -1,
756 -1, -1, -1, -1, -1, -1, -1, -1,
757 -1, -1, -1, -1, -1, 62, -1, -1,
758 52, 53, 54, 55, 56, 57, 58, 59,
759 60, 61, -1, -1, -1, -1, -1, -1,
760 -1, 0, 1, 2, 3, 4, 5, 6,
761 07, 8, 9, 10, 11, 12, 13, 14,
762 15, 16, 17, 18, 19, 20, 21, 22,
763 23, 24, 25, -1, -1, -1, -1, 63,
764 -1, 26, 27, 28, 29, 30, 31, 32,
765 33, 34, 35, 36, 37, 38, 39, 40,
766 41, 42, 43, 44, 45, 46, 47, 48,
767 49, 50, 51, -1, -1, -1, -1, -1,
768 -1, -1, -1, -1, -1, -1, -1, -1,
769 -1, -1, -1, -1, -1, -1, -1, -1,
770 -1, -1, -1, -1, -1, -1, -1, -1,
771 -1, -1, -1, -1, -1, -1, -1, -1,
772 -1, -1, -1, -1, -1, -1, -1, -1,
773 -1, -1, -1, -1, -1, -1, -1, -1,
774 -1, -1, -1, -1, -1, -1, -1, -1,
775 -1, -1, -1, -1, -1, -1, -1, -1,
776 -1, -1, -1, -1, -1, -1, -1, -1,
777 -1, -1, -1, -1, -1, -1, -1, -1,
778 -1, -1, -1, -1, -1, -1, -1, -1,
779 -1, -1, -1, -1, -1, -1, -1, -1,
780 -1, -1, -1, -1, -1, -1, -1, -1,
781 -1, -1, -1, -1, -1, -1, -1, -1,
782 -1, -1, -1, -1, -1, -1, -1, -1,
783 -1, -1, -1, -1, -1, -1, -1, -1
787 size_t CalculateBase64EscapedLenInternal(
size_t input_len,
bool do_padding) {
802 size_t len = (input_len / 3) * 4;
804 if (input_len % 3 == 0) {
809 }
else if (input_len % 3 == 1) {
829 assert(len >= input_len);
833 size_t Base64EscapeInternal(
const unsigned char* src,
size_t szsrc,
char* dest,
834 size_t szdest,
const char* base64,
836 static const char kPad64 =
'=';
838 if (szsrc * 4 > szdest * 3)
return 0;
840 char* cur_dest = dest;
841 const unsigned char* cur_src = src;
843 char*
const limit_dest = dest + szdest;
844 const unsigned char*
const limit_src = src + szsrc;
849 while (cur_src < limit_src - 3) {
852 cur_dest[0] = base64[in >> 18];
854 cur_dest[1] = base64[in >> 12];
856 cur_dest[2] = base64[in >> 6];
858 cur_dest[3] = base64[
in];
865 szdest = limit_dest - cur_dest;
866 szsrc = limit_src - cur_src;
876 if (szdest < 2)
return 0;
877 uint32_t
in = cur_src[0];
878 cur_dest[0] = base64[in >> 2];
880 cur_dest[1] = base64[in << 4];
884 if (szdest < 2)
return 0;
885 cur_dest[0] = kPad64;
886 cur_dest[1] = kPad64;
895 if (szdest < 3)
return 0;
897 cur_dest[0] = base64[in >> 10];
899 cur_dest[1] = base64[in >> 4];
901 cur_dest[2] = base64[in << 2];
905 if (szdest < 1)
return 0;
906 cur_dest[0] = kPad64;
916 if (szdest < 4)
return 0;
918 cur_dest[0] = base64[in >> 18];
920 cur_dest[1] = base64[in >> 12];
922 cur_dest[2] = base64[in >> 6];
924 cur_dest[3] = base64[
in];
932 ABSL_RAW_LOG(FATAL,
"Logic problem? szsrc = %zu", szsrc);
935 return (cur_dest - dest);
938 constexpr
char kBase64Chars[] =
939 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
941 constexpr
char kWebSafeBase64Chars[] =
942 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
944 template <
typename String>
945 void Base64EscapeInternal(
const unsigned char* src,
size_t szsrc, String* dest,
946 bool do_padding,
const char* base64_chars) {
947 const size_t calc_escaped_size =
948 CalculateBase64EscapedLenInternal(szsrc, do_padding);
951 const size_t escaped_len = Base64EscapeInternal(
952 src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
953 assert(calc_escaped_size == escaped_len);
954 dest->erase(escaped_len);
957 template <
typename String>
958 bool Base64UnescapeInternal(
const char* src,
size_t slen, String* dest,
959 const signed char* unbase64) {
963 const size_t dest_len = 3 * (slen / 4) + (slen % 4);
971 Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
978 assert(len <= dest_len);
985 constexpr
char kHexValue[256] = {
986 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
987 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
988 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
989 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
990 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
991 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
992 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
993 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
994 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
995 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
996 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
997 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
998 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
999 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1000 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1001 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1008 template <
typename T>
1009 void HexStringToBytesInternal(
const char*
from, T
to, ptrdiff_t num) {
1010 for (
int i = 0;
i < num;
i++) {
1011 to[
i] = (kHexValue[from[
i * 2] & 0xFF] << 4) +
1012 (kHexValue[from[
i * 2 + 1] & 0xFF]);
1018 template <
typename T>
1019 void BytesToHexStringInternal(
const unsigned char* src, T dest, ptrdiff_t num) {
1020 auto dest_ptr = &dest[0];
1021 for (
auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
1022 const char* hex_p = &kHexTable[*src_ptr * 2];
1023 std::copy(hex_p, hex_p + 2, dest_ptr);
1035 std::string* error) {
1036 return CUnescapeInternal(source, kUnescapeNulls, dest, error);
1041 CEscapeAndAppendInternal(src, &dest);
1046 return CEscapeInternal(src,
true,
false);
1050 return CEscapeInternal(src,
false,
true);
1054 return CEscapeInternal(src,
true,
true);
1082 return Base64UnescapeInternal(src.
data(), src.
size(), dest, kUnBase64);
1086 return Base64UnescapeInternal(src.
data(), src.
size(), dest, kUnWebSafeBase64);
1090 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.
data()),
1091 src.
size(), dest,
true, kBase64Chars);
1095 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.
data()),
1096 src.
size(), dest,
false, kWebSafeBase64Chars);
1101 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.
data()),
1102 src.
size(), &dest,
true, kBase64Chars);
1108 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.
data()),
1109 src.
size(), &dest,
false, kWebSafeBase64Chars);
1115 const auto num = from.
size() / 2;
1117 absl::HexStringToBytesInternal<std::string&>(from.
data(), result, num);
1124 absl::BytesToHexStringInternal<std::string&>(
1125 reinterpret_cast<const unsigned char*
>(from.
data()), result, from.
size());
std::string Utf8SafeCEscape(absl::string_view src)
std::string CEscape(absl::string_view src)
#define ABSL_RAW_LOG(severity,...)
uint16_t Load16(const void *p)
std::string StrCat(const AlphaNum &a, const AlphaNum &b)
std::string Utf8SafeCHexEscape(absl::string_view src)
bool ascii_isprint(unsigned char c)
bool ascii_isspace(unsigned char c)
std::string BytesToHexString(absl::string_view from)
bool WebSafeBase64Unescape(absl::string_view src, std::string *dest)
size_t EncodeUTF8Char(char *buffer, char32_t utf8_char)
constexpr size_type size() const noexcept
std::string CHexEscape(absl::string_view src)
void WebSafeBase64Escape(absl::string_view src, std::string *dest)
uint32_t Load32(const void *p)
void STLStringResizeUninitialized(string_type *s, size_t new_size)
void Base64Escape(absl::string_view src, std::string *dest)
std::string HexStringToBytes(absl::string_view from)
bool Base64Unescape(absl::string_view src, std::string *dest)
bool CUnescape(absl::string_view source, std::string *dest, std::string *error)
bool ascii_isxdigit(unsigned char c)
constexpr const_pointer data() const noexcept
#define GET_INPUT(label, remain)