00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "absl/strings/escaping.h"
00016
00017 #include <algorithm>
00018 #include <cassert>
00019 #include <cstdint>
00020 #include <cstring>
00021 #include <iterator>
00022 #include <limits>
00023 #include <string>
00024
00025 #include "absl/base/internal/endian.h"
00026 #include "absl/base/internal/raw_logging.h"
00027 #include "absl/base/internal/unaligned_access.h"
00028 #include "absl/strings/internal/char_map.h"
00029 #include "absl/strings/internal/resize_uninitialized.h"
00030 #include "absl/strings/internal/utf8.h"
00031 #include "absl/strings/str_cat.h"
00032 #include "absl/strings/str_join.h"
00033 #include "absl/strings/string_view.h"
00034
00035 namespace absl {
00036 namespace {
00037
00038
00039 constexpr char kHexChar[] = "0123456789abcdef";
00040
00041 constexpr char kHexTable[513] =
00042 "000102030405060708090a0b0c0d0e0f"
00043 "101112131415161718191a1b1c1d1e1f"
00044 "202122232425262728292a2b2c2d2e2f"
00045 "303132333435363738393a3b3c3d3e3f"
00046 "404142434445464748494a4b4c4d4e4f"
00047 "505152535455565758595a5b5c5d5e5f"
00048 "606162636465666768696a6b6c6d6e6f"
00049 "707172737475767778797a7b7c7d7e7f"
00050 "808182838485868788898a8b8c8d8e8f"
00051 "909192939495969798999a9b9c9d9e9f"
00052 "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
00053 "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
00054 "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
00055 "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
00056 "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
00057 "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
00058
00059
00060 constexpr bool kUnescapeNulls = false;
00061
00062 inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
00063
00064 inline int hex_digit_to_int(char c) {
00065 static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
00066 "Character set must be ASCII.");
00067 assert(absl::ascii_isxdigit(c));
00068 int x = static_cast<unsigned char>(c);
00069 if (x > '9') {
00070 x += 9;
00071 }
00072 return x & 0xf;
00073 }
00074
00075 inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
00076 if (c >= 0xD800 && c <= 0xDFFF) {
00077 if (error) {
00078 *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
00079 src);
00080 }
00081 return true;
00082 }
00083 return false;
00084 }
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
00104 char* dest, ptrdiff_t* dest_len, std::string* error) {
00105 char* d = dest;
00106 const char* p = source.data();
00107 const char* end = p + source.size();
00108 const char* last_byte = end - 1;
00109
00110
00111 while (p == d && p < end && *p != '\\') p++, d++;
00112
00113 while (p < end) {
00114 if (*p != '\\') {
00115 *d++ = *p++;
00116 } else {
00117 if (++p > last_byte) {
00118 if (error) *error = "String cannot end with \\";
00119 return false;
00120 }
00121 switch (*p) {
00122 case 'a': *d++ = '\a'; break;
00123 case 'b': *d++ = '\b'; break;
00124 case 'f': *d++ = '\f'; break;
00125 case 'n': *d++ = '\n'; break;
00126 case 'r': *d++ = '\r'; break;
00127 case 't': *d++ = '\t'; break;
00128 case 'v': *d++ = '\v'; break;
00129 case '\\': *d++ = '\\'; break;
00130 case '?': *d++ = '\?'; break;
00131 case '\'': *d++ = '\''; break;
00132 case '"': *d++ = '\"'; break;
00133 case '0':
00134 case '1':
00135 case '2':
00136 case '3':
00137 case '4':
00138 case '5':
00139 case '6':
00140 case '7': {
00141
00142 const char* octal_start = p;
00143 unsigned int ch = *p - '0';
00144 if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
00145 if (p < last_byte && is_octal_digit(p[1]))
00146 ch = ch * 8 + *++p - '0';
00147 if (ch > 0xff) {
00148 if (error) {
00149 *error = "Value of \\" +
00150 std::string(octal_start, p + 1 - octal_start) +
00151 " exceeds 0xff";
00152 }
00153 return false;
00154 }
00155 if ((ch == 0) && leave_nulls_escaped) {
00156
00157 const ptrdiff_t octal_size = p + 1 - octal_start;
00158 *d++ = '\\';
00159 memcpy(d, octal_start, octal_size);
00160 d += octal_size;
00161 break;
00162 }
00163 *d++ = ch;
00164 break;
00165 }
00166 case 'x':
00167 case 'X': {
00168 if (p >= last_byte) {
00169 if (error) *error = "String cannot end with \\x";
00170 return false;
00171 } else if (!absl::ascii_isxdigit(p[1])) {
00172 if (error) *error = "\\x cannot be followed by a non-hex digit";
00173 return false;
00174 }
00175 unsigned int ch = 0;
00176 const char* hex_start = p;
00177 while (p < last_byte && absl::ascii_isxdigit(p[1]))
00178
00179 ch = (ch << 4) + hex_digit_to_int(*++p);
00180 if (ch > 0xFF) {
00181 if (error) {
00182 *error = "Value of \\" +
00183 std::string(hex_start, p + 1 - hex_start) +
00184 " exceeds 0xff";
00185 }
00186 return false;
00187 }
00188 if ((ch == 0) && leave_nulls_escaped) {
00189
00190 const ptrdiff_t hex_size = p + 1 - hex_start;
00191 *d++ = '\\';
00192 memcpy(d, hex_start, hex_size);
00193 d += hex_size;
00194 break;
00195 }
00196 *d++ = ch;
00197 break;
00198 }
00199 case 'u': {
00200
00201 char32_t rune = 0;
00202 const char* hex_start = p;
00203 if (p + 4 >= end) {
00204 if (error) {
00205 *error = "\\u must be followed by 4 hex digits: \\" +
00206 std::string(hex_start, p + 1 - hex_start);
00207 }
00208 return false;
00209 }
00210 for (int i = 0; i < 4; ++i) {
00211
00212 if (absl::ascii_isxdigit(p[1])) {
00213 rune = (rune << 4) + hex_digit_to_int(*++p);
00214 } else {
00215 if (error) {
00216 *error = "\\u must be followed by 4 hex digits: \\" +
00217 std::string(hex_start, p + 1 - hex_start);
00218 }
00219 return false;
00220 }
00221 }
00222 if ((rune == 0) && leave_nulls_escaped) {
00223
00224 *d++ = '\\';
00225 memcpy(d, hex_start, 5);
00226 d += 5;
00227 break;
00228 }
00229 if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
00230 return false;
00231 }
00232 d += strings_internal::EncodeUTF8Char(d, rune);
00233 break;
00234 }
00235 case 'U': {
00236
00237 char32_t rune = 0;
00238 const char* hex_start = p;
00239 if (p + 8 >= end) {
00240 if (error) {
00241 *error = "\\U must be followed by 8 hex digits: \\" +
00242 std::string(hex_start, p + 1 - hex_start);
00243 }
00244 return false;
00245 }
00246 for (int i = 0; i < 8; ++i) {
00247
00248 if (absl::ascii_isxdigit(p[1])) {
00249
00250
00251 uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
00252 if (newrune > 0x10FFFF) {
00253 if (error) {
00254 *error = "Value of \\" +
00255 std::string(hex_start, p + 1 - hex_start) +
00256 " exceeds Unicode limit (0x10FFFF)";
00257 }
00258 return false;
00259 } else {
00260 rune = newrune;
00261 }
00262 } else {
00263 if (error) {
00264 *error = "\\U must be followed by 8 hex digits: \\" +
00265 std::string(hex_start, p + 1 - hex_start);
00266 }
00267 return false;
00268 }
00269 }
00270 if ((rune == 0) && leave_nulls_escaped) {
00271
00272 *d++ = '\\';
00273 memcpy(d, hex_start, 9);
00274 d += 9;
00275 break;
00276 }
00277 if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
00278 return false;
00279 }
00280 d += strings_internal::EncodeUTF8Char(d, rune);
00281 break;
00282 }
00283 default: {
00284 if (error) *error = std::string("Unknown escape sequence: \\") + *p;
00285 return false;
00286 }
00287 }
00288 p++;
00289 }
00290 }
00291 *dest_len = d - dest;
00292 return true;
00293 }
00294
00295
00296
00297
00298
00299
00300
00301 bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
00302 std::string* dest, std::string* error) {
00303 strings_internal::STLStringResizeUninitialized(dest, source.size());
00304
00305 ptrdiff_t dest_size;
00306 if (!CUnescapeInternal(source,
00307 leave_nulls_escaped,
00308 &(*dest)[0],
00309 &dest_size,
00310 error)) {
00311 return false;
00312 }
00313 dest->erase(dest_size);
00314 return true;
00315 }
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328 std::string CEscapeInternal(absl::string_view src, bool use_hex,
00329 bool utf8_safe) {
00330 std::string dest;
00331 bool last_hex_escape = false;
00332
00333 for (unsigned char c : src) {
00334 bool is_hex_escape = false;
00335 switch (c) {
00336 case '\n': dest.append("\\" "n"); break;
00337 case '\r': dest.append("\\" "r"); break;
00338 case '\t': dest.append("\\" "t"); break;
00339 case '\"': dest.append("\\" "\""); break;
00340 case '\'': dest.append("\\" "'"); break;
00341 case '\\': dest.append("\\" "\\"); break;
00342 default:
00343
00344
00345
00346 if ((!utf8_safe || c < 0x80) &&
00347 (!absl::ascii_isprint(c) ||
00348 (last_hex_escape && absl::ascii_isxdigit(c)))) {
00349 if (use_hex) {
00350 dest.append("\\" "x");
00351 dest.push_back(kHexChar[c / 16]);
00352 dest.push_back(kHexChar[c % 16]);
00353 is_hex_escape = true;
00354 } else {
00355 dest.append("\\");
00356 dest.push_back(kHexChar[c / 64]);
00357 dest.push_back(kHexChar[(c % 64) / 8]);
00358 dest.push_back(kHexChar[c % 8]);
00359 }
00360 } else {
00361 dest.push_back(c);
00362 break;
00363 }
00364 }
00365 last_hex_escape = is_hex_escape;
00366 }
00367
00368 return dest;
00369 }
00370
00371
00372 constexpr char c_escaped_len[256] = {
00373 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,
00374 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00375 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
00376 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00377 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00378 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,
00379 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00380 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,
00381 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00382 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00383 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00384 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00385 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00386 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00387 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00388 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00389 };
00390
00391
00392
00393
00394
00395 inline size_t CEscapedLength(absl::string_view src) {
00396 size_t escaped_len = 0;
00397 for (unsigned char c : src) escaped_len += c_escaped_len[c];
00398 return escaped_len;
00399 }
00400
00401 void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
00402 size_t escaped_len = CEscapedLength(src);
00403 if (escaped_len == src.size()) {
00404 dest->append(src.data(), src.size());
00405 return;
00406 }
00407
00408 size_t cur_dest_len = dest->size();
00409 strings_internal::STLStringResizeUninitialized(dest,
00410 cur_dest_len + escaped_len);
00411 char* append_ptr = &(*dest)[cur_dest_len];
00412
00413 for (unsigned char c : src) {
00414 int char_len = c_escaped_len[c];
00415 if (char_len == 1) {
00416 *append_ptr++ = c;
00417 } else if (char_len == 2) {
00418 switch (c) {
00419 case '\n':
00420 *append_ptr++ = '\\';
00421 *append_ptr++ = 'n';
00422 break;
00423 case '\r':
00424 *append_ptr++ = '\\';
00425 *append_ptr++ = 'r';
00426 break;
00427 case '\t':
00428 *append_ptr++ = '\\';
00429 *append_ptr++ = 't';
00430 break;
00431 case '\"':
00432 *append_ptr++ = '\\';
00433 *append_ptr++ = '\"';
00434 break;
00435 case '\'':
00436 *append_ptr++ = '\\';
00437 *append_ptr++ = '\'';
00438 break;
00439 case '\\':
00440 *append_ptr++ = '\\';
00441 *append_ptr++ = '\\';
00442 break;
00443 }
00444 } else {
00445 *append_ptr++ = '\\';
00446 *append_ptr++ = '0' + c / 64;
00447 *append_ptr++ = '0' + (c % 64) / 8;
00448 *append_ptr++ = '0' + c % 8;
00449 }
00450 }
00451 }
00452
00453 bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
00454 size_t szdest, const signed char* unbase64,
00455 size_t* len) {
00456 static const char kPad64Equals = '=';
00457 static const char kPad64Dot = '.';
00458
00459 size_t destidx = 0;
00460 int decode = 0;
00461 int state = 0;
00462 unsigned int ch = 0;
00463 unsigned int temp = 0;
00464
00465
00466
00467
00468 const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
00469
00470
00471
00472
00473
00474
00475
00476 #define GET_INPUT(label, remain) \
00477 label: \
00478 --szsrc; \
00479 ch = *src++; \
00480 decode = unbase64[ch]; \
00481 if (decode < 0) { \
00482 if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
00483 state = 4 - remain; \
00484 break; \
00485 }
00486
00487
00488
00489
00490
00491
00492 if (dest) {
00493
00494
00495
00496
00497
00498
00499 while (szsrc >= 4) {
00500
00501
00502
00503
00504
00505
00506
00507
00508 if (!src[0] || !src[1] || !src[2] ||
00509 ((temp = ((unsigned(unbase64[src[0]]) << 18) |
00510 (unsigned(unbase64[src[1]]) << 12) |
00511 (unsigned(unbase64[src[2]]) << 6) |
00512 (unsigned(unbase64[src[3]])))) &
00513 0x80000000)) {
00514
00515
00516
00517
00518
00519
00520
00521 GET_INPUT(first, 4);
00522 temp = decode;
00523 GET_INPUT(second, 3);
00524 temp = (temp << 6) | decode;
00525 GET_INPUT(third, 2);
00526 temp = (temp << 6) | decode;
00527 GET_INPUT(fourth, 1);
00528 temp = (temp << 6) | decode;
00529 } else {
00530
00531
00532
00533 szsrc -= 4;
00534 src += 4;
00535 }
00536
00537
00538
00539 if (destidx + 3 > szdest) return false;
00540 dest[destidx + 2] = temp;
00541 temp >>= 8;
00542 dest[destidx + 1] = temp;
00543 temp >>= 8;
00544 dest[destidx] = temp;
00545 destidx += 3;
00546 }
00547 } else {
00548 while (szsrc >= 4) {
00549 if (!src[0] || !src[1] || !src[2] ||
00550 ((temp = ((unsigned(unbase64[src[0]]) << 18) |
00551 (unsigned(unbase64[src[1]]) << 12) |
00552 (unsigned(unbase64[src[2]]) << 6) |
00553 (unsigned(unbase64[src[3]])))) &
00554 0x80000000)) {
00555 GET_INPUT(first_no_dest, 4);
00556 GET_INPUT(second_no_dest, 3);
00557 GET_INPUT(third_no_dest, 2);
00558 GET_INPUT(fourth_no_dest, 1);
00559 } else {
00560 szsrc -= 4;
00561 src += 4;
00562 }
00563 destidx += 3;
00564 }
00565 }
00566
00567 #undef GET_INPUT
00568
00569
00570
00571 if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
00572 !absl::ascii_isspace(ch))
00573 return false;
00574
00575 if (ch == kPad64Equals || ch == kPad64Dot) {
00576
00577
00578
00579 ++szsrc;
00580 --src;
00581 } else {
00582
00583
00584
00585
00586 while (szsrc > 0) {
00587 --szsrc;
00588 ch = *src++;
00589 decode = unbase64[ch];
00590 if (decode < 0) {
00591 if (absl::ascii_isspace(ch)) {
00592 continue;
00593 } else if (ch == kPad64Equals || ch == kPad64Dot) {
00594
00595
00596 ++szsrc;
00597 --src;
00598 break;
00599 } else {
00600 return false;
00601 }
00602 }
00603
00604
00605 temp = (temp << 6) | decode;
00606 ++state;
00607 if (state == 4) {
00608
00609
00610 if (dest) {
00611 if (destidx + 3 > szdest) return false;
00612 dest[destidx + 2] = temp;
00613 temp >>= 8;
00614 dest[destidx + 1] = temp;
00615 temp >>= 8;
00616 dest[destidx] = temp;
00617 }
00618 destidx += 3;
00619 state = 0;
00620 temp = 0;
00621 }
00622 }
00623 }
00624
00625
00626 int expected_equals = 0;
00627 switch (state) {
00628 case 0:
00629
00630 break;
00631
00632 case 1:
00633
00634 return false;
00635
00636 case 2:
00637
00638 if (dest) {
00639 if (destidx + 1 > szdest) return false;
00640 temp >>= 4;
00641 dest[destidx] = temp;
00642 }
00643 ++destidx;
00644 expected_equals = 2;
00645 break;
00646
00647 case 3:
00648
00649 if (dest) {
00650 if (destidx + 2 > szdest) return false;
00651 temp >>= 2;
00652 dest[destidx + 1] = temp;
00653 temp >>= 8;
00654 dest[destidx] = temp;
00655 }
00656 destidx += 2;
00657 expected_equals = 1;
00658 break;
00659
00660 default:
00661
00662 ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
00663 state);
00664 }
00665
00666
00667
00668
00669
00670
00671 int equals = 0;
00672 while (szsrc > 0) {
00673 if (*src == kPad64Equals || *src == kPad64Dot)
00674 ++equals;
00675 else if (!absl::ascii_isspace(*src))
00676 return false;
00677 --szsrc;
00678 ++src;
00679 }
00680
00681 const bool ok = (equals == 0 || equals == expected_equals);
00682 if (ok) *len = destidx;
00683 return ok;
00684 }
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716 constexpr signed char kUnBase64[] = {
00717 -1, -1, -1, -1, -1, -1, -1, -1,
00718 -1, -1, -1, -1, -1, -1, -1, -1,
00719 -1, -1, -1, -1, -1, -1, -1, -1,
00720 -1, -1, -1, -1, -1, -1, -1, -1,
00721 -1, -1, -1, -1, -1, -1, -1, -1,
00722 -1, -1, -1, 62, -1, -1, -1, 63,
00723 52, 53, 54, 55, 56, 57, 58, 59,
00724 60, 61, -1, -1, -1, -1, -1, -1,
00725 -1, 0, 1, 2, 3, 4, 5, 6,
00726 07, 8, 9, 10, 11, 12, 13, 14,
00727 15, 16, 17, 18, 19, 20, 21, 22,
00728 23, 24, 25, -1, -1, -1, -1, -1,
00729 -1, 26, 27, 28, 29, 30, 31, 32,
00730 33, 34, 35, 36, 37, 38, 39, 40,
00731 41, 42, 43, 44, 45, 46, 47, 48,
00732 49, 50, 51, -1, -1, -1, -1, -1,
00733 -1, -1, -1, -1, -1, -1, -1, -1,
00734 -1, -1, -1, -1, -1, -1, -1, -1,
00735 -1, -1, -1, -1, -1, -1, -1, -1,
00736 -1, -1, -1, -1, -1, -1, -1, -1,
00737 -1, -1, -1, -1, -1, -1, -1, -1,
00738 -1, -1, -1, -1, -1, -1, -1, -1,
00739 -1, -1, -1, -1, -1, -1, -1, -1,
00740 -1, -1, -1, -1, -1, -1, -1, -1,
00741 -1, -1, -1, -1, -1, -1, -1, -1,
00742 -1, -1, -1, -1, -1, -1, -1, -1,
00743 -1, -1, -1, -1, -1, -1, -1, -1,
00744 -1, -1, -1, -1, -1, -1, -1, -1,
00745 -1, -1, -1, -1, -1, -1, -1, -1,
00746 -1, -1, -1, -1, -1, -1, -1, -1,
00747 -1, -1, -1, -1, -1, -1, -1, -1,
00748 -1, -1, -1, -1, -1, -1, -1, -1
00749 };
00750
00751 constexpr signed char kUnWebSafeBase64[] = {
00752 -1, -1, -1, -1, -1, -1, -1, -1,
00753 -1, -1, -1, -1, -1, -1, -1, -1,
00754 -1, -1, -1, -1, -1, -1, -1, -1,
00755 -1, -1, -1, -1, -1, -1, -1, -1,
00756 -1, -1, -1, -1, -1, -1, -1, -1,
00757 -1, -1, -1, -1, -1, 62, -1, -1,
00758 52, 53, 54, 55, 56, 57, 58, 59,
00759 60, 61, -1, -1, -1, -1, -1, -1,
00760 -1, 0, 1, 2, 3, 4, 5, 6,
00761 07, 8, 9, 10, 11, 12, 13, 14,
00762 15, 16, 17, 18, 19, 20, 21, 22,
00763 23, 24, 25, -1, -1, -1, -1, 63,
00764 -1, 26, 27, 28, 29, 30, 31, 32,
00765 33, 34, 35, 36, 37, 38, 39, 40,
00766 41, 42, 43, 44, 45, 46, 47, 48,
00767 49, 50, 51, -1, -1, -1, -1, -1,
00768 -1, -1, -1, -1, -1, -1, -1, -1,
00769 -1, -1, -1, -1, -1, -1, -1, -1,
00770 -1, -1, -1, -1, -1, -1, -1, -1,
00771 -1, -1, -1, -1, -1, -1, -1, -1,
00772 -1, -1, -1, -1, -1, -1, -1, -1,
00773 -1, -1, -1, -1, -1, -1, -1, -1,
00774 -1, -1, -1, -1, -1, -1, -1, -1,
00775 -1, -1, -1, -1, -1, -1, -1, -1,
00776 -1, -1, -1, -1, -1, -1, -1, -1,
00777 -1, -1, -1, -1, -1, -1, -1, -1,
00778 -1, -1, -1, -1, -1, -1, -1, -1,
00779 -1, -1, -1, -1, -1, -1, -1, -1,
00780 -1, -1, -1, -1, -1, -1, -1, -1,
00781 -1, -1, -1, -1, -1, -1, -1, -1,
00782 -1, -1, -1, -1, -1, -1, -1, -1,
00783 -1, -1, -1, -1, -1, -1, -1, -1
00784 };
00785
00786
00787 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
00788
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802 size_t len = (input_len / 3) * 4;
00803
00804 if (input_len % 3 == 0) {
00805
00806
00807
00808
00809 } else if (input_len % 3 == 1) {
00810
00811
00812
00813
00814 len += 2;
00815 if (do_padding) {
00816 len += 2;
00817 }
00818 } else {
00819
00820
00821
00822
00823 len += 3;
00824 if (do_padding) {
00825 len += 1;
00826 }
00827 }
00828
00829 assert(len >= input_len);
00830 return len;
00831 }
00832
00833 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
00834 size_t szdest, const char* base64,
00835 bool do_padding) {
00836 static const char kPad64 = '=';
00837
00838 if (szsrc * 4 > szdest * 3) return 0;
00839
00840 char* cur_dest = dest;
00841 const unsigned char* cur_src = src;
00842
00843 char* const limit_dest = dest + szdest;
00844 const unsigned char* const limit_src = src + szsrc;
00845
00846
00847
00848 if (szsrc >= 3) {
00849 while (cur_src < limit_src - 3) {
00850 uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
00851
00852 cur_dest[0] = base64[in >> 18];
00853 in &= 0x3FFFF;
00854 cur_dest[1] = base64[in >> 12];
00855 in &= 0xFFF;
00856 cur_dest[2] = base64[in >> 6];
00857 in &= 0x3F;
00858 cur_dest[3] = base64[in];
00859
00860 cur_dest += 4;
00861 cur_src += 3;
00862 }
00863 }
00864
00865 szdest = limit_dest - cur_dest;
00866 szsrc = limit_src - cur_src;
00867
00868
00869 switch (szsrc) {
00870 case 0:
00871
00872 break;
00873 case 1: {
00874
00875
00876 if (szdest < 2) return 0;
00877 uint32_t in = cur_src[0];
00878 cur_dest[0] = base64[in >> 2];
00879 in &= 0x3;
00880 cur_dest[1] = base64[in << 4];
00881 cur_dest += 2;
00882 szdest -= 2;
00883 if (do_padding) {
00884 if (szdest < 2) return 0;
00885 cur_dest[0] = kPad64;
00886 cur_dest[1] = kPad64;
00887 cur_dest += 2;
00888 szdest -= 2;
00889 }
00890 break;
00891 }
00892 case 2: {
00893
00894
00895 if (szdest < 3) return 0;
00896 uint32_t in = absl::big_endian::Load16(cur_src);
00897 cur_dest[0] = base64[in >> 10];
00898 in &= 0x3FF;
00899 cur_dest[1] = base64[in >> 4];
00900 in &= 0x00F;
00901 cur_dest[2] = base64[in << 2];
00902 cur_dest += 3;
00903 szdest -= 3;
00904 if (do_padding) {
00905 if (szdest < 1) return 0;
00906 cur_dest[0] = kPad64;
00907 cur_dest += 1;
00908 szdest -= 1;
00909 }
00910 break;
00911 }
00912 case 3: {
00913
00914
00915
00916 if (szdest < 4) return 0;
00917 uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
00918 cur_dest[0] = base64[in >> 18];
00919 in &= 0x3FFFF;
00920 cur_dest[1] = base64[in >> 12];
00921 in &= 0xFFF;
00922 cur_dest[2] = base64[in >> 6];
00923 in &= 0x3F;
00924 cur_dest[3] = base64[in];
00925 cur_dest += 4;
00926 szdest -= 4;
00927 break;
00928 }
00929 default:
00930
00931
00932 ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
00933 break;
00934 }
00935 return (cur_dest - dest);
00936 }
00937
00938 constexpr char kBase64Chars[] =
00939 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
00940
00941 constexpr char kWebSafeBase64Chars[] =
00942 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
00943
00944 template <typename String>
00945 void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
00946 bool do_padding, const char* base64_chars) {
00947 const size_t calc_escaped_size =
00948 CalculateBase64EscapedLenInternal(szsrc, do_padding);
00949 strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size);
00950
00951 const size_t escaped_len = Base64EscapeInternal(
00952 src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
00953 assert(calc_escaped_size == escaped_len);
00954 dest->erase(escaped_len);
00955 }
00956
00957 template <typename String>
00958 bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
00959 const signed char* unbase64) {
00960
00961
00962
00963 const size_t dest_len = 3 * (slen / 4) + (slen % 4);
00964
00965 strings_internal::STLStringResizeUninitialized(dest, dest_len);
00966
00967
00968
00969 size_t len;
00970 const bool ok =
00971 Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
00972 if (!ok) {
00973 dest->clear();
00974 return false;
00975 }
00976
00977
00978 assert(len <= dest_len);
00979 dest->erase(len);
00980
00981 return true;
00982 }
00983
00984
00985 constexpr char kHexValue[256] = {
00986 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00987 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00988 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00989 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,
00990 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00991 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00992 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00993 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00994 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00995 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00996 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00997 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00998 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00999 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
01000 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
01001 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
01002 };
01003
01004
01005
01006
01007
01008 template <typename T>
01009 void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) {
01010 for (int i = 0; i < num; i++) {
01011 to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) +
01012 (kHexValue[from[i * 2 + 1] & 0xFF]);
01013 }
01014 }
01015
01016
01017
01018 template <typename T>
01019 void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
01020 auto dest_ptr = &dest[0];
01021 for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
01022 const char* hex_p = &kHexTable[*src_ptr * 2];
01023 std::copy(hex_p, hex_p + 2, dest_ptr);
01024 }
01025 }
01026
01027 }
01028
01029
01030
01031
01032
01033
01034 bool CUnescape(absl::string_view source, std::string* dest,
01035 std::string* error) {
01036 return CUnescapeInternal(source, kUnescapeNulls, dest, error);
01037 }
01038
01039 std::string CEscape(absl::string_view src) {
01040 std::string dest;
01041 CEscapeAndAppendInternal(src, &dest);
01042 return dest;
01043 }
01044
01045 std::string CHexEscape(absl::string_view src) {
01046 return CEscapeInternal(src, true, false);
01047 }
01048
01049 std::string Utf8SafeCEscape(absl::string_view src) {
01050 return CEscapeInternal(src, false, true);
01051 }
01052
01053 std::string Utf8SafeCHexEscape(absl::string_view src) {
01054 return CEscapeInternal(src, true, true);
01055 }
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081 bool Base64Unescape(absl::string_view src, std::string* dest) {
01082 return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
01083 }
01084
01085 bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) {
01086 return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
01087 }
01088
01089 void Base64Escape(absl::string_view src, std::string* dest) {
01090 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
01091 src.size(), dest, true, kBase64Chars);
01092 }
01093
01094 void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
01095 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
01096 src.size(), dest, false, kWebSafeBase64Chars);
01097 }
01098
01099 std::string Base64Escape(absl::string_view src) {
01100 std::string dest;
01101 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
01102 src.size(), &dest, true, kBase64Chars);
01103 return dest;
01104 }
01105
01106 std::string WebSafeBase64Escape(absl::string_view src) {
01107 std::string dest;
01108 Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
01109 src.size(), &dest, false, kWebSafeBase64Chars);
01110 return dest;
01111 }
01112
01113 std::string HexStringToBytes(absl::string_view from) {
01114 std::string result;
01115 const auto num = from.size() / 2;
01116 strings_internal::STLStringResizeUninitialized(&result, num);
01117 absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
01118 return result;
01119 }
01120
01121 std::string BytesToHexString(absl::string_view from) {
01122 std::string result;
01123 strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
01124 absl::BytesToHexStringInternal<std::string&>(
01125 reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
01126 return result;
01127 }
01128
01129 }