44 static const char kHex[] =
"0123456789abcdef";
52 static const char kCommonEscapes[160][7] = {
54 "\\u0000",
"\\u0001",
"\\u0002",
"\\u0003",
55 "\\u0004",
"\\u0005",
"\\u0006",
"\\u0007",
"\\b",
"\\t",
"\\n",
"\\u000b",
56 "\\f",
"\\r",
"\\u000e",
"\\u000f",
"\\u0010",
"\\u0011",
"\\u0012",
58 "\\u0014",
"\\u0015",
"\\u0016",
"\\u0017",
"\\u0018",
"\\u0019",
"\\u001a",
59 "\\u001b",
"\\u001c",
"\\u001d",
"\\u001e",
"\\u001f",
62 "",
"",
"\\\"",
"",
"",
"",
"",
"",
63 "",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
64 "",
"",
"",
"",
"\\u003c",
"",
"\\u003e",
"",
"",
"",
"",
"",
"",
"",
"",
66 "",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
67 "",
"",
"",
"",
"\\\\",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
68 "",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
69 "",
"",
"",
"",
"",
"",
"",
"\\u007f",
71 "\\u0080",
"\\u0081",
"\\u0082",
"\\u0083",
72 "\\u0084",
"\\u0085",
"\\u0086",
"\\u0087",
"\\u0088",
"\\u0089",
"\\u008a",
73 "\\u008b",
"\\u008c",
"\\u008d",
"\\u008e",
"\\u008f",
"\\u0090",
"\\u0091",
75 "\\u0094",
"\\u0095",
"\\u0096",
"\\u0097",
"\\u0098",
"\\u0099",
"\\u009a",
76 "\\u009b",
"\\u009c",
"\\u009d",
"\\u009e",
"\\u009f"};
80 inline bool IsSurrogate(
uint32 c) {
128 bool ReadCodePoint(StringPiece
str,
int index,
uint32* cp,
int* num_left,
130 if (*num_left == 0) {
161 }
else if (*cp <= 0xbf) {
163 }
else if (*cp <= 0xdf) {
166 }
else if (*cp <= 0xef) {
169 }
else if (*cp <= 0xf7) {
180 while (*num_left > 0 &&
index <
str.size()) {
184 *cp = (*cp << 6) | (
ch & 0x3f);
185 if (ch < 0x80 || ch > 0xbf)
return false;
194 buffer[5] = kHex[cp & 0x0f];
196 buffer[4] = kHex[cp & 0x0f];
198 buffer[3] = kHex[cp & 0x0f];
200 buffer[2] = kHex[cp & 0x0f];
201 return StringPiece(
buffer, 6);
208 uint16 low = ToLowSurrogate(cp);
209 uint16 high = ToHighSurrogate(cp);
211 buffer[11] = kHex[low & 0x0f];
213 buffer[10] = kHex[low & 0x0f];
215 buffer[9] = kHex[low & 0x0f];
217 buffer[8] = kHex[low & 0x0f];
219 buffer[5] = kHex[high & 0x0f];
221 buffer[4] = kHex[high & 0x0f];
223 buffer[3] = kHex[high & 0x0f];
225 buffer[2] = kHex[high & 0x0f];
227 return StringPiece(
buffer, 12);
238 if (cp < 0xa0)
return kCommonEscapes[cp];
255 if ((cp >= 0x0600 && cp <= 0x0603) ||
256 (cp >= 0x200b && cp <= 0x200f) ||
257 (cp >= 0x2028 && cp <= 0x202e) ||
258 (cp >= 0x2060 && cp <= 0x2064) ||
259 (cp >= 0x206a && cp <= 0x206f)) {
263 if (cp == 0x000e0001 ||
264 (cp >= 0x0001d173 && cp <= 0x0001d17a) ||
265 (cp >= 0x000e0020 && cp <= 0x000e007f)) {
266 return ToSurrogateHex(cp,
buffer);
269 return StringPiece();
275 StringPiece EscapeCodePoint(
uint32 cp,
char*
buffer,
bool force_output) {
276 StringPiece sp = EscapeCodePoint(cp,
buffer);
277 if (force_output && sp.empty()) {
278 buffer[5] = (cp & 0x3f) | 0x80;
282 sp = StringPiece(
buffer + 4, 2);
285 buffer[4] = (cp & 0x3f) | 0x80;
289 sp = StringPiece(
buffer + 3, 3);
292 buffer[3] = (cp & 0x3f) | 0x80;
293 buffer[2] = ((cp >> 6) & 0x07) | 0xf0;
294 sp = StringPiece(
buffer + 2, 4);
302 strings::ByteSink*
output) {
303 char buffer[12] =
"\\udead\\ubee";
306 while (
input->Available() > 0) {
312 bool cp_was_split = num_left > 0;
319 ok = ReadCodePoint(
str,
i, &cp, &num_left, &num_read);
320 if (num_left > 0 || !
ok)
break;
321 escaped = EscapeCodePoint(cp,
buffer, cp_was_split);
322 if (!escaped.
empty())
break;
325 }
while (
i <
str.length());
328 if (num_read > 0)
input->Skip(num_read);
333 }
else if (num_left == 0 && !escaped.
empty()) {