15 enum {REPLACEMENT_CHARACTER = 0xFFFD};
17 bool IsAnchorChar(
int ch) {
19 case ',':
case '[':
case ']':
case '{':
case '}':
36 if (ch >= 0xD800 && ch <= 0xDFFF)
38 if ((ch & 0xFFFE) == 0xFFFE)
40 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
48 int Utf8BytesIndicated(
char ch) {
49 int byteVal =
static_cast<unsigned char>(ch);
50 switch (byteVal >> 4) {
51 case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
64 bool IsTrailingByte(
char ch) {
65 return (ch & 0xC0) == 0x80;
68 bool GetNextCodePointAndAdvance(
int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
72 int nBytes = Utf8BytesIndicated(*first);
76 codePoint = REPLACEMENT_CHARACTER;
86 codePoint =
static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
89 for (; nBytes > 0; ++first, --nBytes) {
90 if ((first == last) || !IsTrailingByte(*first)) {
91 codePoint = REPLACEMENT_CHARACTER;
95 codePoint |= *first & 0x3F;
99 if (codePoint > 0x10FFFF)
100 codePoint = REPLACEMENT_CHARACTER;
101 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
102 codePoint = REPLACEMENT_CHARACTER;
103 else if ((codePoint & 0xFFFE) == 0xFFFE)
104 codePoint = REPLACEMENT_CHARACTER;
105 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
106 codePoint = REPLACEMENT_CHARACTER;
110 void WriteCodePoint(
ostream&
out,
int codePoint) {
111 if (codePoint < 0 || codePoint > 0x10FFFF) {
112 codePoint = REPLACEMENT_CHARACTER;
114 if (codePoint < 0x7F) {
115 out << static_cast<char>(codePoint);
116 }
else if (codePoint < 0x7FF) {
117 out << static_cast<char>(0xC0 | (codePoint >> 6))
118 <<
static_cast<char>(0x80 | (codePoint & 0x3F));
119 }
else if (codePoint < 0xFFFF) {
120 out << static_cast<char>(0xE0 | (codePoint >> 12))
121 <<
static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
122 <<
static_cast<char>(0x80 | (codePoint & 0x3F));
124 out << static_cast<char>(0xF0 | (codePoint >> 18))
125 <<
static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
126 <<
static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
127 <<
static_cast<char>(0x80 | (codePoint & 0x3F));
131 bool IsValidPlainScalar(
const std::string& str,
bool inFlow,
bool allowOnlyAscii) {
137 if(!
start.Matches(str))
141 if(!str.empty() && *str.rbegin() ==
' ')
155 if(allowOnlyAscii && (0x7F <
static_cast<unsigned char>(buffer[0])))
163 void WriteDoubleQuoteEscapeSequence(
ostream&
out,
int codePoint) {
164 static const char hexDigits[] =
"0123456789abcdef";
166 char escSeq[] =
"\\U00000000";
168 if (codePoint < 0xFF) {
171 }
else if (codePoint < 0xFFFF) {
178 for (; digits > 0; --digits, ++i) {
179 escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
188 for(std::string::const_iterator i = str.begin();
189 GetNextCodePointAndAdvance(codePoint, i, str.end());
192 if (!IsAnchorChar(codePoint))
195 WriteCodePoint(
out, codePoint);
203 if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
214 for(std::string::const_iterator i = str.begin();
215 GetNextCodePointAndAdvance(codePoint, i, str.end());
218 if (codePoint ==
'\n')
221 if (codePoint ==
'\'')
224 WriteCodePoint(
out, codePoint);
234 for(std::string::const_iterator i = str.begin();
235 GetNextCodePointAndAdvance(codePoint, i, str.end());
238 if (codePoint ==
'\"')
240 else if (codePoint ==
'\\')
242 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0))
243 WriteDoubleQuoteEscapeSequence(
out, codePoint);
244 else if (codePoint == 0xFEFF)
245 WriteDoubleQuoteEscapeSequence(
out, codePoint);
246 else if (escapeNonAscii && codePoint > 0x7E)
247 WriteDoubleQuoteEscapeSequence(
out, codePoint);
249 WriteCodePoint(
out, codePoint);
260 for(std::string::const_iterator i = str.begin();
261 GetNextCodePointAndAdvance(codePoint, i, str.end());
264 if (codePoint ==
'\n')
267 WriteCodePoint(
out, codePoint);
274 if((
'a' <= ch && ch <=
'z') || (
'A' <= ch && ch <=
'Z'))
276 else if((0x20 <= ch && ch <= 0x7e) || ch ==
' ')
277 out <<
"\"" << ch <<
"\"";
286 WriteDoubleQuoteEscapeSequence(
out, ch);
294 const unsigned curIndent =
out.col();
297 for(std::string::const_iterator i = str.begin();
298 GetNextCodePointAndAdvance(codePoint, i, str.end());
301 if(codePoint ==
'\n')
304 WriteCodePoint(
out, codePoint);
312 return WriteAliasName(
out, str);
318 return WriteAliasName(
out, str);
323 out << (verbatim ?
"!<" :
"!");
327 int n = reValid.
Match(buffer);
345 while(prefixBuffer) {
351 out << prefixBuffer[0];