15 enum {REPLACEMENT_CHARACTER = 0xFFFD};
17 bool IsAnchorChar(
int ch) {
19 case ',':
case '[':
case ']':
case '{':
case '}':
36 if (ch >= 0xD800 && ch <= 0xDFFF)
38 if ((ch & 0xFFFE) == 0xFFFE)
40 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
48 int Utf8BytesIndicated(
char ch) {
49 int byteVal =
static_cast<unsigned char>(ch);
50 switch (byteVal >> 4) {
51 case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
64 bool IsTrailingByte(
char ch) {
65 return (ch & 0xC0) == 0x80;
68 bool GetNextCodePointAndAdvance(
int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
72 int nBytes = Utf8BytesIndicated(*first);
76 codePoint = REPLACEMENT_CHARACTER;
86 codePoint =
static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
89 for (; nBytes > 0; ++first, --nBytes) {
90 if ((first == last) || !IsTrailingByte(*first)) {
91 codePoint = REPLACEMENT_CHARACTER;
95 codePoint |= *first & 0x3F;
99 if (codePoint > 0x10FFFF)
100 codePoint = REPLACEMENT_CHARACTER;
101 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
102 codePoint = REPLACEMENT_CHARACTER;
103 else if ((codePoint & 0xFFFE) == 0xFFFE)
104 codePoint = REPLACEMENT_CHARACTER;
105 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
106 codePoint = REPLACEMENT_CHARACTER;
110 void WriteCodePoint(
ostream& out,
int codePoint) {
111 if (codePoint < 0 || codePoint > 0x10FFFF) {
112 codePoint = REPLACEMENT_CHARACTER;
114 if (codePoint < 0x7F) {
115 out << static_cast<char>(codePoint);
116 }
else if (codePoint < 0x7FF) {
117 out << static_cast<char>(0xC0 | (codePoint >> 6))
118 <<
static_cast<char>(0x80 | (codePoint & 0x3F));
119 }
else if (codePoint < 0xFFFF) {
120 out << static_cast<char>(0xE0 | (codePoint >> 12))
121 <<
static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
122 << static_cast<char>(0x80 | (codePoint & 0x3F));
124 out << static_cast<char>(0xF0 | (codePoint >> 18))
125 <<
static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
126 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
127 <<
static_cast<char>(0x80 | (codePoint & 0x3F));
131 bool IsValidPlainScalar(
const std::string& str,
bool inFlow,
bool allowOnlyAscii) {
141 if(!str.empty() && *str.rbegin() ==
' ')
155 if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
163 void WriteDoubleQuoteEscapeSequence(
ostream& out,
int codePoint) {
164 static const char hexDigits[] =
"0123456789abcdef";
166 char escSeq[] =
"\\U00000000";
168 if (codePoint < 0xFF) {
171 }
else if (codePoint < 0xFFFF) {
178 for (; digits > 0; --digits, ++i) {
179 escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
188 for(std::string::const_iterator i = str.begin();
189 GetNextCodePointAndAdvance(codePoint, i, str.end());
192 if (!IsAnchorChar(codePoint))
195 WriteCodePoint(out, codePoint);
203 if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
214 for(std::string::const_iterator i = str.begin();
215 GetNextCodePointAndAdvance(codePoint, i, str.end());
218 if (codePoint ==
'\n')
221 if (codePoint ==
'\'')
224 WriteCodePoint(out, codePoint);
234 for(std::string::const_iterator i = str.begin();
235 GetNextCodePointAndAdvance(codePoint, i, str.end());
238 if (codePoint ==
'\"')
240 else if (codePoint ==
'\\')
242 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0))
243 WriteDoubleQuoteEscapeSequence(out, codePoint);
244 else if (codePoint == 0xFEFF)
245 WriteDoubleQuoteEscapeSequence(out, codePoint);
246 else if (escapeNonAscii && codePoint > 0x7E)
247 WriteDoubleQuoteEscapeSequence(out, codePoint);
249 WriteCodePoint(out, codePoint);
260 for(std::string::const_iterator i = str.begin();
261 GetNextCodePointAndAdvance(codePoint, i, str.end());
264 if (codePoint ==
'\n')
265 out <<
"\n" << IndentTo(indent);
267 WriteCodePoint(out, codePoint);
274 if((
'a' <= ch && ch <=
'z') || (
'A' <= ch && ch <=
'Z'))
276 else if((0x20 <= ch && ch <= 0x7e) || ch ==
' ')
277 out <<
"\"" << ch <<
"\"";
286 WriteDoubleQuoteEscapeSequence(out, ch);
294 const unsigned curIndent = out.
col();
297 for(std::string::const_iterator i = str.begin();
298 GetNextCodePointAndAdvance(codePoint, i, str.end());
301 if(codePoint ==
'\n')
302 out <<
"\n" <<
IndentTo(curIndent) <<
"#" << Indentation(postCommentIndent);
304 WriteCodePoint(out, codePoint);
312 return WriteAliasName(out, str);
318 return WriteAliasName(out, str);
323 out << (verbatim ?
"!<" :
"!");
327 int n = reValid.Match(buffer);
345 while(prefixBuffer) {
351 out << prefixBuffer[0];
const RegEx & NotPrintable()
bool WriteDoubleQuotedString(ostream &out, const std::string &str, bool escapeNonAscii)
bool WriteChar(ostream &out, char ch)
bool WriteTag(ostream &out, const std::string &str, bool verbatim)
const RegEx & BlankOrBreak()
bool WriteBinary(ostream &out, const Binary &binary)
const RegEx & PlainScalarInFlow()
bool WriteAnchor(ostream &out, const std::string &str)
const RegEx & EndScalarInFlow()
const RegEx & Utf8_ByteOrderMark()
int Match(const std::string &str) const
const RegEx & PlainScalar()
bool WriteLiteralString(ostream &out, const std::string &str, int indent)
const RegEx & EndScalar()
bool WriteAlias(ostream &out, const std::string &str)
bool WriteTagWithPrefix(ostream &out, const std::string &prefix, const std::string &tag)
std::string EncodeBase64(const unsigned char *data, std::size_t size)
bool Matches(char ch) const
bool WriteComment(ostream &out, const std::string &str, int postCommentIndent)
bool WriteString(ostream &out, const std::string &str, bool inFlow, bool escapeNonAscii)
const unsigned char * data() const
bool WriteSingleQuotedString(ostream &out, const std::string &str)