00001 #include "emitterutils.h"
00002 #include "exp.h"
00003 #include "indentation.h"
00004 #include "yaml-cpp-pm/binary.h"
00005 #include "yaml-cpp-pm/exceptions.h"
00006 #include "stringsource.h"
00007 #include <sstream>
00008 #include <iomanip>
00009
00010 namespace YAML_PM
00011 {
00012 namespace Utils
00013 {
00014 namespace {
00015 enum {REPLACEMENT_CHARACTER = 0xFFFD};
00016
00017 bool IsAnchorChar(int ch) {
00018 switch (ch) {
00019 case ',': case '[': case ']': case '{': case '}':
00020 case ' ': case '\t':
00021 case 0xFEFF:
00022 case 0xA: case 0xD:
00023 return false;
00024 case 0x85:
00025 return true;
00026 }
00027
00028 if (ch < 0x20)
00029 return false;
00030
00031 if (ch < 0x7E)
00032 return true;
00033
00034 if (ch < 0xA0)
00035 return false;
00036 if (ch >= 0xD800 && ch <= 0xDFFF)
00037 return false;
00038 if ((ch & 0xFFFE) == 0xFFFE)
00039 return false;
00040 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
00041 return false;
00042 if (ch > 0x10FFFF)
00043 return false;
00044
00045 return true;
00046 }
00047
00048 int Utf8BytesIndicated(char ch) {
00049 int byteVal = static_cast<unsigned char>(ch);
00050 switch (byteVal >> 4) {
00051 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00052 return 1;
00053 case 12: case 13:
00054 return 2;
00055 case 14:
00056 return 3;
00057 case 15:
00058 return 4;
00059 default:
00060 return -1;
00061 }
00062 }
00063
00064 bool IsTrailingByte(char ch) {
00065 return (ch & 0xC0) == 0x80;
00066 }
00067
00068 bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
00069 if (first == last)
00070 return false;
00071
00072 int nBytes = Utf8BytesIndicated(*first);
00073 if (nBytes < 1) {
00074
00075 ++first;
00076 codePoint = REPLACEMENT_CHARACTER;
00077 return true;
00078 }
00079
00080 if (nBytes == 1) {
00081 codePoint = *first++;
00082 return true;
00083 }
00084
00085
00086 codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
00087 ++first;
00088 --nBytes;
00089 for (; nBytes > 0; ++first, --nBytes) {
00090 if ((first == last) || !IsTrailingByte(*first)) {
00091 codePoint = REPLACEMENT_CHARACTER;
00092 break;
00093 }
00094 codePoint <<= 6;
00095 codePoint |= *first & 0x3F;
00096 }
00097
00098
00099 if (codePoint > 0x10FFFF)
00100 codePoint = REPLACEMENT_CHARACTER;
00101 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
00102 codePoint = REPLACEMENT_CHARACTER;
00103 else if ((codePoint & 0xFFFE) == 0xFFFE)
00104 codePoint = REPLACEMENT_CHARACTER;
00105 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
00106 codePoint = REPLACEMENT_CHARACTER;
00107 return true;
00108 }
00109
00110 void WriteCodePoint(ostream& out, int codePoint) {
00111 if (codePoint < 0 || codePoint > 0x10FFFF) {
00112 codePoint = REPLACEMENT_CHARACTER;
00113 }
00114 if (codePoint < 0x7F) {
00115 out << static_cast<char>(codePoint);
00116 } else if (codePoint < 0x7FF) {
00117 out << static_cast<char>(0xC0 | (codePoint >> 6))
00118 << static_cast<char>(0x80 | (codePoint & 0x3F));
00119 } else if (codePoint < 0xFFFF) {
00120 out << static_cast<char>(0xE0 | (codePoint >> 12))
00121 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
00122 << static_cast<char>(0x80 | (codePoint & 0x3F));
00123 } else {
00124 out << static_cast<char>(0xF0 | (codePoint >> 18))
00125 << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
00126 << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
00127 << static_cast<char>(0x80 | (codePoint & 0x3F));
00128 }
00129 }
00130
00131 bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
00132 if(str.empty())
00133 return false;
00134
00135
00136 const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
00137 if(!start.Matches(str))
00138 return false;
00139
00140
00141 if(!str.empty() && *str.rbegin() == ' ')
00142 return false;
00143
00144
00145 const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
00146 || (Exp::BlankOrBreak() + Exp::Comment())
00147 || Exp::NotPrintable()
00148 || Exp::Utf8_ByteOrderMark()
00149 || Exp::Break()
00150 || Exp::Tab();
00151 StringCharSource buffer(str.c_str(), str.size());
00152 while(buffer) {
00153 if(disallowed.Matches(buffer))
00154 return false;
00155 if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
00156 return false;
00157 ++buffer;
00158 }
00159
00160 return true;
00161 }
00162
00163 void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
00164 static const char hexDigits[] = "0123456789abcdef";
00165
00166 char escSeq[] = "\\U00000000";
00167 int digits = 8;
00168 if (codePoint < 0xFF) {
00169 escSeq[1] = 'x';
00170 digits = 2;
00171 } else if (codePoint < 0xFFFF) {
00172 escSeq[1] = 'u';
00173 digits = 4;
00174 }
00175
00176
00177 int i = 2;
00178 for (; digits > 0; --digits, ++i) {
00179 escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
00180 }
00181
00182 escSeq[i] = 0;
00183 out << escSeq;
00184 }
00185
00186 bool WriteAliasName(ostream& out, const std::string& str) {
00187 int codePoint;
00188 for(std::string::const_iterator i = str.begin();
00189 GetNextCodePointAndAdvance(codePoint, i, str.end());
00190 )
00191 {
00192 if (!IsAnchorChar(codePoint))
00193 return false;
00194
00195 WriteCodePoint(out, codePoint);
00196 }
00197 return true;
00198 }
00199 }
00200
00201 bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
00202 {
00203 if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
00204 out << str;
00205 return true;
00206 } else
00207 return WriteDoubleQuotedString(out, str, escapeNonAscii);
00208 }
00209
00210 bool WriteSingleQuotedString(ostream& out, const std::string& str)
00211 {
00212 out << "'";
00213 int codePoint;
00214 for(std::string::const_iterator i = str.begin();
00215 GetNextCodePointAndAdvance(codePoint, i, str.end());
00216 )
00217 {
00218 if (codePoint == '\n')
00219 return false;
00220
00221 if (codePoint == '\'')
00222 out << "''";
00223 else
00224 WriteCodePoint(out, codePoint);
00225 }
00226 out << "'";
00227 return true;
00228 }
00229
00230 bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
00231 {
00232 out << "\"";
00233 int codePoint;
00234 for(std::string::const_iterator i = str.begin();
00235 GetNextCodePointAndAdvance(codePoint, i, str.end());
00236 )
00237 {
00238 if (codePoint == '\"')
00239 out << "\\\"";
00240 else if (codePoint == '\\')
00241 out << "\\\\";
00242 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0))
00243 WriteDoubleQuoteEscapeSequence(out, codePoint);
00244 else if (codePoint == 0xFEFF)
00245 WriteDoubleQuoteEscapeSequence(out, codePoint);
00246 else if (escapeNonAscii && codePoint > 0x7E)
00247 WriteDoubleQuoteEscapeSequence(out, codePoint);
00248 else
00249 WriteCodePoint(out, codePoint);
00250 }
00251 out << "\"";
00252 return true;
00253 }
00254
00255 bool WriteLiteralString(ostream& out, const std::string& str, int indent)
00256 {
00257 out << "|\n";
00258 out << IndentTo(indent);
00259 int codePoint;
00260 for(std::string::const_iterator i = str.begin();
00261 GetNextCodePointAndAdvance(codePoint, i, str.end());
00262 )
00263 {
00264 if (codePoint == '\n')
00265 out << "\n" << IndentTo(indent);
00266 else
00267 WriteCodePoint(out, codePoint);
00268 }
00269 return true;
00270 }
00271
00272 bool WriteChar(ostream& out, char ch)
00273 {
00274 if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
00275 out << ch;
00276 else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
00277 out << "\"" << ch << "\"";
00278 else if(ch == '\t')
00279 out << "\"\\t\"";
00280 else if(ch == '\n')
00281 out << "\"\\n\"";
00282 else if(ch == '\b')
00283 out << "\"\\b\"";
00284 else {
00285 out << "\"";
00286 WriteDoubleQuoteEscapeSequence(out, ch);
00287 out << "\"";
00288 }
00289 return true;
00290 }
00291
00292 bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
00293 {
00294 const unsigned curIndent = out.col();
00295 out << "#" << Indentation(postCommentIndent);
00296 int codePoint;
00297 for(std::string::const_iterator i = str.begin();
00298 GetNextCodePointAndAdvance(codePoint, i, str.end());
00299 )
00300 {
00301 if(codePoint == '\n')
00302 out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
00303 else
00304 WriteCodePoint(out, codePoint);
00305 }
00306 return true;
00307 }
00308
00309 bool WriteAlias(ostream& out, const std::string& str)
00310 {
00311 out << "*";
00312 return WriteAliasName(out, str);
00313 }
00314
00315 bool WriteAnchor(ostream& out, const std::string& str)
00316 {
00317 out << "&";
00318 return WriteAliasName(out, str);
00319 }
00320
00321 bool WriteTag(ostream& out, const std::string& str, bool verbatim)
00322 {
00323 out << (verbatim ? "!<" : "!");
00324 StringCharSource buffer(str.c_str(), str.size());
00325 const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
00326 while(buffer) {
00327 int n = reValid.Match(buffer);
00328 if(n <= 0)
00329 return false;
00330
00331 while(--n >= 0) {
00332 out << buffer[0];
00333 ++buffer;
00334 }
00335 }
00336 if (verbatim)
00337 out << ">";
00338 return true;
00339 }
00340
00341 bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
00342 {
00343 out << "!";
00344 StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
00345 while(prefixBuffer) {
00346 int n = Exp::URI().Match(prefixBuffer);
00347 if(n <= 0)
00348 return false;
00349
00350 while(--n >= 0) {
00351 out << prefixBuffer[0];
00352 ++prefixBuffer;
00353 }
00354 }
00355
00356 out << "!";
00357 StringCharSource tagBuffer(tag.c_str(), tag.size());
00358 while(tagBuffer) {
00359 int n = Exp::Tag().Match(tagBuffer);
00360 if(n <= 0)
00361 return false;
00362
00363 while(--n >= 0) {
00364 out << tagBuffer[0];
00365 ++tagBuffer;
00366 }
00367 }
00368 return true;
00369 }
00370
00371 bool WriteBinary(ostream& out, const Binary& binary)
00372 {
00373 WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false);
00374 return true;
00375 }
00376 }
00377 }
00378