emitterutils.cpp
Go to the documentation of this file.
00001 #include "emitterutils.h"
00002 #include "exp.h"
00003 #include "indentation.h"
00004 #include "yaml-cpp-pm/binary.h"
00005 #include "yaml-cpp-pm/exceptions.h"
00006 #include "stringsource.h"
00007 #include <sstream>
00008 #include <iomanip>
00009 
00010 namespace YAML_PM
00011 {
00012         namespace Utils
00013         {
00014                 namespace {
00015                         enum {REPLACEMENT_CHARACTER = 0xFFFD};
00016 
00017                         bool IsAnchorChar(int ch) { // test for ns-anchor-char
00018                                 switch (ch) {
00019                                         case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
00020                                         case ' ': case '\t': // s-white
00021                                         case 0xFEFF: // c-byte-order-mark
00022                                         case 0xA: case 0xD: // b-char
00023                                                 return false;
00024                                         case 0x85:
00025                                                 return true;
00026                                 }
00027 
00028                                 if (ch < 0x20)
00029                                         return false;
00030 
00031                                 if (ch < 0x7E)
00032                                         return true;
00033 
00034                                 if (ch < 0xA0)
00035                                         return false;
00036                                 if (ch >= 0xD800 && ch <= 0xDFFF)
00037                                         return false;
00038                                 if ((ch & 0xFFFE) == 0xFFFE)
00039                                         return false;
00040                                 if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
00041                                         return false;
00042                                 if (ch > 0x10FFFF)
00043                                         return false;
00044 
00045                                 return true;
00046                         }
00047                         
00048                         int Utf8BytesIndicated(char ch) {
00049                                 int byteVal = static_cast<unsigned char>(ch);
00050                                 switch (byteVal >> 4) {
00051                                         case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
00052                                                 return 1;
00053                                         case 12: case 13:
00054                                                 return 2;
00055                                         case 14:
00056                                                 return 3;
00057                                         case 15:
00058                                                 return 4;
00059                                         default:
00060                                           return -1;
00061                                 }
00062                         }
00063 
00064                         bool IsTrailingByte(char ch) {
00065                                 return (ch & 0xC0) == 0x80;
00066                         }
00067                         
00068                         bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
00069                                 if (first == last)
00070                                         return false;
00071                                 
00072                                 int nBytes = Utf8BytesIndicated(*first);
00073                                 if (nBytes < 1) {
00074                                         // Bad lead byte
00075                                         ++first;
00076                                         codePoint = REPLACEMENT_CHARACTER;
00077                                         return true;
00078                                 }
00079                                 
00080                                 if (nBytes == 1) {
00081                                         codePoint = *first++;
00082                                         return true;
00083                                 }
00084                                 
00085                                 // Gather bits from trailing bytes
00086                                 codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
00087                                 ++first;
00088                                 --nBytes;
00089                                 for (; nBytes > 0; ++first, --nBytes) {
00090                                         if ((first == last) || !IsTrailingByte(*first)) {
00091                                                 codePoint = REPLACEMENT_CHARACTER;
00092                                                 break;
00093                                         }
00094                                         codePoint <<= 6;
00095                                         codePoint |= *first & 0x3F;
00096                                 }
00097 
00098                                 // Check for illegal code points
00099                                 if (codePoint > 0x10FFFF)
00100                                         codePoint = REPLACEMENT_CHARACTER;
00101                                 else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
00102                                         codePoint = REPLACEMENT_CHARACTER;
00103                                 else if ((codePoint & 0xFFFE) == 0xFFFE)
00104                                         codePoint = REPLACEMENT_CHARACTER;
00105                                 else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
00106                                         codePoint = REPLACEMENT_CHARACTER;
00107                                 return true;
00108                         }
00109                         
00110                         void WriteCodePoint(ostream& out, int codePoint) {
00111                                 if (codePoint < 0 || codePoint > 0x10FFFF) {
00112                                         codePoint = REPLACEMENT_CHARACTER;
00113                                 }
00114                                 if (codePoint < 0x7F) {
00115                                         out << static_cast<char>(codePoint);
00116                                 } else if (codePoint < 0x7FF) {
00117                                         out << static_cast<char>(0xC0 | (codePoint >> 6))
00118                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
00119                                 } else if (codePoint < 0xFFFF) {
00120                                         out << static_cast<char>(0xE0 | (codePoint >> 12))
00121                                             << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
00122                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
00123                                 } else {
00124                                         out << static_cast<char>(0xF0 | (codePoint >> 18))
00125                                             << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
00126                                             << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
00127                                             << static_cast<char>(0x80 | (codePoint & 0x3F));
00128                                 }
00129                         }
00130                         
00131                         bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
00132                                 if(str.empty())
00133                                         return false;
00134                                 
00135                                 // first check the start
00136                                 const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
00137                                 if(!start.Matches(str))
00138                                         return false;
00139                                 
00140                                 // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
00141                                 if(!str.empty() && *str.rbegin() == ' ')
00142                                         return false;
00143 
00144                                 // then check until something is disallowed
00145                                 const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
00146                                                           || (Exp::BlankOrBreak() + Exp::Comment())
00147                                                           || Exp::NotPrintable()
00148                                                           || Exp::Utf8_ByteOrderMark()
00149                                                           || Exp::Break()
00150                                                           || Exp::Tab();
00151                                 StringCharSource buffer(str.c_str(), str.size());
00152                                 while(buffer) {
00153                                         if(disallowed.Matches(buffer))
00154                                                 return false;
00155                                         if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0]))) 
00156                                                 return false;
00157                                         ++buffer;
00158                                 }
00159                                 
00160                                 return true;
00161                         }
00162 
00163                         void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
00164                                 static const char hexDigits[] = "0123456789abcdef";
00165 
00166                                 char escSeq[] = "\\U00000000";
00167                                 int digits = 8;
00168                                 if (codePoint < 0xFF) {
00169                                         escSeq[1] = 'x';
00170                                         digits = 2;
00171                                 } else if (codePoint < 0xFFFF) {
00172                                         escSeq[1] = 'u';
00173                                         digits = 4;
00174                                 }
00175 
00176                                 // Write digits into the escape sequence
00177                                 int i = 2;
00178                                 for (; digits > 0; --digits, ++i) {
00179                                         escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
00180                                 }
00181 
00182                                 escSeq[i] = 0; // terminate with NUL character
00183                                 out << escSeq;
00184                         }
00185 
00186                         bool WriteAliasName(ostream& out, const std::string& str) {
00187                                 int codePoint;
00188                                 for(std::string::const_iterator i = str.begin();
00189                                         GetNextCodePointAndAdvance(codePoint, i, str.end());
00190                                         )
00191                                 {
00192                                         if (!IsAnchorChar(codePoint))
00193                                                 return false;
00194 
00195                                         WriteCodePoint(out, codePoint);
00196                                 }
00197                                 return true;
00198                         }
00199                 }
00200                 
00201                 bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
00202                 {
00203                         if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
00204                                 out << str;
00205                                 return true;
00206                         } else
00207                                 return WriteDoubleQuotedString(out, str, escapeNonAscii);
00208                 }
00209                 
00210                 bool WriteSingleQuotedString(ostream& out, const std::string& str)
00211                 {
00212                         out << "'";
00213                         int codePoint;
00214                         for(std::string::const_iterator i = str.begin();
00215                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
00216                                 ) 
00217                         {
00218                                 if (codePoint == '\n')
00219                                         return false;  // We can't handle a new line and the attendant indentation yet
00220 
00221                                 if (codePoint == '\'')
00222                                         out << "''";
00223                                 else
00224                                         WriteCodePoint(out, codePoint);
00225                         }
00226                         out << "'";
00227                         return true;
00228                 }
00229                 
00230                 bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
00231                 {
00232                         out << "\"";
00233                         int codePoint;
00234                         for(std::string::const_iterator i = str.begin();
00235                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
00236                                 ) 
00237                         {
00238                                 if (codePoint == '\"')
00239                                         out << "\\\"";
00240                                 else if (codePoint == '\\')
00241                                         out << "\\\\";
00242                                 else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
00243                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
00244                                 else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2) 
00245                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
00246                                 else if (escapeNonAscii && codePoint > 0x7E)
00247                                         WriteDoubleQuoteEscapeSequence(out, codePoint);
00248                                 else
00249                                         WriteCodePoint(out, codePoint);
00250                         }
00251                         out << "\"";
00252                         return true;
00253                 }
00254 
00255                 bool WriteLiteralString(ostream& out, const std::string& str, int indent)
00256                 {
00257                         out << "|\n";
00258                         out << IndentTo(indent);
00259                         int codePoint;
00260                         for(std::string::const_iterator i = str.begin();
00261                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
00262                                 )
00263                         {
00264                                 if (codePoint == '\n')
00265                                   out << "\n" << IndentTo(indent);
00266                                 else
00267                                   WriteCodePoint(out, codePoint);
00268                         }
00269                         return true;
00270                 }
00271                 
00272                 bool WriteChar(ostream& out, char ch)
00273                 {
00274                         if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
00275                                 out << ch;
00276                         else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
00277                                 out << "\"" << ch << "\"";
00278                         else if(ch == '\t')
00279                                 out << "\"\\t\"";
00280                         else if(ch == '\n')
00281                                 out << "\"\\n\"";
00282                         else if(ch == '\b')
00283                                 out << "\"\\b\"";
00284                         else {
00285                                 out << "\"";
00286                                 WriteDoubleQuoteEscapeSequence(out, ch);
00287                                 out << "\"";
00288                         }
00289                         return true;
00290                 }
00291 
00292                 bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
00293                 {
00294                         const unsigned curIndent = out.col();
00295                         out << "#" << Indentation(postCommentIndent);
00296                         int codePoint;
00297                         for(std::string::const_iterator i = str.begin();
00298                                 GetNextCodePointAndAdvance(codePoint, i, str.end());
00299                                 )
00300                         {
00301                                 if(codePoint == '\n')
00302                                         out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
00303                                 else
00304                                         WriteCodePoint(out, codePoint);
00305                         }
00306                         return true;
00307                 }
00308 
00309                 bool WriteAlias(ostream& out, const std::string& str)
00310                 {
00311                         out << "*";
00312                         return WriteAliasName(out, str);
00313                 }
00314                 
00315                 bool WriteAnchor(ostream& out, const std::string& str)
00316                 {
00317                         out << "&";
00318                         return WriteAliasName(out, str);
00319                 }
00320 
00321                 bool WriteTag(ostream& out, const std::string& str, bool verbatim)
00322                 {
00323                         out << (verbatim ? "!<" : "!");
00324                         StringCharSource buffer(str.c_str(), str.size());
00325                         const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
00326                         while(buffer) {
00327                                 int n = reValid.Match(buffer);
00328                                 if(n <= 0)
00329                                         return false;
00330 
00331                                 while(--n >= 0) {
00332                                         out << buffer[0];
00333                                         ++buffer;
00334                                 }
00335                         }
00336                         if (verbatim)
00337                                 out << ">";
00338                         return true;
00339                 }
00340 
00341                 bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
00342                 {
00343                         out << "!";
00344                         StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
00345                         while(prefixBuffer) {
00346                                 int n = Exp::URI().Match(prefixBuffer);
00347                                 if(n <= 0)
00348                                         return false;
00349                                 
00350                                 while(--n >= 0) {
00351                                         out << prefixBuffer[0];
00352                                         ++prefixBuffer;
00353                                 }
00354                         }
00355 
00356                         out << "!";
00357                         StringCharSource tagBuffer(tag.c_str(), tag.size());
00358                         while(tagBuffer) {
00359                                 int n = Exp::Tag().Match(tagBuffer);
00360                                 if(n <= 0)
00361                                         return false;
00362                                 
00363                                 while(--n >= 0) {
00364                                         out << tagBuffer[0];
00365                                         ++tagBuffer;
00366                                 }
00367                         }
00368                         return true;
00369                 }
00370 
00371                 bool WriteBinary(ostream& out, const Binary& binary)
00372                 {
00373             WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false);
00374             return true;
00375                 }
00376         }
00377 }
00378 


libpointmatcher
Author(s):
autogenerated on Thu Jun 20 2019 19:51:29