emitterutils.cpp
Go to the documentation of this file.
1 #include "emitterutils.h"
2 #include "exp.h"
3 #include "indentation.h"
4 #include "yaml-cpp-pm/binary.h"
6 #include "stringsource.h"
7 #include <sstream>
8 #include <iomanip>
9 
10 namespace YAML_PM
11 {
12  namespace Utils
13  {
14  namespace {
15  enum {REPLACEMENT_CHARACTER = 0xFFFD};
16 
17  bool IsAnchorChar(int ch) { // test for ns-anchor-char
18  switch (ch) {
19  case ',': case '[': case ']': case '{': case '}': // c-flow-indicator
20  case ' ': case '\t': // s-white
21  case 0xFEFF: // c-byte-order-mark
22  case 0xA: case 0xD: // b-char
23  return false;
24  case 0x85:
25  return true;
26  }
27 
28  if (ch < 0x20)
29  return false;
30 
31  if (ch < 0x7E)
32  return true;
33 
34  if (ch < 0xA0)
35  return false;
36  if (ch >= 0xD800 && ch <= 0xDFFF)
37  return false;
38  if ((ch & 0xFFFE) == 0xFFFE)
39  return false;
40  if ((ch >= 0xFDD0) && (ch <= 0xFDEF))
41  return false;
42  if (ch > 0x10FFFF)
43  return false;
44 
45  return true;
46  }
47 
48  int Utf8BytesIndicated(char ch) {
49  int byteVal = static_cast<unsigned char>(ch);
50  switch (byteVal >> 4) {
51  case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
52  return 1;
53  case 12: case 13:
54  return 2;
55  case 14:
56  return 3;
57  case 15:
58  return 4;
59  default:
60  return -1;
61  }
62  }
63 
64  bool IsTrailingByte(char ch) {
65  return (ch & 0xC0) == 0x80;
66  }
67 
68  bool GetNextCodePointAndAdvance(int& codePoint, std::string::const_iterator& first, std::string::const_iterator last) {
69  if (first == last)
70  return false;
71 
72  int nBytes = Utf8BytesIndicated(*first);
73  if (nBytes < 1) {
74  // Bad lead byte
75  ++first;
76  codePoint = REPLACEMENT_CHARACTER;
77  return true;
78  }
79 
80  if (nBytes == 1) {
81  codePoint = *first++;
82  return true;
83  }
84 
85  // Gather bits from trailing bytes
86  codePoint = static_cast<unsigned char>(*first) & ~(0xFF << (7 - nBytes));
87  ++first;
88  --nBytes;
89  for (; nBytes > 0; ++first, --nBytes) {
90  if ((first == last) || !IsTrailingByte(*first)) {
91  codePoint = REPLACEMENT_CHARACTER;
92  break;
93  }
94  codePoint <<= 6;
95  codePoint |= *first & 0x3F;
96  }
97 
98  // Check for illegal code points
99  if (codePoint > 0x10FFFF)
100  codePoint = REPLACEMENT_CHARACTER;
101  else if (codePoint >= 0xD800 && codePoint <= 0xDFFF)
102  codePoint = REPLACEMENT_CHARACTER;
103  else if ((codePoint & 0xFFFE) == 0xFFFE)
104  codePoint = REPLACEMENT_CHARACTER;
105  else if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF)
106  codePoint = REPLACEMENT_CHARACTER;
107  return true;
108  }
109 
110  void WriteCodePoint(ostream& out, int codePoint) {
111  if (codePoint < 0 || codePoint > 0x10FFFF) {
112  codePoint = REPLACEMENT_CHARACTER;
113  }
114  if (codePoint < 0x7F) {
115  out << static_cast<char>(codePoint);
116  } else if (codePoint < 0x7FF) {
117  out << static_cast<char>(0xC0 | (codePoint >> 6))
118  << static_cast<char>(0x80 | (codePoint & 0x3F));
119  } else if (codePoint < 0xFFFF) {
120  out << static_cast<char>(0xE0 | (codePoint >> 12))
121  << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
122  << static_cast<char>(0x80 | (codePoint & 0x3F));
123  } else {
124  out << static_cast<char>(0xF0 | (codePoint >> 18))
125  << static_cast<char>(0x80 | ((codePoint >> 12) & 0x3F))
126  << static_cast<char>(0x80 | ((codePoint >> 6) & 0x3F))
127  << static_cast<char>(0x80 | (codePoint & 0x3F));
128  }
129  }
130 
131  bool IsValidPlainScalar(const std::string& str, bool inFlow, bool allowOnlyAscii) {
132  if(str.empty())
133  return false;
134 
135  // first check the start
136  const RegEx& start = (inFlow ? Exp::PlainScalarInFlow() : Exp::PlainScalar());
137  if(!start.Matches(str))
138  return false;
139 
140  // and check the end for plain whitespace (which can't be faithfully kept in a plain scalar)
141  if(!str.empty() && *str.rbegin() == ' ')
142  return false;
143 
144  // then check until something is disallowed
145  const RegEx& disallowed = (inFlow ? Exp::EndScalarInFlow() : Exp::EndScalar())
147  || Exp::NotPrintable()
149  || Exp::Break()
150  || Exp::Tab();
151  StringCharSource buffer(str.c_str(), str.size());
152  while(buffer) {
153  if(disallowed.Matches(buffer))
154  return false;
155  if(allowOnlyAscii && (0x7F < static_cast<unsigned char>(buffer[0])))
156  return false;
157  ++buffer;
158  }
159 
160  return true;
161  }
162 
163  void WriteDoubleQuoteEscapeSequence(ostream& out, int codePoint) {
164  static const char hexDigits[] = "0123456789abcdef";
165 
166  char escSeq[] = "\\U00000000";
167  int digits = 8;
168  if (codePoint < 0xFF) {
169  escSeq[1] = 'x';
170  digits = 2;
171  } else if (codePoint < 0xFFFF) {
172  escSeq[1] = 'u';
173  digits = 4;
174  }
175 
176  // Write digits into the escape sequence
177  int i = 2;
178  for (; digits > 0; --digits, ++i) {
179  escSeq[i] = hexDigits[(codePoint >> (4 * (digits - 1))) & 0xF];
180  }
181 
182  escSeq[i] = 0; // terminate with NUL character
183  out << escSeq;
184  }
185 
186  bool WriteAliasName(ostream& out, const std::string& str) {
187  int codePoint;
188  for(std::string::const_iterator i = str.begin();
189  GetNextCodePointAndAdvance(codePoint, i, str.end());
190  )
191  {
192  if (!IsAnchorChar(codePoint))
193  return false;
194 
195  WriteCodePoint(out, codePoint);
196  }
197  return true;
198  }
199  }
200 
201  bool WriteString(ostream& out, const std::string& str, bool inFlow, bool escapeNonAscii)
202  {
203  if(IsValidPlainScalar(str, inFlow, escapeNonAscii)) {
204  out << str;
205  return true;
206  } else
207  return WriteDoubleQuotedString(out, str, escapeNonAscii);
208  }
209 
211  {
212  out << "'";
213  int codePoint;
214  for(std::string::const_iterator i = str.begin();
215  GetNextCodePointAndAdvance(codePoint, i, str.end());
216  )
217  {
218  if (codePoint == '\n')
219  return false; // We can't handle a new line and the attendant indentation yet
220 
221  if (codePoint == '\'')
222  out << "''";
223  else
224  WriteCodePoint(out, codePoint);
225  }
226  out << "'";
227  return true;
228  }
229 
230  bool WriteDoubleQuotedString(ostream& out, const std::string& str, bool escapeNonAscii)
231  {
232  out << "\"";
233  int codePoint;
234  for(std::string::const_iterator i = str.begin();
235  GetNextCodePointAndAdvance(codePoint, i, str.end());
236  )
237  {
238  if (codePoint == '\"')
239  out << "\\\"";
240  else if (codePoint == '\\')
241  out << "\\\\";
242  else if (codePoint < 0x20 || (codePoint >= 0x80 && codePoint <= 0xA0)) // Control characters and non-breaking space
243  WriteDoubleQuoteEscapeSequence(out, codePoint);
244  else if (codePoint == 0xFEFF) // Byte order marks (ZWNS) should be escaped (YAML 1.2, sec. 5.2)
245  WriteDoubleQuoteEscapeSequence(out, codePoint);
246  else if (escapeNonAscii && codePoint > 0x7E)
247  WriteDoubleQuoteEscapeSequence(out, codePoint);
248  else
249  WriteCodePoint(out, codePoint);
250  }
251  out << "\"";
252  return true;
253  }
254 
255  bool WriteLiteralString(ostream& out, const std::string& str, int indent)
256  {
257  out << "|\n";
258  out << IndentTo(indent);
259  int codePoint;
260  for(std::string::const_iterator i = str.begin();
261  GetNextCodePointAndAdvance(codePoint, i, str.end());
262  )
263  {
264  if (codePoint == '\n')
265  out << "\n" << IndentTo(indent);
266  else
267  WriteCodePoint(out, codePoint);
268  }
269  return true;
270  }
271 
272  bool WriteChar(ostream& out, char ch)
273  {
274  if(('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
275  out << ch;
276  else if((0x20 <= ch && ch <= 0x7e) || ch == ' ')
277  out << "\"" << ch << "\"";
278  else if(ch == '\t')
279  out << "\"\\t\"";
280  else if(ch == '\n')
281  out << "\"\\n\"";
282  else if(ch == '\b')
283  out << "\"\\b\"";
284  else {
285  out << "\"";
286  WriteDoubleQuoteEscapeSequence(out, ch);
287  out << "\"";
288  }
289  return true;
290  }
291 
292  bool WriteComment(ostream& out, const std::string& str, int postCommentIndent)
293  {
294  const unsigned curIndent = out.col();
295  out << "#" << Indentation(postCommentIndent);
296  int codePoint;
297  for(std::string::const_iterator i = str.begin();
298  GetNextCodePointAndAdvance(codePoint, i, str.end());
299  )
300  {
301  if(codePoint == '\n')
302  out << "\n" << IndentTo(curIndent) << "#" << Indentation(postCommentIndent);
303  else
304  WriteCodePoint(out, codePoint);
305  }
306  return true;
307  }
308 
309  bool WriteAlias(ostream& out, const std::string& str)
310  {
311  out << "*";
312  return WriteAliasName(out, str);
313  }
314 
315  bool WriteAnchor(ostream& out, const std::string& str)
316  {
317  out << "&";
318  return WriteAliasName(out, str);
319  }
320 
321  bool WriteTag(ostream& out, const std::string& str, bool verbatim)
322  {
323  out << (verbatim ? "!<" : "!");
324  StringCharSource buffer(str.c_str(), str.size());
325  const RegEx& reValid = verbatim ? Exp::URI() : Exp::Tag();
326  while(buffer) {
327  int n = reValid.Match(buffer);
328  if(n <= 0)
329  return false;
330 
331  while(--n >= 0) {
332  out << buffer[0];
333  ++buffer;
334  }
335  }
336  if (verbatim)
337  out << ">";
338  return true;
339  }
340 
341  bool WriteTagWithPrefix(ostream& out, const std::string& prefix, const std::string& tag)
342  {
343  out << "!";
344  StringCharSource prefixBuffer(prefix.c_str(), prefix.size());
345  while(prefixBuffer) {
346  int n = Exp::URI().Match(prefixBuffer);
347  if(n <= 0)
348  return false;
349 
350  while(--n >= 0) {
351  out << prefixBuffer[0];
352  ++prefixBuffer;
353  }
354  }
355 
356  out << "!";
357  StringCharSource tagBuffer(tag.c_str(), tag.size());
358  while(tagBuffer) {
359  int n = Exp::Tag().Match(tagBuffer);
360  if(n <= 0)
361  return false;
362 
363  while(--n >= 0) {
364  out << tagBuffer[0];
365  ++tagBuffer;
366  }
367  }
368  return true;
369  }
370 
371  bool WriteBinary(ostream& out, const Binary& binary)
372  {
373  WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()), false);
374  return true;
375  }
376  }
377 }
378 
const RegEx & NotPrintable()
Definition: exp.h:63
bool WriteDoubleQuotedString(ostream &out, const std::string &str, bool escapeNonAscii)
bool WriteChar(ostream &out, char ch)
::std::string string
Definition: gtest.h:1979
bool WriteTag(ostream &out, const std::string &str, bool verbatim)
unsigned col() const
Definition: ostream.h:24
const RegEx & BlankOrBreak()
Definition: exp.h:38
const RegEx & Break()
Definition: exp.h:34
const RegEx & Tab()
Definition: exp.h:26
bool WriteBinary(ostream &out, const Binary &binary)
const RegEx & PlainScalarInFlow()
Definition: exp.h:143
bool WriteAnchor(ostream &out, const std::string &str)
const RegEx & EndScalarInFlow()
Definition: exp.h:151
const RegEx & Utf8_ByteOrderMark()
Definition: exp.h:70
int Match(const std::string &str) const
Definition: regeximpl.h:42
std::size_t size() const
Definition: binary.h:24
const RegEx & PlainScalar()
Definition: exp.h:139
bool WriteLiteralString(ostream &out, const std::string &str, int indent)
const RegEx & EndScalar()
Definition: exp.h:147
const RegEx & Tag()
Definition: exp.h:129
const RegEx & URI()
Definition: exp.h:125
bool WriteAlias(ostream &out, const std::string &str)
bool WriteTagWithPrefix(ostream &out, const std::string &prefix, const std::string &tag)
std::string EncodeBase64(const unsigned char *data, std::size_t size)
Definition: binary.cpp:8
bool Matches(char ch) const
Definition: regeximpl.h:16
const RegEx Comment()
Definition: exp.h:113
bool WriteComment(ostream &out, const std::string &str, int postCommentIndent)
bool WriteString(ostream &out, const std::string &str, bool inFlow, bool escapeNonAscii)
const unsigned char * data() const
Definition: binary.h:25
bool WriteSingleQuotedString(ostream &out, const std::string &str)


libpointmatcher
Author(s):
autogenerated on Sat May 27 2023 02:36:30