encodings.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_ENCODINGS_H_
16 #define RAPIDJSON_ENCODINGS_H_
17 
18 #include "rapidjson.h"
19 
20 #if defined(_MSC_VER) && !defined(__clang__)
21 RAPIDJSON_DIAG_PUSH
22 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
23 RAPIDJSON_DIAG_OFF(4702) // unreachable code
24 #elif defined(__GNUC__)
25 RAPIDJSON_DIAG_PUSH
26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
28 #endif
29 
31 
33 // Encoding
34 
47 
54 
62 
68 
72 
76 
80 
86 // UTF8
88 
90 
95 template <typename CharType = char>
96 struct UTF8
97 {
98  typedef CharType Ch;
99 
100  enum
101  {
103  };
104 
105  template <typename OutputStream>
106  static void Encode(OutputStream& os, unsigned codepoint)
107  {
108  if (codepoint <= 0x7F)
109  os.Put(static_cast<Ch>(codepoint & 0xFF));
110  else if (codepoint <= 0x7FF)
111  {
112  os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
113  os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
114  }
115  else if (codepoint <= 0xFFFF)
116  {
117  os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
118  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120  }
121  else
122  {
123  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
124  os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
125  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
126  os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
127  os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
128  }
129  }
130 
131  template <typename OutputStream>
132  static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
133  {
134  if (codepoint <= 0x7F)
135  PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
136  else if (codepoint <= 0x7FF)
137  {
138  PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
139  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
140  }
141  else if (codepoint <= 0xFFFF)
142  {
143  PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
144  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
145  PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
146  }
147  else
148  {
149  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
150  PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
151  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
152  PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
153  PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
154  }
155  }
156 
157  template <typename InputStream>
158  static bool Decode(InputStream& is, unsigned* codepoint)
159  {
160 #define RAPIDJSON_COPY() \
161  c = is.Take(); \
162  *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
163 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
164 #define RAPIDJSON_TAIL() \
165  RAPIDJSON_COPY(); \
166  RAPIDJSON_TRANS(0x70)
167  typename InputStream::Ch c = is.Take();
168  if (!(c & 0x80))
169  {
170  *codepoint = static_cast<unsigned char>(c);
171  return true;
172  }
173 
174  unsigned char type = GetRange(static_cast<unsigned char>(c));
175  if (type >= 32)
176  {
177  *codepoint = 0;
178  }
179  else
180  {
181  *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
182  }
183  bool result = true;
184  switch (type)
185  {
186  case 2:
187  RAPIDJSON_TAIL();
188  return result;
189  case 3:
190  RAPIDJSON_TAIL();
191  RAPIDJSON_TAIL();
192  return result;
193  case 4:
194  RAPIDJSON_COPY();
195  RAPIDJSON_TRANS(0x50);
196  RAPIDJSON_TAIL();
197  return result;
198  case 5:
199  RAPIDJSON_COPY();
200  RAPIDJSON_TRANS(0x10);
201  RAPIDJSON_TAIL();
202  RAPIDJSON_TAIL();
203  return result;
204  case 6:
205  RAPIDJSON_TAIL();
206  RAPIDJSON_TAIL();
207  RAPIDJSON_TAIL();
208  return result;
209  case 10:
210  RAPIDJSON_COPY();
211  RAPIDJSON_TRANS(0x20);
212  RAPIDJSON_TAIL();
213  return result;
214  case 11:
215  RAPIDJSON_COPY();
216  RAPIDJSON_TRANS(0x60);
217  RAPIDJSON_TAIL();
218  RAPIDJSON_TAIL();
219  return result;
220  default:
221  return false;
222  }
223 #undef RAPIDJSON_COPY
224 #undef RAPIDJSON_TRANS
225 #undef RAPIDJSON_TAIL
226  }
227 
228  template <typename InputStream, typename OutputStream>
229  static bool Validate(InputStream& is, OutputStream& os)
230  {
231 #define RAPIDJSON_COPY() os.Put(c = is.Take())
232 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
233 #define RAPIDJSON_TAIL() \
234  RAPIDJSON_COPY(); \
235  RAPIDJSON_TRANS(0x70)
236  Ch c;
237  RAPIDJSON_COPY();
238  if (!(c & 0x80))
239  return true;
240 
241  bool result = true;
242  switch (GetRange(static_cast<unsigned char>(c)))
243  {
244  case 2:
245  RAPIDJSON_TAIL();
246  return result;
247  case 3:
248  RAPIDJSON_TAIL();
249  RAPIDJSON_TAIL();
250  return result;
251  case 4:
252  RAPIDJSON_COPY();
253  RAPIDJSON_TRANS(0x50);
254  RAPIDJSON_TAIL();
255  return result;
256  case 5:
257  RAPIDJSON_COPY();
258  RAPIDJSON_TRANS(0x10);
259  RAPIDJSON_TAIL();
260  RAPIDJSON_TAIL();
261  return result;
262  case 6:
263  RAPIDJSON_TAIL();
264  RAPIDJSON_TAIL();
265  RAPIDJSON_TAIL();
266  return result;
267  case 10:
268  RAPIDJSON_COPY();
269  RAPIDJSON_TRANS(0x20);
270  RAPIDJSON_TAIL();
271  return result;
272  case 11:
273  RAPIDJSON_COPY();
274  RAPIDJSON_TRANS(0x60);
275  RAPIDJSON_TAIL();
276  RAPIDJSON_TAIL();
277  return result;
278  default:
279  return false;
280  }
281 #undef RAPIDJSON_COPY
282 #undef RAPIDJSON_TRANS
283 #undef RAPIDJSON_TAIL
284  }
285 
286  static unsigned char GetRange(unsigned char c)
287  {
288  // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
289  // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
290  static const unsigned char type[] = {
291  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x10, 0x10, 0x10, 0x10,
298  0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
299  0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300  0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301  0x20, 0x20, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
302  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3,
303  3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8,
304  8, 8, 8, 8, 8, 8, 8, 8, 8,
305  };
306  return type[c];
307  }
308 
309  template <typename InputByteStream>
310  static CharType TakeBOM(InputByteStream& is)
311  {
312  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
313  typename InputByteStream::Ch c = Take(is);
314  if (static_cast<unsigned char>(c) != 0xEFu)
315  return c;
316  c = is.Take();
317  if (static_cast<unsigned char>(c) != 0xBBu)
318  return c;
319  c = is.Take();
320  if (static_cast<unsigned char>(c) != 0xBFu)
321  return c;
322  c = is.Take();
323  return c;
324  }
325 
326  template <typename InputByteStream>
327  static Ch Take(InputByteStream& is)
328  {
329  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
330  return static_cast<Ch>(is.Take());
331  }
332 
333  template <typename OutputByteStream>
334  static void PutBOM(OutputByteStream& os)
335  {
336  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
337  os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
338  os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
339  os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
340  }
341 
342  template <typename OutputByteStream>
343  static void Put(OutputByteStream& os, Ch c)
344  {
345  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
346  os.Put(static_cast<typename OutputByteStream::Ch>(c));
347  }
348 };
349 
351 // UTF16
352 
354 
363 template <typename CharType = wchar_t>
364 struct UTF16
365 {
366  typedef CharType Ch;
367  RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
368 
369  enum
370  {
372  };
373 
374  template <typename OutputStream>
375  static void Encode(OutputStream& os, unsigned codepoint)
376  {
377  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
378  if (codepoint <= 0xFFFF)
379  {
380  RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
381  os.Put(static_cast<typename OutputStream::Ch>(codepoint));
382  }
383  else
384  {
385  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
386  unsigned v = codepoint - 0x10000;
387  os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
388  os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
389  }
390  }
391 
392  template <typename OutputStream>
393  static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
394  {
395  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
396  if (codepoint <= 0xFFFF)
397  {
398  RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
399  PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
400  }
401  else
402  {
403  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
404  unsigned v = codepoint - 0x10000;
405  PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
406  PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
407  }
408  }
409 
410  template <typename InputStream>
411  static bool Decode(InputStream& is, unsigned* codepoint)
412  {
413  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
414  typename InputStream::Ch c = is.Take();
415  if (c < 0xD800 || c > 0xDFFF)
416  {
417  *codepoint = static_cast<unsigned>(c);
418  return true;
419  }
420  else if (c <= 0xDBFF)
421  {
422  *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
423  c = is.Take();
424  *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
425  *codepoint += 0x10000;
426  return c >= 0xDC00 && c <= 0xDFFF;
427  }
428  return false;
429  }
430 
431  template <typename InputStream, typename OutputStream>
432  static bool Validate(InputStream& is, OutputStream& os)
433  {
434  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
435  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
436  typename InputStream::Ch c;
437  os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
438  if (c < 0xD800 || c > 0xDFFF)
439  return true;
440  else if (c <= 0xDBFF)
441  {
442  os.Put(c = is.Take());
443  return c >= 0xDC00 && c <= 0xDFFF;
444  }
445  return false;
446  }
447 };
448 
450 template <typename CharType = wchar_t>
451 struct UTF16LE : UTF16<CharType>
452 {
453  template <typename InputByteStream>
454  static CharType TakeBOM(InputByteStream& is)
455  {
456  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
457  CharType c = Take(is);
458  return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
459  }
460 
461  template <typename InputByteStream>
462  static CharType Take(InputByteStream& is)
463  {
464  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
465  unsigned c = static_cast<uint8_t>(is.Take());
466  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
467  return static_cast<CharType>(c);
468  }
469 
470  template <typename OutputByteStream>
471  static void PutBOM(OutputByteStream& os)
472  {
473  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
474  os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
475  os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
476  }
477 
478  template <typename OutputByteStream>
479  static void Put(OutputByteStream& os, CharType c)
480  {
481  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
482  os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
483  os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
484  }
485 };
486 
488 template <typename CharType = wchar_t>
489 struct UTF16BE : UTF16<CharType>
490 {
491  template <typename InputByteStream>
492  static CharType TakeBOM(InputByteStream& is)
493  {
494  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
495  CharType c = Take(is);
496  return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
497  }
498 
499  template <typename InputByteStream>
500  static CharType Take(InputByteStream& is)
501  {
502  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
503  unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
504  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
505  return static_cast<CharType>(c);
506  }
507 
508  template <typename OutputByteStream>
509  static void PutBOM(OutputByteStream& os)
510  {
511  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
512  os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
513  os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
514  }
515 
516  template <typename OutputByteStream>
517  static void Put(OutputByteStream& os, CharType c)
518  {
519  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
520  os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
521  os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
522  }
523 };
524 
526 // UTF32
527 
529 
537 template <typename CharType = unsigned>
538 struct UTF32
539 {
540  typedef CharType Ch;
541  RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
542 
543  enum
544  {
546  };
547 
548  template <typename OutputStream>
549  static void Encode(OutputStream& os, unsigned codepoint)
550  {
551  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
552  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
553  os.Put(codepoint);
554  }
555 
556  template <typename OutputStream>
557  static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
558  {
559  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
560  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
561  PutUnsafe(os, codepoint);
562  }
563 
564  template <typename InputStream>
565  static bool Decode(InputStream& is, unsigned* codepoint)
566  {
567  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
568  Ch c = is.Take();
569  *codepoint = c;
570  return c <= 0x10FFFF;
571  }
572 
573  template <typename InputStream, typename OutputStream>
574  static bool Validate(InputStream& is, OutputStream& os)
575  {
576  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
577  Ch c;
578  os.Put(c = is.Take());
579  return c <= 0x10FFFF;
580  }
581 };
582 
584 template <typename CharType = unsigned>
585 struct UTF32LE : UTF32<CharType>
586 {
587  template <typename InputByteStream>
588  static CharType TakeBOM(InputByteStream& is)
589  {
590  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
591  CharType c = Take(is);
592  return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
593  }
594 
595  template <typename InputByteStream>
596  static CharType Take(InputByteStream& is)
597  {
598  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
599  unsigned c = static_cast<uint8_t>(is.Take());
600  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
601  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
602  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
603  return static_cast<CharType>(c);
604  }
605 
606  template <typename OutputByteStream>
607  static void PutBOM(OutputByteStream& os)
608  {
609  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
610  os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
611  os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
612  os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
613  os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
614  }
615 
616  template <typename OutputByteStream>
617  static void Put(OutputByteStream& os, CharType c)
618  {
619  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
620  os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
621  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
622  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
623  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
624  }
625 };
626 
628 template <typename CharType = unsigned>
629 struct UTF32BE : UTF32<CharType>
630 {
631  template <typename InputByteStream>
632  static CharType TakeBOM(InputByteStream& is)
633  {
634  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
635  CharType c = Take(is);
636  return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
637  }
638 
639  template <typename InputByteStream>
640  static CharType Take(InputByteStream& is)
641  {
642  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
643  unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
644  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
645  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
646  c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
647  return static_cast<CharType>(c);
648  }
649 
650  template <typename OutputByteStream>
651  static void PutBOM(OutputByteStream& os)
652  {
653  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
654  os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
655  os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
656  os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
657  os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
658  }
659 
660  template <typename OutputByteStream>
661  static void Put(OutputByteStream& os, CharType c)
662  {
663  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
664  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
665  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
666  os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
667  os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
668  }
669 };
670 
672 // ASCII
673 
675 
679 template <typename CharType = char>
680 struct ASCII
681 {
682  typedef CharType Ch;
683 
684  enum
685  {
687  };
688 
689  template <typename OutputStream>
690  static void Encode(OutputStream& os, unsigned codepoint)
691  {
692  RAPIDJSON_ASSERT(codepoint <= 0x7F);
693  os.Put(static_cast<Ch>(codepoint & 0xFF));
694  }
695 
696  template <typename OutputStream>
697  static void EncodeUnsafe(OutputStream& os, unsigned codepoint)
698  {
699  RAPIDJSON_ASSERT(codepoint <= 0x7F);
700  PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
701  }
702 
703  template <typename InputStream>
704  static bool Decode(InputStream& is, unsigned* codepoint)
705  {
706  uint8_t c = static_cast<uint8_t>(is.Take());
707  *codepoint = c;
708  return c <= 0X7F;
709  }
710 
711  template <typename InputStream, typename OutputStream>
712  static bool Validate(InputStream& is, OutputStream& os)
713  {
714  uint8_t c = static_cast<uint8_t>(is.Take());
715  os.Put(static_cast<typename OutputStream::Ch>(c));
716  return c <= 0x7F;
717  }
718 
719  template <typename InputByteStream>
720  static CharType TakeBOM(InputByteStream& is)
721  {
722  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
723  uint8_t c = static_cast<uint8_t>(Take(is));
724  return static_cast<Ch>(c);
725  }
726 
727  template <typename InputByteStream>
728  static Ch Take(InputByteStream& is)
729  {
730  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
731  return static_cast<Ch>(is.Take());
732  }
733 
734  template <typename OutputByteStream>
735  static void PutBOM(OutputByteStream& os)
736  {
737  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
738  (void)os;
739  }
740 
741  template <typename OutputByteStream>
742  static void Put(OutputByteStream& os, Ch c)
743  {
744  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
745  os.Put(static_cast<typename OutputByteStream::Ch>(c));
746  }
747 };
748 
750 // AutoUTF
751 
754 {
755  kUTF8 = 0,
756  kUTF16LE = 1,
757  kUTF16BE = 2,
758  kUTF32LE = 3,
759  kUTF32BE = 4
760 };
761 
763 
765 template <typename CharType>
766 struct AutoUTF
767 {
768  typedef CharType Ch;
769 
770  enum
771  {
773  };
774 
775 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
776 
777  template <typename OutputStream>
778  static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint)
779  {
780  typedef void (*EncodeFunc)(OutputStream&, unsigned);
781  static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
782  (*f[os.GetType()])(os, codepoint);
783  }
784 
785  template <typename OutputStream>
786  static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint)
787  {
788  typedef void (*EncodeFunc)(OutputStream&, unsigned);
789  static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
790  (*f[os.GetType()])(os, codepoint);
791  }
792 
793  template <typename InputStream>
794  static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint)
795  {
796  typedef bool (*DecodeFunc)(InputStream&, unsigned*);
797  static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
798  return (*f[is.GetType()])(is, codepoint);
799  }
800 
801  template <typename InputStream, typename OutputStream>
802  static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
803  {
804  typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
805  static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
806  return (*f[is.GetType()])(is, os);
807  }
808 
809 #undef RAPIDJSON_ENCODINGS_FUNC
810 };
811 
813 // Transcoder
814 
816 template <typename SourceEncoding, typename TargetEncoding>
818 {
820  template <typename InputStream, typename OutputStream>
821  static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
822  {
823  unsigned codepoint;
824  if (!SourceEncoding::Decode(is, &codepoint))
825  return false;
826  TargetEncoding::Encode(os, codepoint);
827  return true;
828  }
829 
830  template <typename InputStream, typename OutputStream>
831  static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
832  {
833  unsigned codepoint;
834  if (!SourceEncoding::Decode(is, &codepoint))
835  return false;
836  TargetEncoding::EncodeUnsafe(os, codepoint);
837  return true;
838  }
839 
841  template <typename InputStream, typename OutputStream>
842  static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
843  {
844  return Transcode(is, os); // Since source/target encoding is different, must transcode.
845  }
846 };
847 
848 // Forward declaration.
849 template <typename Stream>
850 inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
851 
853 template <typename Encoding>
854 struct Transcoder<Encoding, Encoding>
855 {
856  template <typename InputStream, typename OutputStream>
857  static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os)
858  {
859  os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class.
860  return true;
861  }
862 
863  template <typename InputStream, typename OutputStream>
864  static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os)
865  {
866  PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class.
867  return true;
868  }
869 
870  template <typename InputStream, typename OutputStream>
871  static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os)
872  {
873  return Encoding::Validate(is, os); // source/target encoding are the same
874  }
875 };
876 
878 
879 #if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__))
880 RAPIDJSON_DIAG_POP
881 #endif
882 
883 #endif // RAPIDJSON_ENCODINGS_H_
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:158
Encoding conversion.
Definition: encodings.h:817
static void Put(OutputByteStream &os, Ch c)
Definition: encodings.h:343
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:588
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:753
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:416
ASCII encoding.
Definition: encodings.h:680
f
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:690
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:126
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:310
CharType Ch
Definition: encodings.h:98
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition: encodings.h:786
UTF-32 big endian.
Definition: encodings.h:759
UTF-16 little endian.
Definition: encodings.h:756
UTF-8.
Definition: encodings.h:755
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:229
UTF-16 encoding.
Definition: encodings.h:364
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition: rapidjson.h:465
unsigned short uint16_t
Definition: stdint.h:125
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:720
static void Put(OutputByteStream &os, Ch c)
Definition: encodings.h:742
unsigned char uint8_t
Definition: stdint.h:124
UTF-16 big endian.
Definition: encodings.h:757
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:574
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:712
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:735
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:334
CharType Ch
Definition: encodings.h:540
#define RAPIDJSON_ENCODINGS_FUNC(x)
Definition: encodings.h:775
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:617
static Ch Take(InputByteStream &is)
Definition: encodings.h:327
static CharType Take(InputByteStream &is)
Definition: encodings.h:462
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:479
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:454
static CharType Take(InputByteStream &is)
Definition: encodings.h:500
static CharType Take(InputByteStream &is)
Definition: encodings.h:596
UTF-8 encoding.
Definition: encodings.h:96
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:704
static CharType Take(InputByteStream &is)
Definition: encodings.h:640
static Ch Take(InputByteStream &is)
Definition: encodings.h:728
Dynamically select encoding according to stream&#39;s runtime-specified UTF encoding type.
Definition: encodings.h:766
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition: encodings.h:821
unsigned int uint32_t
Definition: stdint.h:126
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:517
static unsigned char GetRange(unsigned char c)
Definition: encodings.h:286
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:471
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition: encodings.h:132
UTF-32 encoding.
Definition: encodings.h:538
CharType Ch
Definition: encodings.h:366
#define RAPIDJSON_TAIL()
UTF-16 big endian encoding.
Definition: encodings.h:489
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Definition: encodings.h:857
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:549
UTF-32 big endian encoding.
Definition: encodings.h:629
CharType Ch
Definition: encodings.h:768
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:375
#define RAPIDJSON_COPY()
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:794
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:411
common definitions and configuration
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:778
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition: encodings.h:842
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:632
static void Put(OutputByteStream &os, CharType c)
Definition: encodings.h:661
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:871
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
Definition: stream.h:96
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:607
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
Definition: encodings.h:864
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition: encodings.h:393
UTF-32 little endian.
Definition: encodings.h:758
static bool Decode(InputStream &is, unsigned *codepoint)
Definition: encodings.h:565
CharType Ch
Definition: encodings.h:682
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:651
static void Encode(OutputStream &os, unsigned codepoint)
Definition: encodings.h:106
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition: encodings.h:697
static void PutBOM(OutputByteStream &os)
Definition: encodings.h:509
#define RAPIDJSON_TRANS(mask)
UTF-16 little endian encoding.
Definition: encodings.h:451
static bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:432
static CharType TakeBOM(InputByteStream &is)
Definition: encodings.h:492
UTF-32 little endian enocoding.
Definition: encodings.h:585
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
Definition: encodings.h:831
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Definition: encodings.h:802
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
Definition: encodings.h:557


xbot_talker
Author(s): wangxiaoyun
autogenerated on Sat Oct 10 2020 03:27:53