19 #ifndef RAPIDJSON_ENCODINGS_H_ 20 #define RAPIDJSON_ENCODINGS_H_ 24 #if defined(_MSC_VER) && !defined(__clang__) 28 RAPIDJSON_DIAG_OFF(4702)
29 #elif defined(__GNUC__) 31 RAPIDJSON_DIAG_OFF(effc++)
32 RAPIDJSON_DIAG_OFF(overflow)
101 template <
typename CharType =
char>
107 template <
typename OutputStream>
108 static void Encode(OutputStream &os,
unsigned codepoint) {
109 if (codepoint <= 0x7F)
110 os.Put(static_cast<Ch>(codepoint & 0xFF));
111 else if (codepoint <= 0x7FF) {
112 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
113 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
114 }
else if (codepoint <= 0xFFFF) {
115 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
116 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
117 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
121 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
122 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
123 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
127 template <
typename OutputStream>
129 if (codepoint <= 0x7F)
130 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
131 else if (codepoint <= 0x7FF) {
132 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
133 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
134 }
else if (codepoint <= 0xFFFF) {
135 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
136 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
137 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
140 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
141 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
142 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
143 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
147 template <
typename InputStream>
148 static bool Decode(InputStream &is,
unsigned *codepoint) {
149 #define RAPIDJSON_COPY() \ 151 *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) 152 #define RAPIDJSON_TRANS(mask) \ 153 result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 154 #define RAPIDJSON_TAIL() \ 156 RAPIDJSON_TRANS(0x70) 157 typename InputStream::Ch c = is.Take();
159 *codepoint =
static_cast<unsigned char>(c);
163 unsigned char type =
GetRange(static_cast<unsigned char>(c));
167 *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
208 #undef RAPIDJSON_COPY 209 #undef RAPIDJSON_TRANS 210 #undef RAPIDJSON_TAIL 213 template <
typename InputStream,
typename OutputStream>
214 static bool Validate(InputStream &is, OutputStream &os) {
215 #define RAPIDJSON_COPY() os.Put(c = is.Take()) 216 #define RAPIDJSON_TRANS(mask) \ 217 result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 218 #define RAPIDJSON_TAIL() \ 220 RAPIDJSON_TRANS(0x70) 223 if (!(c & 0x80))
return true;
226 switch (
GetRange(static_cast<unsigned char>(c))) {
264 #undef RAPIDJSON_COPY 265 #undef RAPIDJSON_TRANS 266 #undef RAPIDJSON_TAIL 273 static const unsigned char type[] = {
274 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x10, 0x10, 0x10,
285 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
286 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
287 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
288 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
289 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
290 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
291 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
292 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3,
293 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
294 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8,
300 template <
typename InputByteStream>
301 static CharType
TakeBOM(InputByteStream &is) {
303 typename InputByteStream::Ch c =
Take(is);
304 if (static_cast<unsigned char>(c) != 0xEFu)
return c;
306 if (static_cast<unsigned char>(c) != 0xBBu)
return c;
308 if (static_cast<unsigned char>(c) != 0xBFu)
return c;
313 template <
typename InputByteStream>
314 static Ch
Take(InputByteStream &is) {
316 return static_cast<Ch
>(is.Take());
319 template <
typename OutputByteStream>
320 static void PutBOM(OutputByteStream &os) {
322 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
323 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
324 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
327 template <
typename OutputByteStream>
328 static void Put(OutputByteStream &os, Ch c) {
330 os.Put(static_cast<typename OutputByteStream::Ch>(c));
347 template <
typename CharType =
wchar_t>
354 template <
typename OutputStream>
355 static void Encode(OutputStream &os,
unsigned codepoint) {
357 if (codepoint <= 0xFFFF) {
359 codepoint < 0xD800 ||
361 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
364 unsigned v = codepoint - 0x10000;
365 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
366 os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
370 template <
typename OutputStream>
373 if (codepoint <= 0xFFFF) {
375 codepoint < 0xD800 ||
377 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
380 unsigned v = codepoint - 0x10000;
381 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
383 static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
387 template <
typename InputStream>
388 static bool Decode(InputStream &is,
unsigned *codepoint) {
390 typename InputStream::Ch c = is.Take();
391 if (c < 0xD800 || c > 0xDFFF) {
392 *codepoint =
static_cast<unsigned>(c);
394 }
else if (c <= 0xDBFF) {
395 *codepoint = (
static_cast<unsigned>(c) & 0x3FF) << 10;
397 *codepoint |= (
static_cast<unsigned>(c) & 0x3FF);
398 *codepoint += 0x10000;
399 return c >= 0xDC00 && c <= 0xDFFF;
404 template <
typename InputStream,
typename OutputStream>
405 static bool Validate(InputStream &is, OutputStream &os) {
408 typename InputStream::Ch c;
409 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
410 if (c < 0xD800 || c > 0xDFFF)
412 else if (c <= 0xDBFF) {
413 os.Put(c = is.Take());
414 return c >= 0xDC00 && c <= 0xDFFF;
421 template <
typename CharType =
wchar_t>
423 template <
typename InputByteStream>
424 static CharType
TakeBOM(InputByteStream &is) {
426 CharType c =
Take(is);
427 return static_cast<uint16_t>(c) == 0xFEFFu ?
Take(is) : c;
430 template <
typename InputByteStream>
431 static CharType
Take(InputByteStream &is) {
433 unsigned c =
static_cast<uint8_t>(is.Take());
434 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
435 return static_cast<CharType
>(c);
438 template <
typename OutputByteStream>
439 static void PutBOM(OutputByteStream &os) {
441 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
442 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
445 template <
typename OutputByteStream>
446 static void Put(OutputByteStream &os, CharType c) {
448 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) &
450 os.Put(static_cast<typename OutputByteStream::Ch>(
451 (static_cast<unsigned>(c) >> 8) & 0xFFu));
456 template <
typename CharType =
wchar_t>
458 template <
typename InputByteStream>
459 static CharType
TakeBOM(InputByteStream &is) {
461 CharType c =
Take(is);
462 return static_cast<uint16_t>(c) == 0xFEFFu ?
Take(is) : c;
465 template <
typename InputByteStream>
466 static CharType
Take(InputByteStream &is) {
468 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
469 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
470 return static_cast<CharType
>(c);
473 template <
typename OutputByteStream>
474 static void PutBOM(OutputByteStream &os) {
476 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
477 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
480 template <
typename OutputByteStream>
481 static void Put(OutputByteStream &os, CharType c) {
483 os.Put(static_cast<typename OutputByteStream::Ch>(
484 (static_cast<unsigned>(c) >> 8) & 0xFFu));
485 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) &
502 template <
typename CharType =
unsigned>
509 template <
typename OutputStream>
510 static void Encode(OutputStream &os,
unsigned codepoint) {
516 template <
typename OutputStream>
523 template <
typename InputStream>
524 static bool Decode(InputStream &is,
unsigned *codepoint) {
528 return c <= 0x10FFFF;
531 template <
typename InputStream,
typename OutputStream>
532 static bool Validate(InputStream &is, OutputStream &os) {
535 os.Put(c = is.Take());
536 return c <= 0x10FFFF;
541 template <
typename CharType =
unsigned>
543 template <
typename InputByteStream>
544 static CharType
TakeBOM(InputByteStream &is) {
546 CharType c =
Take(is);
547 return static_cast<uint32_t>(c) == 0x0000FEFFu ?
Take(is) : c;
550 template <
typename InputByteStream>
551 static CharType
Take(InputByteStream &is) {
553 unsigned c =
static_cast<uint8_t>(is.Take());
554 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
555 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
556 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
557 return static_cast<CharType
>(c);
560 template <
typename OutputByteStream>
561 static void PutBOM(OutputByteStream &os) {
563 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
564 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
565 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
566 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
569 template <
typename OutputByteStream>
570 static void Put(OutputByteStream &os, CharType c) {
572 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
573 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
574 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
575 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
580 template <
typename CharType =
unsigned>
582 template <
typename InputByteStream>
583 static CharType
TakeBOM(InputByteStream &is) {
585 CharType c =
Take(is);
586 return static_cast<uint32_t>(c) == 0x0000FEFFu ?
Take(is) : c;
589 template <
typename InputByteStream>
590 static CharType
Take(InputByteStream &is) {
592 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
593 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
594 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
595 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
596 return static_cast<CharType
>(c);
599 template <
typename OutputByteStream>
600 static void PutBOM(OutputByteStream &os) {
602 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
603 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
604 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
605 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
608 template <
typename OutputByteStream>
609 static void Put(OutputByteStream &os, CharType c) {
611 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
612 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
613 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
614 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
626 template <
typename CharType =
char>
632 template <
typename OutputStream>
633 static void Encode(OutputStream &os,
unsigned codepoint) {
635 os.Put(static_cast<Ch>(codepoint & 0xFF));
638 template <
typename OutputStream>
641 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
644 template <
typename InputStream>
645 static bool Decode(InputStream &is,
unsigned *codepoint) {
651 template <
typename InputStream,
typename OutputStream>
652 static bool Validate(InputStream &is, OutputStream &os) {
654 os.Put(static_cast<typename OutputStream::Ch>(c));
658 template <
typename InputByteStream>
659 static CharType
TakeBOM(InputByteStream &is) {
662 return static_cast<Ch
>(c);
665 template <
typename InputByteStream>
666 static Ch
Take(InputByteStream &is) {
668 return static_cast<Ch
>(is.Take());
671 template <
typename OutputByteStream>
672 static void PutBOM(OutputByteStream &os) {
677 template <
typename OutputByteStream>
678 static void Put(OutputByteStream &os, Ch c) {
680 os.Put(static_cast<typename OutputByteStream::Ch>(c));
701 template <
typename CharType>
707 #define RAPIDJSON_ENCODINGS_FUNC(x) \ 708 UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 710 template <
typename OutputStream>
711 static RAPIDJSON_FORCEINLINE
void Encode(OutputStream &os,
712 unsigned codepoint) {
713 typedef void (*EncodeFunc)(OutputStream &, unsigned);
715 (*f[os.GetType()])(os, codepoint);
718 template <
typename OutputStream>
720 unsigned codepoint) {
721 typedef void (*EncodeFunc)(OutputStream &, unsigned);
723 (*f[os.GetType()])(os, codepoint);
726 template <
typename InputStream>
727 static RAPIDJSON_FORCEINLINE
bool Decode(InputStream &is,
728 unsigned *codepoint) {
729 typedef bool (*DecodeFunc)(InputStream &,
unsigned *);
731 return (*f[is.GetType()])(is, codepoint);
734 template <
typename InputStream,
typename OutputStream>
735 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream &is,
737 typedef bool (*ValidateFunc)(InputStream &, OutputStream &);
739 return (*f[is.GetType()])(is, os);
742 #undef RAPIDJSON_ENCODINGS_FUNC 749 template <
typename SourceEncoding,
typename TargetEncoding>
753 template <
typename InputStream,
typename OutputStream>
754 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream &is,
757 if (!SourceEncoding::Decode(is, &codepoint))
return false;
758 TargetEncoding::Encode(os, codepoint);
762 template <
typename InputStream,
typename OutputStream>
766 if (!SourceEncoding::Decode(is, &codepoint))
return false;
767 TargetEncoding::EncodeUnsafe(os, codepoint);
772 template <
typename InputStream,
typename OutputStream>
773 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream &is,
781 template <
typename Stream>
782 inline void PutUnsafe(Stream &stream,
typename Stream::Ch c);
785 template <
typename Encoding>
787 template <
typename InputStream,
typename OutputStream>
788 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream &is,
795 template <
typename InputStream,
typename OutputStream>
803 template <
typename InputStream,
typename OutputStream>
804 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream &is,
806 return Encoding::Validate(is, os);
812 #if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__)) 816 #endif // RAPIDJSON_ENCODINGS_H_ static bool Decode(InputStream &is, unsigned *codepoint)
static void Put(OutputByteStream &os, Ch c)
static CharType TakeBOM(InputByteStream &is)
UTFType
Runtime-specified UTF encoding type of a stream.
#define RAPIDJSON_ASSERT(x)
Assertion.
static void Encode(OutputStream &os, unsigned codepoint)
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
static CharType TakeBOM(InputByteStream &is)
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Validate(InputStream &is, OutputStream &os)
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, Ch c)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_ENCODINGS_FUNC(x)
static void Put(OutputByteStream &os, CharType c)
static Ch Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static CharType TakeBOM(InputByteStream &is)
static CharType Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static bool Decode(InputStream &is, unsigned *codepoint)
static CharType Take(InputByteStream &is)
static Ch Take(InputByteStream &is)
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
static void Put(OutputByteStream &os, CharType c)
static unsigned char GetRange(unsigned char c)
static void PutBOM(OutputByteStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
UTF-16 big endian encoding.
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
UTF-32 big endian encoding.
static void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
common definitions and configuration
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
static void PutBOM(OutputByteStream &os)
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static void PutBOM(OutputByteStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_TRANS(mask)
UTF-16 little endian encoding.
static bool Validate(InputStream &is, OutputStream &os)
static CharType TakeBOM(InputByteStream &is)
UTF-32 little endian enocoding.
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)