15 #ifndef RAPIDJSON_ENCODINGS_H_ 16 #define RAPIDJSON_ENCODINGS_H_ 20 #if defined(_MSC_VER) && !defined(__clang__) 22 RAPIDJSON_DIAG_OFF(4244)
23 RAPIDJSON_DIAG_OFF(4702)
24 #elif defined(__GNUC__) 26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
95 template <
typename CharType =
char>
105 template <
typename OutputStream>
106 static void Encode(OutputStream& os,
unsigned codepoint)
108 if (codepoint <= 0x7F)
109 os.Put(static_cast<Ch>(codepoint & 0xFF));
110 else if (codepoint <= 0x7FF)
112 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
113 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
115 else if (codepoint <= 0xFFFF)
117 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
124 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
125 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
126 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
127 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
131 template <
typename OutputStream>
134 if (codepoint <= 0x7F)
135 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
136 else if (codepoint <= 0x7FF)
138 PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
139 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
141 else if (codepoint <= 0xFFFF)
143 PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
144 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
145 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
150 PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
151 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
152 PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
153 PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
157 template <
typename InputStream>
158 static bool Decode(InputStream& is,
unsigned* codepoint)
160 #define RAPIDJSON_COPY() \ 162 *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu) 163 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 164 #define RAPIDJSON_TAIL() \ 166 RAPIDJSON_TRANS(0x70) 167 typename InputStream::Ch c = is.Take();
170 *codepoint =
static_cast<unsigned char>(c);
174 unsigned char type =
GetRange(static_cast<unsigned char>(c));
181 *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
223 #undef RAPIDJSON_COPY 224 #undef RAPIDJSON_TRANS 225 #undef RAPIDJSON_TAIL 228 template <
typename InputStream,
typename OutputStream>
229 static bool Validate(InputStream& is, OutputStream& os)
231 #define RAPIDJSON_COPY() os.Put(c = is.Take()) 232 #define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0) 233 #define RAPIDJSON_TAIL() \ 235 RAPIDJSON_TRANS(0x70) 242 switch (
GetRange(static_cast<unsigned char>(c)))
281 #undef RAPIDJSON_COPY 282 #undef RAPIDJSON_TRANS 283 #undef RAPIDJSON_TAIL 290 static const unsigned char type[] = {
291 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x10, 0x10, 0x10, 0x10,
298 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
299 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301 0x20, 0x20, 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
302 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3,
303 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8,
304 8, 8, 8, 8, 8, 8, 8, 8, 8,
309 template <
typename InputByteStream>
313 typename InputByteStream::Ch c =
Take(is);
314 if (static_cast<unsigned char>(c) != 0xEFu)
317 if (static_cast<unsigned char>(c) != 0xBBu)
320 if (static_cast<unsigned char>(c) != 0xBFu)
326 template <
typename InputByteStream>
327 static Ch
Take(InputByteStream& is)
330 return static_cast<Ch
>(is.Take());
333 template <
typename OutputByteStream>
337 os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
338 os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
339 os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
342 template <
typename OutputByteStream>
343 static void Put(OutputByteStream& os, Ch c)
346 os.Put(static_cast<typename OutputByteStream::Ch>(c));
363 template <
typename CharType =
wchar_t>
374 template <
typename OutputStream>
375 static void Encode(OutputStream& os,
unsigned codepoint)
378 if (codepoint <= 0xFFFF)
381 os.Put(static_cast<typename OutputStream::Ch>(codepoint));
386 unsigned v = codepoint - 0x10000;
387 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
388 os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
392 template <
typename OutputStream>
396 if (codepoint <= 0xFFFF)
399 PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
404 unsigned v = codepoint - 0x10000;
405 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
406 PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
410 template <
typename InputStream>
411 static bool Decode(InputStream& is,
unsigned* codepoint)
414 typename InputStream::Ch c = is.Take();
415 if (c < 0xD800 || c > 0xDFFF)
417 *codepoint =
static_cast<unsigned>(c);
420 else if (c <= 0xDBFF)
422 *codepoint = (
static_cast<unsigned>(c) & 0x3FF) << 10;
424 *codepoint |= (
static_cast<unsigned>(c) & 0x3FF);
425 *codepoint += 0x10000;
426 return c >= 0xDC00 && c <= 0xDFFF;
431 template <
typename InputStream,
typename OutputStream>
432 static bool Validate(InputStream& is, OutputStream& os)
436 typename InputStream::Ch c;
437 os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
438 if (c < 0xD800 || c > 0xDFFF)
440 else if (c <= 0xDBFF)
442 os.Put(c = is.Take());
443 return c >= 0xDC00 && c <= 0xDFFF;
450 template <
typename CharType =
wchar_t>
453 template <
typename InputByteStream>
457 CharType c =
Take(is);
458 return static_cast<uint16_t>(c) == 0xFEFFu ?
Take(is) : c;
461 template <
typename InputByteStream>
462 static CharType
Take(InputByteStream& is)
465 unsigned c =
static_cast<uint8_t>(is.Take());
466 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
467 return static_cast<CharType
>(c);
470 template <
typename OutputByteStream>
474 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
475 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
478 template <
typename OutputByteStream>
479 static void Put(OutputByteStream& os, CharType c)
482 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
483 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
488 template <
typename CharType =
wchar_t>
491 template <
typename InputByteStream>
495 CharType c =
Take(is);
496 return static_cast<uint16_t>(c) == 0xFEFFu ?
Take(is) : c;
499 template <
typename InputByteStream>
500 static CharType
Take(InputByteStream& is)
503 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
504 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
505 return static_cast<CharType
>(c);
508 template <
typename OutputByteStream>
512 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
513 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
516 template <
typename OutputByteStream>
517 static void Put(OutputByteStream& os, CharType c)
520 os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
521 os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
537 template <
typename CharType =
unsigned>
548 template <
typename OutputStream>
549 static void Encode(OutputStream& os,
unsigned codepoint)
556 template <
typename OutputStream>
564 template <
typename InputStream>
565 static bool Decode(InputStream& is,
unsigned* codepoint)
570 return c <= 0x10FFFF;
573 template <
typename InputStream,
typename OutputStream>
574 static bool Validate(InputStream& is, OutputStream& os)
578 os.Put(c = is.Take());
579 return c <= 0x10FFFF;
584 template <
typename CharType =
unsigned>
587 template <
typename InputByteStream>
591 CharType c =
Take(is);
592 return static_cast<uint32_t>(c) == 0x0000FEFFu ?
Take(is) : c;
595 template <
typename InputByteStream>
596 static CharType
Take(InputByteStream& is)
599 unsigned c =
static_cast<uint8_t>(is.Take());
600 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
601 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
602 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
603 return static_cast<CharType
>(c);
606 template <
typename OutputByteStream>
610 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
611 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
612 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
613 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
616 template <
typename OutputByteStream>
617 static void Put(OutputByteStream& os, CharType c)
620 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
621 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
622 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
623 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
628 template <
typename CharType =
unsigned>
631 template <
typename InputByteStream>
635 CharType c =
Take(is);
636 return static_cast<uint32_t>(c) == 0x0000FEFFu ?
Take(is) : c;
639 template <
typename InputByteStream>
640 static CharType
Take(InputByteStream& is)
643 unsigned c =
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 24;
644 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 16;
645 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take())) << 8;
646 c |=
static_cast<unsigned>(
static_cast<uint8_t>(is.Take()));
647 return static_cast<CharType
>(c);
650 template <
typename OutputByteStream>
654 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
655 os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
656 os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
657 os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
660 template <
typename OutputByteStream>
661 static void Put(OutputByteStream& os, CharType c)
664 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
665 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
666 os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
667 os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
679 template <
typename CharType =
char>
689 template <
typename OutputStream>
690 static void Encode(OutputStream& os,
unsigned codepoint)
693 os.Put(static_cast<Ch>(codepoint & 0xFF));
696 template <
typename OutputStream>
700 PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
703 template <
typename InputStream>
704 static bool Decode(InputStream& is,
unsigned* codepoint)
711 template <
typename InputStream,
typename OutputStream>
712 static bool Validate(InputStream& is, OutputStream& os)
715 os.Put(static_cast<typename OutputStream::Ch>(c));
719 template <
typename InputByteStream>
724 return static_cast<Ch
>(c);
727 template <
typename InputByteStream>
728 static Ch
Take(InputByteStream& is)
731 return static_cast<Ch
>(is.Take());
734 template <
typename OutputByteStream>
741 template <
typename OutputByteStream>
742 static void Put(OutputByteStream& os, Ch c)
745 os.Put(static_cast<typename OutputByteStream::Ch>(c));
765 template <
typename CharType>
775 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 777 template <
typename OutputStream>
778 static RAPIDJSON_FORCEINLINE
void Encode(OutputStream& os,
unsigned codepoint)
780 typedef void (*EncodeFunc)(OutputStream&, unsigned);
782 (*f[os.GetType()])(os, codepoint);
785 template <
typename OutputStream>
786 static RAPIDJSON_FORCEINLINE
void EncodeUnsafe(OutputStream& os,
unsigned codepoint)
788 typedef void (*EncodeFunc)(OutputStream&, unsigned);
790 (*f[os.GetType()])(os, codepoint);
793 template <
typename InputStream>
794 static RAPIDJSON_FORCEINLINE
bool Decode(InputStream& is,
unsigned* codepoint)
796 typedef bool (*DecodeFunc)(InputStream&,
unsigned*);
798 return (*f[is.GetType()])(is, codepoint);
801 template <
typename InputStream,
typename OutputStream>
802 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os)
804 typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
806 return (*f[is.GetType()])(is, os);
809 #undef RAPIDJSON_ENCODINGS_FUNC 816 template <
typename SourceEncoding,
typename TargetEncoding>
820 template <
typename InputStream,
typename OutputStream>
821 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream& is, OutputStream& os)
824 if (!SourceEncoding::Decode(is, &codepoint))
826 TargetEncoding::Encode(os, codepoint);
830 template <
typename InputStream,
typename OutputStream>
834 if (!SourceEncoding::Decode(is, &codepoint))
836 TargetEncoding::EncodeUnsafe(os, codepoint);
841 template <
typename InputStream,
typename OutputStream>
842 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os)
844 return Transcode(is, os);
849 template <
typename Stream>
850 inline void PutUnsafe(Stream& stream,
typename Stream::Ch c);
853 template <
typename Encoding>
856 template <
typename InputStream,
typename OutputStream>
857 static RAPIDJSON_FORCEINLINE
bool Transcode(InputStream& is, OutputStream& os)
863 template <
typename InputStream,
typename OutputStream>
870 template <
typename InputStream,
typename OutputStream>
871 static RAPIDJSON_FORCEINLINE
bool Validate(InputStream& is, OutputStream& os)
873 return Encoding::Validate(is, os);
879 #if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__)) 883 #endif // RAPIDJSON_ENCODINGS_H_ static bool Decode(InputStream &is, unsigned *codepoint)
static void Put(OutputByteStream &os, Ch c)
static CharType TakeBOM(InputByteStream &is)
UTFType
Runtime-specified UTF encoding type of a stream.
#define RAPIDJSON_ASSERT(x)
Assertion.
static void Encode(OutputStream &os, unsigned codepoint)
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
static CharType TakeBOM(InputByteStream &is)
static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Validate(InputStream &is, OutputStream &os)
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, Ch c)
static bool Validate(InputStream &is, OutputStream &os)
static bool Validate(InputStream &is, OutputStream &os)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_ENCODINGS_FUNC(x)
static void Put(OutputByteStream &os, CharType c)
static Ch Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static CharType TakeBOM(InputByteStream &is)
static CharType Take(InputByteStream &is)
static CharType Take(InputByteStream &is)
static bool Decode(InputStream &is, unsigned *codepoint)
static CharType Take(InputByteStream &is)
static Ch Take(InputByteStream &is)
Dynamically select encoding according to stream's runtime-specified UTF encoding type.
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
static void Put(OutputByteStream &os, CharType c)
static unsigned char GetRange(unsigned char c)
static void PutBOM(OutputByteStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
UTF-16 big endian encoding.
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
UTF-32 big endian encoding.
static void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Decode(InputStream &is, unsigned *codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
common definitions and configuration
static RAPIDJSON_FORCEINLINE void Encode(OutputStream &os, unsigned codepoint)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
static CharType TakeBOM(InputByteStream &is)
static void Put(OutputByteStream &os, CharType c)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
void PutUnsafe(Stream &stream, typename Stream::Ch c)
Write character to a stream, presuming buffer is reserved.
static void PutBOM(OutputByteStream &os)
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static bool Decode(InputStream &is, unsigned *codepoint)
static void PutBOM(OutputByteStream &os)
static void Encode(OutputStream &os, unsigned codepoint)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)
static void PutBOM(OutputByteStream &os)
#define RAPIDJSON_TRANS(mask)
UTF-16 little endian encoding.
static bool Validate(InputStream &is, OutputStream &os)
static CharType TakeBOM(InputByteStream &is)
UTF-32 little endian enocoding.
static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream &is, OutputStream &os)
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
static void EncodeUnsafe(OutputStream &os, unsigned codepoint)