encodedstream.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_ENCODEDSTREAM_H_
16 #define RAPIDJSON_ENCODEDSTREAM_H_
17 
18 #include "stream.h"
19 #include "memorystream.h"
20 
21 #if __GNUC__
22 RAPIDJSON_DIAG_PUSH
23 RAPIDJSON_DIAG_OFF(effc++)
24 #endif
25 
26 #if __clang__
27 RAPIDJSON_DIAG_PUSH
28 RAPIDJSON_DIAG_OFF(padded)
29 #endif
30 
32 
34 
38 template <typename Encoding, typename InputByteStream>
40 {
41  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
42 
43 public:
44  typedef typename Encoding::Ch Ch;
45 
46  EncodedInputStream(InputByteStream& is) : is_(is)
47  {
48  current_ = Encoding::TakeBOM(is_);
49  }
50 
51  Ch Peek() const
52  {
53  return current_;
54  }
55  Ch Take()
56  {
57  Ch c = current_;
58  current_ = Encoding::Take(is_);
59  return c;
60  }
61  size_t Tell() const
62  {
63  return is_.Tell();
64  }
65 
66  // Not implemented
67  void Put(Ch)
68  {
69  RAPIDJSON_ASSERT(false);
70  }
71  void Flush()
72  {
73  RAPIDJSON_ASSERT(false);
74  }
75  Ch* PutBegin()
76  {
77  RAPIDJSON_ASSERT(false);
78  return 0;
79  }
80  size_t PutEnd(Ch*)
81  {
82  RAPIDJSON_ASSERT(false);
83  return 0;
84  }
85 
86 private:
89 
90  InputByteStream& is_;
92 };
93 
95 template <>
97 {
98 public:
99  typedef UTF8<>::Ch Ch;
100 
102  {
103  if (static_cast<unsigned char>(is_.Peek()) == 0xEFu)
104  is_.Take();
105  if (static_cast<unsigned char>(is_.Peek()) == 0xBBu)
106  is_.Take();
107  if (static_cast<unsigned char>(is_.Peek()) == 0xBFu)
108  is_.Take();
109  }
110  Ch Peek() const
111  {
112  return is_.Peek();
113  }
114  Ch Take()
115  {
116  return is_.Take();
117  }
118  size_t Tell() const
119  {
120  return is_.Tell();
121  }
122 
123  // Not implemented
124  void Put(Ch)
125  {
126  }
127  void Flush()
128  {
129  }
130  Ch* PutBegin()
131  {
132  return 0;
133  }
134  size_t PutEnd(Ch*)
135  {
136  return 0;
137  }
138 
140 
141 private:
144 };
145 
147 
151 template <typename Encoding, typename OutputByteStream>
153 {
154  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
155 
156 public:
157  typedef typename Encoding::Ch Ch;
158 
159  EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os)
160  {
161  if (putBOM)
162  Encoding::PutBOM(os_);
163  }
164 
165  void Put(Ch c)
166  {
167  Encoding::Put(os_, c);
168  }
169  void Flush()
170  {
171  os_.Flush();
172  }
173 
174  // Not implemented
175  Ch Peek() const
176  {
177  RAPIDJSON_ASSERT(false);
178  return 0;
179  }
180  Ch Take()
181  {
182  RAPIDJSON_ASSERT(false);
183  return 0;
184  }
185  size_t Tell() const
186  {
187  RAPIDJSON_ASSERT(false);
188  return 0;
189  }
190  Ch* PutBegin()
191  {
192  RAPIDJSON_ASSERT(false);
193  return 0;
194  }
195  size_t PutEnd(Ch*)
196  {
197  RAPIDJSON_ASSERT(false);
198  return 0;
199  }
200 
201 private:
204 
205  OutputByteStream& os_;
206 };
207 
208 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
209 
211 
215 template <typename CharType, typename InputByteStream>
217 {
218  RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
219 
220 public:
221  typedef CharType Ch;
222 
224 
228  AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false)
229  {
230  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
231  DetectType();
232  static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
233  takeFunc_ = f[type_];
234  current_ = takeFunc_(*is_);
235  }
236 
237  UTFType GetType() const
238  {
239  return type_;
240  }
241  bool HasBOM() const
242  {
243  return hasBOM_;
244  }
245 
246  Ch Peek() const
247  {
248  return current_;
249  }
250  Ch Take()
251  {
252  Ch c = current_;
253  current_ = takeFunc_(*is_);
254  return c;
255  }
256  size_t Tell() const
257  {
258  return is_->Tell();
259  }
260 
261  // Not implemented
262  void Put(Ch)
263  {
264  RAPIDJSON_ASSERT(false);
265  }
266  void Flush()
267  {
268  RAPIDJSON_ASSERT(false);
269  }
270  Ch* PutBegin()
271  {
272  RAPIDJSON_ASSERT(false);
273  return 0;
274  }
275  size_t PutEnd(Ch*)
276  {
277  RAPIDJSON_ASSERT(false);
278  return 0;
279  }
280 
281 private:
284 
285  // Detect encoding type with BOM or RFC 4627
286  void DetectType()
287  {
288  // BOM (Byte Order Mark):
289  // 00 00 FE FF UTF-32BE
290  // FF FE 00 00 UTF-32LE
291  // FE FF UTF-16BE
292  // FF FE UTF-16LE
293  // EF BB BF UTF-8
294 
295  const unsigned char* c = reinterpret_cast<const unsigned char*>(is_->Peek4());
296  if (!c)
297  return;
298 
299  unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
300  hasBOM_ = false;
301  if (bom == 0xFFFE0000)
302  {
303  type_ = kUTF32BE;
304  hasBOM_ = true;
305  is_->Take();
306  is_->Take();
307  is_->Take();
308  is_->Take();
309  }
310  else if (bom == 0x0000FEFF)
311  {
312  type_ = kUTF32LE;
313  hasBOM_ = true;
314  is_->Take();
315  is_->Take();
316  is_->Take();
317  is_->Take();
318  }
319  else if ((bom & 0xFFFF) == 0xFFFE)
320  {
321  type_ = kUTF16BE;
322  hasBOM_ = true;
323  is_->Take();
324  is_->Take();
325  }
326  else if ((bom & 0xFFFF) == 0xFEFF)
327  {
328  type_ = kUTF16LE;
329  hasBOM_ = true;
330  is_->Take();
331  is_->Take();
332  }
333  else if ((bom & 0xFFFFFF) == 0xBFBBEF)
334  {
335  type_ = kUTF8;
336  hasBOM_ = true;
337  is_->Take();
338  is_->Take();
339  is_->Take();
340  }
341 
342  // RFC 4627: Section 3
343  // "Since the first two characters of a JSON text will always be ASCII
344  // characters [RFC0020], it is possible to determine whether an octet
345  // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
346  // at the pattern of nulls in the first four octets."
347  // 00 00 00 xx UTF-32BE
348  // 00 xx 00 xx UTF-16BE
349  // xx 00 00 00 UTF-32LE
350  // xx 00 xx 00 UTF-16LE
351  // xx xx xx xx UTF-8
352 
353  if (!hasBOM_)
354  {
355  int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
356  switch (pattern)
357  {
358  case 0x08:
359  type_ = kUTF32BE;
360  break;
361  case 0x0A:
362  type_ = kUTF16BE;
363  break;
364  case 0x01:
365  type_ = kUTF32LE;
366  break;
367  case 0x05:
368  type_ = kUTF16LE;
369  break;
370  case 0x0F:
371  type_ = kUTF8;
372  break;
373  default:
374  break; // Use type defined by user.
375  }
376  }
377 
378  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
379  if (type_ == kUTF16LE || type_ == kUTF16BE)
380  RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
381  if (type_ == kUTF32LE || type_ == kUTF32BE)
382  RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
383  }
384 
385  typedef Ch (*TakeFunc)(InputByteStream& is);
386  InputByteStream* is_;
389  TakeFunc takeFunc_;
390  bool hasBOM_;
391 };
392 
394 
398 template <typename CharType, typename OutputByteStream>
400 {
401  RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
402 
403 public:
404  typedef CharType Ch;
405 
407 
412  AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type)
413  {
414  RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
415 
416  // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
417  if (type_ == kUTF16LE || type_ == kUTF16BE)
418  RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
419  if (type_ == kUTF32LE || type_ == kUTF32BE)
420  RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
421 
422  static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
423  putFunc_ = f[type_];
424 
425  if (putBOM)
426  PutBOM();
427  }
428 
429  UTFType GetType() const
430  {
431  return type_;
432  }
433 
434  void Put(Ch c)
435  {
436  putFunc_(*os_, c);
437  }
438  void Flush()
439  {
440  os_->Flush();
441  }
442 
443  // Not implemented
444  Ch Peek() const
445  {
446  RAPIDJSON_ASSERT(false);
447  return 0;
448  }
449  Ch Take()
450  {
451  RAPIDJSON_ASSERT(false);
452  return 0;
453  }
454  size_t Tell() const
455  {
456  RAPIDJSON_ASSERT(false);
457  return 0;
458  }
459  Ch* PutBegin()
460  {
461  RAPIDJSON_ASSERT(false);
462  return 0;
463  }
464  size_t PutEnd(Ch*)
465  {
466  RAPIDJSON_ASSERT(false);
467  return 0;
468  }
469 
470 private:
473 
474  void PutBOM()
475  {
476  typedef void (*PutBOMFunc)(OutputByteStream&);
477  static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
478  f[type_](*os_);
479  }
480 
481  typedef void (*PutFunc)(OutputByteStream&, Ch);
482 
483  OutputByteStream* os_;
485  PutFunc putFunc_;
486 };
487 
488 #undef RAPIDJSON_ENCODINGS_FUNC
489 
491 
492 #if __clang__
493 RAPIDJSON_DIAG_POP
494 #endif
495 
496 #if __GNUC__
497 RAPIDJSON_DIAG_POP
498 #endif
499 
500 #endif // RAPIDJSON_FILESTREAM_H_
#define RAPIDJSON_ENCODINGS_FUNC(x)
UTFType
Runtime-specified UTF encoding type of a stream.
Definition: encodings.h:753
bool HasBOM() const
Represents an in-memory input byte stream.
Definition: memorystream.h:42
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition: rapidjson.h:416
f
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:126
OutputByteStream * os_
CharType Ch
Definition: encodings.h:98
UTF-32 big endian.
Definition: encodings.h:759
UTFType GetType() const
UTF-16 little endian.
Definition: encodings.h:756
EncodedInputStream & operator=(const EncodedInputStream &)
Output byte stream wrapper with statically bound encoding.
UTF-8.
Definition: encodings.h:755
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
size_t PutEnd(Ch *)
Encoding::Ch Ch
Definition: encodedstream.h:44
UTF-16 big endian.
Definition: encodings.h:757
EncodedOutputStream(OutputByteStream &os, bool putBOM=true)
size_t Tell() const
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
size_t PutEnd(Ch *)
Definition: encodedstream.h:80
EncodedInputStream(InputByteStream &is)
Definition: encodedstream.h:46
UTF-8 encoding.
Definition: encodings.h:96
InputByteStream * is_
Ch Peek() const
Definition: encodedstream.h:51
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
Input byte stream wrapper with a statically bound encoding.
Definition: encodedstream.h:39
size_t Tell() const
size_t Tell() const
Definition: encodedstream.h:61
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch)==1)
UTFType GetType() const
size_t PutEnd(Ch *)
UTF-32 little endian.
Definition: encodings.h:758
InputByteStream & is_
Definition: encodedstream.h:90
OutputByteStream & os_
size_t Tell() const
size_t PutEnd(Ch *)


xbot_talker
Author(s): wangxiaoyun
autogenerated on Sat Oct 10 2020 03:27:53