json_stream_parser.h
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
33 
34 #include <stack>
35 #include <string>
36 
40 
41 #include <google/protobuf/port_def.inc>
42 
43 namespace google {
44 namespace protobuf {
45 namespace util {
46 namespace converter {
47 
48 class ObjectWriter;
49 
50 // A JSON parser that can parse a stream of JSON chunks rather than needing the
51 // entire JSON string up front. It is a modified version of the parser in
52 // //net/proto/json/json-parser.h that has been changed in the following ways:
53 // - Changed from recursion to an explicit stack to allow resumption
54 // - Added support for int64 and uint64 numbers
55 // - Removed support for octal and decimal escapes
56 // - Removed support for numeric keys
57 // - Removed support for functions (javascript)
58 // - Removed some lax-comma support (but kept trailing comma support)
59 // - Writes directly to an ObjectWriter rather than using subclassing
60 //
61 // Here is an example usage:
62 // JsonStreamParser parser(ow_.get());
63 // util::Status result = parser.Parse(chunk1);
64 // result.Update(parser.Parse(chunk2));
65 // result.Update(parser.FinishParse());
66 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
67 //
68 // This parser is thread-compatible as long as only one thread is calling a
69 // Parse() method at a time.
70 class PROTOBUF_EXPORT JsonStreamParser {
71  public:
72  // Creates a JsonStreamParser that will write to the given ObjectWriter.
73  explicit JsonStreamParser(ObjectWriter* ow);
74  virtual ~JsonStreamParser();
75 
76  // Parses a UTF-8 encoded JSON string from a StringPiece.
78 
79 
80  // Finish parsing the JSON string.
81  util::Status FinishParse();
82 
83 
84  // Sets the max recursion depth of JSON message to be deserialized. JSON
85  // messages over this depth will fail to be deserialized.
86  // Default value is 100.
87  void set_max_recursion_depth(int max_depth) {
88  max_recursion_depth_ = max_depth;
89  }
90 
91  private:
92  friend class JsonStreamParserTest;
93  // Return the current recursion depth.
94  int recursion_depth() { return recursion_depth_; }
95 
96  enum TokenType {
97  BEGIN_STRING, // " or '
98  BEGIN_NUMBER, // - or digit
99  BEGIN_TRUE, // true
100  BEGIN_FALSE, // false
101  BEGIN_NULL, // null
105  END_ARRAY, // ]
108  BEGIN_KEY, // letter, _, $ or digit. Must begin with non-digit
109  UNKNOWN // Unknown token or we ran out of the stream.
110  };
111 
112  enum ParseType {
113  VALUE, // Expects a {, [, true, false, null, string or number
114  OBJ_MID, // Expects a ',' or }
115  ENTRY, // Expects a key or }
116  ENTRY_MID, // Expects a :
117  ARRAY_VALUE, // Expects a value or ]
118  ARRAY_MID // Expects a ',' or ]
119  };
120 
121  // Holds the result of parsing a number
122  struct NumberResult {
123  enum Type { DOUBLE, INT, UINT };
125  union {
126  double double_val;
129  };
130  };
131 
132  // Parses a single chunk of JSON, returning an error if the JSON was invalid.
133  util::Status ParseChunk(StringPiece json);
134 
135  // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
136  // out of data. If we unexpectedly run out of p_ we push the latest back onto
137  // the stack and return.
138  util::Status RunParser();
139 
140  // Parses a value from p_ and writes it to ow_.
141  // A value may be an object, array, true, false, null, string or number.
142  util::Status ParseValue(TokenType type);
143 
144  // Parses a string and writes it out to the ow_.
145  util::Status ParseString();
146 
147  // Parses a string, storing the result in parsed_.
148  util::Status ParseStringHelper();
149 
150  // This function parses unicode escape sequences in strings. It returns an
151  // error when there's a parsing error, either the size is not the expected
152  // size or a character is not a hex digit. When it returns str will contain
153  // what has been successfully parsed so far.
154  util::Status ParseUnicodeEscape();
155 
156  // Expects p_ to point to a JSON number, writes the number to the writer using
157  // the appropriate Render method based on the type of number.
159 
160  // Parse a number into a NumberResult, reporting an error if no number could
161  // be parsed. This method will try to parse into a uint64, int64, or double
162  // based on whether the number was positive or negative or had a decimal
163  // component.
164  util::Status ParseNumberHelper(NumberResult* result);
165 
166  // Parse a number as double into a NumberResult.
167  util::Status ParseDoubleHelper(const std::string& number,
168  NumberResult* result);
169 
170  // Handles a { during parsing of a value.
171  util::Status HandleBeginObject();
172 
173  // Parses from the ENTRY state.
174  util::Status ParseEntry(TokenType type);
175 
176  // Parses from the ENTRY_MID state.
177  util::Status ParseEntryMid(TokenType type);
178 
179  // Parses from the OBJ_MID state.
180  util::Status ParseObjectMid(TokenType type);
181 
182  // Handles a [ during parsing of a value.
183  util::Status HandleBeginArray();
184 
185  // Parses from the ARRAY_VALUE state.
186  util::Status ParseArrayValue(TokenType type);
187 
188  // Parses from the ARRAY_MID state.
189  util::Status ParseArrayMid(TokenType type);
190 
191  // Expects p_ to point to an unquoted literal
192  util::Status ParseTrue();
193  util::Status ParseFalse();
194  util::Status ParseNull();
195  util::Status ParseEmptyNull();
196 
197  // Whether an empty-null is allowed in the current state.
198  bool IsEmptyNullAllowed(TokenType type);
199 
200  // Report a failure as a util::Status.
201  util::Status ReportFailure(StringPiece message);
202 
203  // Report a failure due to an UNKNOWN token type. We check if we hit the
204  // end of the stream and if we're finishing or not to detect what type of
205  // status to return in this case.
206  util::Status ReportUnknown(StringPiece message);
207 
208  // Helper function to check recursion depth and increment it. It will return
209  // Status::OK if the current depth is allowed. Otherwise an error is returned.
210  // key is used for error reporting.
211  util::Status IncrementRecursionDepth(StringPiece key) const;
212 
213  // Advance p_ past all whitespace or until the end of the string.
214  void SkipWhitespace();
215 
216  // Advance p_ one UTF-8 character
217  void Advance();
218 
219  // Expects p_ to point to the beginning of a key.
220  util::Status ParseKey();
221 
222  // Return the type of the next token at p_.
223  TokenType GetNextTokenType();
224 
225  // The object writer to write parse events to.
227 
228  // The stack of parsing we still need to do. When the stack runs empty we will
229  // have parsed a single value from the root (e.g. an object or list).
230  std::stack<ParseType> stack_;
231 
232  // Contains any leftover text from a previous chunk that we weren't able to
233  // fully parse, for example the start of a key or number.
235 
236  // The current chunk of JSON being parsed. Primarily used for providing
237  // context during error reporting.
239 
240  // A pointer within the current JSON being parsed, used to track location.
242 
243  // Stores the last key read, as we separate parsing of keys and values.
245 
246  // Storage for key_ if we need to keep ownership, for example between chunks
247  // or if the key was unescaped from a JSON string.
249 
250  // True during the FinishParse() call, so we know that any errors are fatal.
251  // For example an unterminated string will normally result in cancelling and
252  // trying during the next chunk, but during FinishParse() it is an error.
254 
255  // String we parsed during a call to ParseStringHelper().
257 
258  // Storage for the string we parsed. This may be empty if the string was able
259  // to be parsed directly from the input.
261 
262  // The character that opened the string, either ' or ".
263  // A value of 0 indicates that string parsing is not in process.
265 
266  // Storage for the chunk that are being parsed in ParseChunk().
268 
269  // Whether to allow non UTF-8 encoded input and replace invalid code points.
271 
272  // Whether allows empty string represented null array value or object entry
273  // value.
275 
276  // Whether allows out-of-range floating point numbers or reject them.
278 
279  // Tracks current recursion depth.
280  mutable int recursion_depth_;
281 
282  // Maximum allowed recursion depth.
284 
286 };
287 
288 } // namespace converter
289 } // namespace util
290 } // namespace protobuf
291 } // namespace google
292 
293 #include <google/protobuf/port_undef.inc>
294 
295 #endif // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
google::protobuf::util::converter::JsonStreamParser::finishing_
bool finishing_
Definition: json_stream_parser.h:253
google::protobuf::util::converter::JsonStreamParser::NumberResult::int_val
int64 int_val
Definition: json_stream_parser.h:127
google::protobuf::util::converter::JsonStreamParser::VALUE_SEPARATOR
@ VALUE_SEPARATOR
Definition: json_stream_parser.h:107
google::protobuf::util::converter::JsonStreamParser::ENTRY
@ ENTRY
Definition: json_stream_parser.h:115
google::protobuf::int64
int64_t int64
Definition: protobuf/src/google/protobuf/stubs/port.h:151
google::protobuf::util::converter::JsonStreamParser::BEGIN_STRING
@ BEGIN_STRING
Definition: json_stream_parser.h:97
google::protobuf::util::converter::JsonStreamParser::BEGIN_NUMBER
@ BEGIN_NUMBER
Definition: json_stream_parser.h:98
google::protobuf::util::converter::JsonStreamParser::END_OBJECT
@ END_OBJECT
Definition: json_stream_parser.h:103
google::protobuf::util::converter::JsonStreamParser::BEGIN_KEY
@ BEGIN_KEY
Definition: json_stream_parser.h:108
google::protobuf::util::converter::JsonStreamParser::END_ARRAY
@ END_ARRAY
Definition: json_stream_parser.h:105
google::protobuf::util::converter::JsonStreamParser::chunk_storage_
std::string chunk_storage_
Definition: json_stream_parser.h:267
google::protobuf::util::converter::JsonStreamParser::OBJ_MID
@ OBJ_MID
Definition: json_stream_parser.h:114
google::protobuf::util::converter::ObjectWriter
Definition: object_writer.h:60
google::protobuf::util::converter::JsonStreamParser::parsed_storage_
std::string parsed_storage_
Definition: json_stream_parser.h:260
google::protobuf::util::converter::JsonStreamParser::ParseType
ParseType
Definition: json_stream_parser.h:112
string
GLsizei const GLchar *const * string
Definition: glcorearb.h:3083
google::protobuf::util::converter::JsonStreamParser::ENTRY_SEPARATOR
@ ENTRY_SEPARATOR
Definition: json_stream_parser.h:106
google::protobuf::util::converter::JsonStreamParser::coerce_to_utf8_
bool coerce_to_utf8_
Definition: json_stream_parser.h:270
google::protobuf::util::error::UNKNOWN
@ UNKNOWN
Definition: status.h:49
google::protobuf::util::converter::JsonStreamParser::p_
StringPiece p_
Definition: json_stream_parser.h:241
ParseNumber
static bool ParseNumber(State *state, int *number_out)
Definition: demangle.cc:638
google::protobuf::util::converter::JsonStreamParser::key_
StringPiece key_
Definition: json_stream_parser.h:244
google::protobuf::util::converter::JsonStreamParser::recursion_depth_
int recursion_depth_
Definition: json_stream_parser.h:280
GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS
#define GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName)
Definition: macros.h:45
google::protobuf::util::converter::JsonStreamParser::NumberResult::double_val
double double_val
Definition: json_stream_parser.h:126
stringpiece.h
google::protobuf::util::converter::JsonStreamParser::ENTRY_MID
@ ENTRY_MID
Definition: json_stream_parser.h:116
google::protobuf::util::converter::JsonStreamParser::BEGIN_ARRAY
@ BEGIN_ARRAY
Definition: json_stream_parser.h:104
google::protobuf::util::converter::JsonStreamParser::key_storage_
std::string key_storage_
Definition: json_stream_parser.h:248
Type
Definition: type.pb.h:182
google::protobuf::StringPiece
Definition: stringpiece.h:180
google::protobuf::util::converter::JsonStreamParser::NumberResult
Definition: json_stream_parser.h:122
google::protobuf::util::converter::JsonStreamParser
Definition: json_stream_parser.h:70
google::protobuf::util::converter::JsonStreamParser::BEGIN_FALSE
@ BEGIN_FALSE
Definition: json_stream_parser.h:100
google::protobuf::uint64
uint64_t uint64
Definition: protobuf/src/google/protobuf/stubs/port.h:156
google::protobuf::util::converter::JsonStreamParser::leftover_
std::string leftover_
Definition: json_stream_parser.h:234
google::protobuf::util::converter::JsonStreamParser::string_open_
char string_open_
Definition: json_stream_parser.h:264
key
const SETUP_TEARDOWN_TESTCONTEXT char * key
Definition: test_wss_transport.cpp:10
google::protobuf::util::converter::JsonStreamParser::BEGIN_TRUE
@ BEGIN_TRUE
Definition: json_stream_parser.h:99
google::protobuf::util::converter::JsonStreamParser::stack_
std::stack< ParseType > stack_
Definition: json_stream_parser.h:230
google::protobuf::util::converter::JsonStreamParser::ARRAY_VALUE
@ ARRAY_VALUE
Definition: json_stream_parser.h:117
google::protobuf::util::converter::JsonStreamParser::parsed_
StringPiece parsed_
Definition: json_stream_parser.h:256
type
GLenum type
Definition: glcorearb.h:2695
google::protobuf.json_format.Parse
def Parse(text, message, ignore_unknown_fields=False, descriptor_pool=None)
Definition: json_format.py:394
google::protobuf::util::converter::JsonStreamParser::TokenType
TokenType
Definition: json_stream_parser.h:96
common.h
google::protobuf::util::converter::JsonStreamParser::BEGIN_OBJECT
@ BEGIN_OBJECT
Definition: json_stream_parser.h:102
google::protobuf::util::converter::JsonStreamParser::max_recursion_depth_
int max_recursion_depth_
Definition: json_stream_parser.h:283
google::protobuf::util::converter::JsonStreamParserTest
Definition: json_stream_parser_test.cc:87
google::protobuf::util::converter::JsonStreamParser::NumberResult::uint_val
uint64 uint_val
Definition: json_stream_parser.h:128
google::protobuf::util::converter::JsonStreamParser::NumberResult::type
Type type
Definition: json_stream_parser.h:124
google::protobuf::util::Status
Definition: status.h:67
google::protobuf::util::converter::JsonStreamParser::BEGIN_NULL
@ BEGIN_NULL
Definition: json_stream_parser.h:101
google::protobuf::util::converter::JsonStreamParser::json_
StringPiece json_
Definition: json_stream_parser.h:238
google::protobuf::util::converter::JsonStreamParser::recursion_depth
int recursion_depth()
Definition: json_stream_parser.h:94
google::protobuf::util::converter::JsonStreamParser::VALUE
@ VALUE
Definition: json_stream_parser.h:113
google::protobuf::util::converter::JsonStreamParser::set_max_recursion_depth
void set_max_recursion_depth(int max_depth)
Definition: json_stream_parser.h:87
google::protobuf::util::converter::JsonStreamParser::ow_
ObjectWriter * ow_
Definition: json_stream_parser.h:226
google::protobuf::util::converter::JsonStreamParser::loose_float_number_conversion_
bool loose_float_number_conversion_
Definition: json_stream_parser.h:277
google::protobuf::util::converter::JsonStreamParser::allow_empty_null_
bool allow_empty_null_
Definition: json_stream_parser.h:274
number
double number
Definition: cJSON.h:326
status.h
google
Definition: data_proto2_to_proto3_util.h:11
message
GLenum GLuint GLenum GLsizei const GLchar * message
Definition: glcorearb.h:2695


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:06:55