sajson.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2017 Chad Austin
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include <assert.h>
28 #include <stdint.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <math.h>
32 #include <limits.h>
33 #include <algorithm>
34 #include <cstdio>
35 #include <limits>
36 
37 #ifndef SAJSON_NO_STD_STRING
38 #include <string> // for convenient access to error messages and string values.
39 #endif
40 
41 #if defined(__GNUC__) || defined(__clang__)
42 #define SAJSON_LIKELY(x) __builtin_expect(!!(x), 1)
43 #define SAJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
44 #define SAJSON_ALWAYS_INLINE __attribute__((always_inline))
45 #define SAJSON_UNREACHABLE() __builtin_unreachable()
46 #define SAJSON_snprintf snprintf
47 #elif defined(_MSC_VER)
48 #define SAJSON_LIKELY(x) x
49 #define SAJSON_UNLIKELY(x) x
50 #define SAJSON_ALWAYS_INLINE __forceinline
51 #define SAJSON_UNREACHABLE() __assume(0)
52 #if (_MSC_VER <= 1800)
53 #define SAJSON_snprintf _snprintf
54 #else
55 #define SAJSON_snprintf snprintf
56 #endif
57 #else
58 #define SAJSON_LIKELY(x) x
59 #define SAJSON_UNLIKELY(x) x
60 #define SAJSON_ALWAYS_INLINE inline
61 #define SAJSON_UNREACHABLE() assert(!"unreachable")
62 #define SAJSON_snprintf snprintf
63 #endif
64 
68 namespace sajson {
69 
71  enum type: uint8_t {
74  TYPE_NULL = 2,
76  TYPE_TRUE = 4,
80  };
81 
82  namespace internal {
83  static const size_t TYPE_BITS = 3;
84  static const size_t TYPE_MASK = (1 << TYPE_BITS) - 1;
85  static const size_t VALUE_MASK = size_t(-1) >> TYPE_BITS;
86 
87  static const size_t ROOT_MARKER = VALUE_MASK;
88 
89  inline type get_element_type(size_t s) {
90  return static_cast<type>(s & TYPE_MASK);
91  }
92 
93  inline size_t get_element_value(size_t s) {
94  return s >> TYPE_BITS;
95  }
96 
97  inline size_t make_element(type t, size_t value) {
98  //assert((value & ~VALUE_MASK) == 0);
99  //value &= VALUE_MASK;
100  return static_cast<size_t>(t) | (value << TYPE_BITS);
101  }
102 
103  // This template utilizes the One Definition Rule to create global arrays in a header.
104  // This trick courtesy of Rich Geldreich's Purple JSON parser.
105  template<typename unused=void>
106  struct globals_struct {
107  static const unsigned char parse_flags[256];
108  };
110 
111  // bit 0 (1) - set if: plain ASCII string character
112  // bit 1 (2) - set if: whitespace
113  // bit 4 (0x10) - set if: 0-9 e E .
114  template<typename unused>
115  const uint8_t globals_struct<unused>::parse_flags[256] = {
116  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
117  0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, // 0
118  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1
119  3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
120  0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, 0x11,0x11,1, 1, 1, 1, 1, 1, // 3
121  1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
122  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 5
123  1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
124  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7
125 
126  // 128-255
127  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
128  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
129  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
130  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
131  };
132 
133  inline bool is_plain_string_character(char c) {
134  //return c >= 0x20 && c <= 0x7f && c != 0x22 && c != 0x5c;
135  return (globals::parse_flags[static_cast<unsigned char>(c)] & 1) != 0;
136  }
137 
138  inline bool is_whitespace(char c) {
139  //return c == '\r' || c == '\n' || c == '\t' || c == ' ';
140  return (globals::parse_flags[static_cast<unsigned char>(c)] & 2) != 0;
141  }
142 
144  public:
146  : memory(0)
147  {}
148 
149  explicit allocated_buffer(size_t length) {
150  // throws std::bad_alloc upon allocation failure
151  void* buffer = operator new(sizeof(size_t) + length);
152  memory = static_cast<layout*>(buffer);
153  memory->refcount = 1;
154  }
155 
157  : memory(that.memory)
158  {
159  incref();
160  }
161 
163  : memory(that.memory)
164  {
165  that.memory = 0;
166  }
167 
169  decref();
170  }
171 
173  if (this != &that) {
174  decref();
175  memory = that.memory;
176  incref();
177  }
178  return *this;
179  }
180 
182  if (this != &that) {
183  decref();
184  memory = that.memory;
185  that.memory = 0;
186  }
187  return *this;
188  }
189 
190  char* get_data() const {
191  return memory ? memory->data : 0;
192  }
193 
194  private:
195  void incref() const {
196  if (memory) {
197  ++(memory->refcount);
198  }
199  }
200 
201  void decref() const {
202  if (memory && --(memory->refcount) == 0) {
203  operator delete(memory);
204  }
205  }
206 
207  struct layout {
208  size_t refcount;
209  char data[1];
210  };
211 
213  };
214  }
215 
218  class string {
219  public:
220  string(const char* text_, size_t length)
221  : text(text_)
222  , _length(length)
223  {}
224 
225  const char* data() const {
226  return text;
227  }
228 
229  size_t length() const {
230  return _length;
231  }
232 
233 #ifndef SAJSON_NO_STD_STRING
234  std::string as_string() const {
235  return std::string(text, text + _length);
236  }
237 #endif
238 
239  private:
240  const char* const text;
241  const size_t _length;
242 
243  string(); /*=delete*/
244  };
245 
248  class literal : public string {
249  public:
250  template <size_t sz>
251  explicit literal(const char (&text_)[sz])
252  : string(text_, sz - 1)
253  {
254  static_assert(sz > 0, "!");
255  }
256  };
257 
261  public:
264  : length_(0)
265  , data(0)
266  , buffer()
267  {}
268 
273  mutable_string_view(size_t length, char* data_)
274  : length_(length)
275  , data(data_)
276  , buffer()
277  {}
278 
282  : length_(s.length())
283  , buffer(length_)
284  {
285  data = buffer.get_data();
286  memcpy(data, s.data(), length_);
287  }
288 
291  mutable_string_view(const string& s)
292  : length_(s.length())
293  , buffer(length_)
294  {
295  data = buffer.get_data();
296  memcpy(data, s.data(), length_);
297  }
298 
303  : length_(that.length_)
304  , data(that.data)
305  , buffer(that.buffer)
306  {}
307 
310  : length_(that.length_)
311  , data(that.data)
312  , buffer(std::move(that.buffer))
313  {
314  that.length_ = 0;
315  that.data = 0;
316  }
317 
319  if (this != &that) {
320  length_ = that.length_;
321  data = that.data;
322  buffer = std::move(that.buffer);
323  that.length_ = 0;
324  that.data = 0;
325  }
326  return *this;
327  }
328 
330  if (this != &that) {
331  length_ = that.length_;
332  data = that.data;
333  buffer = that.buffer;
334  }
335  return *this;
336  }
337 
338  size_t length() const {
339  return length_;
340  }
341 
342  char* get_data() const {
343  return data;
344  }
345 
346  private:
347  size_t length_;
348  char* data;
349  internal::allocated_buffer buffer; // may not be allocated
350  };
351 
352  namespace internal {
354  size_t key_start;
355  size_t key_end;
356  size_t value;
357  };
358 
360  object_key_comparator(const char* object_data)
361  : data(object_data)
362  {}
363 
364  bool operator()(const object_key_record& lhs, const string& rhs) const {
365  const size_t lhs_length = lhs.key_end - lhs.key_start;
366  const size_t rhs_length = rhs.length();
367  if (lhs_length < rhs_length) {
368  return true;
369  } else if (lhs_length > rhs_length) {
370  return false;
371  }
372  return memcmp(data + lhs.key_start, rhs.data(), lhs_length) < 0;
373  }
374 
375  bool operator()(const string& lhs, const object_key_record& rhs) const {
376  return !(*this)(rhs, lhs);
377  }
378 
380  const object_key_record& lhs,
381  const object_key_record& rhs
382  ) {
383  const size_t lhs_length = lhs.key_end - lhs.key_start;
384  const size_t rhs_length = rhs.key_end - rhs.key_start;
385  if (lhs_length < rhs_length) {
386  return true;
387  } else if (lhs_length > rhs_length) {
388  return false;
389  }
390  return memcmp(
391  data + lhs.key_start,
392  data + rhs.key_start,
393  lhs_length
394  ) < 0;
395  }
396 
397  const char* data;
398  };
399  }
400 
401  namespace integer_storage {
402  enum {
404  };
405 
406  inline int load(const size_t* location) {
407  int value;
408  memcpy(&value, location, sizeof(value));
409  return value;
410  }
411 
412  inline void store(size_t* location, int value) {
413  // NOTE: Most modern compilers optimize away this constant-size
414  // memcpy into a single instruction. If any don't, and treat
415  // punning through a union as legal, they can be special-cased.
416  static_assert(
417  sizeof(value) <= sizeof(*location),
418  "size_t must not be smaller than int");
419  memcpy(location, &value, sizeof(value));
420  }
421  }
422 
423  namespace double_storage {
424  enum {
425  word_length = sizeof(double) / sizeof(size_t)
426  };
427 
428  inline double load(const size_t* location) {
429  double value;
430  memcpy(&value, location, sizeof(double));
431  return value;
432  }
433 
434  inline void store(size_t* location, double value) {
435  // NOTE: Most modern compilers optimize away this constant-size
436  // memcpy into a single instruction. If any don't, and treat
437  // punning through a union as legal, they can be special-cased.
438  memcpy(location, &value, sizeof(double));
439  }
440  }
441 
448  class value {
449  public:
451  type get_type() const {
452  return value_type;
453  }
454 
457  size_t get_length() const {
458  assert_type_2(TYPE_ARRAY, TYPE_OBJECT);
459  return payload[0];
460  }
461 
465  value get_array_element(size_t index) const {
466  using namespace internal;
467  assert_type(TYPE_ARRAY);
468  size_t element = payload[1 + index];
469  return value(get_element_type(element), payload + get_element_value(element), text);
470  }
471 
475  string get_object_key(size_t index) const {
476  assert_type(TYPE_OBJECT);
477  const size_t* s = payload + 1 + index * 3;
478  return string(text + s[0], s[1] - s[0]);
479  }
480 
483  value get_object_value(size_t index) const {
484  using namespace internal;
485  assert_type(TYPE_OBJECT);
486  size_t element = payload[3 + index * 3];
487  return value(get_element_type(element), payload + get_element_value(element), text);
488  }
489 
493  value get_value_of_key(const string& key) const {
494  assert_type(TYPE_OBJECT);
495  size_t i = find_object_key(key);
496  if (i < get_length()) {
497  return get_object_value(i);
498  } else {
499  return value(TYPE_NULL, 0, 0);
500  }
501  }
502 
507  size_t find_object_key(const string& key) const {
508  using namespace internal;
509  assert_type(TYPE_OBJECT);
510  const object_key_record* start = reinterpret_cast<const object_key_record*>(payload + 1);
511  const object_key_record* end = start + get_length();
512 #ifdef SAJSON_UNSORTED_OBJECT_KEYS
513  for (const object_key_record* i = start; i != end; ++i)
514 #else
515  const object_key_record* i = std::lower_bound(start, end, key, object_key_comparator(text));
516 #endif
517  if (i != end
518  && (i->key_end - i->key_start) == key.length()
519  && memcmp(key.data(), text + i->key_start, key.length()) == 0) {
520  return i - start;
521  }
522  return get_length();
523  }
524 
527  int get_integer_value() const {
528  assert_type(TYPE_INTEGER);
529  return integer_storage::load(payload);
530  }
531 
534  double get_double_value() const {
535  assert_type(TYPE_DOUBLE);
536  return double_storage::load(payload);
537  }
538 
541  double get_number_value() const {
542  assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
543  if (get_type() == TYPE_INTEGER) {
544  return get_integer_value();
545  } else {
546  return get_double_value();
547  }
548  }
549 
557  bool get_int53_value(int64_t* out) const {
558  // Make sure the output variable is always defined to avoid any
559  // possible situation like
560  // https://gist.github.com/chadaustin/2c249cb850619ddec05b23ca42cf7a18
561  *out = 0;
562 
563  assert_type_2(TYPE_INTEGER, TYPE_DOUBLE);
564  if (get_type() == TYPE_INTEGER) {
565  *out = get_integer_value();
566  return true;
567  } else if (get_type() == TYPE_DOUBLE) {
568  double v = get_double_value();
569  if (v < -(1LL << 53) || v > (1LL << 53)) {
570  return false;
571  }
572  int64_t as_int = static_cast<int64_t>(v);
573  if (as_int != v) {
574  return false;
575  }
576  *out = as_int;
577  return true;
578  } else {
579  return false;
580  }
581  }
582 
585  size_t get_string_length() const {
586  assert_type(TYPE_STRING);
587  return payload[1] - payload[0];
588  }
589 
596  const char* as_cstring() const {
597  assert_type(TYPE_STRING);
598  return text + payload[0];
599  }
600 
601 #ifndef SAJSON_NO_STD_STRING
602  std::string as_string() const {
605  assert_type(TYPE_STRING);
606  return std::string(text + payload[0], text + payload[1]);
607  }
608 #endif
609 
611  const size_t* _internal_get_payload() const {
612  return payload;
613  }
615 
616  private:
617  explicit value(type value_type_, const size_t* payload_, const char* text_)
618  : value_type(value_type_)
619  , payload(payload_)
620  , text(text_)
621  {}
622 
623  void assert_type(type expected) const {
624  assert(expected == get_type());
625  }
626 
627  void assert_type_2(type e1, type e2) const {
628  assert(e1 == get_type() || e2 == get_type());
629  }
630 
631  void assert_in_bounds(size_t i) const {
632  assert(i < get_length());
633  }
634 
636  const size_t* const payload;
637  const char* const text;
638 
639  friend class document;
640  };
641 
643  enum error {
667  };
668 
669  namespace internal {
670  class ownership {
671  public:
672  ownership() = delete;
673  ownership(const ownership&) = delete;
674  void operator=(const ownership&) = delete;
675 
676  explicit ownership(size_t* p_)
677  : p(p_)
678  {}
679 
681  : p(p_.p) {
682  p_.p = 0;
683  }
684 
686  delete[] p;
687  }
688 
689  bool is_valid() const {
690  return !!p;
691  }
692 
693  private:
694  size_t* p;
695  };
696 
697  inline const char* get_error_text(error error_code) {
698  switch (error_code) {
699  case ERROR_NO_ERROR: return "no error";
700  case ERROR_OUT_OF_MEMORY: return "out of memory";
701  case ERROR_UNEXPECTED_END: return "unexpected end of input";
702  case ERROR_MISSING_ROOT_ELEMENT: return "missing root element";
703  case ERROR_BAD_ROOT: return "document root must be object or array";
704  case ERROR_EXPECTED_COMMA: return "expected ,";
705  case ERROR_MISSING_OBJECT_KEY: return "missing object key";
706  case ERROR_EXPECTED_COLON: return "expected :";
707  case ERROR_EXPECTED_END_OF_INPUT: return "expected end of input";
708  case ERROR_UNEXPECTED_COMMA: return "unexpected comma";
709  case ERROR_EXPECTED_VALUE: return "expected value";
710  case ERROR_EXPECTED_NULL: return "expected 'null'";
711  case ERROR_EXPECTED_FALSE: return "expected 'false'";
712  case ERROR_EXPECTED_TRUE: return "expected 'true'";
713  case ERROR_INVALID_NUMBER: return "invalid number";
714  case ERROR_MISSING_EXPONENT: return "missing exponent";
715  case ERROR_ILLEGAL_CODEPOINT: return "illegal unprintable codepoint in string";
716  case ERROR_INVALID_UNICODE_ESCAPE: return "invalid character in unicode escape";
717  case ERROR_UNEXPECTED_END_OF_UTF16: return "unexpected end of input during UTF-16 surrogate pair";
718  case ERROR_EXPECTED_U: return "expected \\u";
719  case ERROR_INVALID_UTF16_TRAIL_SURROGATE: return "invalid UTF-16 trail surrogate";
720  case ERROR_UNKNOWN_ESCAPE: return "unknown escape";
721  case ERROR_INVALID_UTF8: return "invalid UTF-8";
722  }
723 
725  }
726  }
727 
737  class document {
738  public:
740  : input(rhs.input)
741  , structure(std::move(rhs.structure))
742  , root_type(rhs.root_type)
743  , root(rhs.root)
744  , error_line(rhs.error_line)
745  , error_column(rhs.error_column)
746  , error_code(rhs.error_code)
747  , error_arg(rhs.error_arg)
748  {
749  // Yikes... but strcpy is okay here because formatted_error is
750  // guaranteed to be null-terminated.
751  strcpy(formatted_error_message, rhs.formatted_error_message);
752  // should rhs's fields be zeroed too?
753  }
754 
761  bool is_valid() const {
762  return root_type == TYPE_ARRAY || root_type == TYPE_OBJECT;
763  }
764 
766  value get_root() const {
767  return value(root_type, root, input.get_data());
768  }
769 
771  size_t get_error_line() const {
772  return error_line;
773  }
774 
776  size_t get_error_column() const {
777  return error_column;
778  }
779 
780 #ifndef SAJSON_NO_STD_STRING
781  std::string get_error_message_as_string() const {
783  return formatted_error_message;
784  }
785 #endif
786 
788  const char* get_error_message_as_cstring() const {
789  return formatted_error_message;
790  }
791 
793 
794  // WARNING: Internal function which is subject to change
795  error _internal_get_error_code() const {
796  return error_code;
797  }
798 
799  // WARNING: Internal function which is subject to change
800  int _internal_get_error_argument() const {
801  return error_arg;
802  }
803 
804  // WARNING: Internal function which is subject to change
805  const char* _internal_get_error_text() const {
806  return internal::get_error_text(error_code);
807  }
808 
809  // WARNING: Internal function exposed only for high-performance language bindings.
810  type _internal_get_root_type() const {
811  return root_type;
812  }
813 
814  // WARNING: Internal function exposed only for high-performance language bindings.
815  const size_t* _internal_get_root() const {
816  return root;
817  }
818 
819  // WARNING: Internal function exposed only for high-performance language bindings.
820  const mutable_string_view& _internal_get_input() const {
821  return input;
822  }
823 
825 
826  private:
827  document(const document&) = delete;
828  void operator=(const document&) = delete;
829 
830  explicit document(const mutable_string_view& input_, internal::ownership&& structure_, type root_type_, const size_t* root_)
831  : input(input_)
832  , structure(std::move(structure_))
833  , root_type(root_type_)
834  , root(root_)
835  , error_line(0)
836  , error_column(0)
837  , error_code(ERROR_NO_ERROR)
838  , error_arg(0)
839  {
840  formatted_error_message[0] = 0;
841  }
842 
843  explicit document(const mutable_string_view& input_, size_t error_line_, size_t error_column_, const error error_code_, int error_arg_)
844  : input(input_)
845  , structure(0)
846  , root_type(TYPE_NULL)
847  , root(0)
848  , error_line(error_line_)
849  , error_column(error_column_)
850  , error_code(error_code_)
851  , error_arg(error_arg_)
852  {
853  formatted_error_message[ERROR_BUFFER_LENGTH - 1] = 0;
854  int written = has_significant_error_arg()
855  ? SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s: %d", _internal_get_error_text(), error_arg)
856  : SAJSON_snprintf(formatted_error_message, ERROR_BUFFER_LENGTH - 1, "%s", _internal_get_error_text());
857  (void)written;
858  assert(written >= 0 && written < ERROR_BUFFER_LENGTH);
859  }
860 
862  return error_code == ERROR_ILLEGAL_CODEPOINT;
863  }
864 
868  const size_t* const root;
869  const size_t error_line;
870  const size_t error_column;
872  const int error_arg;
873 
874  enum { ERROR_BUFFER_LENGTH = 128 };
875  char formatted_error_message[ERROR_BUFFER_LENGTH];
876 
877  template<typename AllocationStrategy, typename StringType>
878  friend document parse(const AllocationStrategy& strategy, const StringType& string);
879  template<typename Allocator>
880  friend class parser;
881  };
882 
887  public:
889 
890  class stack_head {
891  public:
892  stack_head(stack_head&& other)
893  : stack_bottom(other.stack_bottom)
894  , stack_top(other.stack_top)
895  {}
896 
897  bool push(size_t element) {
898  *stack_top++ = element;
899  return true;
900  }
901 
902  size_t* reserve(size_t amount, bool* success) {
903  size_t* rv = stack_top;
904  stack_top += amount;
905  *success = true;
906  return rv;
907  }
908 
909  // The compiler does not see the stack_head (stored in a local)
910  // and the allocator (stored as a field) have the same stack_bottom
911  // values, so it does a bit of redundant work.
912  // So there's a microoptimization available here: introduce a type
913  // "stack_mark" and make it polymorphic on the allocator. For
914  // single_allocation, it merely needs to be a single pointer.
915 
916  void reset(size_t new_top) {
917  stack_top = stack_bottom + new_top;
918  }
919 
920  size_t get_size() {
921  return stack_top - stack_bottom;
922  }
923 
924  size_t* get_top() {
925  return stack_top;
926  }
927 
928  size_t* get_pointer_from_offset(size_t offset) {
929  return stack_bottom + offset;
930  }
931 
932  private:
933  stack_head() = delete;
934  stack_head(const stack_head&) = delete;
935  void operator=(const stack_head&) = delete;
936 
937  explicit stack_head(size_t* base)
938  : stack_bottom(base)
939  , stack_top(base)
940  {}
941 
942  size_t* const stack_bottom;
943  size_t* stack_top;
944 
945  friend class single_allocation;
946  };
947 
948  class allocator {
949  public:
950  allocator() = delete;
951  allocator(const allocator&) = delete;
952  void operator=(const allocator&) = delete;
953 
954  explicit allocator(size_t* buffer, size_t input_size, bool should_deallocate_)
955  : structure(buffer)
956  , structure_end(buffer ? buffer + input_size : 0)
957  , write_cursor(structure_end)
958  , should_deallocate(should_deallocate_)
959  {}
960 
961  explicit allocator(std::nullptr_t)
962  : structure(0)
963  , structure_end(0)
964  , write_cursor(0)
965  , should_deallocate(false)
966  {}
967 
968  allocator(allocator&& other)
969  : structure(other.structure)
970  , structure_end(other.structure_end)
971  , write_cursor(other.write_cursor)
972  , should_deallocate(other.should_deallocate)
973  {
974  other.structure = 0;
975  other.structure_end = 0;
976  other.write_cursor = 0;
977  other.should_deallocate = false;
978  }
979 
980  ~allocator() {
981  if (should_deallocate) {
982  delete[] structure;
983  }
984  }
985 
986  stack_head get_stack_head(bool* success) {
987  *success = true;
988  return stack_head(structure);
989  }
990 
991  size_t get_write_offset() {
992  return structure_end - write_cursor;
993  }
994 
995  size_t* get_write_pointer_of(size_t v) {
996  return structure_end - v;
997  }
998 
999  size_t* reserve(size_t size, bool* success) {
1000  *success = true;
1001  write_cursor -= size;
1002  return write_cursor;
1003  }
1004 
1005  size_t* get_ast_root() {
1006  return write_cursor;
1007  }
1008 
1009  internal::ownership transfer_ownership() {
1010  auto p = structure;
1011  structure = 0;
1012  structure_end = 0;
1013  write_cursor = 0;
1014  if (should_deallocate) {
1015  return internal::ownership(p);
1016  } else {
1017  return internal::ownership(0);
1018  }
1019  }
1020 
1021  private:
1022  size_t* structure;
1023  size_t* structure_end;
1024  size_t* write_cursor;
1025  bool should_deallocate;
1026  };
1027 
1029 
1033  : has_existing_buffer(false)
1034  , existing_buffer(0)
1035  , existing_buffer_size(0)
1036  {}
1037 
1042  single_allocation(size_t* existing_buffer_, size_t size_in_words)
1043  : has_existing_buffer(true)
1044  , existing_buffer(existing_buffer_)
1045  , existing_buffer_size(size_in_words)
1046  {}
1047 
1050  template<size_t N>
1051  explicit single_allocation(size_t (&existing_buffer_)[N])
1052  : single_allocation(existing_buffer_, N)
1053  {}
1054 
1056 
1057  allocator make_allocator(size_t input_document_size_in_bytes, bool* succeeded) const {
1058  if (has_existing_buffer) {
1059  if (existing_buffer_size < input_document_size_in_bytes) {
1060  *succeeded = false;
1061  return allocator(nullptr);
1062  }
1063  *succeeded = true;
1064  return allocator(existing_buffer, input_document_size_in_bytes, false);
1065  } else {
1066  size_t* buffer = new(std::nothrow) size_t[input_document_size_in_bytes];
1067  if (!buffer) {
1068  *succeeded = false;
1069  return allocator(nullptr);
1070  }
1071  *succeeded = true;
1072  return allocator(buffer, input_document_size_in_bytes, true);
1073  }
1074  }
1075 
1077 
1078  private:
1082  };
1083 
1088  public:
1090 
1091  class stack_head {
1092  public:
1093  stack_head(stack_head&& other)
1094  : stack_top(other.stack_top)
1095  , stack_bottom(other.stack_bottom)
1096  , stack_limit(other.stack_limit)
1097  {
1098  other.stack_top = 0;
1099  other.stack_bottom = 0;
1100  other.stack_limit = 0;
1101  }
1102 
1103  ~stack_head() {
1104  delete[] stack_bottom;
1105  }
1106 
1107  bool push(size_t element) {
1108  if (can_grow(1)) {
1109  *stack_top++ = element;
1110  return true;
1111  } else {
1112  return false;
1113  }
1114  }
1115 
1116  size_t* reserve(size_t amount, bool* success) {
1117  if (can_grow(amount)) {
1118  size_t* rv = stack_top;
1119  stack_top += amount;
1120  *success = true;
1121  return rv;
1122  } else {
1123  *success = false;
1124  return 0;
1125  }
1126  }
1127 
1128  void reset(size_t new_top) {
1129  stack_top = stack_bottom + new_top;
1130  }
1131 
1132  size_t get_size() {
1133  return stack_top - stack_bottom;
1134  }
1135 
1136  size_t* get_top() {
1137  return stack_top;
1138  }
1139 
1140  size_t* get_pointer_from_offset(size_t offset) {
1141  return stack_bottom + offset;
1142  }
1143 
1144  private:
1145  stack_head(const stack_head&) = delete;
1146  void operator=(const stack_head&) = delete;
1147 
1148  explicit stack_head(size_t initial_capacity, bool* success) {
1149  assert(initial_capacity);
1150  stack_bottom = new(std::nothrow) size_t[initial_capacity];
1151  stack_top = stack_bottom;
1152  if (stack_bottom) {
1153  stack_limit = stack_bottom + initial_capacity;
1154  } else {
1155  stack_limit = 0;
1156  }
1157  *success = !!stack_bottom;
1158  }
1159 
1160  bool can_grow(size_t amount) {
1161  if (SAJSON_LIKELY(amount <= static_cast<size_t>(stack_limit - stack_top))) {
1162  return true;
1163  }
1164 
1165  size_t current_size = stack_top - stack_bottom;
1166  size_t old_capacity = stack_limit - stack_bottom;
1167  size_t new_capacity = old_capacity * 2;
1168  while (new_capacity < amount + current_size) {
1169  new_capacity *= 2;
1170  }
1171  size_t* new_stack = new(std::nothrow) size_t[new_capacity];
1172  if (!new_stack) {
1173  stack_top = 0;
1174  stack_bottom = 0;
1175  stack_limit = 0;
1176  return false;
1177  }
1178 
1179  memcpy(new_stack, stack_bottom, current_size * sizeof(size_t));
1180  delete[] stack_bottom;
1181  stack_top = new_stack + current_size;
1182  stack_bottom = new_stack;
1183  stack_limit = stack_bottom + new_capacity;
1184  return true;
1185  }
1186 
1187  size_t* stack_top; // stack grows up: stack_top >= stack_bottom
1188  size_t* stack_bottom;
1189  size_t* stack_limit;
1190 
1191  friend class dynamic_allocation;
1192  };
1193 
1194  class allocator {
1195  public:
1196  allocator() = delete;
1197  allocator(const allocator&) = delete;
1198  void operator=(const allocator&) = delete;
1199 
1200  explicit allocator(size_t* buffer_, size_t current_capacity, size_t initial_stack_capacity_)
1201  : ast_buffer_bottom(buffer_)
1202  , ast_buffer_top(buffer_ + current_capacity)
1203  , ast_write_head(ast_buffer_top)
1204  , initial_stack_capacity(initial_stack_capacity_)
1205  {}
1206 
1207  explicit allocator(std::nullptr_t)
1208  : ast_buffer_bottom(0)
1209  , ast_buffer_top(0)
1210  , ast_write_head(0)
1211  , initial_stack_capacity(0)
1212  {}
1213 
1214  allocator(allocator&& other)
1215  : ast_buffer_bottom(other.ast_buffer_bottom)
1216  , ast_buffer_top(other.ast_buffer_top)
1217  , ast_write_head(other.ast_write_head)
1218  , initial_stack_capacity(other.initial_stack_capacity)
1219  {
1220  other.ast_buffer_bottom = 0;
1221  other.ast_buffer_top = 0;
1222  other.ast_write_head = 0;
1223  }
1224 
1225  ~allocator() {
1226  delete[] ast_buffer_bottom;
1227  }
1228 
1229  stack_head get_stack_head(bool* success) {
1230  return stack_head(initial_stack_capacity, success);
1231  }
1232 
1233  size_t get_write_offset() {
1234  return ast_buffer_top - ast_write_head;
1235  }
1236 
1237  size_t* get_write_pointer_of(size_t v) {
1238  return ast_buffer_top - v;
1239  }
1240 
1241  size_t* reserve(size_t size, bool* success) {
1242  if (can_grow(size)) {
1243  ast_write_head -= size;
1244  *success = true;
1245  return ast_write_head;
1246  } else {
1247  *success = false;
1248  return 0;
1249  }
1250  }
1251 
1252  size_t* get_ast_root() {
1253  return ast_write_head;
1254  }
1255 
1256  internal::ownership transfer_ownership() {
1257  auto p = ast_buffer_bottom;
1258  ast_buffer_bottom = 0;
1259  ast_buffer_top = 0;
1260  ast_write_head = 0;
1261  return internal::ownership(p);
1262  }
1263 
1264  private:
1265  bool can_grow(size_t amount) {
1266  if (SAJSON_LIKELY(amount <= static_cast<size_t>(ast_write_head - ast_buffer_bottom))) {
1267  return true;
1268  }
1269  size_t current_capacity = ast_buffer_top - ast_buffer_bottom;
1270 
1271  size_t current_size = ast_buffer_top - ast_write_head;
1272  size_t new_capacity = current_capacity * 2;
1273  while (new_capacity < amount + current_size) {
1274  new_capacity *= 2;
1275  }
1276 
1277  size_t* old_buffer = ast_buffer_bottom;
1278  size_t* new_buffer = new(std::nothrow) size_t[new_capacity];
1279  if (!new_buffer) {
1280  ast_buffer_bottom = 0;
1281  ast_buffer_top = 0;
1282  ast_write_head = 0;
1283  return false;
1284  }
1285 
1286  size_t* old_write_head = ast_write_head;
1287  ast_buffer_bottom = new_buffer;
1288  ast_buffer_top = new_buffer + new_capacity;
1289  ast_write_head = ast_buffer_top - current_size;
1290  memcpy(ast_write_head, old_write_head, current_size * sizeof(size_t));
1291  delete[] old_buffer;
1292 
1293  return true;
1294  }
1295 
1296  size_t* ast_buffer_bottom; // base address of the ast buffer - it grows down
1297  size_t* ast_buffer_top;
1298  size_t* ast_write_head;
1299  size_t initial_stack_capacity;
1300  };
1301 
1303 
1306  dynamic_allocation(size_t initial_ast_capacity_ = 0, size_t initial_stack_capacity_ = 0)
1307  : initial_ast_capacity(initial_ast_capacity_)
1308  , initial_stack_capacity(initial_stack_capacity_)
1309  {}
1310 
1312 
1313  allocator make_allocator(size_t, bool* succeeded) const {
1314  size_t capacity = initial_ast_capacity;
1315  if (!capacity) {
1316  // TODO: guess based on input document size
1317  capacity = 1024;
1318  }
1319 
1320  size_t* buffer = new(std::nothrow) size_t[capacity];
1321  if (!buffer) {
1322  *succeeded = false;
1323  return allocator(nullptr);
1324  }
1325 
1326  size_t stack_capacity = initial_stack_capacity;
1327  if (!stack_capacity) {
1328  stack_capacity = 256;
1329  }
1330 
1331  *succeeded = true;
1332  return allocator(buffer, capacity, stack_capacity);
1333  }
1334 
1336 
1337  private:
1340  };
1341 
1345  // of memory that can be used.
1347  public:
1349 
1350  class allocator;
1351 
1352  class stack_head {
1353  public:
1354  stack_head(stack_head&& other)
1355  : source_allocator(other.source_allocator)
1356  {
1357  other.source_allocator = 0;
1358  }
1359 
1360  bool push(size_t element) {
1361  if (SAJSON_LIKELY(source_allocator->can_grow(1))) {
1362  *(source_allocator->stack_top)++ = element;
1363  return true;
1364  } else {
1365  return false;
1366  }
1367  }
1368 
1369  size_t* reserve(size_t amount, bool* success) {
1370  if (SAJSON_LIKELY(source_allocator->can_grow(amount))) {
1371  size_t* rv = source_allocator->stack_top;
1372  source_allocator->stack_top += amount;
1373  *success = true;
1374  return rv;
1375  } else {
1376  *success = false;
1377  return 0;
1378  }
1379  }
1380 
1381  void reset(size_t new_top) {
1382  source_allocator->stack_top = source_allocator->structure + new_top;
1383  }
1384 
1385  size_t get_size() {
1386  return source_allocator->stack_top - source_allocator->structure;
1387  }
1388 
1389  size_t* get_top() {
1390  return source_allocator->stack_top;
1391  }
1392 
1393  size_t* get_pointer_from_offset(size_t offset) {
1394  return source_allocator->structure + offset;
1395  }
1396 
1397  private:
1398  stack_head(const stack_head&) = delete;
1399  void operator=(const stack_head&) = delete;
1400 
1401  explicit stack_head(allocator* source_allocator_)
1402  : source_allocator(source_allocator_)
1403  {}
1404 
1405  allocator* source_allocator;
1406 
1407  friend class bounded_allocation;
1408  };
1409 
1410  class allocator {
1411  public:
1412  allocator() = delete;
1413  allocator(const allocator&) = delete;
1414  void operator=(const allocator&) = delete;
1415 
1416  explicit allocator(size_t* existing_buffer, size_t existing_buffer_size)
1417  : structure(existing_buffer)
1418  , structure_end(existing_buffer + existing_buffer_size)
1419  , write_cursor(structure_end)
1420  , stack_top(structure)
1421  {}
1422 
1423  allocator(allocator&& other)
1424  : structure(other.structure)
1425  , structure_end(other.structure_end)
1426  , write_cursor(other.write_cursor)
1427  , stack_top(other.stack_top)
1428  {
1429  other.structure = 0;
1430  other.structure_end = 0;
1431  other.write_cursor = 0;
1432  other.stack_top = 0;
1433  }
1434 
1435  stack_head get_stack_head(bool* success) {
1436  *success = true;
1437  return stack_head(this);
1438  }
1439 
1440  size_t get_write_offset() {
1441  return structure_end - write_cursor;
1442  }
1443 
1444  size_t* get_write_pointer_of(size_t v) {
1445  return structure_end - v;
1446  }
1447 
1448  size_t* reserve(size_t size, bool* success) {
1449  if (can_grow(size)) {
1450  write_cursor -= size;
1451  *success = true;
1452  return write_cursor;
1453  } else {
1454  *success = false;
1455  return 0;
1456  }
1457  }
1458 
1459  size_t* get_ast_root() {
1460  return write_cursor;
1461  }
1462 
1463  internal::ownership transfer_ownership() {
1464  structure = 0;
1465  structure_end = 0;
1466  write_cursor = 0;
1467  return internal::ownership(0);
1468  }
1469 
1470  private:
1471  bool can_grow(size_t amount) {
1472  // invariant: stack_top <= write_cursor
1473  // thus: write_cursor - stack_top is positive
1474  return static_cast<size_t>(write_cursor - stack_top) >= amount;
1475  }
1476 
1477  size_t* structure;
1478  size_t* structure_end;
1479  size_t* write_cursor;
1480  size_t* stack_top;
1481 
1482  friend class bounded_allocation;
1483  };
1484 
1486 
1490  bounded_allocation(size_t* existing_buffer_, size_t size_in_words)
1491  : existing_buffer(existing_buffer_)
1492  , existing_buffer_size(size_in_words)
1493  {}
1494 
1497  template<size_t N>
1498  explicit bounded_allocation(size_t (&existing_buffer_)[N])
1499  : bounded_allocation(existing_buffer_, N)
1500  {}
1501 
1503 
1504  allocator make_allocator(size_t, bool* succeeded) const {
1505  *succeeded = true;
1506  return allocator(existing_buffer, existing_buffer_size);
1507  }
1508 
1510 
1511  private:
1514  };
1515 
1516  // I thought about putting parser in the internal namespace but I don't
1517  // want to indent it further...
1519  template<typename Allocator>
1520  class parser {
1521  public:
1522  parser(const mutable_string_view& msv, Allocator&& allocator_)
1523  : input(msv)
1524  , input_end(input.get_data() + input.length())
1525  , allocator(std::move(allocator_))
1526  , root_type(TYPE_NULL)
1527  , error_line(0)
1528  , error_column(0)
1529  {}
1530 
1531  document get_document() {
1532  if (parse()) {
1533  size_t* ast_root = allocator.get_ast_root();
1534  return document(input, allocator.transfer_ownership(), root_type, ast_root);
1535  } else {
1536  return document(input, error_line, error_column, error_code, error_arg);
1537  }
1538  }
1539 
1540  private:
1541  struct error_result {
1542  operator bool() const {
1543  return false;
1544  }
1545  operator char*() const {
1546  return 0;
1547  }
1548  };
1549 
1550  bool at_eof(const char* p) {
1551  return p == input_end;
1552  }
1553 
1554  char* skip_whitespace(char* p) {
1555  // There is an opportunity to make better use of superscalar
1556  // hardware here* but if someone cares about JSON parsing
1557  // performance the first thing they do is minify, so prefer
1558  // to optimize for code size here.
1559  // * https://github.com/chadaustin/Web-Benchmarks/blob/master/json/third-party/pjson/pjson.h#L1873
1560  for (;;) {
1561  if (SAJSON_UNLIKELY(p == input_end)) {
1562  return 0;
1563  } else if (internal::is_whitespace(*p)) {
1564  ++p;
1565  } else {
1566  return p;
1567  }
1568  }
1569  }
1570 
1571  error_result oom(char* p) {
1572  return make_error(p, ERROR_OUT_OF_MEMORY);
1573  }
1574 
1575  error_result unexpected_end() {
1576  return make_error(0, ERROR_UNEXPECTED_END);
1577  }
1578 
1579  error_result unexpected_end(char* p) {
1580  return make_error(p, ERROR_UNEXPECTED_END);
1581  }
1582 
1583  error_result make_error(char* p, error code, int arg = 0) {
1584  if (!p) {
1585  p = input_end;
1586  }
1587 
1588  error_line = 1;
1589  error_column = 1;
1590 
1591  char* c = input.get_data();
1592  while (c < p) {
1593  if (*c == '\r') {
1594  if (c + 1 < p && c[1] == '\n') {
1595  ++error_line;
1596  error_column = 1;
1597  ++c;
1598  } else {
1599  ++error_line;
1600  error_column = 1;
1601  }
1602  } else if (*c == '\n') {
1603  ++error_line;
1604  error_column = 1;
1605  } else {
1606  // TODO: count UTF-8 characters
1607  ++error_column;
1608  }
1609  ++c;
1610  }
1611 
1612  error_code = code;
1613  error_arg = arg;
1614  return error_result();
1615  }
1616 
1617  bool parse() {
1618  using namespace internal;
1619 
1620  // p points to the character currently being parsed
1621  char* p = input.get_data();
1622 
1623  bool success;
1624  auto stack = allocator.get_stack_head(&success);
1625  if (SAJSON_UNLIKELY(!success)) {
1626  return oom(p);
1627  }
1628 
1629  p = skip_whitespace(p);
1630  if (SAJSON_UNLIKELY(!p)) {
1631  return make_error(p, ERROR_MISSING_ROOT_ELEMENT);
1632  }
1633 
1634  // current_base is an offset to the first element of the current structure (object or array)
1635  size_t current_base = stack.get_size();
1636  type current_structure_type;
1637  if (*p == '[') {
1638  current_structure_type = TYPE_ARRAY;
1639  bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1640  if (SAJSON_UNLIKELY(!s)) {
1641  return oom(p);
1642  }
1643  goto array_close_or_element;
1644  } else if (*p == '{') {
1645  current_structure_type = TYPE_OBJECT;
1646  bool s = stack.push(make_element(current_structure_type, ROOT_MARKER));
1647  if (SAJSON_UNLIKELY(!s)) {
1648  return oom(p);
1649  }
1650  goto object_close_or_element;
1651  } else {
1652  return make_error(p, ERROR_BAD_ROOT);
1653  }
1654 
1655  // BEGIN STATE MACHINE
1656 
1657  size_t pop_element; // used as an argument into the `pop` routine
1658 
1659  if (0) { // purely for structure
1660 
1661  // ASSUMES: byte at p SHOULD be skipped
1662  array_close_or_element:
1663  p = skip_whitespace(p + 1);
1664  if (SAJSON_UNLIKELY(!p)) {
1665  return unexpected_end();
1666  }
1667  if (*p == ']') {
1668  goto pop_array;
1669  } else {
1670  goto next_element;
1671  }
1673 
1674  // ASSUMES: byte at p SHOULD be skipped
1675  object_close_or_element:
1676  p = skip_whitespace(p + 1);
1677  if (SAJSON_UNLIKELY(!p)) {
1678  return unexpected_end();
1679  }
1680  if (*p == '}') {
1681  goto pop_object;
1682  } else {
1683  goto object_key;
1684  }
1686 
1687  // ASSUMES: byte at p SHOULD NOT be skipped
1688  structure_close_or_comma:
1689  p = skip_whitespace(p);
1690  if (SAJSON_UNLIKELY(!p)) {
1691  return unexpected_end();
1692  }
1693 
1694  if (current_structure_type == TYPE_ARRAY) {
1695  if (*p == ']') {
1696  goto pop_array;
1697  } else {
1698  if (SAJSON_UNLIKELY(*p != ',')) {
1699  return make_error(p, ERROR_EXPECTED_COMMA);
1700  }
1701  ++p;
1702  goto next_element;
1703  }
1704  } else {
1705  assert(current_structure_type == TYPE_OBJECT);
1706  if (*p == '}') {
1707  goto pop_object;
1708  } else {
1709  if (SAJSON_UNLIKELY(*p != ',')) {
1710  return make_error(p, ERROR_EXPECTED_COMMA);
1711  }
1712  ++p;
1713  goto object_key;
1714  }
1715  }
1717 
1718  // ASSUMES: *p == '}'
1719  pop_object: {
1720  ++p;
1721  size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1722  pop_element = *base_ptr;
1723  if (SAJSON_UNLIKELY(!install_object(base_ptr + 1, stack.get_top()))) {
1724  return oom(p);
1725  }
1726  goto pop;
1727  }
1728 
1729  // ASSUMES: *p == ']'
1730  pop_array: {
1731  ++p;
1732  size_t* base_ptr = stack.get_pointer_from_offset(current_base);
1733  pop_element = *base_ptr;
1734  if (SAJSON_UNLIKELY(!install_array(base_ptr + 1, stack.get_top()))) {
1735  return oom(p);
1736  }
1737  goto pop;
1738  }
1739 
1740  // ASSUMES: byte at p SHOULD NOT be skipped
1741  object_key: {
1742  p = skip_whitespace(p);
1743  if (SAJSON_UNLIKELY(!p)) {
1744  return unexpected_end();
1745  }
1746  if (SAJSON_UNLIKELY(*p != '"')) {
1747  return make_error(p, ERROR_MISSING_OBJECT_KEY);
1748  }
1749  bool success_;
1750  size_t* out = stack.reserve(2, &success_);
1751  if (SAJSON_UNLIKELY(!success_)) {
1752  return oom(p);
1753  }
1754  p = parse_string(p, out);
1755  if (SAJSON_UNLIKELY(!p)) {
1756  return false;
1757  }
1758  p = skip_whitespace(p);
1759  if (SAJSON_UNLIKELY(!p || *p != ':')) {
1760  return make_error(p, ERROR_EXPECTED_COLON);
1761  }
1762  ++p;
1763  goto next_element;
1764  }
1765 
1766  // ASSUMES: byte at p SHOULD NOT be skipped
1767  next_element:
1768  p = skip_whitespace(p);
1769  if (SAJSON_UNLIKELY(!p)) {
1770  return unexpected_end();
1771  }
1772 
1773  type value_type_result;
1774  switch (*p) {
1775  case 0:
1776  return unexpected_end(p);
1777  case 'n':
1778  p = parse_null(p);
1779  if (!p) {
1780  return false;
1781  }
1782  value_type_result = TYPE_NULL;
1783  break;
1784  case 'f':
1785  p = parse_false(p);
1786  if (!p) {
1787  return false;
1788  }
1789  value_type_result = TYPE_FALSE;
1790  break;
1791  case 't':
1792  p = parse_true(p);
1793  if (!p) {
1794  return false;
1795  }
1796  value_type_result = TYPE_TRUE;
1797  break;
1798  case '0':
1799  case '1':
1800  case '2':
1801  case '3':
1802  case '4':
1803  case '5':
1804  case '6':
1805  case '7':
1806  case '8':
1807  case '9':
1808  case '-': {
1809  auto result = parse_number(p);
1810  p = result.first;
1811  if (!p) {
1812  return false;
1813  }
1814  value_type_result = result.second;
1815  break;
1816  }
1817  case '"': {
1818  bool success_;
1819  size_t* string_tag = allocator.reserve(2, &success_);
1820  if (SAJSON_UNLIKELY(!success_)) {
1821  return oom(p);
1822  }
1823  p = parse_string(p, string_tag);
1824  if (!p) {
1825  return false;
1826  }
1827  value_type_result = TYPE_STRING;
1828  break;
1829  }
1830 
1831  case '[': {
1832  size_t previous_base = current_base;
1833  current_base = stack.get_size();
1834  bool s = stack.push(make_element(current_structure_type, previous_base));
1835  if (SAJSON_UNLIKELY(!s)) {
1836  return oom(p);
1837  }
1838  current_structure_type = TYPE_ARRAY;
1839  goto array_close_or_element;
1840  }
1841  case '{': {
1842  size_t previous_base = current_base;
1843  current_base = stack.get_size();
1844  bool s = stack.push(make_element(current_structure_type, previous_base));
1845  if (SAJSON_UNLIKELY(!s)) {
1846  return oom(p);
1847  }
1848  current_structure_type = TYPE_OBJECT;
1849  goto object_close_or_element;
1850  }
1851  pop: {
1852  size_t parent = get_element_value(pop_element);
1853  if (parent == ROOT_MARKER) {
1854  root_type = current_structure_type;
1855  p = skip_whitespace(p);
1856  if (SAJSON_UNLIKELY(p)) {
1857  return make_error(p, ERROR_EXPECTED_END_OF_INPUT);
1858  }
1859  return true;
1860  }
1861  stack.reset(current_base);
1862  current_base = parent;
1863  value_type_result = current_structure_type;
1864  current_structure_type = get_element_type(pop_element);
1865  break;
1866  }
1867 
1868  case ',':
1869  return make_error(p, ERROR_UNEXPECTED_COMMA);
1870  default:
1871  return make_error(p, ERROR_EXPECTED_VALUE);
1872  }
1873 
1874  bool s = stack.push(make_element(
1875  value_type_result,
1876  allocator.get_write_offset()));
1877  if (SAJSON_UNLIKELY(!s)) {
1878  return oom(p);
1879  }
1880 
1881  goto structure_close_or_comma;
1882  }
1883 
1885  }
1886 
1887  bool has_remaining_characters(char* p, ptrdiff_t remaining) {
1888  return input_end - p >= remaining;
1889  }
1890 
1891  char* parse_null(char* p) {
1892  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1893  make_error(p, ERROR_UNEXPECTED_END);
1894  return 0;
1895  }
1896  char p1 = p[1];
1897  char p2 = p[2];
1898  char p3 = p[3];
1899  if (SAJSON_UNLIKELY(p1 != 'u' || p2 != 'l' || p3 != 'l')) {
1900  make_error(p, ERROR_EXPECTED_NULL);
1901  return 0;
1902  }
1903  return p + 4;
1904  }
1905 
1906  char* parse_false(char* p) {
1907  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 5))) {
1908  return make_error(p, ERROR_UNEXPECTED_END);
1909  }
1910  char p1 = p[1];
1911  char p2 = p[2];
1912  char p3 = p[3];
1913  char p4 = p[4];
1914  if (SAJSON_UNLIKELY(p1 != 'a' || p2 != 'l' || p3 != 's' || p4 != 'e')) {
1915  return make_error(p, ERROR_EXPECTED_FALSE);
1916  }
1917  return p + 5;
1918  }
1919 
1920  char* parse_true(char* p) {
1921  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
1922  return make_error(p, ERROR_UNEXPECTED_END);
1923  }
1924  char p1 = p[1];
1925  char p2 = p[2];
1926  char p3 = p[3];
1927  if (SAJSON_UNLIKELY(p1 != 'r' || p2 != 'u' || p3 != 'e')) {
1928  return make_error(p, ERROR_EXPECTED_TRUE);
1929  }
1930  return p + 4;
1931  }
1932 
1933  static double pow10(int64_t exponent) {
1934  if (SAJSON_UNLIKELY(exponent > 308)) {
1935  return std::numeric_limits<double>::infinity();
1936  } else if (SAJSON_UNLIKELY(exponent < -323)) {
1937  return 0.0;
1938  }
1939  static const double constants[] = {
1940  1e-323,1e-322,1e-321,1e-320,1e-319,1e-318,1e-317,1e-316,1e-315,1e-314,
1941  1e-313,1e-312,1e-311,1e-310,1e-309,1e-308,1e-307,1e-306,1e-305,1e-304,
1942  1e-303,1e-302,1e-301,1e-300,1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,
1943  1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,
1944  1e-283,1e-282,1e-281,1e-280,1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,
1945  1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,
1946  1e-263,1e-262,1e-261,1e-260,1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,
1947  1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,
1948  1e-243,1e-242,1e-241,1e-240,1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,
1949  1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,
1950  1e-223,1e-222,1e-221,1e-220,1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,
1951  1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,
1952  1e-203,1e-202,1e-201,1e-200,1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,
1953  1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,
1954  1e-183,1e-182,1e-181,1e-180,1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,
1955  1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,
1956  1e-163,1e-162,1e-161,1e-160,1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,
1957  1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,
1958  1e-143,1e-142,1e-141,1e-140,1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,
1959  1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,
1960  1e-123,1e-122,1e-121,1e-120,1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,
1961  1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,
1962  1e-103,1e-102,1e-101,1e-100,1e-99,1e-98,1e-97,1e-96,1e-95,1e-94,1e-93,
1963  1e-92,1e-91,1e-90,1e-89,1e-88,1e-87,1e-86,1e-85,1e-84,1e-83,1e-82,1e-81,
1964  1e-80,1e-79,1e-78,1e-77,1e-76,1e-75,1e-74,1e-73,1e-72,1e-71,1e-70,1e-69,
1965  1e-68,1e-67,1e-66,1e-65,1e-64,1e-63,1e-62,1e-61,1e-60,1e-59,1e-58,1e-57,
1966  1e-56,1e-55,1e-54,1e-53,1e-52,1e-51,1e-50,1e-49,1e-48,1e-47,1e-46,1e-45,
1967  1e-44,1e-43,1e-42,1e-41,1e-40,1e-39,1e-38,1e-37,1e-36,1e-35,1e-34,1e-33,
1968  1e-32,1e-31,1e-30,1e-29,1e-28,1e-27,1e-26,1e-25,1e-24,1e-23,1e-22,1e-21,
1969  1e-20,1e-19,1e-18,1e-17,1e-16,1e-15,1e-14,1e-13,1e-12,1e-11,1e-10,1e-9,
1970  1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2,1e3,1e4,1e5,1e6,1e7,
1971  1e8,1e9,1e10,1e11,1e12,1e13,1e14,1e15,1e16,1e17,1e18,1e19,1e20,1e21,
1972  1e22,1e23,1e24,1e25,1e26,1e27,1e28,1e29,1e30,1e31,1e32,1e33,1e34,1e35,
1973  1e36,1e37,1e38,1e39,1e40,1e41,1e42,1e43,1e44,1e45,1e46,1e47,1e48,1e49,
1974  1e50,1e51,1e52,1e53,1e54,1e55,1e56,1e57,1e58,1e59,1e60,1e61,1e62,1e63,
1975  1e64,1e65,1e66,1e67,1e68,1e69,1e70,1e71,1e72,1e73,1e74,1e75,1e76,1e77,
1976  1e78,1e79,1e80,1e81,1e82,1e83,1e84,1e85,1e86,1e87,1e88,1e89,1e90,1e91,
1977  1e92,1e93,1e94,1e95,1e96,1e97,1e98,1e99,1e100,1e101,1e102,1e103,1e104,
1978  1e105,1e106,1e107,1e108,1e109,1e110,1e111,1e112,1e113,1e114,1e115,1e116,
1979  1e117,1e118,1e119,1e120,1e121,1e122,1e123,1e124,1e125,1e126,1e127,1e128,
1980  1e129,1e130,1e131,1e132,1e133,1e134,1e135,1e136,1e137,1e138,1e139,1e140,
1981  1e141,1e142,1e143,1e144,1e145,1e146,1e147,1e148,1e149,1e150,1e151,1e152,
1982  1e153,1e154,1e155,1e156,1e157,1e158,1e159,1e160,1e161,1e162,1e163,1e164,
1983  1e165,1e166,1e167,1e168,1e169,1e170,1e171,1e172,1e173,1e174,1e175,1e176,
1984  1e177,1e178,1e179,1e180,1e181,1e182,1e183,1e184,1e185,1e186,1e187,1e188,
1985  1e189,1e190,1e191,1e192,1e193,1e194,1e195,1e196,1e197,1e198,1e199,1e200,
1986  1e201,1e202,1e203,1e204,1e205,1e206,1e207,1e208,1e209,1e210,1e211,1e212,
1987  1e213,1e214,1e215,1e216,1e217,1e218,1e219,1e220,1e221,1e222,1e223,1e224,
1988  1e225,1e226,1e227,1e228,1e229,1e230,1e231,1e232,1e233,1e234,1e235,1e236,
1989  1e237,1e238,1e239,1e240,1e241,1e242,1e243,1e244,1e245,1e246,1e247,1e248,
1990  1e249,1e250,1e251,1e252,1e253,1e254,1e255,1e256,1e257,1e258,1e259,1e260,
1991  1e261,1e262,1e263,1e264,1e265,1e266,1e267,1e268,1e269,1e270,1e271,1e272,
1992  1e273,1e274,1e275,1e276,1e277,1e278,1e279,1e280,1e281,1e282,1e283,1e284,
1993  1e285,1e286,1e287,1e288,1e289,1e290,1e291,1e292,1e293,1e294,1e295,1e296,
1994  1e297,1e298,1e299,1e300,1e301,1e302,1e303,1e304,1e305,1e306,1e307,1e308
1995  };
1996  return constants[exponent + 323];
1997  }
1998 
1999  std::pair<char*, type> parse_number(char* p) {
2000  bool negative = false;
2001  if ('-' == *p) {
2002  ++p;
2003  negative = true;
2004 
2005  if (SAJSON_UNLIKELY(at_eof(p))) {
2006  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2007  }
2008  }
2009 
2010  bool try_double = false;
2011 
2012  int i = 0;
2013  double d = 0.0; // gcc complains that d might be used uninitialized which isn't true. appease the warning anyway.
2014  if (*p == '0') {
2015  ++p;
2016  if (SAJSON_UNLIKELY(at_eof(p))) {
2017  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2018  }
2019  } else {
2020  unsigned char c = *p;
2021  if (c < '0' || c > '9') {
2022  return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2023  }
2024 
2025  do {
2026  ++p;
2027  if (SAJSON_UNLIKELY(at_eof(p))) {
2028  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2029  }
2030 
2031  unsigned char digit = c - '0';
2032 
2033  if (SAJSON_UNLIKELY(!try_double && i > INT_MAX / 10 - 9)) {
2034  // TODO: could split this into two loops
2035  try_double = true;
2036  d = i;
2037  }
2038  if (SAJSON_UNLIKELY(try_double)) {
2039  d = 10.0 * d + digit;
2040  } else {
2041  i = 10 * i + digit;
2042  }
2043 
2044  c = *p;
2045  } while (c >= '0' && c <= '9');
2046  }
2047 
2048  int64_t exponent = 0;
2049 
2050  if ('.' == *p) {
2051  if (!try_double) {
2052  try_double = true;
2053  d = i;
2054  }
2055  ++p;
2056  if (SAJSON_UNLIKELY(at_eof(p))) {
2057  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2058  }
2059  char c = *p;
2060  if (c < '0' || c > '9') {
2061  return std::make_pair(make_error(p, ERROR_INVALID_NUMBER), TYPE_NULL);
2062  }
2063 
2064  do {
2065  ++p;
2066  if (SAJSON_UNLIKELY(at_eof(p))) {
2067  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2068  }
2069  d = d * 10 + (c - '0');
2070  // One option to avoid underflow would be to clamp
2071  // to INT_MIN, but int64 subtraction is cheap and
2072  // in the absurd case of parsing 2 GB of digits
2073  // with an extremely high exponent, this will
2074  // produce accurate results. Instead, we just
2075  // leave exponent as int64_t and it will never
2076  // underflow.
2077  --exponent;
2078 
2079  c = *p;
2080  } while (c >= '0' && c <= '9');
2081  }
2082 
2083  char e = *p;
2084  if ('e' == e || 'E' == e) {
2085  if (!try_double) {
2086  try_double = true;
2087  d = i;
2088  }
2089  ++p;
2090  if (SAJSON_UNLIKELY(at_eof(p))) {
2091  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2092  }
2093 
2094  bool negativeExponent = false;
2095  if ('-' == *p) {
2096  negativeExponent = true;
2097  ++p;
2098  if (SAJSON_UNLIKELY(at_eof(p))) {
2099  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2100  }
2101  } else if ('+' == *p) {
2102  ++p;
2103  if (SAJSON_UNLIKELY(at_eof(p))) {
2104  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2105  }
2106  }
2107 
2108  int exp = 0;
2109 
2110  char c = *p;
2111  if (SAJSON_UNLIKELY(c < '0' || c > '9')) {
2112  return std::make_pair(make_error(p, ERROR_MISSING_EXPONENT), TYPE_NULL);
2113  }
2114  for (;;) {
2115  // c guaranteed to be between '0' and '9', inclusive
2116  unsigned char digit = c - '0';
2117  if (exp > (INT_MAX - digit) / 10) {
2118  // The exponent overflowed. Keep parsing, but
2119  // it will definitely be out of range when
2120  // pow10 is called.
2121  exp = INT_MAX;
2122  } else {
2123  exp = 10 * exp + digit;
2124  }
2125 
2126  ++p;
2127  if (SAJSON_UNLIKELY(at_eof(p))) {
2128  return std::make_pair(make_error(p, ERROR_UNEXPECTED_END), TYPE_NULL);
2129  }
2130 
2131  c = *p;
2132  if (c < '0' || c > '9') {
2133  break;
2134  }
2135  }
2136  static_assert(-INT_MAX >= INT_MIN, "exp can be negated without loss or UB");
2137  exponent += (negativeExponent ? -exp : exp);
2138  }
2139 
2140  if (exponent) {
2141  assert(try_double);
2142  // If d is zero but the exponent is huge, don't
2143  // multiply zero by inf which gives nan.
2144  if (d != 0.0) {
2145  d *= pow10(exponent);
2146  }
2147  }
2148 
2149  if (negative) {
2150  if (try_double) {
2151  d = -d;
2152  } else {
2153  i = -i;
2154  }
2155  }
2156  if (try_double) {
2157  bool success;
2158  size_t* out = allocator.reserve(double_storage::word_length, &success);
2159  if (SAJSON_UNLIKELY(!success)) {
2160  return std::make_pair(oom(p), TYPE_NULL);
2161  }
2162  double_storage::store(out, d);
2163  return std::make_pair(p, TYPE_DOUBLE);
2164  } else {
2165  bool success;
2166  size_t* out = allocator.reserve(integer_storage::word_length, &success);
2167  if (SAJSON_UNLIKELY(!success)) {
2168  return std::make_pair(oom(p), TYPE_NULL);
2169  }
2170  integer_storage::store(out, i);
2171  return std::make_pair(p, TYPE_INTEGER);
2172  }
2173  }
2174 
2175  bool install_array(size_t* array_base, size_t* array_end) {
2176  using namespace sajson::internal;
2177 
2178  const size_t length = array_end - array_base;
2179  bool success;
2180  size_t* const new_base = allocator.reserve(length + 1, &success);
2181  if (SAJSON_UNLIKELY(!success)) {
2182  return false;
2183  }
2184  size_t* out = new_base + length + 1;
2185  size_t* const structure_end = allocator.get_write_pointer_of(0);
2186 
2187  while (array_end > array_base) {
2188  size_t element = *--array_end;
2189  type element_type = get_element_type(element);
2190  size_t element_value = get_element_value(element);
2191  size_t* element_ptr = structure_end - element_value;
2192  *--out = make_element(element_type, element_ptr - new_base);
2193  }
2194  *--out = length;
2195  return true;
2196  }
2197 
2198  bool install_object(size_t* object_base, size_t* object_end) {
2199  using namespace internal;
2200 
2201  assert((object_end - object_base) % 3 == 0);
2202  const size_t length_times_3 = object_end - object_base;
2203 #ifndef SAJSON_UNSORTED_OBJECT_KEYS
2204  std::sort(
2205  reinterpret_cast<object_key_record*>(object_base),
2206  reinterpret_cast<object_key_record*>(object_end),
2207  object_key_comparator(input.get_data()));
2208 #endif
2209 
2210  bool success;
2211  size_t* const new_base = allocator.reserve(length_times_3 + 1, &success);
2212  if (SAJSON_UNLIKELY(!success)) {
2213  return false;
2214  }
2215  size_t* out = new_base + length_times_3 + 1;
2216  size_t* const structure_end = allocator.get_write_pointer_of(0);
2217 
2218  while (object_end > object_base) {
2219  size_t element = *--object_end;
2220  type element_type = get_element_type(element);
2221  size_t element_value = get_element_value(element);
2222  size_t* element_ptr = structure_end - element_value;
2223 
2224  *--out = make_element(element_type, element_ptr - new_base);
2225  *--out = *--object_end;
2226  *--out = *--object_end;
2227  }
2228  *--out = length_times_3 / 3;
2229  return true;
2230  }
2231 
2232  char* parse_string(char* p, size_t* tag) {
2233  using namespace internal;
2234 
2235  ++p; // "
2236  size_t start = p - input.get_data();
2237  char* input_end_local = input_end;
2238  while (input_end_local - p >= 4) {
2239  if (!is_plain_string_character(p[0])) { goto found; }
2240  if (!is_plain_string_character(p[1])) { p += 1; goto found; }
2241  if (!is_plain_string_character(p[2])) { p += 2; goto found; }
2242  if (!is_plain_string_character(p[3])) { p += 3; goto found; }
2243  p += 4;
2244  }
2245  for (;;) {
2246  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2247  return make_error(p, ERROR_UNEXPECTED_END);
2248  }
2249 
2250  if (!is_plain_string_character(*p)) {
2251  break;
2252  }
2253 
2254  ++p;
2255  }
2256  found:
2257  if (SAJSON_LIKELY(*p == '"')) {
2258  tag[0] = start;
2259  tag[1] = p - input.get_data();
2260  *p = '\0';
2261  return p + 1;
2262  }
2263 
2264  if (*p >= 0 && *p < 0x20) {
2265  return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2266  } else {
2267  // backslash or >0x7f
2268  return parse_string_slow(p, tag, start);
2269  }
2270  }
2271 
2272  char* read_hex(char* p, unsigned& u) {
2273  unsigned v = 0;
2274  int i = 4;
2275  while (i--) {
2276  unsigned char c = *p++;
2277  if (c >= '0' && c <= '9') {
2278  c -= '0';
2279  } else if (c >= 'a' && c <= 'f') {
2280  c = c - 'a' + 10;
2281  } else if (c >= 'A' && c <= 'F') {
2282  c = c - 'A' + 10;
2283  } else {
2284  return make_error(p, ERROR_INVALID_UNICODE_ESCAPE);
2285  }
2286  v = (v << 4) + c;
2287  }
2288 
2289  u = v;
2290  return p;
2291  }
2292 
2293  void write_utf8(unsigned codepoint, char*& end) {
2294  if (codepoint < 0x80) {
2295  *end++ = char(codepoint);
2296  } else if (codepoint < 0x800) {
2297  *end++ = 0xC0 | char(codepoint >> 6);
2298  *end++ = 0x80 | char(codepoint & 0x3F);
2299  } else if (codepoint < 0x10000) {
2300  *end++ = 0xE0 | char(codepoint >> 12);
2301  *end++ = 0x80 | char((codepoint >> 6) & 0x3F);
2302  *end++ = 0x80 | char(codepoint & 0x3F);
2303  } else {
2304  assert(codepoint < 0x200000);
2305  *end++ = 0xF0 | char(codepoint >> 18);
2306  *end++ = 0x80 | char((codepoint >> 12) & 0x3F);
2307  *end++ = 0x80 | char((codepoint >> 6) & 0x3F);
2308  *end++ = 0x80 | char(codepoint & 0x3F);
2309  }
2310  }
2311 
2312  char* parse_string_slow(char* p, size_t* tag, size_t start) {
2313  char* end = p;
2314  char* input_end_local = input_end;
2315 
2316  for (;;) {
2317  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2318  return make_error(p, ERROR_UNEXPECTED_END);
2319  }
2320 
2321  if (SAJSON_UNLIKELY(*p >= 0 && *p < 0x20)) {
2322  return make_error(p, ERROR_ILLEGAL_CODEPOINT, static_cast<int>(*p));
2323  }
2324 
2325  switch (*p) {
2326  case '"':
2327  tag[0] = start;
2328  tag[1] = end - input.get_data();
2329  *end = '\0';
2330  return p + 1;
2331 
2332  case '\\':
2333  ++p;
2334  if (SAJSON_UNLIKELY(p >= input_end_local)) {
2335  return make_error(p, ERROR_UNEXPECTED_END);
2336  }
2337 
2338  char replacement;
2339  switch (*p) {
2340  case '"': replacement = '"'; goto replace;
2341  case '\\': replacement = '\\'; goto replace;
2342  case '/': replacement = '/'; goto replace;
2343  case 'b': replacement = '\b'; goto replace;
2344  case 'f': replacement = '\f'; goto replace;
2345  case 'n': replacement = '\n'; goto replace;
2346  case 'r': replacement = '\r'; goto replace;
2347  case 't': replacement = '\t'; goto replace;
2348  replace:
2349  *end++ = replacement;
2350  ++p;
2351  break;
2352  case 'u': {
2353  ++p;
2354  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2355  return make_error(p, ERROR_UNEXPECTED_END);
2356  }
2357  unsigned u = 0; // gcc's complaining that this could be used uninitialized. wrong.
2358  p = read_hex(p, u);
2359  if (!p) {
2360  return 0;
2361  }
2362  if (u >= 0xD800 && u <= 0xDBFF) {
2363  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 6))) {
2364  return make_error(p, ERROR_UNEXPECTED_END_OF_UTF16);
2365  }
2366  char p0 = p[0];
2367  char p1 = p[1];
2368  if (p0 != '\\' || p1 != 'u') {
2369  return make_error(p, ERROR_EXPECTED_U);
2370  }
2371  p += 2;
2372  unsigned v = 0; // gcc's complaining that this could be used uninitialized. wrong.
2373  p = read_hex(p, v);
2374  if (!p) {
2375  return p;
2376  }
2377 
2378  if (v < 0xDC00 || v > 0xDFFF) {
2379  return make_error(p, ERROR_INVALID_UTF16_TRAIL_SURROGATE);
2380  }
2381  u = 0x10000 + (((u - 0xD800) << 10) | (v - 0xDC00));
2382  }
2383  write_utf8(u, end);
2384  break;
2385  }
2386  default:
2387  return make_error(p, ERROR_UNKNOWN_ESCAPE);
2388  }
2389  break;
2390 
2391  default:
2392  // validate UTF-8
2393  unsigned char c0 = p[0];
2394  if (c0 < 128) {
2395  *end++ = *p++;
2396  } else if (c0 < 224) {
2397  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 2))) {
2398  return unexpected_end(p);
2399  }
2400  unsigned char c1 = p[1];
2401  if (c1 < 128 || c1 >= 192) {
2402  return make_error(p + 1, ERROR_INVALID_UTF8);
2403  }
2404  end[0] = c0;
2405  end[1] = c1;
2406  end += 2;
2407  p += 2;
2408  } else if (c0 < 240) {
2409  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 3))) {
2410  return unexpected_end(p);
2411  }
2412  unsigned char c1 = p[1];
2413  if (c1 < 128 || c1 >= 192) {
2414  return make_error(p + 1, ERROR_INVALID_UTF8);
2415  }
2416  unsigned char c2 = p[2];
2417  if (c2 < 128 || c2 >= 192) {
2418  return make_error(p + 2, ERROR_INVALID_UTF8);
2419  }
2420  end[0] = c0;
2421  end[1] = c1;
2422  end[2] = c2;
2423  end += 3;
2424  p += 3;
2425  } else if (c0 < 248) {
2426  if (SAJSON_UNLIKELY(!has_remaining_characters(p, 4))) {
2427  return unexpected_end(p);
2428  }
2429  unsigned char c1 = p[1];
2430  if (c1 < 128 || c1 >= 192) {
2431  return make_error(p + 1, ERROR_INVALID_UTF8);
2432  }
2433  unsigned char c2 = p[2];
2434  if (c2 < 128 || c2 >= 192) {
2435  return make_error(p + 2, ERROR_INVALID_UTF8);
2436  }
2437  unsigned char c3 = p[3];
2438  if (c3 < 128 || c3 >= 192) {
2439  return make_error(p + 3, ERROR_INVALID_UTF8);
2440  }
2441  end[0] = c0;
2442  end[1] = c1;
2443  end[2] = c2;
2444  end[3] = c3;
2445  end += 4;
2446  p += 4;
2447  } else {
2448  return make_error(p, ERROR_INVALID_UTF8);
2449  }
2450  break;
2451  }
2452  }
2453  }
2454 
2455  mutable_string_view input;
2456  char* const input_end;
2457  Allocator allocator;
2458 
2459  type root_type;
2460  size_t error_line;
2461  size_t error_column;
2462  error error_code;
2463  int error_arg; // optional argument for the error
2464  };
2466 
2478  template<typename AllocationStrategy, typename StringType>
2479  document parse(const AllocationStrategy& strategy, const StringType& string) {
2480  mutable_string_view input(string);
2481 
2482  bool success;
2483  auto allocator = strategy.make_allocator(input.length(), &success);
2484  if (!success) {
2485  return document(input, 1, 1, ERROR_OUT_OF_MEMORY, 0);
2486  }
2487 
2488  return parser<typename AllocationStrategy::allocator>(
2489  input,
2490  std::move(allocator)
2491  ).get_document();
2492  }
2493 }
d
const char *const text
Definition: sajson.h:637
literal(const char(&text_)[sz])
Definition: sajson.h:251
type
Tag indicating a JSON value&#39;s type.
Definition: sajson.h:71
bounded_allocation(size_t *existing_buffer_, size_t size_in_words)
Definition: sajson.h:1490
string(const char *text_, size_t length)
Definition: sajson.h:220
bounded_allocation(size_t(&existing_buffer_)[N])
Definition: sajson.h:1498
document(const mutable_string_view &input_, internal::ownership &&structure_, type root_type_, const size_t *root_)
Definition: sajson.h:830
bool operator()(const object_key_record &lhs, const object_key_record &rhs)
Definition: sajson.h:379
document(document &&rhs)
Definition: sajson.h:739
mutable_string_view(const mutable_string_view &that)
Definition: sajson.h:302
size_t get_string_length() const
Definition: sajson.h:585
ROSCPP_DECL void start()
ownership(ownership &&p_)
Definition: sajson.h:680
document(const mutable_string_view &input_, size_t error_line_, size_t error_column_, const error error_code_, int error_arg_)
Definition: sajson.h:843
allocated_buffer(size_t length)
Definition: sajson.h:149
const int error_arg
Definition: sajson.h:872
bool is_plain_string_character(char c)
Definition: sajson.h:133
size_t length() const
Definition: sajson.h:338
const char * get_error_text(error error_code)
Definition: sajson.h:697
char * get_data() const
Definition: sajson.h:342
internal::allocated_buffer buffer
Definition: sajson.h:349
value get_value_of_key(const string &key) const
Definition: sajson.h:493
XmlRpcServer s
std::string as_string() const
Definition: sajson.h:234
static const unsigned char parse_flags[256]
Definition: sajson.h:107
size_t find_object_key(const string &key) const
Definition: sajson.h:507
allocated_buffer & operator=(allocated_buffer &&that)
Definition: sajson.h:181
object_key_comparator(const char *object_data)
Definition: sajson.h:360
static const size_t ROOT_MARKER
Definition: sajson.h:87
void store(size_t *location, int value)
Definition: sajson.h:412
static const size_t TYPE_BITS
Definition: sajson.h:83
int get_integer_value() const
Definition: sajson.h:527
const char * as_cstring() const
Definition: sajson.h:596
GLenum GLuint GLenum GLsizei length
Definition: gl.h:1033
const char * get_error_message_as_cstring() const
If not is_valid(), returns a null-terminated C string indicating why the parse failed.
Definition: sajson.h:788
mutable_string_view(size_t length, char *data_)
Definition: sajson.h:273
allocated_buffer & operator=(const allocated_buffer &that)
Definition: sajson.h:172
mutable_string_view & operator=(const mutable_string_view &that)
Definition: sajson.h:329
const char * data() const
Definition: sajson.h:225
value get_array_element(size_t index) const
Definition: sajson.h:465
bool get_int53_value(int64_t *out) const
Definition: sajson.h:557
bool operator()(const string &lhs, const object_key_record &rhs) const
Definition: sajson.h:375
size_t length() const
Definition: sajson.h:229
size_t get_length() const
Definition: sajson.h:457
mutable_string_view & operator=(mutable_string_view &&that)
Definition: sajson.h:318
void assert_type(type expected) const
Definition: sajson.h:623
string get_object_key(size_t index) const
Definition: sajson.h:475
internal::ownership structure
Definition: sajson.h:866
const error error_code
Definition: sajson.h:871
single_allocation(size_t(&existing_buffer_)[N])
Definition: sajson.h:1051
size_t get_element_value(size_t s)
Definition: sajson.h:93
const type value_type
Definition: sajson.h:635
const size_t error_line
Definition: sajson.h:869
size_t get_error_line() const
If not is_valid(), returns the one-based line number where the parse failed.
Definition: sajson.h:771
globals_struct globals
Definition: sajson.h:109
const size_t error_column
Definition: sajson.h:870
bool is_valid() const
Definition: sajson.h:689
const size_t *const payload
Definition: sajson.h:636
bool operator()(const object_key_record &lhs, const string &rhs) const
Definition: sajson.h:364
mutable_string_view input
Definition: sajson.h:865
value get_root() const
If is_valid(), returns the document&#39;s root value.
Definition: sajson.h:766
size_t get_error_column() const
If not is_valid(), returns the one-based column number where the parse failed.
Definition: sajson.h:776
mutable_string_view(const literal &s)
Definition: sajson.h:281
mutable_string_view()
Creates an empty, zero-sized view.
Definition: sajson.h:263
dynamic_allocation(size_t initial_ast_capacity_=0, size_t initial_stack_capacity_=0)
Definition: sajson.h:1306
allocated_buffer(allocated_buffer &&that)
Definition: sajson.h:162
#define SAJSON_snprintf
Definition: sajson.h:62
bool is_whitespace(char c)
Definition: sajson.h:138
Definition: sajson.h:68
typedef void(GLAD_API_PTR *GLDEBUGPROC)(GLenum source
const size_t _length
Definition: sajson.h:241
INLINE Rall1d< T, V, S > exp(const Rall1d< T, V, S > &arg)
void assert_in_bounds(size_t i) const
Definition: sajson.h:631
void assert_type_2(type e1, type e2) const
Definition: sajson.h:627
type get_element_type(size_t s)
Definition: sajson.h:89
mutable_string_view(const string &s)
Definition: sajson.h:291
value get_object_value(size_t index) const
Definition: sajson.h:483
const type root_type
Definition: sajson.h:867
mutable_string_view(mutable_string_view &&that)
Move constructor - neuters the old mutable_string_view.
Definition: sajson.h:309
#define SAJSON_LIKELY(x)
Definition: sajson.h:58
bool is_valid() const
Definition: sajson.h:761
double get_double_value() const
Definition: sajson.h:534
value(type value_type_, const size_t *payload_, const char *text_)
Definition: sajson.h:617
allocated_buffer(const allocated_buffer &that)
Definition: sajson.h:156
static const size_t VALUE_MASK
Definition: sajson.h:85
parser
Definition: sajson.h:880
bool has_significant_error_arg() const
Definition: sajson.h:861
double get_number_value() const
Definition: sajson.h:541
const size_t *const root
Definition: sajson.h:868
size_t make_element(type t, size_t value)
Definition: sajson.h:97
void store(size_t *location, double value)
Definition: sajson.h:434
static const size_t TYPE_MASK
Definition: sajson.h:84
int load(const size_t *location)
Definition: sajson.h:406
error
Error code indicating why parse failed.
Definition: sajson.h:643
type get_type() const
Returns the JSON value&#39;s type.
Definition: sajson.h:451
document parse(const AllocationStrategy &strategy, const StringType &string)
Definition: sajson.h:2479
#define SAJSON_UNLIKELY(x)
Definition: sajson.h:59
const char *const text
Definition: sajson.h:240
#define SAJSON_UNREACHABLE()
Definition: sajson.h:61
single_allocation(size_t *existing_buffer_, size_t size_in_words)
Definition: sajson.h:1042
double load(const size_t *location)
Definition: sajson.h:428


mvsim
Author(s):
autogenerated on Tue Jul 4 2023 03:08:21