tokenizer.cc
Go to the documentation of this file.
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Here we have a hand-written lexer. At first you might ask yourself,
36 // "Hand-written text processing? Is Kenton crazy?!" Well, first of all,
37 // yes I am crazy, but that's beside the point. There are actually reasons
38 // why I ended up writing this this way.
39 //
40 // The traditional approach to lexing is to use lex to generate a lexer for
41 // you. Unfortunately, lex's output is ridiculously ugly and difficult to
42 // integrate cleanly with C++ code, especially abstract code or code meant
43 // as a library. Better parser-generators exist but would add dependencies
44 // which most users won't already have, which we'd like to avoid. (GNU flex
45 // has a C++ output option, but it's still ridiculously ugly, non-abstract,
46 // and not library-friendly.)
47 //
48 // The next approach that any good software engineer should look at is to
49 // use regular expressions. And, indeed, I did. I have code which
50 // implements this same class using regular expressions. It's about 200
51 // lines shorter. However:
52 // - Rather than error messages telling you "This string has an invalid
53 // escape sequence at line 5, column 45", you get error messages like
54 // "Parse error on line 5". Giving more precise errors requires adding
55 // a lot of code that ends up basically as complex as the hand-coded
56 // version anyway.
57 // - The regular expression to match a string literal looks like this:
58 // kString = new RE("(\"([^\"\\\\]|" // non-escaped
59 // "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape
60 // "\\\\x[0-9a-fA-F])*\"|" // hex escape
61 // "\'([^\'\\\\]|" // Also support single-quotes.
62 // "\\\\[abfnrtv?\"'\\\\0-7]|"
63 // "\\\\x[0-9a-fA-F])*\')");
64 // Verifying the correctness of this line noise is actually harder than
65 // verifying the correctness of ConsumeString(), defined below. I'm not
66 // even confident that the above is correct, after staring at it for some
67 // time.
68 // - PCRE is fast, but there's still more overhead involved than the code
69 // below.
70 // - Sadly, regular expressions are not part of the C standard library, so
71 // using them would require depending on some other library. For the
72 // open source release, this could be really annoying. Nobody likes
73 // downloading one piece of software just to find that they need to
74 // download something else to make it work, and in all likelihood
75 // people downloading Protocol Buffers will already be doing so just
76 // to make something else work. We could include a copy of PCRE with
77 // our code, but that obligates us to keep it up-to-date and just seems
78 // like a big waste just to save 200 lines of code.
79 //
80 // On a similar but unrelated note, I'm even scared to use ctype.h.
81 // Apparently functions like isalpha() are locale-dependent. So, if we used
82 // that, then if this code is being called from some program that doesn't
83 // have its locale set to "C", it would behave strangely. We can't just set
84 // the locale to "C" ourselves since we might break the calling program that
85 // way, particularly if it is multi-threaded. WTF? Someone please let me
86 // (Kenton) know if I'm missing something here...
87 //
88 // I'd love to hear about other alternatives, though, as this code isn't
89 // exactly pretty.
90 
99 
100 namespace google {
101 namespace protobuf {
102 namespace io {
103 namespace {
104 
105 // As mentioned above, I don't trust ctype.h due to the presence of "locales".
106 // So, I have written replacement functions here. Someone please smack me if
107 // this is a bad idea or if there is some way around this.
108 //
109 // These "character classes" are designed to be used in template methods.
110 // For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
111 // whitespace.
112 
113 // Note: No class is allowed to contain '\0', since this is used to mark end-
114 // of-input and is handled specially.
115 
116 #define CHARACTER_CLASS(NAME, EXPRESSION) \
117  class NAME { \
118  public: \
119  static inline bool InClass(char c) { return EXPRESSION; } \
120  }
121 
122 CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' || c == '\r' ||
123  c == '\v' || c == '\f');
124 CHARACTER_CLASS(WhitespaceNoNewline,
125  c == ' ' || c == '\t' || c == '\r' || c == '\v' || c == '\f');
126 
127 CHARACTER_CLASS(Unprintable, c<' ' && c> '\0');
128 
129 CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
130 CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
131 CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') ||
132  ('A' <= c && c <= 'F'));
133 
134 CHARACTER_CLASS(Letter,
135  ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_'));
136 
137 CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
138  ('A' <= c && c <= 'Z') ||
139  ('0' <= c && c <= '9') || (c == '_'));
140 
141 CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
142  c == 'r' || c == 't' || c == 'v' || c == '\\' ||
143  c == '?' || c == '\'' || c == '\"');
144 
145 #undef CHARACTER_CLASS
146 
147 // Given a char, interpret it as a numeric digit and return its value.
148 // This supports any number base up to 36.
149 inline int DigitValue(char digit) {
150  if ('0' <= digit && digit <= '9') return digit - '0';
151  if ('a' <= digit && digit <= 'z') return digit - 'a' + 10;
152  if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10;
153  return -1;
154 }
155 
156 // Inline because it's only used in one place.
157 inline char TranslateEscape(char c) {
158  switch (c) {
159  case 'a':
160  return '\a';
161  case 'b':
162  return '\b';
163  case 'f':
164  return '\f';
165  case 'n':
166  return '\n';
167  case 'r':
168  return '\r';
169  case 't':
170  return '\t';
171  case 'v':
172  return '\v';
173  case '\\':
174  return '\\';
175  case '?':
176  return '\?'; // Trigraphs = :(
177  case '\'':
178  return '\'';
179  case '"':
180  return '\"';
181 
182  // We expect escape sequences to have been validated separately.
183  default:
184  return '?';
185  }
186 }
187 
188 } // anonymous namespace
189 
191 
192 // ===================================================================
193 
195  ErrorCollector* error_collector)
196  : input_(input),
197  error_collector_(error_collector),
198  buffer_(NULL),
199  buffer_size_(0),
200  buffer_pos_(0),
201  read_error_(false),
202  line_(0),
203  column_(0),
204  record_target_(NULL),
205  record_start_(-1),
206  allow_f_after_float_(false),
207  comment_style_(CPP_COMMENT_STYLE),
208  require_space_after_number_(true),
209  allow_multiline_strings_(false) {
210  current_.line = 0;
211  current_.column = 0;
212  current_.end_column = 0;
214 
215  Refresh();
216 }
217 
219  // If we had any buffer left unread, return it to the underlying stream
220  // so that someone else can read it.
221  if (buffer_size_ > buffer_pos_) {
223  }
224 }
225 
226 // -------------------------------------------------------------------
227 // Internal helpers.
228 
230  // Update our line and column counters based on the character being
231  // consumed.
232  if (current_char_ == '\n') {
233  ++line_;
234  column_ = 0;
235  } else if (current_char_ == '\t') {
237  } else {
238  ++column_;
239  }
240 
241  // Advance to the next character.
242  ++buffer_pos_;
243  if (buffer_pos_ < buffer_size_) {
245  } else {
246  Refresh();
247  }
248 }
249 
251  if (read_error_) {
252  current_char_ = '\0';
253  return;
254  }
255 
256  // If we're in a token, append the rest of the buffer to it.
260  record_start_ = 0;
261  }
262 
263  const void* data = NULL;
264  buffer_ = NULL;
265  buffer_pos_ = 0;
266  do {
267  if (!input_->Next(&data, &buffer_size_)) {
268  // end of stream (or read error)
269  buffer_size_ = 0;
270  read_error_ = true;
271  current_char_ = '\0';
272  return;
273  }
274  } while (buffer_size_ == 0);
275 
276  buffer_ = static_cast<const char*>(data);
277 
278  current_char_ = buffer_[0];
279 }
280 
284 }
285 
287  // Note: The if() is necessary because some STL implementations crash when
288  // you call string::append(NULL, 0), presumably because they are trying to
289  // be helpful by detecting the NULL pointer, even though there's nothing
290  // wrong with reading zero bytes from NULL.
291  if (buffer_pos_ != record_start_) {
294  }
296  record_start_ = -1;
297 }
298 
299 inline void Tokenizer::StartToken() {
300  current_.type = TYPE_START; // Just for the sake of initializing it.
301  current_.text.clear();
302  current_.line = line_;
305 }
306 
307 inline void Tokenizer::EndToken() {
308  StopRecording();
310 }
311 
312 // -------------------------------------------------------------------
313 // Helper methods that consume characters.
314 
315 template <typename CharacterClass>
316 inline bool Tokenizer::LookingAt() {
317  return CharacterClass::InClass(current_char_);
318 }
319 
320 template <typename CharacterClass>
322  if (CharacterClass::InClass(current_char_)) {
323  NextChar();
324  return true;
325  } else {
326  return false;
327  }
328 }
329 
330 inline bool Tokenizer::TryConsume(char c) {
331  if (current_char_ == c) {
332  NextChar();
333  return true;
334  } else {
335  return false;
336  }
337 }
338 
339 template <typename CharacterClass>
341  while (CharacterClass::InClass(current_char_)) {
342  NextChar();
343  }
344 }
345 
346 template <typename CharacterClass>
347 inline void Tokenizer::ConsumeOneOrMore(const char* error) {
348  if (!CharacterClass::InClass(current_char_)) {
349  AddError(error);
350  } else {
351  do {
352  NextChar();
353  } while (CharacterClass::InClass(current_char_));
354  }
355 }
356 
357 // -------------------------------------------------------------------
358 // Methods that read whole patterns matching certain kinds of tokens
359 // or comments.
360 
361 void Tokenizer::ConsumeString(char delimiter) {
362  while (true) {
363  switch (current_char_) {
364  case '\0':
365  AddError("Unexpected end of string.");
366  return;
367 
368  case '\n': {
370  AddError("String literals cannot cross line boundaries.");
371  return;
372  }
373  NextChar();
374  break;
375  }
376 
377  case '\\': {
378  // An escape sequence.
379  NextChar();
380  if (TryConsumeOne<Escape>()) {
381  // Valid escape sequence.
382  } else if (TryConsumeOne<OctalDigit>()) {
383  // Possibly followed by two more octal digits, but these will
384  // just be consumed by the main loop anyway so we don't need
385  // to do so explicitly here.
386  } else if (TryConsume('x')) {
387  if (!TryConsumeOne<HexDigit>()) {
388  AddError("Expected hex digits for escape sequence.");
389  }
390  // Possibly followed by another hex digit, but again we don't care.
391  } else if (TryConsume('u')) {
392  if (!TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() ||
393  !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>()) {
394  AddError("Expected four hex digits for \\u escape sequence.");
395  }
396  } else if (TryConsume('U')) {
397  // We expect 8 hex digits; but only the range up to 0x10ffff is
398  // legal.
399  if (!TryConsume('0') || !TryConsume('0') ||
400  !(TryConsume('0') || TryConsume('1')) ||
401  !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() ||
402  !TryConsumeOne<HexDigit>() || !TryConsumeOne<HexDigit>() ||
403  !TryConsumeOne<HexDigit>()) {
404  AddError(
405  "Expected eight hex digits up to 10ffff for \\U escape "
406  "sequence");
407  }
408  } else {
409  AddError("Invalid escape sequence in string literal.");
410  }
411  break;
412  }
413 
414  default: {
415  if (current_char_ == delimiter) {
416  NextChar();
417  return;
418  }
419  NextChar();
420  break;
421  }
422  }
423  }
424 }
425 
427  bool started_with_dot) {
428  bool is_float = false;
429 
430  if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
431  // A hex number (started with "0x").
432  ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");
433 
434  } else if (started_with_zero && LookingAt<Digit>()) {
435  // An octal number (had a leading zero).
436  ConsumeZeroOrMore<OctalDigit>();
437  if (LookingAt<Digit>()) {
438  AddError("Numbers starting with leading zero must be in octal.");
439  ConsumeZeroOrMore<Digit>();
440  }
441 
442  } else {
443  // A decimal number.
444  if (started_with_dot) {
445  is_float = true;
446  ConsumeZeroOrMore<Digit>();
447  } else {
448  ConsumeZeroOrMore<Digit>();
449 
450  if (TryConsume('.')) {
451  is_float = true;
452  ConsumeZeroOrMore<Digit>();
453  }
454  }
455 
456  if (TryConsume('e') || TryConsume('E')) {
457  is_float = true;
458  TryConsume('-') || TryConsume('+');
459  ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
460  }
461 
462  if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
463  is_float = true;
464  }
465  }
466 
467  if (LookingAt<Letter>() && require_space_after_number_) {
468  AddError("Need space between number and identifier.");
469  } else if (current_char_ == '.') {
470  if (is_float) {
471  AddError(
472  "Already saw decimal point or exponent; can't have another one.");
473  } else {
474  AddError("Hex and octal numbers must be integers.");
475  }
476  }
477 
478  return is_float ? TYPE_FLOAT : TYPE_INTEGER;
479 }
480 
482  if (content != NULL) RecordTo(content);
483 
484  while (current_char_ != '\0' && current_char_ != '\n') {
485  NextChar();
486  }
487  TryConsume('\n');
488 
489  if (content != NULL) StopRecording();
490 }
491 
493  int start_line = line_;
494  int start_column = column_ - 2;
495 
496  if (content != NULL) RecordTo(content);
497 
498  while (true) {
499  while (current_char_ != '\0' && current_char_ != '*' &&
500  current_char_ != '/' && current_char_ != '\n') {
501  NextChar();
502  }
503 
504  if (TryConsume('\n')) {
505  if (content != NULL) StopRecording();
506 
507  // Consume leading whitespace and asterisk;
508  ConsumeZeroOrMore<WhitespaceNoNewline>();
509  if (TryConsume('*')) {
510  if (TryConsume('/')) {
511  // End of comment.
512  break;
513  }
514  }
515 
516  if (content != NULL) RecordTo(content);
517  } else if (TryConsume('*') && TryConsume('/')) {
518  // End of comment.
519  if (content != NULL) {
520  StopRecording();
521  // Strip trailing "*/".
522  content->erase(content->size() - 2);
523  }
524  break;
525  } else if (TryConsume('/') && current_char_ == '*') {
526  // Note: We didn't consume the '*' because if there is a '/' after it
527  // we want to interpret that as the end of the comment.
528  AddError(
529  "\"/*\" inside block comment. Block comments cannot be nested.");
530  } else if (current_char_ == '\0') {
531  AddError("End-of-file inside block comment.");
532  error_collector_->AddError(start_line, start_column,
533  " Comment started here.");
534  if (content != NULL) StopRecording();
535  break;
536  }
537  }
538 }
539 
542  if (TryConsume('/')) {
543  return LINE_COMMENT;
544  } else if (TryConsume('*')) {
545  return BLOCK_COMMENT;
546  } else {
547  // Oops, it was just a slash. Return it.
549  current_.text = "/";
550  current_.line = line_;
551  current_.column = column_ - 1;
553  return SLASH_NOT_COMMENT;
554  }
555  } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
556  return LINE_COMMENT;
557  } else {
558  return NO_COMMENT;
559  }
560 }
561 
562 // -------------------------------------------------------------------
563 
566 
567  while (!read_error_) {
568  ConsumeZeroOrMore<Whitespace>();
569 
570  switch (TryConsumeCommentStart()) {
571  case LINE_COMMENT:
573  continue;
574  case BLOCK_COMMENT:
576  continue;
577  case SLASH_NOT_COMMENT:
578  return true;
579  case NO_COMMENT:
580  break;
581  }
582 
583  // Check for EOF before continuing.
584  if (read_error_) break;
585 
586  if (LookingAt<Unprintable>() || current_char_ == '\0') {
587  AddError("Invalid control characters encountered in text.");
588  NextChar();
589  // Skip more unprintable characters, too. But, remember that '\0' is
590  // also what current_char_ is set to after EOF / read error. We have
591  // to be careful not to go into an infinite loop of trying to consume
592  // it, so make sure to check read_error_ explicitly before consuming
593  // '\0'.
594  while (TryConsumeOne<Unprintable>() ||
595  (!read_error_ && TryConsume('\0'))) {
596  // Ignore.
597  }
598 
599  } else {
600  // Reading some sort of token.
601  StartToken();
602 
603  if (TryConsumeOne<Letter>()) {
604  ConsumeZeroOrMore<Alphanumeric>();
606  } else if (TryConsume('0')) {
607  current_.type = ConsumeNumber(true, false);
608  } else if (TryConsume('.')) {
609  // This could be the beginning of a floating-point number, or it could
610  // just be a '.' symbol.
611 
612  if (TryConsumeOne<Digit>()) {
613  // It's a floating-point number.
614  if (previous_.type == TYPE_IDENTIFIER &&
617  // We don't accept syntax like "blah.123".
619  line_, column_ - 2,
620  "Need space between identifier and decimal point.");
621  }
622  current_.type = ConsumeNumber(false, true);
623  } else {
625  }
626  } else if (TryConsumeOne<Digit>()) {
627  current_.type = ConsumeNumber(false, false);
628  } else if (TryConsume('\"')) {
629  ConsumeString('\"');
631  } else if (TryConsume('\'')) {
632  ConsumeString('\'');
634  } else {
635  // Check if the high order bit is set.
636  if (current_char_ & 0x80) {
638  line_, column_,
639  StringPrintf("Interpreting non ascii codepoint %d.",
640  static_cast<unsigned char>(current_char_)));
641  }
642  NextChar();
644  }
645 
646  EndToken();
647  return true;
648  }
649  }
650 
651  // EOF
653  current_.text.clear();
654  current_.line = line_;
657  return false;
658 }
659 
660 namespace {
661 
662 // Helper class for collecting comments and putting them in the right places.
663 //
664 // This basically just buffers the most recent comment until it can be decided
665 // exactly where that comment should be placed. When Flush() is called, the
666 // current comment goes into either prev_trailing_comments or detached_comments.
667 // When the CommentCollector is destroyed, the last buffered comment goes into
668 // next_leading_comments.
669 class CommentCollector {
670  public:
671  CommentCollector(std::string* prev_trailing_comments,
672  std::vector<std::string>* detached_comments,
681  if (detached_comments != NULL) detached_comments->clear();
683  }
684 
685  ~CommentCollector() {
686  // Whatever is in the buffer is a leading comment.
689  }
690  }
691 
692  // About to read a line comment. Get the comment buffer pointer in order to
693  // read into it.
694  std::string* GetBufferForLineComment() {
695  // We want to combine with previous line comments, but not block comments.
696  if (has_comment_ && !is_line_comment_) {
697  Flush();
698  }
699  has_comment_ = true;
700  is_line_comment_ = true;
701  return &comment_buffer_;
702  }
703 
704  // About to read a block comment. Get the comment buffer pointer in order to
705  // read into it.
706  std::string* GetBufferForBlockComment() {
707  if (has_comment_) {
708  Flush();
709  }
710  has_comment_ = true;
711  is_line_comment_ = false;
712  return &comment_buffer_;
713  }
714 
715  void ClearBuffer() {
716  comment_buffer_.clear();
717  has_comment_ = false;
718  }
719 
720  // Called once we know that the comment buffer is complete and is *not*
721  // connected to the next token.
722  void Flush() {
723  if (has_comment_) {
724  if (can_attach_to_prev_) {
725  if (prev_trailing_comments_ != NULL) {
727  }
728  can_attach_to_prev_ = false;
729  } else {
730  if (detached_comments_ != NULL) {
732  }
733  }
734  ClearBuffer();
735  }
736  }
737 
738  void DetachFromPrev() { can_attach_to_prev_ = false; }
739 
740  private:
742  std::vector<std::string>* detached_comments_;
744 
746 
747  // True if any comments were read into comment_buffer_. This can be true even
748  // if comment_buffer_ is empty, namely if the comment was "/**/".
750 
751  // Is the comment in the comment buffer a line comment?
753 
754  // Is it still possible that we could be reading a comment attached to the
755  // previous token?
757 };
758 
759 } // namespace
760 
762  std::vector<std::string>* detached_comments,
764  CommentCollector collector(prev_trailing_comments, detached_comments,
766 
767  if (current_.type == TYPE_START) {
768  // Ignore unicode byte order mark(BOM) if it appears at the file
769  // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
770  if (TryConsume((char)0xEF)) {
771  if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
772  AddError(
773  "Proto file starts with 0xEF but not UTF-8 BOM. "
774  "Only UTF-8 is accepted for proto file.");
775  return false;
776  }
777  }
778  collector.DetachFromPrev();
779  } else {
780  // A comment appearing on the same line must be attached to the previous
781  // declaration.
782  ConsumeZeroOrMore<WhitespaceNoNewline>();
783  switch (TryConsumeCommentStart()) {
784  case LINE_COMMENT:
785  ConsumeLineComment(collector.GetBufferForLineComment());
786 
787  // Don't allow comments on subsequent lines to be attached to a trailing
788  // comment.
789  collector.Flush();
790  break;
791  case BLOCK_COMMENT:
792  ConsumeBlockComment(collector.GetBufferForBlockComment());
793 
794  ConsumeZeroOrMore<WhitespaceNoNewline>();
795  if (!TryConsume('\n')) {
796  // Oops, the next token is on the same line. If we recorded a comment
797  // we really have no idea which token it should be attached to.
798  collector.ClearBuffer();
799  return Next();
800  }
801 
802  // Don't allow comments on subsequent lines to be attached to a trailing
803  // comment.
804  collector.Flush();
805  break;
806  case SLASH_NOT_COMMENT:
807  return true;
808  case NO_COMMENT:
809  if (!TryConsume('\n')) {
810  // The next token is on the same line. There are no comments.
811  return Next();
812  }
813  break;
814  }
815  }
816 
817  // OK, we are now on the line *after* the previous token.
818  while (true) {
819  ConsumeZeroOrMore<WhitespaceNoNewline>();
820 
821  switch (TryConsumeCommentStart()) {
822  case LINE_COMMENT:
823  ConsumeLineComment(collector.GetBufferForLineComment());
824  break;
825  case BLOCK_COMMENT:
826  ConsumeBlockComment(collector.GetBufferForBlockComment());
827 
828  // Consume the rest of the line so that we don't interpret it as a
829  // blank line the next time around the loop.
830  ConsumeZeroOrMore<WhitespaceNoNewline>();
831  TryConsume('\n');
832  break;
833  case SLASH_NOT_COMMENT:
834  return true;
835  case NO_COMMENT:
836  if (TryConsume('\n')) {
837  // Completely blank line.
838  collector.Flush();
839  collector.DetachFromPrev();
840  } else {
841  bool result = Next();
842  if (!result || current_.text == "}" || current_.text == "]" ||
843  current_.text == ")") {
844  // It looks like we're at the end of a scope. In this case it
845  // makes no sense to attach a comment to the following token.
846  collector.Flush();
847  }
848  return result;
849  }
850  break;
851  }
852  }
853 }
854 
855 // -------------------------------------------------------------------
856 // Token-parsing helpers. Remember that these don't need to report
857 // errors since any errors should already have been reported while
858 // tokenizing. Also, these can assume that whatever text they
859 // are given is text that the tokenizer actually parsed as a token
860 // of the given type.
861 
862 bool Tokenizer::ParseInteger(const std::string& text, uint64 max_value,
863  uint64* output) {
864  // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull()
865  // is non-standard. I hate the C standard library. :(
866 
867  // return strtoull(text.c_str(), NULL, 0);
868 
869  const char* ptr = text.c_str();
870  int base = 10;
871  if (ptr[0] == '0') {
872  if (ptr[1] == 'x' || ptr[1] == 'X') {
873  // This is hex.
874  base = 16;
875  ptr += 2;
876  } else {
877  // This is octal.
878  base = 8;
879  }
880  }
881 
882  uint64 result = 0;
883  for (; *ptr != '\0'; ptr++) {
884  int digit = DigitValue(*ptr);
885  if (digit < 0 || digit >= base) {
886  // The token provided by Tokenizer is invalid. i.e., 099 is an invalid
887  // token, but Tokenizer still think it's integer.
888  return false;
889  }
890  if (digit > max_value || result > (max_value - digit) / base) {
891  // Overflow.
892  return false;
893  }
894  result = result * base + digit;
895  }
896 
897  *output = result;
898  return true;
899 }
900 
901 double Tokenizer::ParseFloat(const std::string& text) {
902  const char* start = text.c_str();
903  char* end;
904  double result = NoLocaleStrtod(start, &end);
905 
906  // "1e" is not a valid float, but if the tokenizer reads it, it will
907  // report an error but still return it as a valid token. We need to
908  // accept anything the tokenizer could possibly return, error or not.
909  if (*end == 'e' || *end == 'E') {
910  ++end;
911  if (*end == '-' || *end == '+') ++end;
912  }
913 
914  // If the Tokenizer had allow_f_after_float_ enabled, the float may be
915  // suffixed with the letter 'f'.
916  if (*end == 'f' || *end == 'F') {
917  ++end;
918  }
919 
920  GOOGLE_LOG_IF(DFATAL, end - start != text.size() || *start == '-')
921  << " Tokenizer::ParseFloat() passed text that could not have been"
922  " tokenized as a float: "
923  << CEscape(text);
924  return result;
925 }
926 
927 // Helper to append a Unicode code point to a string as UTF8, without bringing
928 // in any external dependencies.
929 static void AppendUTF8(uint32 code_point, std::string* output) {
930  uint32 tmp = 0;
931  int len = 0;
932  if (code_point <= 0x7f) {
933  tmp = code_point;
934  len = 1;
935  } else if (code_point <= 0x07ff) {
936  tmp = 0x0000c080 | ((code_point & 0x07c0) << 2) | (code_point & 0x003f);
937  len = 2;
938  } else if (code_point <= 0xffff) {
939  tmp = 0x00e08080 | ((code_point & 0xf000) << 4) |
940  ((code_point & 0x0fc0) << 2) | (code_point & 0x003f);
941  len = 3;
942  } else if (code_point <= 0x1fffff) {
943  tmp = 0xf0808080 | ((code_point & 0x1c0000) << 6) |
944  ((code_point & 0x03f000) << 4) | ((code_point & 0x000fc0) << 2) |
945  (code_point & 0x003f);
946  len = 4;
947  } else {
948  // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
949  // normally only defined up to there as well.
950  StringAppendF(output, "\\U%08x", code_point);
951  return;
952  }
953  tmp = ghtonl(tmp);
954  output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
955 }
956 
957 // Try to read <len> hex digits from ptr, and stuff the numeric result into
958 // *result. Returns true if that many digits were successfully consumed.
959 static bool ReadHexDigits(const char* ptr, int len, uint32* result) {
960  *result = 0;
961  if (len == 0) return false;
962  for (const char* end = ptr + len; ptr < end; ++ptr) {
963  if (*ptr == '\0') return false;
964  *result = (*result << 4) + DigitValue(*ptr);
965  }
966  return true;
967 }
968 
969 // Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range
970 // 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail
971 // surrogate. These numbers are in a reserved range of Unicode code points, so
972 // if we encounter such a pair we know how to parse it and convert it into a
973 // single code point.
974 static const uint32 kMinHeadSurrogate = 0xd800;
975 static const uint32 kMaxHeadSurrogate = 0xdc00;
976 static const uint32 kMinTrailSurrogate = 0xdc00;
977 static const uint32 kMaxTrailSurrogate = 0xe000;
978 
979 static inline bool IsHeadSurrogate(uint32 code_point) {
980  return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate);
981 }
982 
983 static inline bool IsTrailSurrogate(uint32 code_point) {
984  return (code_point >= kMinTrailSurrogate) &&
985  (code_point < kMaxTrailSurrogate);
986 }
987 
988 // Combine a head and trail surrogate into a single Unicode code point.
989 static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) {
990  GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate));
991  GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate));
992  return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) |
993  (trail_surrogate - kMinTrailSurrogate));
994 }
995 
996 // Convert the escape sequence parameter to a number of expected hex digits.
997 static inline int UnicodeLength(char key) {
998  if (key == 'u') return 4;
999  if (key == 'U') return 8;
1000  return 0;
1001 }
1002 
1003 // Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
1004 // to parse that sequence. On success, returns a pointer to the first char
1005 // beyond that sequence, and fills in *code_point. On failure, returns ptr
1006 // itself.
1007 static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) {
1008  const char* p = ptr;
1009  // Fetch the code point.
1010  const int len = UnicodeLength(*p++);
1011  if (!ReadHexDigits(p, len, code_point)) return ptr;
1012  p += len;
1013 
1014  // Check if the code point we read is a "head surrogate." If so, then we
1015  // expect it to be immediately followed by another code point which is a valid
1016  // "trail surrogate," and together they form a UTF-16 pair which decodes into
1017  // a single Unicode point. Trail surrogates may only use \u, not \U.
1018  if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') {
1019  uint32 trail_surrogate;
1020  if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
1021  IsTrailSurrogate(trail_surrogate)) {
1022  *code_point = AssembleUTF16(*code_point, trail_surrogate);
1023  p += 6;
1024  }
1025  // If this failed, then we just emit the head surrogate as a code point.
1026  // It's bogus, but so is the string.
1027  }
1028 
1029  return p;
1030 }
1031 
1032 // The text string must begin and end with single or double quote
1033 // characters.
1035  std::string* output) {
1036  // Reminder: text[0] is always a quote character. (If text is
1037  // empty, it's invalid, so we'll just return).
1038  const size_t text_size = text.size();
1039  if (text_size == 0) {
1040  GOOGLE_LOG(DFATAL) << " Tokenizer::ParseStringAppend() passed text that could not"
1041  " have been tokenized as a string: "
1042  << CEscape(text);
1043  return;
1044  }
1045 
1046  // Reserve room for new string. The branch is necessary because if
1047  // there is already space available the reserve() call might
1048  // downsize the output.
1049  const size_t new_len = text_size + output->size();
1050  if (new_len > output->capacity()) {
1051  output->reserve(new_len);
1052  }
1053 
1054  // Loop through the string copying characters to "output" and
1055  // interpreting escape sequences. Note that any invalid escape
1056  // sequences or other errors were already reported while tokenizing.
1057  // In this case we do not need to produce valid results.
1058  for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) {
1059  if (*ptr == '\\' && ptr[1] != '\0') {
1060  // An escape sequence.
1061  ++ptr;
1062 
1063  if (OctalDigit::InClass(*ptr)) {
1064  // An octal escape. May one, two, or three digits.
1065  int code = DigitValue(*ptr);
1066  if (OctalDigit::InClass(ptr[1])) {
1067  ++ptr;
1068  code = code * 8 + DigitValue(*ptr);
1069  }
1070  if (OctalDigit::InClass(ptr[1])) {
1071  ++ptr;
1072  code = code * 8 + DigitValue(*ptr);
1073  }
1074  output->push_back(static_cast<char>(code));
1075 
1076  } else if (*ptr == 'x') {
1077  // A hex escape. May zero, one, or two digits. (The zero case
1078  // will have been caught as an error earlier.)
1079  int code = 0;
1080  if (HexDigit::InClass(ptr[1])) {
1081  ++ptr;
1082  code = DigitValue(*ptr);
1083  }
1084  if (HexDigit::InClass(ptr[1])) {
1085  ++ptr;
1086  code = code * 16 + DigitValue(*ptr);
1087  }
1088  output->push_back(static_cast<char>(code));
1089 
1090  } else if (*ptr == 'u' || *ptr == 'U') {
1091  uint32 unicode;
1092  const char* end = FetchUnicodePoint(ptr, &unicode);
1093  if (end == ptr) {
1094  // Failure: Just dump out what we saw, don't try to parse it.
1095  output->push_back(*ptr);
1096  } else {
1097  AppendUTF8(unicode, output);
1098  ptr = end - 1; // Because we're about to ++ptr.
1099  }
1100  } else {
1101  // Some other escape code.
1102  output->push_back(TranslateEscape(*ptr));
1103  }
1104 
1105  } else if (*ptr == text[0] && ptr[1] == '\0') {
1106  // Ignore final quote matching the starting quote.
1107  } else {
1108  output->push_back(*ptr);
1109  }
1110  }
1111 }
1112 
1113 template <typename CharacterClass>
1114 static bool AllInClass(const std::string& s) {
1115  for (int i = 0; i < s.size(); ++i) {
1116  if (!CharacterClass::InClass(s[i])) return false;
1117  }
1118  return true;
1119 }
1120 
1122  // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
1123  if (text.size() == 0) return false;
1124  if (!Letter::InClass(text.at(0))) return false;
1125  if (!AllInClass<Alphanumeric>(text.substr(1))) return false;
1126  return true;
1127 }
1128 
1129 } // namespace io
1130 } // namespace protobuf
1131 } // namespace google
zero_copy_stream.h
google::protobuf::io::Tokenizer::StartToken
void StartToken()
Definition: tokenizer.cc:299
google::protobuf::io::AssembleUTF16
static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate)
Definition: tokenizer.cc:989
google::protobuf::io::Tokenizer::TYPE_FLOAT
@ TYPE_FLOAT
Definition: tokenizer.h:115
google::protobuf::io::Tokenizer::require_space_after_number_
bool require_space_after_number_
Definition: tokenizer.h:287
google::protobuf::io::Tokenizer::TYPE_IDENTIFIER
@ TYPE_IDENTIFIER
Definition: tokenizer.h:105
google::protobuf::io::Tokenizer::line_
int line_
Definition: tokenizer.h:274
google::protobuf::io::Tokenizer::TokenType
TokenType
Definition: tokenizer.h:101
end
GLuint GLuint end
Definition: glcorearb.h:2858
google::protobuf::io::FetchUnicodePoint
static const char * FetchUnicodePoint(const char *ptr, uint32 *code_point)
Definition: tokenizer.cc:1007
NULL
NULL
Definition: test_security_zap.cpp:405
input_
std::unique_ptr< io::Tokenizer > input_
Definition: parser_unittest.cc:186
google::protobuf::io::Tokenizer::ParseInteger
static bool ParseInteger(const std::string &text, uint64 max_value, uint64 *output)
Definition: tokenizer.cc:862
input
std::string input
Definition: tokenizer_unittest.cc:197
base
Definition: logging.cc:2162
google::protobuf::io::Tokenizer::SLASH_NOT_COMMENT
@ SLASH_NOT_COMMENT
Definition: tokenizer.h:352
google::protobuf::io::Tokenizer::~Tokenizer
~Tokenizer()
Definition: tokenizer.cc:218
google::protobuf::io::Tokenizer::previous_
Token previous_
Definition: tokenizer.h:262
GOOGLE_DCHECK
#define GOOGLE_DCHECK
Definition: logging.h:194
google::protobuf::io::Tokenizer::ConsumeLineComment
void ConsumeLineComment(std::string *content)
Definition: tokenizer.cc:481
s
XmlRpcServer s
tokenizer.h
google::protobuf::io::kMaxHeadSurrogate
static const uint32 kMaxHeadSurrogate
Definition: tokenizer.cc:975
google::protobuf::uint32
uint32_t uint32
Definition: protobuf/src/google/protobuf/stubs/port.h:155
google::protobuf::CEscape
string CEscape(const string &src)
Definition: strutil.cc:615
google::protobuf::io::Tokenizer::NextWithComments
bool NextWithComments(std::string *prev_trailing_comments, std::vector< std::string > *detached_comments, std::string *next_leading_comments)
Definition: tokenizer.cc:761
google::protobuf::io::NoLocaleStrtod
double NoLocaleStrtod(const char *text, char **original_endptr)
Definition: strtod.cc:82
google::protobuf::io::ErrorCollector::AddError
virtual void AddError(int line, ColumnNumber column, const std::string &message)=0
google::protobuf::io::Tokenizer::NextChar
void NextChar()
Definition: tokenizer.cc:229
string
GLsizei const GLchar *const * string
Definition: glcorearb.h:3083
google::protobuf::io::Tokenizer::ConsumeOneOrMore
void ConsumeOneOrMore(const char *error)
Definition: tokenizer.cc:347
google::protobuf::io::Tokenizer::ConsumeString
void ConsumeString(char delimiter)
Definition: tokenizer.cc:361
google::protobuf::io::Tokenizer::TryConsumeCommentStart
NextCommentStatus TryConsumeCommentStart()
Definition: tokenizer.cc:540
target
GLenum target
Definition: glcorearb.h:3739
google::protobuf::io::Tokenizer::EndToken
void EndToken()
Definition: tokenizer.cc:307
google::protobuf::StringAppendF
void StringAppendF(string *dst, const char *format,...)
Definition: stringprintf.cc:127
error
Definition: cJSON.c:88
google::protobuf::io::AllInClass
static bool AllInClass(const std::string &s)
Definition: tokenizer.cc:1114
google::protobuf::io::Tokenizer::Token::line
int line
Definition: tokenizer.h:135
google::protobuf::ghtonl
uint32 ghtonl(uint32 x)
Definition: common.cc:307
google::protobuf::io::ErrorCollector::~ErrorCollector
virtual ~ErrorCollector()
Definition: tokenizer.cc:190
google::protobuf::io::Tokenizer::current_char_
char current_char_
Definition: tokenizer.h:267
google::protobuf::io::UnicodeLength
static int UnicodeLength(char key)
Definition: tokenizer.cc:997
google::protobuf::io::Tokenizer::Next
bool Next()
Definition: tokenizer.cc:564
strutil.h
google::protobuf::io::ReadHexDigits
static bool ReadHexDigits(const char *ptr, int len, uint32 *result)
Definition: tokenizer.cc:959
comment_buffer_
std::string comment_buffer_
Definition: tokenizer.cc:745
google::protobuf::io::Tokenizer::SH_COMMENT_STYLE
@ SH_COMMENT_STYLE
Definition: tokenizer.h:236
google::protobuf::io::Tokenizer::RecordTo
void RecordTo(std::string *target)
Definition: tokenizer.cc:281
line_
int line_
Definition: objectivec_helpers.cc:1468
google::protobuf::io::kMinTrailSurrogate
static const uint32 kMinTrailSurrogate
Definition: tokenizer.cc:976
google::protobuf::io::Tokenizer::ConsumeZeroOrMore
void ConsumeZeroOrMore()
Definition: tokenizer.cc:340
start
GLuint start
Definition: glcorearb.h:2858
google::protobuf::io::Tokenizer::AddError
void AddError(const std::string &message)
Definition: tokenizer.h:316
google::protobuf::io::Tokenizer::NextCommentStatus
NextCommentStatus
Definition: tokenizer.h:343
google::protobuf::io::Tokenizer::ParseFloat
static double ParseFloat(const std::string &text)
Definition: tokenizer.cc:901
google::protobuf::io::Tokenizer::kTabWidth
static const int kTabWidth
Definition: tokenizer.h:293
p
const char * p
Definition: gmock-matchers_test.cc:3863
GOOGLE_LOG
#define GOOGLE_LOG(LEVEL)
Definition: logging.h:146
google::protobuf::io::Tokenizer::TryConsume
bool TryConsume(char c)
Definition: tokenizer.cc:330
strtod.h
google::protobuf::io::IsHeadSurrogate
static bool IsHeadSurrogate(uint32 code_point)
Definition: tokenizer.cc:979
google::protobuf::uint64
uint64_t uint64
Definition: protobuf/src/google/protobuf/stubs/port.h:156
prev_trailing_comments
const char * prev_trailing_comments
Definition: tokenizer_unittest.cc:525
detached_comments
const char * detached_comments[10]
Definition: tokenizer_unittest.cc:526
google::protobuf::io::Tokenizer::TYPE_STRING
@ TYPE_STRING
Definition: tokenizer.h:118
google::protobuf::io::Tokenizer::Tokenizer
Tokenizer(ZeroCopyInputStream *input, ErrorCollector *error_collector)
Definition: tokenizer.cc:194
google::protobuf::io::Tokenizer::buffer_pos_
int buffer_pos_
Definition: tokenizer.h:270
google::protobuf::io::Tokenizer::ParseStringAppend
static void ParseStringAppend(const std::string &text, std::string *output)
Definition: tokenizer.cc:1034
google::protobuf::StringPrintf
string StringPrintf(const char *format,...)
Definition: stringprintf.cc:109
google::protobuf::io::Tokenizer::buffer_
const char * buffer_
Definition: tokenizer.h:268
key
const SETUP_TEARDOWN_TESTCONTEXT char * key
Definition: test_wss_transport.cpp:10
google::protobuf::io::kMinHeadSurrogate
static const uint32 kMinHeadSurrogate
Definition: tokenizer.cc:974
prev_trailing_comments_
std::string * prev_trailing_comments_
Definition: tokenizer.cc:741
google::protobuf::io::ZeroCopyInputStream
Definition: zero_copy_stream.h:126
i
int i
Definition: gmock-matchers_test.cc:764
google::protobuf::io::Tokenizer::LookingAt
bool LookingAt()
Definition: tokenizer.cc:316
google::protobuf::io::Tokenizer::Token::type
TokenType type
Definition: tokenizer.h:128
google::protobuf::io::Tokenizer::Token::column
ColumnNumber column
Definition: tokenizer.h:136
google::protobuf::io::AppendUTF8
static void AppendUTF8(uint32 code_point, std::string *output)
Definition: tokenizer.cc:929
google::protobuf::io::Tokenizer::Token::end_column
ColumnNumber end_column
Definition: tokenizer.h:137
error_collector_
MockErrorCollector error_collector_
Definition: importer_unittest.cc:129
len
int len
Definition: php/ext/google/protobuf/map.c:206
google::protobuf::io::Tokenizer::TYPE_END
@ TYPE_END
Definition: tokenizer.h:103
common.h
google::protobuf::io::Tokenizer::current_
Token current_
Definition: tokenizer.h:261
google::protobuf::io::Tokenizer::column_
ColumnNumber column_
Definition: tokenizer.h:275
next_leading_comments_
std::string * next_leading_comments_
Definition: tokenizer.cc:743
google::protobuf::io::Tokenizer::StopRecording
void StopRecording()
Definition: tokenizer.cc:286
google::protobuf::io::IsTrailSurrogate
static bool IsTrailSurrogate(uint32 code_point)
Definition: tokenizer.cc:983
google::protobuf::io::Tokenizer::comment_style_
CommentStyle comment_style_
Definition: tokenizer.h:286
google::protobuf::io::ErrorCollector
Definition: tokenizer.h:66
stl_util.h
stringprintf.h
google::protobuf::io::Tokenizer::TYPE_START
@ TYPE_START
Definition: tokenizer.h:102
GOOGLE_LOG_IF
#define GOOGLE_LOG_IF(LEVEL, CONDITION)
Definition: logging.h:150
logging.h
has_comment_
bool has_comment_
Definition: tokenizer.cc:749
google::protobuf::io::Tokenizer::record_start_
int record_start_
Definition: tokenizer.h:282
google::protobuf::io::Tokenizer::BLOCK_COMMENT
@ BLOCK_COMMENT
Definition: tokenizer.h:348
google::protobuf::io::Tokenizer::TYPE_INTEGER
@ TYPE_INTEGER
Definition: tokenizer.h:109
google::protobuf::io::Tokenizer::Token::text
std::string text
Definition: tokenizer.h:129
google::protobuf::io::Tokenizer::CPP_COMMENT_STYLE
@ CPP_COMMENT_STYLE
Definition: tokenizer.h:234
data
GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * data
Definition: glcorearb.h:2879
google::protobuf::io::Tokenizer::ConsumeBlockComment
void ConsumeBlockComment(std::string *content)
Definition: tokenizer.cc:492
google::protobuf::io::Tokenizer::LINE_COMMENT
@ LINE_COMMENT
Definition: tokenizer.h:345
google::protobuf::io::Tokenizer::read_error_
bool read_error_
Definition: tokenizer.h:271
true
#define true
Definition: cJSON.c:65
google::protobuf::io::Tokenizer::allow_f_after_float_
bool allow_f_after_float_
Definition: tokenizer.h:285
google::protobuf::io::Tokenizer::buffer_size_
int buffer_size_
Definition: tokenizer.h:269
buffer_
static uint8 buffer_[kBufferSize]
Definition: coded_stream_unittest.cc:136
google::protobuf::io::kMaxTrailSurrogate
static const uint32 kMaxTrailSurrogate
Definition: tokenizer.cc:977
google::protobuf::io::Tokenizer::record_target_
std::string * record_target_
Definition: tokenizer.h:281
google::protobuf::io::ZeroCopyInputStream::BackUp
virtual void BackUp(int count)=0
google::protobuf::io::ZeroCopyInputStream::Next
virtual bool Next(const void **data, int *size)=0
output
const upb_json_parsermethod const upb_symtab upb_sink * output
Definition: ruby/ext/google/protobuf_c/upb.h:10503
detached_comments_
std::vector< std::string > * detached_comments_
Definition: tokenizer.cc:742
google::protobuf::io::Tokenizer::error_collector_
ErrorCollector * error_collector_
Definition: tokenizer.h:265
google::protobuf::io::Tokenizer::ConsumeNumber
TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot)
Definition: tokenizer.cc:426
google::protobuf::io::Tokenizer::Refresh
void Refresh()
Definition: tokenizer.cc:250
false
#define false
Definition: cJSON.c:70
is_line_comment_
bool is_line_comment_
Definition: tokenizer.cc:752
is_float
def is_float(t)
google::protobuf::io::Tokenizer::TryConsumeOne
bool TryConsumeOne()
Definition: tokenizer.cc:321
google::protobuf::io::Tokenizer::input_
ZeroCopyInputStream * input_
Definition: tokenizer.h:264
CHARACTER_CLASS
#define CHARACTER_CLASS(NAME, EXPRESSION)
Definition: tokenizer.cc:116
google::protobuf::io::Tokenizer::NO_COMMENT
@ NO_COMMENT
Definition: tokenizer.h:355
next_leading_comments
const char * next_leading_comments
Definition: tokenizer_unittest.cc:527
google
Definition: data_proto2_to_proto3_util.h:11
google::protobuf::io::Tokenizer::TYPE_SYMBOL
@ TYPE_SYMBOL
Definition: tokenizer.h:121
can_attach_to_prev_
bool can_attach_to_prev_
Definition: tokenizer.cc:756
google::protobuf::io::Tokenizer::allow_multiline_strings_
bool allow_multiline_strings_
Definition: tokenizer.h:288
google::protobuf::io::Tokenizer::IsIdentifier
static bool IsIdentifier(const std::string &text)
Definition: tokenizer.cc:1121


libaditof
Author(s):
autogenerated on Wed May 21 2025 02:07:00