35 #include <google/protobuf/io/tokenizer.h>
42 #include <google/protobuf/stubs/common.h>
43 #include <google/protobuf/stubs/logging.h>
44 #include <google/protobuf/stubs/strutil.h>
45 #include <google/protobuf/stubs/substitute.h>
46 #include <google/protobuf/io/zero_copy_stream_impl.h>
47 #include <google/protobuf/testing/googletest.h>
48 #include <gtest/gtest.h>
78 #define TEST_1D(FIXTURE, NAME, CASES) \
79 class FIXTURE##_##NAME##_DD : public FIXTURE { \
81 template <typename CaseType> \
82 void DoSingleCase(const CaseType& CASES##_case); \
85 TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
86 for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \
87 SCOPED_TRACE(testing::Message() \
88 << #CASES " case #" << i << ": " << CASES[i]); \
89 DoSingleCase(CASES[i]); \
93 template <typename CaseType> \
94 void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
96 #define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
97 class FIXTURE##_##NAME##_DD : public FIXTURE { \
99 template <typename CaseType1, typename CaseType2> \
100 void DoSingleCase(const CaseType1& CASES1##_case, \
101 const CaseType2& CASES2##_case); \
104 TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
105 for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \
106 for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \
107 SCOPED_TRACE(testing::Message() \
108 << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
109 << #CASES2 " case #" << j << ": " << CASES2[j]); \
110 DoSingleCase(CASES1[i], CASES2[j]); \
115 template <typename CaseType1, typename CaseType2> \
116 void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
117 const CaseType2& CASES2##_case)
125 TestInputStream(
const void*
data,
int size,
int block_size)
127 ~TestInputStream() {}
157 class TestErrorCollector :
public ErrorCollector {
159 TestErrorCollector() {}
160 ~TestErrorCollector() {}
176 const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
192 #if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
196 struct SimpleTokenCase {
202 const SimpleTokenCase& test_case) {
206 SimpleTokenCase kSimpleTokenCases[] = {
250 TestInputStream
input(kSimpleTokenCases_case.input.data(),
251 kSimpleTokenCases_case.input.size(), kBlockSizes_case);
252 TestErrorCollector error_collector;
253 Tokenizer tokenizer(&
input, &error_collector);
259 EXPECT_EQ(0, tokenizer.current().column);
260 EXPECT_EQ(0, tokenizer.current().end_column);
266 EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type);
268 EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text);
271 EXPECT_EQ(0, tokenizer.current().column);
272 EXPECT_EQ(kSimpleTokenCases_case.input.size(),
273 tokenizer.current().end_column);
282 EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
283 EXPECT_EQ(kSimpleTokenCases_case.input.size(),
284 tokenizer.current().end_column);
294 const char*
text =
"1f 2.5f 6e3f 7F";
296 TestErrorCollector error_collector;
297 Tokenizer tokenizer(&
input, &error_collector);
298 tokenizer.set_allow_f_after_float(
true);
302 EXPECT_EQ(tokenizer.current().text,
"1f");
305 EXPECT_EQ(tokenizer.current().text,
"2.5f");
308 EXPECT_EQ(tokenizer.current().text,
"6e3f");
311 EXPECT_EQ(tokenizer.current().text,
"7F");
320 SimpleTokenCase kWhitespaceTokenCases[] = {
334 TestInputStream
input(kWhitespaceTokenCases_case.input.data(),
335 kWhitespaceTokenCases_case.input.size(),
337 TestErrorCollector error_collector;
338 Tokenizer tokenizer(&
input, &error_collector);
343 TestInputStream
input(kWhitespaceTokenCases_case.input.data(),
344 kWhitespaceTokenCases_case.input.size(),
346 TestErrorCollector error_collector;
347 Tokenizer tokenizer(&
input, &error_collector);
348 tokenizer.set_report_whitespace(
true);
349 tokenizer.set_report_newlines(
true);
352 EXPECT_EQ(tokenizer.current().text, kWhitespaceTokenCases_case.input);
353 EXPECT_EQ(tokenizer.current().type, kWhitespaceTokenCases_case.type);
365 struct MultiTokenCase {
371 const MultiTokenCase& test_case) {
375 MultiTokenCase kMultiTokenCases[] = {
383 {
"foo 1 1.2 + 'bar'",
431 {
"foo // This is a comment\n"
432 "bar // This is another comment",
440 {
"foo /* This is a block comment */ bar",
469 TestInputStream
input(kMultiTokenCases_case.input.data(),
470 kMultiTokenCases_case.input.size(), kBlockSizes_case);
471 TestErrorCollector error_collector;
472 Tokenizer tokenizer(&
input, &error_collector);
478 EXPECT_EQ(0, tokenizer.current().column);
479 EXPECT_EQ(0, tokenizer.current().end_column);
483 Tokenizer::Token token;
485 token = kMultiTokenCases_case.output[
i++];
489 Tokenizer::Token
previous = tokenizer.current();
506 EXPECT_EQ(token.type, tokenizer.current().type);
507 EXPECT_EQ(token.text, tokenizer.current().text);
508 EXPECT_EQ(token.line, tokenizer.current().line);
509 EXPECT_EQ(token.column, tokenizer.current().column);
510 EXPECT_EQ(token.end_column, tokenizer.current().end_column);
518 MultiTokenCase kMultiWhitespaceTokenCases[] = {
520 {
"foo 1 \t1.2 \n +\v'bar'",
538 TEST_2D(TokenizerTest, MultipleWhitespaceTokens, kMultiWhitespaceTokenCases,
541 TestInputStream
input(kMultiWhitespaceTokenCases_case.input.data(),
542 kMultiWhitespaceTokenCases_case.input.size(),
544 TestErrorCollector error_collector;
545 Tokenizer tokenizer(&
input, &error_collector);
546 tokenizer.set_report_whitespace(
true);
547 tokenizer.set_report_newlines(
true);
553 EXPECT_EQ(0, tokenizer.current().column);
554 EXPECT_EQ(0, tokenizer.current().end_column);
558 Tokenizer::Token token;
560 token = kMultiWhitespaceTokenCases_case.output[
i++];
564 Tokenizer::Token
previous = tokenizer.current();
581 EXPECT_EQ(token.type, tokenizer.current().type);
582 EXPECT_EQ(token.text, tokenizer.current().text);
583 EXPECT_EQ(token.line, tokenizer.current().line);
584 EXPECT_EQ(token.column, tokenizer.current().column);
585 EXPECT_EQ(token.end_column, tokenizer.current().end_column);
595 #if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
603 "corge /* grault */\n"
605 const char*
const kTokens[] = {
"foo",
606 "baz",
"/",
"/",
"qux",
"corge",
"/",
607 "*",
"grault",
"*",
"/",
"garply"};
611 TestErrorCollector error_collector;
612 Tokenizer tokenizer(&
input, &error_collector);
618 EXPECT_EQ(tokenizer.current().text, kTokens[i]);
633 struct DocCommentCase {
642 const DocCommentCase& test_case) {
646 DocCommentCase kDocCommentCases[] = {
653 {
"prev /* ignored */ next",
659 {
"prev // trailing comment\n"
662 " trailing comment\n",
667 "// leading comment\n"
677 "// trailing comment\n"
682 " trailing comment\n"
687 {
"prev // trailing comment\n"
688 "// leading comment\n"
692 " trailing comment\n",
697 {
"prev /* trailing block comment */\n"
698 "/* leading block comment\n"
703 " trailing block comment ",
705 " leading block comment\n"
710 "/* trailing block comment\n"
714 "/* leading block comment\n"
719 " trailing block comment\n"
723 " leading block comment\n"
728 "// trailing comment\n"
730 "// detached comment\n"
733 "// second detached comment\n"
734 "/* third detached comment\n"
736 "// leading comment\n"
739 " trailing comment\n",
740 {
" detached comment\n"
742 " second detached comment\n",
743 " third detached comment\n"
745 " leading comment\n"},
749 "// detached comment\n"
751 "// leading comment\n"
755 {
" detached comment\n"},
756 " leading comment\n"},
759 "// leading comment\n"
764 " leading comment\n"},
769 TestInputStream
input(kDocCommentCases_case.input.data(),
770 kDocCommentCases_case.input.size(), kBlockSizes_case);
771 TestErrorCollector error_collector;
772 Tokenizer tokenizer(&
input, &error_collector);
775 TestInputStream input2(kDocCommentCases_case.input.data(),
776 kDocCommentCases_case.input.size(), kBlockSizes_case);
777 Tokenizer tokenizer2(&input2, &error_collector);
782 EXPECT_EQ(
"prev", tokenizer.current().text);
783 EXPECT_EQ(
"prev", tokenizer2.current().text);
790 tokenizer2.NextWithComments(NULL, NULL, NULL);
791 EXPECT_EQ(
"next", tokenizer.current().text);
792 EXPECT_EQ(
"next", tokenizer2.current().text);
794 EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
799 ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
876 #ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
880 "passed text that could not have been tokenized as a float");
883 "passed text that could not have been tokenized as a float");
886 "passed text that could not have been tokenized as a float");
887 #endif // PROTOBUF_HAS_DEATH_TEST
890 TEST_F(TokenizerTest, ParseString) {
929 #ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
932 "passed text that could not have been tokenized as a string");
933 #endif // PROTOBUF_HAS_DEATH_TEST
936 TEST_F(TokenizerTest, ParseStringAppend) {
958 inline std::ostream&
operator<<(std::ostream&
out,
const ErrorCase& test_case) {
962 ErrorCase kErrorCases[] = {
964 {
"'\\l' foo",
true,
"0:2: Invalid escape sequence in string literal.\n"},
965 {
"'\\X' foo",
true,
"0:2: Invalid escape sequence in string literal.\n"},
966 {
"'\\x' foo",
true,
"0:3: Expected hex digits for escape sequence.\n"},
967 {
"'foo",
false,
"0:4: Unexpected end of string.\n"},
968 {
"'bar\nfoo",
true,
"0:4: String literals cannot cross line boundaries.\n"},
969 {
"'\\u01' foo",
true,
970 "0:5: Expected four hex digits for \\u escape sequence.\n"},
971 {
"'\\u01' foo",
true,
972 "0:5: Expected four hex digits for \\u escape sequence.\n"},
973 {
"'\\uXYZ' foo",
true,
974 "0:3: Expected four hex digits for \\u escape sequence.\n"},
977 {
"123foo",
true,
"0:3: Need space between number and identifier.\n"},
980 {
"0x foo",
true,
"0:2: \"0x\" must be followed by hex digits.\n"},
981 {
"0541823 foo",
true,
982 "0:4: Numbers starting with leading zero must be in octal.\n"},
983 {
"0x123z foo",
true,
"0:5: Need space between number and identifier.\n"},
984 {
"0x123.4 foo",
true,
"0:5: Hex and octal numbers must be integers.\n"},
985 {
"0123.4 foo",
true,
"0:4: Hex and octal numbers must be integers.\n"},
988 {
"1e foo",
true,
"0:2: \"e\" must be followed by exponent.\n"},
989 {
"1e- foo",
true,
"0:3: \"e\" must be followed by exponent.\n"},
991 "0:3: Already saw decimal point or exponent; can't have another one.\n"},
993 "0:3: Already saw decimal point or exponent; can't have another one.\n"},
995 "0:1: Need space between identifier and decimal point.\n"},
997 {
"1.0f foo",
true,
"0:3: Need space between number and identifier.\n"},
1001 "0:2: End-of-file inside block comment.\n"
1002 "0:0: Comment started here.\n"},
1004 "0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"},
1008 {
"\b foo",
true,
"0:0: Invalid control characters encountered in text.\n"},
1010 "0:0: Invalid control characters encountered in text.\n"},
1014 {
"\b",
false,
"0:0: Invalid control characters encountered in text.\n"},
1021 "0:0: Invalid control characters encountered in text.\n"},
1023 "0:0: Invalid control characters encountered in text.\n"},
1026 {
"\300foo",
true,
"0:0: Interpreting non ascii codepoint 192.\n"},
1031 TestInputStream
input(kErrorCases_case.input.data(),
1032 kErrorCases_case.input.size(), kBlockSizes_case);
1033 TestErrorCollector error_collector;
1034 Tokenizer tokenizer(&
input, &error_collector);
1037 bool last_was_foo =
false;
1038 while (tokenizer.Next()) {
1039 last_was_foo = tokenizer.current().text ==
"foo";
1043 EXPECT_EQ(kErrorCases_case.errors, error_collector.text_);
1046 if (kErrorCases_case.recoverable) {
1055 TestInputStream
input(
text.data(),
text.size(), kBlockSizes_case);
1059 TestErrorCollector error_collector;
1060 Tokenizer tokenizer(&
input, &error_collector);