37 #include <google/protobuf/compiler/parser.h>
43 #include <unordered_map>
44 #include <unordered_set>
46 #include <google/protobuf/stubs/casts.h>
47 #include <google/protobuf/stubs/logging.h>
48 #include <google/protobuf/stubs/common.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/io/tokenizer.h>
51 #include <google/protobuf/descriptor.h>
52 #include <google/protobuf/wire_format.h>
53 #include <google/protobuf/stubs/strutil.h>
54 #include <google/protobuf/stubs/map_util.h>
55 #include <google/protobuf/stubs/hash.h>
61 using internal::WireFormat;
65 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
67 TypeNameMap MakeTypeNameTable() {
91 const TypeNameMap kTypeNames = MakeTypeNameTable();
97 static const char kSuffix[] =
"Entry";
98 result.reserve(field_name.size() +
sizeof(kSuffix));
100 for (
const char field_name_char : field_name) {
101 if (field_name_char ==
'_') {
103 }
else if (cap_next) {
105 if (
'a' <= field_name_char && field_name_char <=
'z') {
106 result.push_back(field_name_char -
'a' +
'A');
108 result.push_back(field_name_char);
112 result.push_back(field_name_char);
119 bool IsUppercase(
char c) {
return c >=
'A' &&
c <=
'Z'; }
121 bool IsLowercase(
char c) {
return c >=
'a' &&
c <=
'z'; }
123 bool IsNumber(
char c) {
return c >=
'0' &&
c <=
'9'; }
130 if (!IsUppercase(
name[0])) {
134 for (
const char c :
name) {
143 for (
const char c :
name) {
144 if (!IsUppercase(c) && c !=
'_' && !IsNumber(c)) {
152 for (
const char c :
name) {
153 if (!IsLowercase(c) && c !=
'_' && !IsNumber(c)) {
161 for (
int i = 1;
i <
name.length();
i++) {
163 if (IsNumber(c) &&
name[i - 1] ==
'_') {
175 #define DO(STATEMENT) \
185 source_location_table_(NULL),
188 stop_after_syntax_identifier_(
false) {
195 inline bool Parser::LookingAt(
const char*
text) {
199 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
200 return input_->current().type == token_type;
203 inline bool Parser::AtEnd() {
return LookingAtType(io::Tokenizer::TYPE_END); }
205 bool Parser::TryConsume(
const char*
text) {
206 if (LookingAt(
text)) {
215 if (TryConsume(
text)) {
224 if (TryConsume(
text)) {
233 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
243 bool Parser::ConsumeInteger(
int*
output,
const char*
error) {
244 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
249 AddError(
"Integer out of range.");
261 bool Parser::ConsumeSignedInteger(
int*
output,
const char*
error) {
262 bool is_negative =
false;
264 if (TryConsume(
"-")) {
270 if (is_negative)
value *= -1;
277 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
280 AddError(
"Integer out of range.");
292 bool Parser::ConsumeNumber(
double*
output,
const char*
error) {
293 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
297 }
else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
303 AddError(
"Integer out of range.");
309 }
else if (LookingAt(
"inf")) {
310 *
output = std::numeric_limits<double>::infinity();
313 }
else if (LookingAt(
"nan")) {
314 *
output = std::numeric_limits<double>::quiet_NaN();
324 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
325 io::Tokenizer::ParseString(
input_->current().text,
output);
328 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
329 io::Tokenizer::ParseStringAppend(
input_->current().text,
output);
339 bool Parser::TryConsumeEndOfDeclaration(
const char*
text,
340 const LocationRecorder* location) {
341 if (LookingAt(
text)) {
343 std::vector<std::string> detached;
348 leading.swap(upcoming_doc_comments_);
350 if (location != NULL) {
351 upcoming_detached_comments_.swap(detached);
352 location->AttachComments(&leading, &
trailing, &detached);
353 }
else if (strcmp(
text,
"}") == 0) {
356 upcoming_detached_comments_.swap(detached);
360 upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
361 detached.begin(), detached.end());
370 bool Parser::ConsumeEndOfDeclaration(
const char*
text,
371 const LocationRecorder* location) {
372 if (TryConsumeEndOfDeclaration(
text, location)) {
402 Parser::LocationRecorder::LocationRecorder(
Parser*
parser)
404 source_code_info_(
parser->source_code_info_),
405 location_(
parser_->source_code_info_->add_location()) {
406 location_->add_span(
parser_->input_->current().line);
407 location_->add_span(
parser_->input_->current().column);
411 Init(parent, parent.source_code_info_);
417 Init(parent, source_code_info);
423 Init(parent, parent.source_code_info_);
428 int path1,
int path2) {
429 Init(parent, parent.source_code_info_);
440 location_->mutable_path()->CopyFrom(parent.location_->path());
442 location_->add_span(
parser_->input_->current().line);
443 location_->add_span(
parser_->input_->current().column);
447 if (location_->span_size() <= 2) {
448 EndAt(
parser_->input_->previous());
453 location_->add_path(path_component);
457 location_->set_span(0, token.line);
458 location_->set_span(1, token.column);
462 location_->set_span(0, other.location_->span(0));
463 location_->set_span(1, other.location_->span(1));
467 if (token.line != location_->span(0)) {
468 location_->add_span(token.line);
470 location_->add_span(token.end_column);
476 if (
parser_->source_location_table_ != NULL) {
477 parser_->source_location_table_->Add(
478 descriptor, location, location_->span(0), location_->span(1));
484 if (
parser_->source_location_table_ !=
nullptr) {
485 parser_->source_location_table_->AddImport(
491 return location_->path_size();
500 if (!leading->empty()) {
501 location_->mutable_leading_comments()->swap(*leading);
504 location_->mutable_trailing_comments()->swap(*
trailing);
550 bool has_allow_alias =
false;
551 bool allow_alias =
false;
553 for (
int i = 0;
i < proto->
options().uninterpreted_option_size();
i++) {
555 if (
option.name_size() > 1) {
558 if (!
option.name(0).is_extension() &&
559 option.name(0).name_part() ==
"allow_alias") {
560 has_allow_alias =
true;
561 if (
option.identifier_value() ==
"true") {
568 if (has_allow_alias && !allow_alias) {
570 "\"" + proto->
name() +
571 "\" declares 'option allow_alias = false;' which has no effect. "
572 "Please remove the declaration.";
578 std::set<int> used_values;
579 bool has_duplicates =
false;
582 if (used_values.find(enum_value.
number()) != used_values.end()) {
583 has_duplicates =
true;
586 used_values.insert(enum_value.
number());
589 if (allow_alias && !has_duplicates) {
591 "\"" + proto->
name() +
592 "\" declares support for enum aliases but no enum values share field "
593 "numbers. Please remove the unnecessary 'option allow_alias = true;' "
604 for (
const auto& enum_value : proto->
value()) {
605 if (!IsUpperUnderscore(enum_value.
name())) {
607 "Enum constant should be in UPPER_CASE. Found: " +
609 ". See https://developers.google.com/protocol-buffers/docs/style");
636 LocationRecorder root_location(
this);
637 root_location.RecordLegacyLocation(
file,
650 <<
". Please use 'syntax = \"proto2\";' "
651 <<
"or 'syntax = \"proto3\";' to specify a syntax "
652 <<
"version. (Defaulted to proto2 syntax.)";
676 assert(
file != NULL);
677 source_code_info.
Swap(
file->mutable_source_code_info());
682 LocationRecorder syntax_location(parent,
686 "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
697 AddError(syntax_token.line, syntax_token.column,
698 "Unrecognized syntax identifier \"" +
syntax +
700 "only recognizes \"proto2\" and \"proto3\".");
708 const LocationRecorder& root_location) {
713 LocationRecorder location(root_location,
715 file->message_type_size());
718 LocationRecorder location(root_location,
720 file->enum_type_size());
723 LocationRecorder location(root_location,
725 file->service_size());
728 LocationRecorder location(root_location,
731 file->mutable_extension(),
file->mutable_message_type(), root_location,
735 file->mutable_public_dependency(),
736 file->mutable_weak_dependency(), root_location,
file);
740 LocationRecorder location(root_location,
745 AddError(
"Expected top-level statement (e.g. \"message\").");
758 LocationRecorder location(message_location,
760 location.RecordLegacyLocation(
message,
763 if (!IsUpperCamelCase(
message->name())) {
765 "Message name should be in UpperCamelCase. Found: " +
767 ". See https://developers.google.com/protocol-buffers/docs/style");
778 std::unordered_set<std::string>
names;
782 for (
const auto& oneof :
message->oneof_decl()) {
783 names.insert(oneof.name());
787 if (
field.proto3_optional()) {
793 if (oneof_name.empty() || oneof_name[0] !=
'_') {
794 oneof_name =
'_' + oneof_name;
796 while (
names.count(oneof_name) > 0) {
797 oneof_name =
'X' + oneof_name;
800 names.insert(oneof_name);
813 const int kMaxRangeSentinel = -1;
817 for (
int i = 0;
i <
options.uninterpreted_option_size(); ++
i) {
820 uninterpreted.
name(0).name_part() ==
"message_set_wire_format" &&
832 const bool is_message_set = IsMessageSetWireFormatMessage(*
message);
833 const int max_extension_number = is_message_set
836 for (
int i = 0;
i <
message->extension_range_size(); ++
i) {
837 if (
message->extension_range(i).end() == kMaxRangeSentinel) {
838 message->mutable_extension_range(i)->set_end(max_extension_number);
847 const bool is_message_set = IsMessageSetWireFormatMessage(*
message);
848 const int max_field_number = is_message_set
851 for (
int i = 0;
i <
message->reserved_range_size(); ++
i) {
852 if (
message->reserved_range(i).end() == kMaxRangeSentinel) {
853 message->mutable_reserved_range(i)->set_end(max_field_number);
861 const LocationRecorder& message_location,
867 AddError(
"Reached end of input in message definition (missing '}').");
878 if (
message->extension_range_size() > 0) {
879 AdjustExtensionRangesWithMaxEndNumber(
message);
881 if (
message->reserved_range_size() > 0) {
882 AdjustReservedRangesWithMaxEndNumber(
message);
888 const LocationRecorder& message_location,
894 LocationRecorder location(message_location,
900 LocationRecorder location(message_location,
906 LocationRecorder location(message_location,
912 LocationRecorder location(message_location,
915 message->mutable_nested_type(), message_location,
919 LocationRecorder location(message_location,
924 int oneof_index =
message->oneof_decl_size();
925 LocationRecorder oneof_location(
929 oneof_location, message_location, containing_file);
931 LocationRecorder location(message_location,
935 message->add_field(),
message->mutable_nested_type(), message_location,
941 RepeatedPtrField<DescriptorProto>* messages,
942 const LocationRecorder& parent_location,
943 int location_field_number_for_nested_type,
944 const LocationRecorder& field_location,
952 field->set_proto3_optional(
true);
958 location_field_number_for_nested_type,
959 field_location, containing_file);
964 const LocationRecorder& parent_location,
965 int location_field_number_for_nested_type,
966 const LocationRecorder& field_location,
971 LocationRecorder location(field_location);
974 bool type_parsed =
false;
982 map_field.is_map_field =
true;
989 if (map_field.is_map_field) {
990 if (
field->has_oneof_index()) {
991 AddError(
"Map fields are not allowed in oneofs.");
994 if (
field->has_label()) {
996 "Field labels (required/optional/repeated) are not allowed on "
1000 if (
field->has_extendee()) {
1001 AddError(
"Map fields are not allowed to be extensions.");
1006 DO(
ParseType(&map_field.key_type, &map_field.key_type_name));
1008 DO(
ParseType(&map_field.value_type, &map_field.value_type_name));
1018 if (!
field->has_label()) {
1019 AddError(
"Expected \"required\", \"optional\", or \"repeated\".");
1043 LocationRecorder location(field_location,
1048 if (!IsLowerUnderscore(
field->name())) {
1050 "Field name should be lowercase. Found: " +
field->name() +
1051 ". See: https://developers.google.com/protocol-buffers/docs/style");
1053 if (IsNumberFollowUnderscore(
field->name())) {
1055 "Number should not come right after an underscore. Found: " +
1057 ". See: https://developers.google.com/protocol-buffers/docs/style");
1060 DO(
Consume(
"=",
"Missing field number."));
1064 LocationRecorder location(field_location,
1066 location.RecordLegacyLocation(
field,
1080 LocationRecorder group_location(parent_location);
1081 group_location.StartAt(field_location);
1082 group_location.AddPath(location_field_number_for_nested_type);
1083 group_location.AddPath(messages->size());
1090 LocationRecorder location(group_location,
1092 location.StartAt(name_token);
1093 location.EndAt(name_token);
1094 location.RecordLegacyLocation(
group,
1100 LocationRecorder location(field_location,
1102 location.StartAt(name_token);
1103 location.EndAt(name_token);
1109 if (
group->name()[0] <
'A' ||
'Z' <
group->name()[0]) {
1110 AddError(name_token.line, name_token.column,
1111 "Group names must start with a capital letter.");
1127 if (map_field.is_map_field) {
1136 RepeatedPtrField<DescriptorProto>* messages) {
1139 field->set_type_name(entry_name);
1146 if (map_field.key_type_name.empty()) {
1147 key_field->
set_type(map_field.key_type);
1155 if (map_field.value_type_name.empty()) {
1156 value_field->
set_type(map_field.value_type);
1180 for (
int i = 0;
i <
field->options().uninterpreted_option_size(); ++
i) {
1182 field->options().uninterpreted_option(i);
1183 if (
option.name_size() == 1 &&
1184 option.name(0).name_part() ==
"enforce_utf8" &&
1185 !
option.name(0).is_extension()) {
1199 const LocationRecorder& field_location,
1203 LocationRecorder location(field_location,
1230 if (
field->has_default_value()) {
1231 AddError(
"Already set option \"default\".");
1232 field->clear_default_value();
1238 LocationRecorder location(field_location,
1240 location.RecordLegacyLocation(
field,
1244 if (!
field->has_type()) {
1259 switch (
field->type()) {
1275 default_value->append(
"-");
1282 "Expected integer for field default value."));
1300 AddError(
"Unsigned field can't have negative default value.");
1305 "Expected integer for field default value."));
1315 default_value->append(
"-");
1327 default_value->assign(
"true");
1329 default_value->assign(
"false");
1331 AddError(
"Expected \"true\" or \"false\".");
1341 "Expected string for field default "
1347 *default_value =
CEscape(*default_value);
1352 "Expected enum identifier for field "
1358 AddError(
"Messages can't have default values.");
1366 const LocationRecorder& field_location,
1368 if (
field->has_json_name()) {
1369 AddError(
"Already set option \"json_name\".");
1370 field->clear_json_name();
1373 LocationRecorder location(field_location,
1375 location.RecordLegacyLocation(
field,
1381 LocationRecorder value_location(location);
1382 value_location.RecordLegacyLocation(
1386 "Expected string for JSON name."));
1391 const LocationRecorder& part_location,
1399 LocationRecorder location(
1405 name->mutable_name_part()->append(identifier);
1409 name->mutable_name_part()->append(
".");
1411 name->mutable_name_part()->append(identifier);
1416 name->set_is_extension(
true);
1418 LocationRecorder location(
1421 name->mutable_name_part()->append(identifier);
1422 name->set_is_extension(
false);
1432 int brace_depth = 1;
1438 if (brace_depth == 0) {
1448 AddError(
"Unexpected end of stream while parsing aggregate value.");
1455 const LocationRecorder& options_location,
1460 options->GetDescriptor()->FindFieldByName(
"uninterpreted_option");
1462 <<
"No field named \"uninterpreted_option\" in the Options proto.";
1464 const Reflection* reflection =
options->GetReflection();
1466 LocationRecorder location(
1467 options_location, uninterpreted_option_field->number(),
1468 reflection->FieldSize(*
options, uninterpreted_option_field));
1475 down_cast<UninterpretedOption*>(
options->GetReflection()->AddMessage(
1476 options, uninterpreted_option_field));
1480 LocationRecorder name_location(location,
1482 name_location.RecordLegacyLocation(
1486 LocationRecorder part_location(name_location,
1494 LocationRecorder part_location(name_location,
1504 LocationRecorder value_location(location);
1505 value_location.RecordLegacyLocation(
1514 GOOGLE_LOG(
FATAL) <<
"Trying to read value before any tokens have been read.";
1518 AddError(
"Unexpected end of stream while parsing option value.");
1524 <<
"Whitespace tokens were not requested.";
1529 value_location.AddPath(
1532 AddError(
"Invalid '-' symbol before identifier.");
1549 value_location.AddPath(
1554 value_location.AddPath(
1572 AddError(
"Invalid '-' symbol before string.");
1583 value_location.AddPath(
1588 AddError(
"Expected option value.");
1603 const LocationRecorder& extensions_location,
1608 int old_range_size =
message->extension_range_size();
1612 LocationRecorder location(extensions_location,
1613 message->extension_range_size());
1616 location.RecordLegacyLocation(
range,
1620 io::Tokenizer::Token start_token;
1623 LocationRecorder start_location(
1630 LocationRecorder end_location(
1636 end = kMaxRangeSentinel - 1;
1641 LocationRecorder end_location(
1643 end_location.StartAt(start_token);
1644 end_location.EndAt(start_token);
1657 int range_number_index = extensions_location.CurrentPathSize();
1662 message->mutable_extension_range(old_range_size)->mutable_options();
1665 LocationRecorder index_location(
1666 extensions_location, 0 ,
1668 LocationRecorder location(
1681 for (
int i = old_range_size + 1;
i <
message->extension_range_size();
i++) {
1682 message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1686 for (
int i = old_range_size;
i <
message->extension_range_size();
i++) {
1688 if (info.
location(j).path_size() == range_number_index + 1) {
1695 dest->set_path(range_number_index, i);
1707 const LocationRecorder& message_location) {
1712 LocationRecorder location(message_location,
1714 location.StartAt(start_token);
1717 LocationRecorder location(message_location,
1719 location.StartAt(start_token);
1725 const LocationRecorder& parent_location) {
1727 LocationRecorder location(parent_location,
message->reserved_name_size());
1735 const LocationRecorder& parent_location) {
1738 LocationRecorder location(parent_location,
message->reserved_range_size());
1742 io::Tokenizer::Token start_token;
1744 LocationRecorder start_location(
1748 :
"Expected field number range.")));
1752 LocationRecorder end_location(
1758 end = kMaxRangeSentinel - 1;
1763 LocationRecorder end_location(
1765 end_location.StartAt(start_token);
1766 end_location.EndAt(start_token);
1784 const LocationRecorder& message_location) {
1789 LocationRecorder location(message_location,
1791 location.StartAt(start_token);
1794 LocationRecorder location(message_location,
1796 location.StartAt(start_token);
1802 const LocationRecorder& parent_location) {
1804 LocationRecorder location(parent_location,
message->reserved_name_size());
1812 const LocationRecorder& parent_location) {
1815 LocationRecorder location(parent_location,
message->reserved_range_size());
1818 message->add_reserved_range();
1820 io::Tokenizer::Token start_token;
1822 LocationRecorder start_location(
1826 (
first ?
"Expected enum value or number range."
1827 :
"Expected enum number range.")));
1831 LocationRecorder end_location(
1841 LocationRecorder end_location(
1843 end_location.StartAt(start_token);
1844 end_location.EndAt(start_token);
1858 RepeatedPtrField<DescriptorProto>* messages,
1859 const LocationRecorder& parent_location,
1860 int location_field_number_for_nested_type,
1861 const LocationRecorder& extend_location,
1874 bool is_first =
true;
1878 AddError(
"Reached end of input in extend definition (missing '}').");
1883 LocationRecorder location(extend_location,
extensions->size());
1888 LocationRecorder extendee_location(
1890 extendee_location.StartAt(extendee_start);
1891 extendee_location.EndAt(extendee_end);
1894 extendee_location.RecordLegacyLocation(
1900 field->set_extendee(extendee);
1903 location_field_number_for_nested_type, location,
1916 const LocationRecorder& oneof_location,
1917 const LocationRecorder& containing_type_location,
1922 LocationRecorder name_location(oneof_location,
1931 AddError(
"Reached end of input in oneof definition (missing '}').");
1936 LocationRecorder option_location(
1950 "Fields in oneofs must not have labels (required / optional "
1957 LocationRecorder field_location(containing_type_location,
1963 field->set_oneof_index(oneof_index);
1966 containing_type_location,
1968 field_location, containing_file)) {
1982 const LocationRecorder& enum_location,
1987 LocationRecorder location(enum_location,
1989 location.RecordLegacyLocation(
enum_type,
2002 const LocationRecorder& enum_location,
2008 AddError(
"Reached end of input in enum definition (missing '}').");
2023 const LocationRecorder& enum_location,
2029 LocationRecorder location(enum_location,
2036 LocationRecorder location(enum_location,
2044 const LocationRecorder& enum_value_location,
2048 LocationRecorder location(enum_value_location,
2050 location.RecordLegacyLocation(enum_value,
2053 "Expected enum constant name."));
2056 DO(
Consume(
"=",
"Missing numeric value for enum constant."));
2060 LocationRecorder location(enum_value_location,
2062 location.RecordLegacyLocation(enum_value,
2080 const LocationRecorder& enum_value_location,
2084 LocationRecorder location(enum_value_location,
2107 LocationRecorder location(service_location,
2109 location.RecordLegacyLocation(
service,
2119 const LocationRecorder& service_location,
2125 AddError(
"Reached end of input in service definition (missing '}').");
2140 const LocationRecorder& service_location,
2146 LocationRecorder location(service_location,
2151 LocationRecorder location(service_location,
2159 const LocationRecorder& method_location,
2164 LocationRecorder location(method_location,
2174 LocationRecorder location(
2176 location.RecordLegacyLocation(
method,
2178 method->set_client_streaming(
true);
2182 LocationRecorder location(method_location,
2184 location.RecordLegacyLocation(
method,
2195 LocationRecorder location(
2197 location.RecordLegacyLocation(
method,
2200 method->set_server_streaming(
true);
2203 LocationRecorder location(method_location,
2205 location.RecordLegacyLocation(
method,
2215 method->mutable_options()));
2225 const int optionsFieldNumber,
2231 AddError(
"Reached end of input in method options (missing '}').");
2238 LocationRecorder location(parent_location, optionsFieldNumber);
2239 if (!
ParseOption(mutable_options, location, containing_file,
2254 const LocationRecorder& field_location,
2260 LocationRecorder location(field_location,
2276 if (
iter != kTypeNames.end()) {
2289 if (
iter != kTypeNames.end()) {
2294 AddError(
"Expected message type.");
2323 const LocationRecorder& root_location,
2325 if (
file->has_package()) {
2326 AddError(
"Multiple package definitions.");
2329 file->clear_package();
2332 LocationRecorder location(root_location,
2341 file->mutable_package()->append(identifier);
2343 file->mutable_package()->append(
".");
2354 const LocationRecorder& root_location,
2356 LocationRecorder location(root_location,
2358 dependency->size());
2363 LocationRecorder public_location(
2365 public_dependency->
size());
2367 *public_dependency->Add() = dependency->
size();
2369 LocationRecorder weak_location(
2371 weak_dependency->
size());
2372 weak_location.RecordLegacyImportLocation(containing_file,
"weak");
2374 *weak_dependency->Add() = dependency->
size();
2379 "Expected a string naming the file to import."));
2380 *dependency->Add() = import_file;
2381 location.RecordLegacyImportLocation(containing_file, import_file);
2396 int* column)
const {
2397 const std::pair<int, int>*
result =
2405 *column =
result->second;
2412 int* column)
const {
2413 const std::pair<int, int>*
result =
2421 *column =
result->second;
2430 location_map_[std::make_pair(
descriptor, location)] =
2431 std::make_pair(
line, column);
2438 std::make_pair(
line, column);