31 """Contains routines for printing protocol messages in text format.
35 # Create a proto object and serialize it to a text proto string.
36 message = my_proto_pb2.MyMessage(foo='bar')
37 text_proto = text_format.MessageToString(message)
39 # Parse a text proto string.
40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
43 __author__ =
'kenton@google.com (Kenton Varda)'
59 __all__ = [
'MessageToString',
'Parse',
'PrintMessage',
'PrintField',
60 'PrintFieldValue',
'Merge',
'MessageToBytes']
62 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
63 type_checkers.Int32ValueChecker(),
64 type_checkers.Uint64ValueChecker(),
65 type_checkers.Int64ValueChecker())
66 _FLOAT_INFINITY = re.compile(
'-?inf(?:inity)?f?$', re.IGNORECASE)
67 _FLOAT_NAN = re.compile(
'nanf?$', re.IGNORECASE)
68 _QUOTES = frozenset((
"'",
'"'))
69 _ANY_FULL_TYPE_NAME =
'google.protobuf.Any'
73 """Top-level module error for text_format."""
77 """Thrown in case of text parsing or tokenizing error."""
79 def __init__(self, message=None, line=None, column=None):
80 if message
is not None and line
is not None:
82 if column
is not None:
83 loc +=
':{0}'.format(column)
84 message =
'{0} : {1}'.format(loc, message)
85 if message
is not None:
86 super(ParseError, self).
__init__(message)
109 if isinstance(val, six.text_type):
110 val = val.encode(
'utf-8')
123 use_short_repeated_primitives=False,
124 pointy_brackets=False,
125 use_index_order=False,
128 use_field_number=False,
129 descriptor_pool=None,
131 message_formatter=None,
132 print_unknown_fields=False):
134 """Convert protobuf message to text format.
136 Double values can be formatted compactly with 15 digits of
137 precision (which is the most that IEEE 754 "double" can guarantee)
138 using double_format='.15g'. To ensure that converting to text and back to a
139 proto will result in an identical value, double_format='.17g' should be used.
142 message: The protocol buffers message.
143 as_utf8: Return unescaped Unicode for non-ASCII characters.
144 In Python 3 actual Unicode characters may appear as is in strings.
145 In Python 2 the return value will be valid UTF-8 rather than only ASCII.
146 as_one_line: Don't introduce newlines between fields.
147 use_short_repeated_primitives: Use short repeated format for primitives.
148 pointy_brackets: If True, use angle brackets instead of curly braces for
150 use_index_order: If True, fields of a proto message will be printed using
151 the order defined in source code instead of the field number, extensions
152 will be printed at the end of the message and their relative order is
153 determined by the extension number. By default, use the field number
155 float_format: If set, use this to specify float field formatting
156 (per the "Format Specification Mini-Language"); otherwise, 8 valid digits
157 is used (default '.8g'). Also affect double field if double_format is
158 not set but float_format is set.
159 double_format: If set, use this to specify double field formatting
160 (per the "Format Specification Mini-Language"); if it is not set but
161 float_format is set, use float_format. Otherwise, use str()
162 use_field_number: If True, print field numbers instead of names.
163 descriptor_pool: A DescriptorPool used to resolve Any types.
164 indent: The initial indent level, in terms of spaces, for pretty print.
165 message_formatter: A function(message, indent, as_one_line): unicode|None
166 to custom format selected sub-messages (usually based on message type).
167 Use to pretty print parts of the protobuf for easier diffing.
168 print_unknown_fields: If True, unknown fields will be printed.
171 A string of the text formatted protocol buffer message.
174 printer =
_Printer(out, indent, as_utf8, as_one_line,
175 use_short_repeated_primitives, pointy_brackets,
176 use_index_order, float_format, double_format,
178 descriptor_pool, message_formatter,
179 print_unknown_fields=print_unknown_fields)
180 printer.PrintMessage(message)
181 result = out.getvalue()
184 return result.rstrip()
190 """Convert protobuf message to encoded text format. See MessageToString."""
192 if isinstance(text, bytes):
194 codec =
'utf-8' if kwargs.get(
'as_utf8')
else 'ascii'
195 return text.encode(codec)
199 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE
and
200 field.message_type.has_options
and
201 field.message_type.GetOptions().map_entry)
209 use_short_repeated_primitives=False,
210 pointy_brackets=False,
211 use_index_order=False,
214 use_field_number=False,
215 descriptor_pool=None,
216 message_formatter=None,
217 print_unknown_fields=False):
219 out=out, indent=indent, as_utf8=as_utf8,
220 as_one_line=as_one_line,
221 use_short_repeated_primitives=use_short_repeated_primitives,
222 pointy_brackets=pointy_brackets,
223 use_index_order=use_index_order,
224 float_format=float_format,
225 double_format=double_format,
226 use_field_number=use_field_number,
227 descriptor_pool=descriptor_pool,
228 message_formatter=message_formatter,
229 print_unknown_fields=print_unknown_fields)
230 printer.PrintMessage(message)
239 use_short_repeated_primitives=False,
240 pointy_brackets=False,
241 use_index_order=False,
244 message_formatter=None,
245 print_unknown_fields=False):
246 """Print a single field name/value pair."""
247 printer =
_Printer(out, indent, as_utf8, as_one_line,
248 use_short_repeated_primitives, pointy_brackets,
249 use_index_order, float_format, double_format,
250 message_formatter=message_formatter,
251 print_unknown_fields=print_unknown_fields)
252 printer.PrintField(field, value)
261 use_short_repeated_primitives=False,
262 pointy_brackets=False,
263 use_index_order=False,
266 message_formatter=None,
267 print_unknown_fields=False):
268 """Print a single field value (not including name)."""
269 printer =
_Printer(out, indent, as_utf8, as_one_line,
270 use_short_repeated_primitives, pointy_brackets,
271 use_index_order, float_format, double_format,
272 message_formatter=message_formatter,
273 print_unknown_fields=print_unknown_fields)
274 printer.PrintFieldValue(field, value)
278 """Returns a protobuf message instance.
281 type_name: Fully-qualified protobuf message type name string.
282 descriptor_pool: DescriptorPool instance.
285 A Message instance of type matching type_name, or None if the a Descriptor
286 wasn't found matching type_name.
289 if descriptor_pool
is None:
291 descriptor_pool = pool_mod.Default()
293 database = symbol_database.Default()
295 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
298 message_type = database.GetPrototype(message_descriptor)
303 WIRETYPE_LENGTH_DELIMITED = 2
304 WIRETYPE_START_GROUP = 3
308 """Text format printer for protocol message."""
315 use_short_repeated_primitives=False,
316 pointy_brackets=False,
317 use_index_order=False,
320 use_field_number=False,
321 descriptor_pool=None,
322 message_formatter=None,
323 print_unknown_fields=False):
324 """Initialize the Printer.
326 Double values can be formatted compactly with 15 digits of precision
327 (which is the most that IEEE 754 "double" can guarantee) using
328 double_format='.15g'. To ensure that converting to text and back to a proto
329 will result in an identical value, double_format='.17g' should be used.
332 out: To record the text format result.
333 indent: The initial indent level for pretty print.
334 as_utf8: Return unescaped Unicode for non-ASCII characters.
335 In Python 3 actual Unicode characters may appear as is in strings.
336 In Python 2 the return value will be valid UTF-8 rather than ASCII.
337 as_one_line: Don't introduce newlines between fields.
338 use_short_repeated_primitives: Use short repeated format for primitives.
339 pointy_brackets: If True, use angle brackets instead of curly braces for
341 use_index_order: If True, print fields of a proto message using the order
342 defined in source code instead of the field number. By default, use the
344 float_format: If set, use this to specify float field formatting
345 (per the "Format Specification Mini-Language"); otherwise, 8 valid
346 digits is used (default '.8g'). Also affect double field if
347 double_format is not set but float_format is set.
348 double_format: If set, use this to specify double field formatting
349 (per the "Format Specification Mini-Language"); if it is not set but
350 float_format is set, use float_format. Otherwise, str() is used.
351 use_field_number: If True, print field numbers instead of names.
352 descriptor_pool: A DescriptorPool used to resolve Any types.
353 message_formatter: A function(message, indent, as_one_line): unicode|None
354 to custom format selected sub-messages (usually based on message type).
355 Use to pretty print parts of the protobuf for easier diffing.
356 print_unknown_fields: If True, unknown fields will be printed.
366 if double_format
is not None:
376 """Serializes if message is a google.protobuf.Any field."""
377 if '/' not in message.type_url:
382 packed_message.MergeFromString(message.value)
383 self.
out.write(
'%s[%s] ' % (self.
indent *
' ', message.type_url))
392 if formatted
is None:
396 out.write(
' ' * self.
indent)
402 """Convert protobuf message to text format.
405 message: The protocol buffers message.
409 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME
and
412 fields = message.ListFields()
415 key=
lambda x: x[0].number
if x[0].is_extension
else x[0].index)
416 for field, value
in fields:
418 for key
in sorted(value):
424 entry_submsg = value.GetEntryClass()(key=key, value=value[key])
426 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
428 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE
429 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING):
432 for element
in value:
441 """Print unknown fields."""
443 for field
in unknown_fields:
444 out.write(
' ' * self.
indent)
445 out.write(
str(field.field_number))
446 if field.wire_type == WIRETYPE_START_GROUP:
460 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:
465 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(
466 memoryview(field.data), 0,
len(field.data))
470 if pos ==
len(field.data):
487 out.write(text_encoding.CEscape(field.data,
False))
492 out.write(
str(field.data))
496 """Print field name."""
498 out.write(
' ' * self.
indent)
500 out.write(
str(field.number))
502 if field.is_extension:
504 if (field.containing_type.GetOptions().message_set_wire_format
and
505 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE
and
506 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
507 out.write(field.message_type.full_name)
509 out.write(field.full_name)
511 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
513 out.write(field.message_type.name)
515 out.write(field.name)
517 if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
523 """Print a single field name/value pair."""
533 for i
in six.moves.range(
len(value) - 1):
549 self.
out.write(
'%s ' % openb)
551 self.
out.write(closeb)
553 self.
out.write(
'%s\n' % openb)
557 self.
out.write(
' ' * self.
indent + closeb)
560 """Print a single field value (not including name).
562 For repeated fields, the value should be a single element.
565 field: The descriptor of the field to be printed.
566 value: The value of the field.
569 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
571 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
572 enum_value = field.enum_type.values_by_number.get(value,
None)
573 if enum_value
is not None:
574 out.write(enum_value.name)
576 out.write(
str(value))
577 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
579 if isinstance(value, six.text_type)
and (six.PY2
or not self.
as_utf8):
580 out_value = value.encode(
'utf-8')
583 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
588 out.write(text_encoding.CEscape(out_value, out_as_utf8))
590 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
595 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:
599 out.write(
str(float(format(value,
'.8g'))))
600 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE
and
604 out.write(
str(value))
609 allow_unknown_extension=False,
610 allow_field_number=False,
611 descriptor_pool=None,
612 allow_unknown_field=False):
613 """Parses a text representation of a protocol message into a message.
615 NOTE: for historical reasons this function does not clear the input
616 message. This is different from what the binary msg.ParseFrom(...) does.
620 a.repeated_field.append('test')
623 text_format.Parse(repr(a), b)
624 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]
627 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"
629 Caller is responsible for clearing the message as needed.
632 text: Message text representation.
633 message: A protocol buffer message to merge into.
634 allow_unknown_extension: if True, skip over missing extensions and keep
636 allow_field_number: if True, both field number and field name are allowed.
637 descriptor_pool: A DescriptorPool used to resolve Any types.
638 allow_unknown_field: if True, skip over unknown field and keep
639 parsing. Avoid to use this option if possible. It may hide some
640 errors (e.g. spelling error on field name)
643 The same message passed as argument.
646 ParseError: On text parsing problems.
648 return ParseLines(text.split(b
'\n' if isinstance(text, bytes)
else u'\n'),
650 allow_unknown_extension,
652 descriptor_pool=descriptor_pool,
653 allow_unknown_field=allow_unknown_field)
658 allow_unknown_extension=False,
659 allow_field_number=False,
660 descriptor_pool=None,
661 allow_unknown_field=False):
662 """Parses a text representation of a protocol message into a message.
664 Like Parse(), but allows repeated values for a non-repeated field, and uses
668 text: Message text representation.
669 message: A protocol buffer message to merge into.
670 allow_unknown_extension: if True, skip over missing extensions and keep
672 allow_field_number: if True, both field number and field name are allowed.
673 descriptor_pool: A DescriptorPool used to resolve Any types.
674 allow_unknown_field: if True, skip over unknown field and keep
675 parsing. Avoid to use this option if possible. It may hide some
676 errors (e.g. spelling error on field name)
679 The same message passed as argument.
682 ParseError: On text parsing problems.
685 text.split(b
'\n' if isinstance(text, bytes)
else u'\n'),
687 allow_unknown_extension,
689 descriptor_pool=descriptor_pool,
690 allow_unknown_field=allow_unknown_field)
695 allow_unknown_extension=False,
696 allow_field_number=False,
697 descriptor_pool=None,
698 allow_unknown_field=False):
699 """Parses a text representation of a protocol message into a message.
702 lines: An iterable of lines of a message's text representation.
703 message: A protocol buffer message to merge into.
704 allow_unknown_extension: if True, skip over missing extensions and keep
706 allow_field_number: if True, both field number and field name are allowed.
707 descriptor_pool: A DescriptorPool used to resolve Any types.
708 allow_unknown_field: if True, skip over unknown field and keep
709 parsing. Avoid to use this option if possible. It may hide some
710 errors (e.g. spelling error on field name)
713 The same message passed as argument.
716 ParseError: On text parsing problems.
718 parser =
_Parser(allow_unknown_extension,
720 descriptor_pool=descriptor_pool,
721 allow_unknown_field=allow_unknown_field)
722 return parser.ParseLines(lines, message)
727 allow_unknown_extension=False,
728 allow_field_number=False,
729 descriptor_pool=None,
730 allow_unknown_field=False):
731 """Parses a text representation of a protocol message into a message.
733 Like ParseLines(), but allows repeated values for a non-repeated field, and
737 lines: An iterable of lines of a message's text representation.
738 message: A protocol buffer message to merge into.
739 allow_unknown_extension: if True, skip over missing extensions and keep
741 allow_field_number: if True, both field number and field name are allowed.
742 descriptor_pool: A DescriptorPool used to resolve Any types.
743 allow_unknown_field: if True, skip over unknown field and keep
744 parsing. Avoid to use this option if possible. It may hide some
745 errors (e.g. spelling error on field name)
748 The same message passed as argument.
751 ParseError: On text parsing problems.
753 parser =
_Parser(allow_unknown_extension,
755 descriptor_pool=descriptor_pool,
756 allow_unknown_field=allow_unknown_field)
757 return parser.MergeLines(lines, message)
761 """Text format parser for protocol message."""
764 allow_unknown_extension=False,
765 allow_field_number=False,
766 descriptor_pool=None,
767 allow_unknown_field=False):
774 """Parses a text representation of a protocol message into a message."""
780 """Merges a text representation of a protocol message into a message."""
786 """Converts a text representation of a protocol message into a message.
789 lines: Lines of a message's text representation.
790 message: A protocol buffer message to merge into.
793 ParseError: On text parsing problems.
797 str_lines = (line
if isinstance(line, str)
else line.encode(
'utf-8')
800 str_lines = (line
if isinstance(line, str)
else line.decode(
'utf-8')
803 while not tokenizer.AtEnd():
807 """Merges a single protocol message field into a message.
810 tokenizer: A tokenizer to parse the field name and values.
811 message: A protocol message to record the data.
814 ParseError: In case of text parsing problems.
816 message_descriptor = message.DESCRIPTOR
817 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME
and
818 tokenizer.TryConsume(
'[')):
820 tokenizer.Consume(
']')
821 tokenizer.TryConsume(
':')
822 if tokenizer.TryConsume(
'<'):
823 expanded_any_end_token =
'>'
825 tokenizer.Consume(
'{')
826 expanded_any_end_token =
'}'
829 if not expanded_any_sub_message:
830 raise ParseError(
'Type %s not found in descriptor pool' %
832 while not tokenizer.TryConsume(expanded_any_end_token):
833 if tokenizer.AtEnd():
834 raise tokenizer.ParseErrorPreviousToken(
'Expected "%s".' %
835 (expanded_any_end_token,))
836 self.
_MergeField(tokenizer, expanded_any_sub_message)
837 message.Pack(expanded_any_sub_message,
838 type_url_prefix=type_url_prefix)
841 if tokenizer.TryConsume(
'['):
842 name = [tokenizer.ConsumeIdentifier()]
843 while tokenizer.TryConsume(
'.'):
844 name.append(tokenizer.ConsumeIdentifier())
845 name =
'.'.join(name)
847 if not message_descriptor.is_extendable:
848 raise tokenizer.ParseErrorPreviousToken(
849 'Message type "%s" does not have extensions.' %
850 message_descriptor.full_name)
852 field = message.Extensions._FindExtensionByName(name)
858 raise tokenizer.ParseErrorPreviousToken(
859 'Extension "%s" not registered. '
860 'Did you import the _pb2 module which defines it? '
861 'If you are trying to place the extension in the MessageSet '
862 'field of another message that is in an Any or MessageSet field, '
863 'that message\'s _pb2 module must be imported as well' % name)
864 elif message_descriptor != field.containing_type:
865 raise tokenizer.ParseErrorPreviousToken(
866 'Extension "%s" does not extend message type "%s".' %
867 (name, message_descriptor.full_name))
869 tokenizer.Consume(
']')
872 name = tokenizer.ConsumeIdentifierOrNumber()
875 field = message_descriptor.fields_by_number.get(number,
None)
876 if not field
and message_descriptor.is_extendable:
877 field = message.Extensions._FindExtensionByNumber(number)
879 field = message_descriptor.fields_by_name.get(name,
None)
885 field = message_descriptor.fields_by_name.get(name.lower(),
None)
886 if field
and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
889 if (field
and field.type == descriptor.FieldDescriptor.TYPE_GROUP
and
890 field.message_type.name != name):
894 raise tokenizer.ParseErrorPreviousToken(
895 'Message type "%s" has no field named "%s".' %
896 (message_descriptor.full_name, name))
903 which_oneof = message.WhichOneof(field.containing_oneof.name)
904 if which_oneof
is not None and which_oneof != field.name:
905 raise tokenizer.ParseErrorPreviousToken(
906 'Field "%s" is specified along with field "%s", another member '
907 'of oneof "%s" for message type "%s".' %
908 (field.name, which_oneof, field.containing_oneof.name,
909 message_descriptor.full_name))
911 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
912 tokenizer.TryConsume(
':')
915 tokenizer.Consume(
':')
918 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED
and
919 tokenizer.TryConsume(
'[')):
921 if not tokenizer.TryConsume(
']'):
923 merger(tokenizer, message, field)
924 if tokenizer.TryConsume(
']'):
926 tokenizer.Consume(
',')
929 merger(tokenizer, message, field)
937 if not tokenizer.TryConsume(
','):
938 tokenizer.TryConsume(
';')
941 """Consumes a google.protobuf.Any type URL and returns the type name."""
943 prefix = [tokenizer.ConsumeIdentifier()]
944 tokenizer.Consume(
'.')
945 prefix.append(tokenizer.ConsumeIdentifier())
946 tokenizer.Consume(
'.')
947 prefix.append(tokenizer.ConsumeIdentifier())
948 tokenizer.Consume(
'/')
950 name = [tokenizer.ConsumeIdentifier()]
951 while tokenizer.TryConsume(
'.'):
952 name.append(tokenizer.ConsumeIdentifier())
953 return '.'.join(prefix),
'.'.join(name)
956 """Merges a single scalar field into a message.
959 tokenizer: A tokenizer to parse the field value.
960 message: The message of which field is a member.
961 field: The descriptor of the field to be merged.
964 ParseError: In case of text parsing problems.
968 if tokenizer.TryConsume(
'<'):
971 tokenizer.Consume(
'{')
974 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
975 if field.is_extension:
976 sub_message = message.Extensions[field].add()
980 sub_message =
getattr(message, field.name).add()
982 if field.is_extension:
984 message.HasExtension(field)):
985 raise tokenizer.ParseErrorPreviousToken(
986 'Message type "%s" should not have multiple "%s" extensions.' %
987 (message.DESCRIPTOR.full_name, field.full_name))
988 sub_message = message.Extensions[field]
993 message.HasField(field.name)):
994 raise tokenizer.ParseErrorPreviousToken(
995 'Message type "%s" should not have multiple "%s" fields.' %
996 (message.DESCRIPTOR.full_name, field.name))
997 sub_message =
getattr(message, field.name)
998 sub_message.SetInParent()
1000 while not tokenizer.TryConsume(end_token):
1001 if tokenizer.AtEnd():
1002 raise tokenizer.ParseErrorPreviousToken(
'Expected "%s".' % (end_token,))
1006 value_cpptype = field.message_type.fields_by_name[
'value'].cpp_type
1007 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
1008 value =
getattr(message, field.name)[sub_message.key]
1009 value.MergeFrom(sub_message.value)
1011 getattr(message, field.name)[sub_message.key] = sub_message.value
1015 message_descriptor = message.DESCRIPTOR
1016 return (hasattr(message_descriptor,
'syntax')
and
1017 message_descriptor.syntax ==
'proto3')
1020 """Merges a single scalar field into a message.
1023 tokenizer: A tokenizer to parse the field value.
1024 message: A protocol message to record the data.
1025 field: The descriptor of the field to be merged.
1028 ParseError: In case of text parsing problems.
1029 RuntimeError: On runtime errors.
1034 if field.type
in (descriptor.FieldDescriptor.TYPE_INT32,
1035 descriptor.FieldDescriptor.TYPE_SINT32,
1036 descriptor.FieldDescriptor.TYPE_SFIXED32):
1038 elif field.type
in (descriptor.FieldDescriptor.TYPE_INT64,
1039 descriptor.FieldDescriptor.TYPE_SINT64,
1040 descriptor.FieldDescriptor.TYPE_SFIXED64):
1042 elif field.type
in (descriptor.FieldDescriptor.TYPE_UINT32,
1043 descriptor.FieldDescriptor.TYPE_FIXED32):
1045 elif field.type
in (descriptor.FieldDescriptor.TYPE_UINT64,
1046 descriptor.FieldDescriptor.TYPE_FIXED64):
1048 elif field.type
in (descriptor.FieldDescriptor.TYPE_FLOAT,
1049 descriptor.FieldDescriptor.TYPE_DOUBLE):
1050 value = tokenizer.ConsumeFloat()
1051 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
1052 value = tokenizer.ConsumeBool()
1053 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
1054 value = tokenizer.ConsumeString()
1055 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
1056 value = tokenizer.ConsumeByteString()
1057 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
1058 value = tokenizer.ConsumeEnum(field)
1060 raise RuntimeError(
'Unknown field type %d' % field.type)
1062 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
1063 if field.is_extension:
1064 message.Extensions[field].append(value)
1066 getattr(message, field.name).append(value)
1068 if field.is_extension:
1071 message.HasExtension(field)):
1072 raise tokenizer.ParseErrorPreviousToken(
1073 'Message type "%s" should not have multiple "%s" extensions.' %
1074 (message.DESCRIPTOR.full_name, field.full_name))
1076 message.Extensions[field] = value
1078 duplicate_error =
False
1083 duplicate_error = bool(
getattr(message, field.name))
1085 duplicate_error = message.HasField(field.name)
1088 raise tokenizer.ParseErrorPreviousToken(
1089 'Message type "%s" should not have multiple "%s" fields.' %
1090 (message.DESCRIPTOR.full_name, field.name))
1092 setattr(message, field.name, value)
1096 """Skips over contents (value or message) of a field.
1099 tokenizer: A tokenizer to parse the field name and values.
1107 if tokenizer.TryConsume(
':')
and not tokenizer.LookingAt(
1108 '{')
and not tokenizer.LookingAt(
'<'):
1115 """Skips over a complete field (name and value/message).
1118 tokenizer: A tokenizer to parse the field name and values.
1120 if tokenizer.TryConsume(
'['):
1122 tokenizer.ConsumeIdentifier()
1123 while tokenizer.TryConsume(
'.'):
1124 tokenizer.ConsumeIdentifier()
1125 tokenizer.Consume(
']')
1127 tokenizer.ConsumeIdentifierOrNumber()
1133 if not tokenizer.TryConsume(
','):
1134 tokenizer.TryConsume(
';')
1138 """Skips over a field message.
1141 tokenizer: A tokenizer to parse the field name and values.
1144 if tokenizer.TryConsume(
'<'):
1147 tokenizer.Consume(
'{')
1150 while not tokenizer.LookingAt(
'>')
and not tokenizer.LookingAt(
'}'):
1153 tokenizer.Consume(delimiter)
1157 """Skips over a field value.
1160 tokenizer: A tokenizer to parse the field name and values.
1163 ParseError: In case an invalid field value is found.
1167 if tokenizer.TryConsumeByteString():
1168 while tokenizer.TryConsumeByteString():
1172 if (
not tokenizer.TryConsumeIdentifier()
and
1174 not tokenizer.TryConsumeFloat()):
1175 raise ParseError(
'Invalid field value: ' + tokenizer.token)
1179 """Protocol buffer text representation tokenizer.
1181 This class handles the lower level string parsing by splitting it into
1184 It was directly ported from the Java protocol buffer API.
1187 _WHITESPACE = re.compile(
r'\s+')
1188 _COMMENT = re.compile(
r'(\s*#.*$)', re.MULTILINE)
1189 _WHITESPACE_OR_COMMENT = re.compile(
r'(\s|(#.*$))+', re.MULTILINE)
1190 _TOKEN = re.compile(
'|'.join([
1191 r'[a-zA-Z_][0-9a-zA-Z_+-]*',
1192 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*',
1195 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.format(qt=mark)
1199 _IDENTIFIER = re.compile(
r'[^\d\W]\w*')
1200 _IDENTIFIER_OR_NUMBER = re.compile(
r'\w+')
1220 return self.
token == token
1223 """Checks the end of the text was reached.
1226 True iff the end was reached.
1228 return not self.
token
1234 except StopIteration:
1248 length =
len(match.group(0))
1252 """Tries to consume a given piece of text.
1255 token: Text to consume.
1258 True iff the text was consumed.
1260 if self.
token == token:
1266 """Consumes a piece of text.
1269 token: Text to consume.
1272 ParseError: If the text couldn't be consumed.
1275 raise self.
ParseError(
'Expected "%s".' % token)
1279 if not self.
_COMMENT.match(result):
1285 """Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""
1296 and not just_started)
1298 return trailing, comment
1308 """Consumes protocol message field identifier.
1314 ParseError: If an identifier couldn't be consumed.
1318 raise self.
ParseError(
'Expected identifier.')
1330 """Consumes protocol message field identifier.
1336 ParseError: If an identifier couldn't be consumed.
1340 raise self.
ParseError(
'Expected identifier or number, got %s.' % result)
1353 """Consumes an integer number.
1356 is_long: True if the value should be returned as a long integer.
1361 ParseError: If an integer couldn't be consumed.
1365 except ValueError
as e:
1378 """Consumes an floating point number.
1384 ParseError: If a floating point number couldn't be consumed.
1388 except ValueError
as e:
1394 """Consumes a boolean value.
1400 ParseError: If a boolean value couldn't be consumed.
1404 except ValueError
as e:
1417 """Consumes a string value.
1423 ParseError: If a string value couldn't be consumed.
1427 return six.text_type(the_bytes,
'utf-8')
1428 except UnicodeDecodeError
as e:
1432 """Consumes a byte array value.
1435 The array parsed (as a string).
1438 ParseError: If a byte array value couldn't be consumed.
1441 while self.
token and self.
token[0]
in _QUOTES:
1443 return b
''.join(the_list)
1446 """Consume one token of a string literal.
1448 String literals (whether bytes or text) can come in multiple adjacent
1449 tokens which are automatically concatenated, like in C or Python. This
1450 method only consumes one token.
1455 ParseError: When the wrong format data is found.
1458 if len(text) < 1
or text[0]
not in _QUOTES:
1459 raise self.
ParseError(
'Expected string but found: %r' % (text,))
1461 if len(text) < 2
or text[-1] != text[0]:
1462 raise self.
ParseError(
'String missing ending quote: %r' % (text,))
1465 result = text_encoding.CUnescape(text[1:-1])
1466 except ValueError
as e:
1474 except ValueError
as e:
1480 """Creates and *returns* a ParseError for the previously read token.
1483 message: A message to set for the exception.
1486 A ParseError instance.
1492 """Creates and *returns* a ParseError for the current token."""
1500 """Reads the next meaningful token."""
1515 token = match.group(0)
1522 _Tokenizer = Tokenizer
1526 """Consumes a signed 32bit integer number from tokenizer.
1529 tokenizer: A tokenizer used to parse the number.
1535 ParseError: If a signed 32bit integer couldn't be consumed.
1541 """Consumes an unsigned 32bit integer number from tokenizer.
1544 tokenizer: A tokenizer used to parse the number.
1550 ParseError: If an unsigned 32bit integer couldn't be consumed.
1564 """Consumes a signed 32bit integer number from tokenizer.
1567 tokenizer: A tokenizer used to parse the number.
1573 ParseError: If a signed 32bit integer couldn't be consumed.
1587 """Consumes an unsigned 64bit integer number from tokenizer.
1590 tokenizer: A tokenizer used to parse the number.
1596 ParseError: If an unsigned 64bit integer couldn't be consumed.
1610 """Consumes an integer number from tokenizer.
1613 tokenizer: A tokenizer used to parse the number.
1614 is_signed: True if a signed integer must be parsed.
1615 is_long: True if a long integer must be parsed.
1621 ParseError: If an integer with given characteristics couldn't be consumed.
1624 result =
ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1625 except ValueError
as e:
1626 raise tokenizer.ParseError(
str(e))
1627 tokenizer.NextToken()
1632 """Parses an integer.
1635 text: The text to parse.
1636 is_signed: True if a signed integer must be parsed.
1637 is_long: True if a long integer must be parsed.
1643 ValueError: Thrown Iff the text is not a valid integer.
1649 checker = _INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
1650 checker.CheckValue(result)
1655 """Parses an integer without checking size/signedness.
1658 text: The text to parse.
1659 is_long: True if the value should be returned as a long integer.
1665 ValueError: Thrown Iff the text is not a valid integer.
1669 c_octal_match = re.match(
r'(-?)0(\d+)$', text)
1673 text = c_octal_match.group(1) +
'0o' + c_octal_match.group(2)
1679 return long(text, 0)
1683 raise ValueError(
'Couldn\'t parse integer: %s' % orig_text)
1687 """Parse a floating point number.
1690 text: Text to parse.
1696 ValueError: If a floating point number couldn't be parsed.
1703 if _FLOAT_INFINITY.match(text):
1705 return float(
'-inf')
1708 elif _FLOAT_NAN.match(text):
1713 return float(text.rstrip(
'f'))
1715 raise ValueError(
'Couldn\'t parse float: %s' % text)
1719 """Parse a boolean value.
1722 text: Text to parse.
1725 Boolean values parsed
1728 ValueError: If text is not a valid boolean.
1730 if text
in (
'true',
't',
'1',
'True'):
1732 elif text
in (
'false',
'f',
'0',
'False'):
1735 raise ValueError(
'Expected "true" or "false".')
1739 """Parse an enum value.
1741 The value can be specified by a number (the enum value), or by
1742 a string literal (the enum name).
1745 field: Enum field descriptor.
1746 value: String value.
1752 ValueError: If the enum value could not be parsed.
1754 enum_descriptor = field.enum_type
1756 number = int(value, 0)
1759 enum_value = enum_descriptor.values_by_name.get(value,
None)
1760 if enum_value
is None:
1761 raise ValueError(
'Enum type "%s" has no value named %s.' %
1762 (enum_descriptor.full_name, value))
1765 if hasattr(field.file,
'syntax'):
1767 if field.file.syntax ==
'proto3':
1770 enum_value = enum_descriptor.values_by_number.get(number,
None)
1771 if enum_value
is None:
1772 raise ValueError(
'Enum type "%s" has no value with number %d.' %
1773 (enum_descriptor.full_name, number))
1774 return enum_value.number