31 """Contains routines for printing protocol messages in text format.
33 Simple usage example::
35 # Create a proto object and serialize it to a text proto string.
36 message = my_proto_pb2.MyMessage(foo='bar')
37 text_proto = text_format.MessageToString(message)
39 # Parse a text proto string.
40 message = text_format.Parse(text_proto, my_proto_pb2.MyMessage())
43 __author__ =
'kenton@google.com (Kenton Varda)'
46 import encodings.raw_unicode_escape
47 import encodings.unicode_escape
58 __all__ = [
'MessageToString',
'Parse',
'PrintMessage',
'PrintField',
59 'PrintFieldValue',
'Merge',
'MessageToBytes']
61 _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
62 type_checkers.Int32ValueChecker(),
63 type_checkers.Uint64ValueChecker(),
64 type_checkers.Int64ValueChecker())
65 _FLOAT_INFINITY = re.compile(
'-?inf(?:inity)?f?$', re.IGNORECASE)
66 _FLOAT_NAN = re.compile(
'nanf?$', re.IGNORECASE)
67 _QUOTES = frozenset((
"'",
'"'))
68 _ANY_FULL_TYPE_NAME =
'google.protobuf.Any'
71 class Error(Exception):
72 """Top-level module error for text_format."""
75 class ParseError(Error):
76 """Thrown in case of text parsing or tokenizing error."""
78 def __init__(self, message=None, line=None, column=None):
79 if message
is not None and line
is not None:
81 if column
is not None:
82 loc +=
':{0}'.
format(column)
83 message =
'{0} : {1}'.
format(loc, message)
84 if message
is not None:
85 super(ParseError, self).
__init__(message)
117 use_short_repeated_primitives=False,
118 pointy_brackets=False,
119 use_index_order=False,
122 use_field_number=False,
123 descriptor_pool=None,
125 message_formatter=None,
126 print_unknown_fields=False,
129 """Convert protobuf message to text format.
131 Double values can be formatted compactly with 15 digits of
132 precision (which is the most that IEEE 754 "double" can guarantee)
133 using double_format='.15g'. To ensure that converting to text and back to a
134 proto will result in an identical value, double_format='.17g' should be used.
137 message: The protocol buffers message.
138 as_utf8: Return unescaped Unicode for non-ASCII characters.
139 In Python 3 actual Unicode characters may appear as is in strings.
140 In Python 2 the return value will be valid UTF-8 rather than only ASCII.
141 as_one_line: Don't introduce newlines between fields.
142 use_short_repeated_primitives: Use short repeated format for primitives.
143 pointy_brackets: If True, use angle brackets instead of curly braces for
145 use_index_order: If True, fields of a proto message will be printed using
146 the order defined in source code instead of the field number, extensions
147 will be printed at the end of the message and their relative order is
148 determined by the extension number. By default, use the field number
150 float_format (str): If set, use this to specify float field formatting
151 (per the "Format Specification Mini-Language"); otherwise, shortest float
152 that has same value in wire will be printed. Also affect double field
153 if double_format is not set but float_format is set.
154 double_format (str): If set, use this to specify double field formatting
155 (per the "Format Specification Mini-Language"); if it is not set but
156 float_format is set, use float_format. Otherwise, use ``str()``
157 use_field_number: If True, print field numbers instead of names.
158 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.
159 indent (int): The initial indent level, in terms of spaces, for pretty
161 message_formatter (function(message, indent, as_one_line) -> unicode|None):
162 Custom formatter for selected sub-messages (usually based on message
163 type). Use to pretty print parts of the protobuf for easier diffing.
164 print_unknown_fields: If True, unknown fields will be printed.
165 force_colon: If set, a colon will be added after the field name even if the
166 field is a proto message.
169 str: A string of the text formatted protocol buffer message.
177 use_short_repeated_primitives,
185 print_unknown_fields=print_unknown_fields,
186 force_colon=force_colon)
187 printer.PrintMessage(message)
188 result = out.getvalue()
191 return result.rstrip()
197 """Convert protobuf message to encoded text format. See MessageToString."""
199 if isinstance(text, bytes):
201 codec =
'utf-8' if kwargs.get(
'as_utf8')
else 'ascii'
202 return text.encode(codec)
206 return (field.type == descriptor.FieldDescriptor.TYPE_MESSAGE
and
207 field.message_type.has_options
and
208 field.message_type.GetOptions().map_entry)
216 use_short_repeated_primitives=False,
217 pointy_brackets=False,
218 use_index_order=False,
221 use_field_number=False,
222 descriptor_pool=None,
223 message_formatter=None,
224 print_unknown_fields=False,
227 out=out, indent=indent, as_utf8=as_utf8,
228 as_one_line=as_one_line,
229 use_short_repeated_primitives=use_short_repeated_primitives,
230 pointy_brackets=pointy_brackets,
231 use_index_order=use_index_order,
232 float_format=float_format,
233 double_format=double_format,
234 use_field_number=use_field_number,
235 descriptor_pool=descriptor_pool,
236 message_formatter=message_formatter,
237 print_unknown_fields=print_unknown_fields,
238 force_colon=force_colon)
239 printer.PrintMessage(message)
248 use_short_repeated_primitives=False,
249 pointy_brackets=False,
250 use_index_order=False,
253 message_formatter=None,
254 print_unknown_fields=False,
256 """Print a single field name/value pair."""
257 printer =
_Printer(out, indent, as_utf8, as_one_line,
258 use_short_repeated_primitives, pointy_brackets,
259 use_index_order, float_format, double_format,
260 message_formatter=message_formatter,
261 print_unknown_fields=print_unknown_fields,
262 force_colon=force_colon)
263 printer.PrintField(field, value)
272 use_short_repeated_primitives=False,
273 pointy_brackets=False,
274 use_index_order=False,
277 message_formatter=None,
278 print_unknown_fields=False,
280 """Print a single field value (not including name)."""
281 printer =
_Printer(out, indent, as_utf8, as_one_line,
282 use_short_repeated_primitives, pointy_brackets,
283 use_index_order, float_format, double_format,
284 message_formatter=message_formatter,
285 print_unknown_fields=print_unknown_fields,
286 force_colon=force_colon)
287 printer.PrintFieldValue(field, value)
291 """Returns a protobuf message instance.
294 type_name: Fully-qualified protobuf message type name string.
295 descriptor_pool: DescriptorPool instance.
298 A Message instance of type matching type_name, or None if the a Descriptor
299 wasn't found matching type_name.
302 if descriptor_pool
is None:
304 descriptor_pool = pool_mod.Default()
306 database = symbol_database.Default()
308 message_descriptor = descriptor_pool.FindMessageTypeByName(type_name)
311 message_type = database.GetPrototype(message_descriptor)
316 WIRETYPE_LENGTH_DELIMITED = 2
317 WIRETYPE_START_GROUP = 3
321 """Text format printer for protocol message."""
329 use_short_repeated_primitives=False,
330 pointy_brackets=False,
331 use_index_order=False,
334 use_field_number=False,
335 descriptor_pool=None,
336 message_formatter=None,
337 print_unknown_fields=False,
339 """Initialize the Printer.
341 Double values can be formatted compactly with 15 digits of precision
342 (which is the most that IEEE 754 "double" can guarantee) using
343 double_format='.15g'. To ensure that converting to text and back to a proto
344 will result in an identical value, double_format='.17g' should be used.
347 out: To record the text format result.
348 indent: The initial indent level for pretty print.
349 as_utf8: Return unescaped Unicode for non-ASCII characters.
350 In Python 3 actual Unicode characters may appear as is in strings.
351 In Python 2 the return value will be valid UTF-8 rather than ASCII.
352 as_one_line: Don't introduce newlines between fields.
353 use_short_repeated_primitives: Use short repeated format for primitives.
354 pointy_brackets: If True, use angle brackets instead of curly braces for
356 use_index_order: If True, print fields of a proto message using the order
357 defined in source code instead of the field number. By default, use the
359 float_format: If set, use this to specify float field formatting
360 (per the "Format Specification Mini-Language"); otherwise, shortest
361 float that has same value in wire will be printed. Also affect double
362 field if double_format is not set but float_format is set.
363 double_format: If set, use this to specify double field formatting
364 (per the "Format Specification Mini-Language"); if it is not set but
365 float_format is set, use float_format. Otherwise, str() is used.
366 use_field_number: If True, print field numbers instead of names.
367 descriptor_pool: A DescriptorPool used to resolve Any types.
368 message_formatter: A function(message, indent, as_one_line): unicode|None
369 to custom format selected sub-messages (usually based on message type).
370 Use to pretty print parts of the protobuf for easier diffing.
371 print_unknown_fields: If True, unknown fields will be printed.
372 force_colon: If set, a colon will be added after the field name even if
373 the field is a proto message.
383 if double_format
is not None:
394 """Serializes if message is a google.protobuf.Any field."""
395 if '/' not in message.type_url:
400 packed_message.MergeFromString(message.value)
402 self.
out.
write(
'%s[%s]%s ' % (self.
indent *
' ', message.type_url, colon))
411 if formatted
is None:
415 out.write(
' ' * self.
indent)
421 """Convert protobuf message to text format.
424 message: The protocol buffers message.
428 if (message.DESCRIPTOR.full_name == _ANY_FULL_TYPE_NAME
and
431 fields = message.ListFields()
434 key=
lambda x: x[0].number
if x[0].is_extension
else x[0].index)
435 for field, value
in fields:
437 for key
in sorted(value):
443 entry_submsg = value.GetEntryClass()(key=key, value=value[key])
445 elif field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
447 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE
448 and field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_STRING):
451 for element
in value:
460 """Print unknown fields."""
462 for field
in unknown_fields:
463 out.write(
' ' * self.
indent)
464 out.write(
str(field.field_number))
465 if field.wire_type == WIRETYPE_START_GROUP:
478 out.write(
' ' * self.
indent +
'}\n')
479 elif field.wire_type == WIRETYPE_LENGTH_DELIMITED:
484 (embedded_unknown_message, pos) = decoder._DecodeUnknownFieldSet(
485 memoryview(field.data), 0,
len(field.data))
489 if pos ==
len(field.data):
502 out.write(
' ' * self.
indent +
'}\n')
506 out.write(text_encoding.CEscape(field.data,
False))
511 out.write(
str(field.data))
515 """Print field name."""
517 out.write(
' ' * self.
indent)
519 out.write(
str(field.number))
521 if field.is_extension:
523 if (field.containing_type.GetOptions().message_set_wire_format
and
524 field.type == descriptor.FieldDescriptor.TYPE_MESSAGE
and
525 field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
526 out.write(field.message_type.full_name)
528 out.write(field.full_name)
530 elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
532 out.write(field.message_type.name)
534 out.write(field.name)
537 field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE):
544 """Print a single field name/value pair."""
551 """"Prints short repeated primitives value."""
582 """Print a single field value (not including name).
584 For repeated fields, the value should be a single element.
587 field: The descriptor of the field to be printed.
588 value: The value of the field.
591 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
593 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
594 enum_value = field.enum_type.values_by_number.get(value,
None)
595 if enum_value
is not None:
596 out.write(enum_value.name)
598 out.write(
str(value))
599 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
601 if isinstance(value, str)
and not self.
as_utf8:
602 out_value = value.encode(
'utf-8')
605 if field.type == descriptor.FieldDescriptor.TYPE_BYTES:
610 out.write(text_encoding.CEscape(out_value, out_as_utf8))
612 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
617 elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_FLOAT:
621 if math.isnan(value):
622 out.write(
str(value))
624 out.write(
str(type_checkers.ToShortestFloat(value)))
625 elif (field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_DOUBLE
and
629 out.write(
str(value))
634 allow_unknown_extension=False,
635 allow_field_number=False,
636 descriptor_pool=None,
637 allow_unknown_field=False):
638 """Parses a text representation of a protocol message into a message.
640 NOTE: for historical reasons this function does not clear the input
641 message. This is different from what the binary msg.ParseFrom(...) does.
642 If text contains a field already set in message, the value is appended if the
643 field is repeated. Otherwise, an error is raised.
648 a.repeated_field.append('test')
651 # Repeated fields are combined
652 text_format.Parse(repr(a), b)
653 text_format.Parse(repr(a), b) # repeated_field contains ["test", "test"]
655 # Non-repeated fields cannot be overwritten
658 text_format.Parse(repr(a), b) # ParseError
661 b.ParseFromString(a.SerializeToString()) # repeated_field is now "test"
663 Caller is responsible for clearing the message as needed.
666 text (str): Message text representation.
667 message (Message): A protocol buffer message to merge into.
668 allow_unknown_extension: if True, skip over missing extensions and keep
670 allow_field_number: if True, both field number and field name are allowed.
671 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.
672 allow_unknown_field: if True, skip over unknown field and keep
673 parsing. Avoid to use this option if possible. It may hide some
674 errors (e.g. spelling error on field name)
677 Message: The same message passed as argument.
680 ParseError: On text parsing problems.
682 return ParseLines(text.split(b
'\n' if isinstance(text, bytes)
else u'\n'),
684 allow_unknown_extension,
686 descriptor_pool=descriptor_pool,
687 allow_unknown_field=allow_unknown_field)
692 allow_unknown_extension=False,
693 allow_field_number=False,
694 descriptor_pool=None,
695 allow_unknown_field=False):
696 """Parses a text representation of a protocol message into a message.
698 Like Parse(), but allows repeated values for a non-repeated field, and uses
699 the last one. This means any non-repeated, top-level fields specified in text
700 replace those in the message.
703 text (str): Message text representation.
704 message (Message): A protocol buffer message to merge into.
705 allow_unknown_extension: if True, skip over missing extensions and keep
707 allow_field_number: if True, both field number and field name are allowed.
708 descriptor_pool (DescriptorPool): Descriptor pool used to resolve Any types.
709 allow_unknown_field: if True, skip over unknown field and keep
710 parsing. Avoid to use this option if possible. It may hide some
711 errors (e.g. spelling error on field name)
714 Message: The same message passed as argument.
717 ParseError: On text parsing problems.
720 text.split(b
'\n' if isinstance(text, bytes)
else u'\n'),
722 allow_unknown_extension,
724 descriptor_pool=descriptor_pool,
725 allow_unknown_field=allow_unknown_field)
730 allow_unknown_extension=False,
731 allow_field_number=False,
732 descriptor_pool=None,
733 allow_unknown_field=False):
734 """Parses a text representation of a protocol message into a message.
736 See Parse() for caveats.
739 lines: An iterable of lines of a message's text representation.
740 message: A protocol buffer message to merge into.
741 allow_unknown_extension: if True, skip over missing extensions and keep
743 allow_field_number: if True, both field number and field name are allowed.
744 descriptor_pool: A DescriptorPool used to resolve Any types.
745 allow_unknown_field: if True, skip over unknown field and keep
746 parsing. Avoid to use this option if possible. It may hide some
747 errors (e.g. spelling error on field name)
750 The same message passed as argument.
753 ParseError: On text parsing problems.
755 parser =
_Parser(allow_unknown_extension,
757 descriptor_pool=descriptor_pool,
758 allow_unknown_field=allow_unknown_field)
759 return parser.ParseLines(lines, message)
764 allow_unknown_extension=False,
765 allow_field_number=False,
766 descriptor_pool=None,
767 allow_unknown_field=False):
768 """Parses a text representation of a protocol message into a message.
770 See Merge() for more details.
773 lines: An iterable of lines of a message's text representation.
774 message: A protocol buffer message to merge into.
775 allow_unknown_extension: if True, skip over missing extensions and keep
777 allow_field_number: if True, both field number and field name are allowed.
778 descriptor_pool: A DescriptorPool used to resolve Any types.
779 allow_unknown_field: if True, skip over unknown field and keep
780 parsing. Avoid to use this option if possible. It may hide some
781 errors (e.g. spelling error on field name)
784 The same message passed as argument.
787 ParseError: On text parsing problems.
789 parser =
_Parser(allow_unknown_extension,
791 descriptor_pool=descriptor_pool,
792 allow_unknown_field=allow_unknown_field)
793 return parser.MergeLines(lines, message)
797 """Text format parser for protocol message."""
800 allow_unknown_extension=False,
801 allow_field_number=False,
802 descriptor_pool=None,
803 allow_unknown_field=False):
810 """Parses a text representation of a protocol message into a message."""
816 """Merges a text representation of a protocol message into a message."""
822 """Converts a text representation of a protocol message into a message.
825 lines: Lines of a message's text representation.
826 message: A protocol buffer message to merge into.
829 ParseError: On text parsing problems.
833 line
if isinstance(line, str)
else line.decode(
'utf-8')
836 while not tokenizer.AtEnd():
840 """Merges a single protocol message field into a message.
843 tokenizer: A tokenizer to parse the field name and values.
844 message: A protocol message to record the data.
847 ParseError: In case of text parsing problems.
849 message_descriptor = message.DESCRIPTOR
850 if (message_descriptor.full_name == _ANY_FULL_TYPE_NAME
and
851 tokenizer.TryConsume(
'[')):
853 tokenizer.Consume(
']')
854 tokenizer.TryConsume(
':')
855 if tokenizer.TryConsume(
'<'):
856 expanded_any_end_token =
'>'
858 tokenizer.Consume(
'{')
859 expanded_any_end_token =
'}'
862 if not expanded_any_sub_message:
863 raise ParseError(
'Type %s not found in descriptor pool' %
865 while not tokenizer.TryConsume(expanded_any_end_token):
866 if tokenizer.AtEnd():
867 raise tokenizer.ParseErrorPreviousToken(
'Expected "%s".' %
868 (expanded_any_end_token,))
869 self.
_MergeField(tokenizer, expanded_any_sub_message)
870 deterministic =
False
872 message.Pack(expanded_any_sub_message,
873 type_url_prefix=type_url_prefix,
874 deterministic=deterministic)
877 if tokenizer.TryConsume(
'['):
878 name = [tokenizer.ConsumeIdentifier()]
879 while tokenizer.TryConsume(
'.'):
880 name.append(tokenizer.ConsumeIdentifier())
881 name =
'.'.join(name)
883 if not message_descriptor.is_extendable:
884 raise tokenizer.ParseErrorPreviousToken(
885 'Message type "%s" does not have extensions.' %
886 message_descriptor.full_name)
888 field = message.Extensions._FindExtensionByName(name)
896 raise tokenizer.ParseErrorPreviousToken(
897 'Extension "%s" not registered. '
898 'Did you import the _pb2 module which defines it? '
899 'If you are trying to place the extension in the MessageSet '
900 'field of another message that is in an Any or MessageSet field, '
901 'that message\'s _pb2 module must be imported as well' % name)
902 elif message_descriptor != field.containing_type:
903 raise tokenizer.ParseErrorPreviousToken(
904 'Extension "%s" does not extend message type "%s".' %
905 (name, message_descriptor.full_name))
907 tokenizer.Consume(
']')
910 name = tokenizer.ConsumeIdentifierOrNumber()
913 field = message_descriptor.fields_by_number.get(number,
None)
914 if not field
and message_descriptor.is_extendable:
915 field = message.Extensions._FindExtensionByNumber(number)
917 field = message_descriptor.fields_by_name.get(name,
None)
923 field = message_descriptor.fields_by_name.get(name.lower(),
None)
924 if field
and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
927 if (field
and field.type == descriptor.FieldDescriptor.TYPE_GROUP
and
928 field.message_type.name != name):
932 raise tokenizer.ParseErrorPreviousToken(
933 'Message type "%s" has no field named "%s".' %
934 (message_descriptor.full_name, name))
941 which_oneof = message.WhichOneof(field.containing_oneof.name)
942 if which_oneof
is not None and which_oneof != field.name:
943 raise tokenizer.ParseErrorPreviousToken(
944 'Field "%s" is specified along with field "%s", another member '
945 'of oneof "%s" for message type "%s".' %
946 (field.name, which_oneof, field.containing_oneof.name,
947 message_descriptor.full_name))
949 if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
950 tokenizer.TryConsume(
':')
953 tokenizer.Consume(
':')
956 if (field.label == descriptor.FieldDescriptor.LABEL_REPEATED
and
957 tokenizer.TryConsume(
'[')):
959 if not tokenizer.TryConsume(
']'):
961 merger(tokenizer, message, field)
962 if tokenizer.TryConsume(
']'):
964 tokenizer.Consume(
',')
967 merger(tokenizer, message, field)
975 if not tokenizer.TryConsume(
','):
976 tokenizer.TryConsume(
';')
980 """Consumes a google.protobuf.Any type URL and returns the type name."""
982 prefix = [tokenizer.ConsumeIdentifier()]
983 tokenizer.Consume(
'.')
984 prefix.append(tokenizer.ConsumeIdentifier())
985 tokenizer.Consume(
'.')
986 prefix.append(tokenizer.ConsumeIdentifier())
987 tokenizer.Consume(
'/')
989 name = [tokenizer.ConsumeIdentifier()]
990 while tokenizer.TryConsume(
'.'):
991 name.append(tokenizer.ConsumeIdentifier())
992 return '.'.join(prefix),
'.'.join(name)
995 """Merges a single scalar field into a message.
998 tokenizer: A tokenizer to parse the field value.
999 message: The message of which field is a member.
1000 field: The descriptor of the field to be merged.
1003 ParseError: In case of text parsing problems.
1007 if tokenizer.TryConsume(
'<'):
1010 tokenizer.Consume(
'{')
1013 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
1014 if field.is_extension:
1015 sub_message = message.Extensions[field].
add()
1017 sub_message = getattr(message, field.name).
GetEntryClass()()
1019 sub_message = getattr(message, field.name).
add()
1021 if field.is_extension:
1023 message.HasExtension(field)):
1024 raise tokenizer.ParseErrorPreviousToken(
1025 'Message type "%s" should not have multiple "%s" extensions.' %
1026 (message.DESCRIPTOR.full_name, field.full_name))
1027 sub_message = message.Extensions[field]
1032 message.HasField(field.name)):
1033 raise tokenizer.ParseErrorPreviousToken(
1034 'Message type "%s" should not have multiple "%s" fields.' %
1035 (message.DESCRIPTOR.full_name, field.name))
1036 sub_message = getattr(message, field.name)
1037 sub_message.SetInParent()
1039 while not tokenizer.TryConsume(end_token):
1040 if tokenizer.AtEnd():
1041 raise tokenizer.ParseErrorPreviousToken(
'Expected "%s".' % (end_token,))
1045 value_cpptype = field.message_type.fields_by_name[
'value'].cpp_type
1046 if value_cpptype == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
1047 value = getattr(message, field.name)[sub_message.key]
1048 value.CopyFrom(sub_message.value)
1050 getattr(message, field.name)[sub_message.key] = sub_message.value
1054 message_descriptor = message.DESCRIPTOR
1055 return (hasattr(message_descriptor,
'syntax')
and
1056 message_descriptor.syntax ==
'proto3')
1059 """Merges a single scalar field into a message.
1062 tokenizer: A tokenizer to parse the field value.
1063 message: A protocol message to record the data.
1064 field: The descriptor of the field to be merged.
1067 ParseError: In case of text parsing problems.
1068 RuntimeError: On runtime errors.
1073 if field.type
in (descriptor.FieldDescriptor.TYPE_INT32,
1074 descriptor.FieldDescriptor.TYPE_SINT32,
1075 descriptor.FieldDescriptor.TYPE_SFIXED32):
1077 elif field.type
in (descriptor.FieldDescriptor.TYPE_INT64,
1078 descriptor.FieldDescriptor.TYPE_SINT64,
1079 descriptor.FieldDescriptor.TYPE_SFIXED64):
1081 elif field.type
in (descriptor.FieldDescriptor.TYPE_UINT32,
1082 descriptor.FieldDescriptor.TYPE_FIXED32):
1084 elif field.type
in (descriptor.FieldDescriptor.TYPE_UINT64,
1085 descriptor.FieldDescriptor.TYPE_FIXED64):
1087 elif field.type
in (descriptor.FieldDescriptor.TYPE_FLOAT,
1088 descriptor.FieldDescriptor.TYPE_DOUBLE):
1089 value = tokenizer.ConsumeFloat()
1090 elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
1091 value = tokenizer.ConsumeBool()
1092 elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
1093 value = tokenizer.ConsumeString()
1094 elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
1095 value = tokenizer.ConsumeByteString()
1096 elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
1097 value = tokenizer.ConsumeEnum(field)
1099 raise RuntimeError(
'Unknown field type %d' % field.type)
1101 if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
1102 if field.is_extension:
1103 message.Extensions[field].append(value)
1105 getattr(message, field.name).append(value)
1107 if field.is_extension:
1110 message.HasExtension(field)):
1111 raise tokenizer.ParseErrorPreviousToken(
1112 'Message type "%s" should not have multiple "%s" extensions.' %
1113 (message.DESCRIPTOR.full_name, field.full_name))
1115 message.Extensions[field] = value
1117 duplicate_error =
False
1122 duplicate_error = bool(getattr(message, field.name))
1124 duplicate_error = message.HasField(field.name)
1127 raise tokenizer.ParseErrorPreviousToken(
1128 'Message type "%s" should not have multiple "%s" fields.' %
1129 (message.DESCRIPTOR.full_name, field.name))
1131 setattr(message, field.name, value)
1135 """Skips over contents (value or message) of a field.
1138 tokenizer: A tokenizer to parse the field name and values.
1146 if tokenizer.TryConsume(
':')
and not tokenizer.LookingAt(
1147 '{')
and not tokenizer.LookingAt(
'<'):
1154 """Skips over a complete field (name and value/message).
1157 tokenizer: A tokenizer to parse the field name and values.
1159 if tokenizer.TryConsume(
'['):
1161 tokenizer.ConsumeIdentifier()
1162 while tokenizer.TryConsume(
'.'):
1163 tokenizer.ConsumeIdentifier()
1164 tokenizer.Consume(
']')
1166 tokenizer.ConsumeIdentifierOrNumber()
1172 if not tokenizer.TryConsume(
','):
1173 tokenizer.TryConsume(
';')
1177 """Skips over a field message.
1180 tokenizer: A tokenizer to parse the field name and values.
1183 if tokenizer.TryConsume(
'<'):
1186 tokenizer.Consume(
'{')
1189 while not tokenizer.LookingAt(
'>')
and not tokenizer.LookingAt(
'}'):
1192 tokenizer.Consume(delimiter)
1196 """Skips over a field value.
1199 tokenizer: A tokenizer to parse the field name and values.
1202 ParseError: In case an invalid field value is found.
1206 if tokenizer.TryConsumeByteString():
1207 while tokenizer.TryConsumeByteString():
1211 if (
not tokenizer.TryConsumeIdentifier()
and
1213 not tokenizer.TryConsumeFloat()):
1214 raise ParseError(
'Invalid field value: ' + tokenizer.token)
1218 """Protocol buffer text representation tokenizer.
1220 This class handles the lower level string parsing by splitting it into
1223 It was directly ported from the Java protocol buffer API.
1226 _WHITESPACE = re.compile(
r'\s+')
1227 _COMMENT = re.compile(
r'(\s*#.*$)', re.MULTILINE)
1228 _WHITESPACE_OR_COMMENT = re.compile(
r'(\s|(#.*$))+', re.MULTILINE)
1229 _TOKEN = re.compile(
'|'.join([
1230 r'[a-zA-Z_][0-9a-zA-Z_+-]*',
1231 r'([0-9+-]|(\.[0-9]))[0-9a-zA-Z_.+-]*',
1234 r'{qt}[^{qt}\n\\]*((\\.)+[^{qt}\n\\]*)*({qt}|\\?$)'.
format(qt=mark)
1238 _IDENTIFIER = re.compile(
r'[^\d\W]\w*')
1239 _IDENTIFIER_OR_NUMBER = re.compile(
r'\w+')
1259 return self.
token == token
1262 """Checks the end of the text was reached.
1265 True iff the end was reached.
1267 return not self.
token
1273 except StopIteration:
1287 length =
len(match.group(0))
1291 """Tries to consume a given piece of text.
1294 token: Text to consume.
1297 True iff the text was consumed.
1299 if self.
token == token:
1305 """Consumes a piece of text.
1308 token: Text to consume.
1311 ParseError: If the text couldn't be consumed.
1314 raise self.
ParseError(
'Expected "%s".' % token)
1324 """Consumes a comment, returns a 2-tuple (trailing bool, comment str)."""
1335 and not just_started)
1337 return trailing, comment
1347 """Consumes protocol message field identifier.
1353 ParseError: If an identifier couldn't be consumed.
1357 raise self.
ParseError(
'Expected identifier.')
1369 """Consumes protocol message field identifier.
1375 ParseError: If an identifier couldn't be consumed.
1379 raise self.
ParseError(
'Expected identifier or number, got %s.' % result)
1391 """Consumes an integer number.
1397 ParseError: If an integer couldn't be consumed.
1401 except ValueError
as e:
1414 """Consumes an floating point number.
1420 ParseError: If a floating point number couldn't be consumed.
1424 except ValueError
as e:
1430 """Consumes a boolean value.
1436 ParseError: If a boolean value couldn't be consumed.
1440 except ValueError
as e:
1453 """Consumes a string value.
1459 ParseError: If a string value couldn't be consumed.
1463 return str(the_bytes,
'utf-8')
1464 except UnicodeDecodeError
as e:
1468 """Consumes a byte array value.
1471 The array parsed (as a string).
1474 ParseError: If a byte array value couldn't be consumed.
1477 while self.
token and self.
token[0]
in _QUOTES:
1479 return b
''.join(the_list)
1482 """Consume one token of a string literal.
1484 String literals (whether bytes or text) can come in multiple adjacent
1485 tokens which are automatically concatenated, like in C or Python. This
1486 method only consumes one token.
1491 ParseError: When the wrong format data is found.
1494 if len(text) < 1
or text[0]
not in _QUOTES:
1495 raise self.
ParseError(
'Expected string but found: %r' % (text,))
1497 if len(text) < 2
or text[-1] != text[0]:
1498 raise self.
ParseError(
'String missing ending quote: %r' % (text,))
1501 result = text_encoding.CUnescape(text[1:-1])
1502 except ValueError
as e:
1510 except ValueError
as e:
1516 """Creates and *returns* a ParseError for the previously read token.
1519 message: A message to set for the exception.
1522 A ParseError instance.
1528 """Creates and *returns* a ParseError for the current token."""
1536 """Reads the next meaningful token."""
1551 token = match.group(0)
1558 _Tokenizer = Tokenizer
1562 """Consumes a signed 32bit integer number from tokenizer.
1565 tokenizer: A tokenizer used to parse the number.
1571 ParseError: If a signed 32bit integer couldn't be consumed.
1577 """Consumes an unsigned 32bit integer number from tokenizer.
1580 tokenizer: A tokenizer used to parse the number.
1586 ParseError: If an unsigned 32bit integer couldn't be consumed.
1600 """Consumes a signed 32bit integer number from tokenizer.
1603 tokenizer: A tokenizer used to parse the number.
1609 ParseError: If a signed 32bit integer couldn't be consumed.
1623 """Consumes an unsigned 64bit integer number from tokenizer.
1626 tokenizer: A tokenizer used to parse the number.
1632 ParseError: If an unsigned 64bit integer couldn't be consumed.
1638 """Consumes an integer number from tokenizer.
1641 tokenizer: A tokenizer used to parse the number.
1642 is_signed: True if a signed integer must be parsed.
1643 is_long: True if a long integer must be parsed.
1649 ParseError: If an integer with given characteristics couldn't be consumed.
1652 result =
ParseInteger(tokenizer.token, is_signed=is_signed, is_long=is_long)
1653 except ValueError
as e:
1654 raise tokenizer.ParseError(
str(e))
1655 tokenizer.NextToken()
1659 def ParseInteger(text, is_signed=False, is_long=False):
1660 """Parses an integer.
1663 text: The text to parse.
1664 is_signed: True if a signed integer must be parsed.
1665 is_long: True if a long integer must be parsed.
1671 ValueError: Thrown Iff the text is not a valid integer.
1677 checker = _INTEGER_CHECKERS[2 *
int(is_long) +
int(is_signed)]
1678 checker.CheckValue(result)
1683 """Parses an integer without checking size/signedness.
1686 text: The text to parse.
1692 ValueError: Thrown Iff the text is not a valid integer.
1696 c_octal_match = re.match(
r'(-?)0(\d+)$', text)
1700 text = c_octal_match.group(1) +
'0o' + c_octal_match.group(2)
1704 raise ValueError(
'Couldn\'t parse integer: %s' % orig_text)
1708 """Parse a floating point number.
1711 text: Text to parse.
1717 ValueError: If a floating point number couldn't be parsed.
1724 if _FLOAT_INFINITY.match(text):
1726 return float(
'-inf')
1729 elif _FLOAT_NAN.match(text):
1734 return float(text.rstrip(
'f'))
1736 raise ValueError(
'Couldn\'t parse float: %s' % text)
1740 """Parse a boolean value.
1743 text: Text to parse.
1746 Boolean values parsed
1749 ValueError: If text is not a valid boolean.
1751 if text
in (
'true',
't',
'1',
'True'):
1753 elif text
in (
'false',
'f',
'0',
'False'):
1756 raise ValueError(
'Expected "true" or "false".')
1760 """Parse an enum value.
1762 The value can be specified by a number (the enum value), or by
1763 a string literal (the enum name).
1766 field: Enum field descriptor.
1767 value: String value.
1773 ValueError: If the enum value could not be parsed.
1775 enum_descriptor = field.enum_type
1777 number =
int(value, 0)
1780 enum_value = enum_descriptor.values_by_name.get(value,
None)
1781 if enum_value
is None:
1782 raise ValueError(
'Enum type "%s" has no value named %s.' %
1783 (enum_descriptor.full_name, value))
1786 if hasattr(field.file,
'syntax'):
1788 if field.file.syntax ==
'proto3':
1791 enum_value = enum_descriptor.values_by_number.get(number,
None)
1792 if enum_value
is None:
1793 raise ValueError(
'Enum type "%s" has no value with number %d.' %
1794 (enum_descriptor.full_name, number))
1795 return enum_value.number