31 """Code for encoding protocol message primitives.
33 Contains the logic for encoding every logical protocol field type
34 into one of the 5 physical wire types.
36 This code is designed to push the Python interpreter's performance to the
39 The basic idea is that at startup time, for every field (i.e. every
40 FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The
41 sizer takes a value of this field's type and computes its byte size. The
42 encoder takes a writer function and a value. It encodes the value into byte
43 strings and invokes the writer function to write those strings. Typically the
44 writer function is the write() method of a BytesIO.
46 We try to do as much work as possible when constructing the writer and the
47 sizer rather than when calling them. In particular:
48 * We copy any needed global functions to local variables, so that we do not need
49 to do costly global table lookups at runtime.
50 * Similarly, we try to do any attribute lookups at startup time if possible.
51 * Every field's tag is encoded to bytes at startup, since it can't change at
53 * Whatever component of the field size we can compute at startup, we do.
54 * We *avoid* sharing code if doing so would make the code slower and not sharing
55 does not burden us too much. For example, encoders for repeated fields do
56 not just call the encoders for singular fields in a loop because this would
57 add an extra function call overhead for every loop iteration; instead, we
58 manually inline the single-value encoder into the loop.
59 * If a Python function lacks a return statement, Python actually generates
60 instructions to pop the result of the last statement off the stack, push
61 None onto the stack, and then return that. If we really don't care what
62 value is returned, then we can save two instructions by returning the
63 result of the last statement. It looks funny but it helps.
64 * We assume that type and bounds checking has happened at a higher level.
67 __author__ =
'kenton@google.com (Kenton Varda)'
83 """Compute the size of a varint value."""
84 if value <= 0x7f:
return 1
85 if value <= 0x3fff:
return 2
86 if value <= 0x1fffff:
return 3
87 if value <= 0xfffffff:
return 4
88 if value <= 0x7ffffffff:
return 5
89 if value <= 0x3ffffffffff:
return 6
90 if value <= 0x1ffffffffffff:
return 7
91 if value <= 0xffffffffffffff:
return 8
92 if value <= 0x7fffffffffffffff:
return 9
97 """Compute the size of a signed varint value."""
98 if value < 0:
return 10
99 if value <= 0x7f:
return 1
100 if value <= 0x3fff:
return 2
101 if value <= 0x1fffff:
return 3
102 if value <= 0xfffffff:
return 4
103 if value <= 0x7ffffffff:
return 5
104 if value <= 0x3ffffffffff:
return 6
105 if value <= 0x1ffffffffffff:
return 7
106 if value <= 0xffffffffffffff:
return 8
107 if value <= 0x7fffffffffffffff:
return 9
112 """Returns the number of bytes required to serialize a tag with this field
115 return _VarintSize(wire_format.PackTag(field_number, 0))
127 """A sizer which uses the function compute_value_size to compute the size of
128 each value. Typically compute_value_size is _VarintSize."""
130 def SpecificSizer(field_number, is_repeated, is_packed):
133 local_VarintSize = _VarintSize
134 def PackedFieldSize(value):
136 for element
in value:
137 result += compute_value_size(element)
138 return result + local_VarintSize(result) + tag_size
139 return PackedFieldSize
141 def RepeatedFieldSize(value):
142 result = tag_size *
len(value)
143 for element
in value:
144 result += compute_value_size(element)
146 return RepeatedFieldSize
148 def FieldSize(value):
149 return tag_size + compute_value_size(value)
156 """Like SimpleSizer, but modify_value is invoked on each value before it is
157 passed to compute_value_size. modify_value is typically ZigZagEncode."""
159 def SpecificSizer(field_number, is_repeated, is_packed):
162 local_VarintSize = _VarintSize
163 def PackedFieldSize(value):
165 for element
in value:
166 result += compute_value_size(modify_value(element))
167 return result + local_VarintSize(result) + tag_size
168 return PackedFieldSize
170 def RepeatedFieldSize(value):
171 result = tag_size *
len(value)
172 for element
in value:
173 result += compute_value_size(modify_value(element))
175 return RepeatedFieldSize
177 def FieldSize(value):
178 return tag_size + compute_value_size(modify_value(value))
185 """Like _SimpleSizer except for a fixed-size field. The input is the size
188 def SpecificSizer(field_number, is_repeated, is_packed):
191 local_VarintSize = _VarintSize
192 def PackedFieldSize(value):
193 result =
len(value) * value_size
194 return result + local_VarintSize(result) + tag_size
195 return PackedFieldSize
197 element_size = value_size + tag_size
198 def RepeatedFieldSize(value):
199 return len(value) * element_size
200 return RepeatedFieldSize
202 field_size = value_size + tag_size
203 def FieldSize(value):
222 _SignedVarintSize, wire_format.ZigZagEncode)
231 """Returns a sizer for a string field."""
234 local_VarintSize = _VarintSize
238 def RepeatedFieldSize(value):
239 result = tag_size *
len(value)
240 for element
in value:
241 l = local_len(element.encode(
'utf-8'))
242 result += local_VarintSize(l) + l
244 return RepeatedFieldSize
246 def FieldSize(value):
247 l = local_len(value.encode(
'utf-8'))
248 return tag_size + local_VarintSize(l) + l
253 """Returns a sizer for a bytes field."""
256 local_VarintSize = _VarintSize
260 def RepeatedFieldSize(value):
261 result = tag_size *
len(value)
262 for element
in value:
263 l = local_len(element)
264 result += local_VarintSize(l) + l
266 return RepeatedFieldSize
268 def FieldSize(value):
270 return tag_size + local_VarintSize(l) + l
275 """Returns a sizer for a group field."""
277 tag_size =
_TagSize(field_number) * 2
280 def RepeatedFieldSize(value):
281 result = tag_size *
len(value)
282 for element
in value:
283 result += element.ByteSize()
285 return RepeatedFieldSize
287 def FieldSize(value):
288 return tag_size + value.ByteSize()
293 """Returns a sizer for a message field."""
296 local_VarintSize = _VarintSize
299 def RepeatedFieldSize(value):
300 result = tag_size *
len(value)
301 for element
in value:
302 l = element.ByteSize()
303 result += local_VarintSize(l) + l
305 return RepeatedFieldSize
307 def FieldSize(value):
309 return tag_size + local_VarintSize(l) + l
318 """Returns a sizer for extensions of MessageSet.
320 The message set message looks like this:
322 repeated group Item = 1 {
323 required int32 type_id = 2;
324 required string message = 3;
330 local_VarintSize = _VarintSize
332 def FieldSize(value):
334 return static_size + local_VarintSize(l) + l
344 """Returns a sizer for a map field."""
348 message_type = field_descriptor.message_type
349 message_sizer =
MessageSizer(field_descriptor.number,
False,
False)
351 def FieldSize(map_value):
353 for key
in map_value:
354 value = map_value[key]
360 entry_msg = message_type._concrete_class(key=key, value=value)
361 total += message_sizer(entry_msg)
373 """Return an encoder for a basic varint value (does not include tag)."""
375 def EncodeVarint(write, value, unused_deterministic=None):
379 write(six.int2byte(0x80|bits))
382 return write(six.int2byte(bits))
388 """Return an encoder for a basic signed varint value (does not include
391 def EncodeSignedVarint(write, value, unused_deterministic=None):
397 write(six.int2byte(0x80|bits))
400 return write(six.int2byte(bits))
402 return EncodeSignedVarint
410 """Encode the given integer as a varint and return the bytes. This is only
411 called at startup time so it doesn't need to be fast."""
415 return b
"".join(pieces)
419 """Encode the given tag and return the bytes. Only called at startup."""
421 return six.binary_type(
422 _VarintBytes(wire_format.PackTag(field_number, wire_type)))
430 """Return a constructor for an encoder for fields of a particular type.
433 wire_type: The field's wire type, for encoding tags.
434 encode_value: A function which encodes an individual value, e.g.
436 compute_value_size: A function which computes the size of an individual
437 value, e.g. _VarintSize().
440 def SpecificEncoder(field_number, is_repeated, is_packed):
442 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
443 local_EncodeVarint = _EncodeVarint
444 def EncodePackedField(write, value, deterministic):
447 for element
in value:
448 size += compute_value_size(element)
449 local_EncodeVarint(write, size, deterministic)
450 for element
in value:
451 encode_value(write, element, deterministic)
452 return EncodePackedField
454 tag_bytes =
TagBytes(field_number, wire_type)
455 def EncodeRepeatedField(write, value, deterministic):
456 for element
in value:
458 encode_value(write, element, deterministic)
459 return EncodeRepeatedField
461 tag_bytes =
TagBytes(field_number, wire_type)
462 def EncodeField(write, value, deterministic):
464 return encode_value(write, value, deterministic)
467 return SpecificEncoder
471 """Like SimpleEncoder but additionally invokes modify_value on every value
472 before passing it to encode_value. Usually modify_value is ZigZagEncode."""
474 def SpecificEncoder(field_number, is_repeated, is_packed):
476 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
477 local_EncodeVarint = _EncodeVarint
478 def EncodePackedField(write, value, deterministic):
481 for element
in value:
482 size += compute_value_size(modify_value(element))
483 local_EncodeVarint(write, size, deterministic)
484 for element
in value:
485 encode_value(write, modify_value(element), deterministic)
486 return EncodePackedField
488 tag_bytes =
TagBytes(field_number, wire_type)
489 def EncodeRepeatedField(write, value, deterministic):
490 for element
in value:
492 encode_value(write, modify_value(element), deterministic)
493 return EncodeRepeatedField
495 tag_bytes =
TagBytes(field_number, wire_type)
496 def EncodeField(write, value, deterministic):
498 return encode_value(write, modify_value(value), deterministic)
501 return SpecificEncoder
505 """Return a constructor for an encoder for a fixed-width field.
508 wire_type: The field's wire type, for encoding tags.
509 format: The format string to pass to struct.pack().
512 value_size = struct.calcsize(format)
514 def SpecificEncoder(field_number, is_repeated, is_packed):
515 local_struct_pack = struct.pack
517 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
518 local_EncodeVarint = _EncodeVarint
519 def EncodePackedField(write, value, deterministic):
521 local_EncodeVarint(write,
len(value) * value_size, deterministic)
522 for element
in value:
523 write(local_struct_pack(format, element))
524 return EncodePackedField
526 tag_bytes =
TagBytes(field_number, wire_type)
527 def EncodeRepeatedField(write, value, unused_deterministic=None):
528 for element
in value:
530 write(local_struct_pack(format, element))
531 return EncodeRepeatedField
533 tag_bytes =
TagBytes(field_number, wire_type)
534 def EncodeField(write, value, unused_deterministic=None):
536 return write(local_struct_pack(format, value))
539 return SpecificEncoder
543 """Return a constructor for an encoder for float fields.
545 This is like StructPackEncoder, but catches errors that may be due to
546 passing non-finite floating-point values to struct.pack, and makes a
547 second attempt to encode those values.
550 wire_type: The field's wire type, for encoding tags.
551 format: The format string to pass to struct.pack().
554 value_size = struct.calcsize(format)
556 def EncodeNonFiniteOrRaise(write, value):
558 if value == _POS_INF:
559 write(b
'\x00\x00\x80\x7F')
560 elif value == _NEG_INF:
561 write(b
'\x00\x00\x80\xFF')
563 write(b
'\x00\x00\xC0\x7F')
566 elif value_size == 8:
567 def EncodeNonFiniteOrRaise(write, value):
568 if value == _POS_INF:
569 write(b
'\x00\x00\x00\x00\x00\x00\xF0\x7F')
570 elif value == _NEG_INF:
571 write(b
'\x00\x00\x00\x00\x00\x00\xF0\xFF')
573 write(b
'\x00\x00\x00\x00\x00\x00\xF8\x7F')
577 raise ValueError(
'Can\'t encode floating-point values that are '
578 '%d bytes long (only 4 or 8)' % value_size)
580 def SpecificEncoder(field_number, is_repeated, is_packed):
581 local_struct_pack = struct.pack
583 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
584 local_EncodeVarint = _EncodeVarint
585 def EncodePackedField(write, value, deterministic):
587 local_EncodeVarint(write,
len(value) * value_size, deterministic)
588 for element
in value:
592 write(local_struct_pack(format, element))
594 EncodeNonFiniteOrRaise(write, element)
595 return EncodePackedField
597 tag_bytes =
TagBytes(field_number, wire_type)
598 def EncodeRepeatedField(write, value, unused_deterministic=None):
599 for element
in value:
602 write(local_struct_pack(format, element))
604 EncodeNonFiniteOrRaise(write, element)
605 return EncodeRepeatedField
607 tag_bytes =
TagBytes(field_number, wire_type)
608 def EncodeField(write, value, unused_deterministic=None):
611 write(local_struct_pack(format, value))
613 EncodeNonFiniteOrRaise(write, value)
616 return SpecificEncoder
625 wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize)
628 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize)
631 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize,
632 wire_format.ZigZagEncode)
647 """Returns an encoder for a boolean field."""
652 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
653 local_EncodeVarint = _EncodeVarint
654 def EncodePackedField(write, value, deterministic):
656 local_EncodeVarint(write,
len(value), deterministic)
657 for element
in value:
662 return EncodePackedField
664 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_VARINT)
665 def EncodeRepeatedField(write, value, unused_deterministic=None):
666 for element
in value:
672 return EncodeRepeatedField
674 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_VARINT)
675 def EncodeField(write, value, unused_deterministic=None):
678 return write(true_byte)
679 return write(false_byte)
684 """Returns an encoder for a string field."""
686 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
687 local_EncodeVarint = _EncodeVarint
691 def EncodeRepeatedField(write, value, deterministic):
692 for element
in value:
693 encoded = element.encode(
'utf-8')
695 local_EncodeVarint(write, local_len(encoded), deterministic)
697 return EncodeRepeatedField
699 def EncodeField(write, value, deterministic):
700 encoded = value.encode(
'utf-8')
702 local_EncodeVarint(write, local_len(encoded), deterministic)
703 return write(encoded)
708 """Returns an encoder for a bytes field."""
710 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
711 local_EncodeVarint = _EncodeVarint
715 def EncodeRepeatedField(write, value, deterministic):
716 for element
in value:
718 local_EncodeVarint(write, local_len(element), deterministic)
720 return EncodeRepeatedField
722 def EncodeField(write, value, deterministic):
724 local_EncodeVarint(write, local_len(value), deterministic)
730 """Returns an encoder for a group field."""
732 start_tag =
TagBytes(field_number, wire_format.WIRETYPE_START_GROUP)
733 end_tag =
TagBytes(field_number, wire_format.WIRETYPE_END_GROUP)
736 def EncodeRepeatedField(write, value, deterministic):
737 for element
in value:
739 element._InternalSerialize(write, deterministic)
741 return EncodeRepeatedField
743 def EncodeField(write, value, deterministic):
745 value._InternalSerialize(write, deterministic)
746 return write(end_tag)
751 """Returns an encoder for a message field."""
753 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
754 local_EncodeVarint = _EncodeVarint
757 def EncodeRepeatedField(write, value, deterministic):
758 for element
in value:
760 local_EncodeVarint(write, element.ByteSize(), deterministic)
761 element._InternalSerialize(write, deterministic)
762 return EncodeRepeatedField
764 def EncodeField(write, value, deterministic):
766 local_EncodeVarint(write, value.ByteSize(), deterministic)
767 return value._InternalSerialize(write, deterministic)
776 """Encoder for extensions of MessageSet.
778 The message set message looks like this:
780 repeated group Item = 1 {
781 required int32 type_id = 2;
782 required string message = 3;
786 start_bytes = b
"".join([
787 TagBytes(1, wire_format.WIRETYPE_START_GROUP),
788 TagBytes(2, wire_format.WIRETYPE_VARINT),
790 TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)])
791 end_bytes =
TagBytes(1, wire_format.WIRETYPE_END_GROUP)
792 local_EncodeVarint = _EncodeVarint
794 def EncodeField(write, value, deterministic):
796 local_EncodeVarint(write, value.ByteSize(), deterministic)
797 value._InternalSerialize(write, deterministic)
798 return write(end_bytes)
808 """Encoder for extensions of MessageSet.
810 Maps always have a wire format like this:
813 value_type value = 2;
815 repeated MapEntry map = N;
819 message_type = field_descriptor.message_type
820 encode_message =
MessageEncoder(field_descriptor.number,
False,
False)
822 def EncodeField(write, value, deterministic):
823 value_keys = sorted(value.keys())
if deterministic
else value
824 for key
in value_keys:
825 entry_msg = message_type._concrete_class(key=key, value=value[key])
826 encode_message(write, entry_msg, deterministic)