31 """Code for encoding protocol message primitives.
33 Contains the logic for encoding every logical protocol field type
34 into one of the 5 physical wire types.
36 This code is designed to push the Python interpreter's performance to the
39 The basic idea is that at startup time, for every field (i.e. every
40 FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The
41 sizer takes a value of this field's type and computes its byte size. The
42 encoder takes a writer function and a value. It encodes the value into byte
43 strings and invokes the writer function to write those strings. Typically the
44 writer function is the write() method of a BytesIO.
46 We try to do as much work as possible when constructing the writer and the
47 sizer rather than when calling them. In particular:
48 * We copy any needed global functions to local variables, so that we do not need
49 to do costly global table lookups at runtime.
50 * Similarly, we try to do any attribute lookups at startup time if possible.
51 * Every field's tag is encoded to bytes at startup, since it can't change at
53 * Whatever component of the field size we can compute at startup, we do.
54 * We *avoid* sharing code if doing so would make the code slower and not sharing
55 does not burden us too much. For example, encoders for repeated fields do
56 not just call the encoders for singular fields in a loop because this would
57 add an extra function call overhead for every loop iteration; instead, we
58 manually inline the single-value encoder into the loop.
59 * If a Python function lacks a return statement, Python actually generates
60 instructions to pop the result of the last statement off the stack, push
61 None onto the stack, and then return that. If we really don't care what
62 value is returned, then we can save two instructions by returning the
63 result of the last statement. It looks funny but it helps.
64 * We assume that type and bounds checking has happened at a higher level.
67 __author__ =
'kenton@google.com (Kenton Varda)'
81 """Compute the size of a varint value."""
82 if value <= 0x7f:
return 1
83 if value <= 0x3fff:
return 2
84 if value <= 0x1fffff:
return 3
85 if value <= 0xfffffff:
return 4
86 if value <= 0x7ffffffff:
return 5
87 if value <= 0x3ffffffffff:
return 6
88 if value <= 0x1ffffffffffff:
return 7
89 if value <= 0xffffffffffffff:
return 8
90 if value <= 0x7fffffffffffffff:
return 9
95 """Compute the size of a signed varint value."""
96 if value < 0:
return 10
97 if value <= 0x7f:
return 1
98 if value <= 0x3fff:
return 2
99 if value <= 0x1fffff:
return 3
100 if value <= 0xfffffff:
return 4
101 if value <= 0x7ffffffff:
return 5
102 if value <= 0x3ffffffffff:
return 6
103 if value <= 0x1ffffffffffff:
return 7
104 if value <= 0xffffffffffffff:
return 8
105 if value <= 0x7fffffffffffffff:
return 9
110 """Returns the number of bytes required to serialize a tag with this field
113 return _VarintSize(wire_format.PackTag(field_number, 0))
125 """A sizer which uses the function compute_value_size to compute the size of
126 each value. Typically compute_value_size is _VarintSize."""
128 def SpecificSizer(field_number, is_repeated, is_packed):
131 local_VarintSize = _VarintSize
132 def PackedFieldSize(value):
134 for element
in value:
135 result += compute_value_size(element)
136 return result + local_VarintSize(result) + tag_size
137 return PackedFieldSize
139 def RepeatedFieldSize(value):
140 result = tag_size *
len(value)
141 for element
in value:
142 result += compute_value_size(element)
144 return RepeatedFieldSize
146 def FieldSize(value):
147 return tag_size + compute_value_size(value)
154 """Like SimpleSizer, but modify_value is invoked on each value before it is
155 passed to compute_value_size. modify_value is typically ZigZagEncode."""
157 def SpecificSizer(field_number, is_repeated, is_packed):
160 local_VarintSize = _VarintSize
161 def PackedFieldSize(value):
163 for element
in value:
164 result += compute_value_size(modify_value(element))
165 return result + local_VarintSize(result) + tag_size
166 return PackedFieldSize
168 def RepeatedFieldSize(value):
169 result = tag_size *
len(value)
170 for element
in value:
171 result += compute_value_size(modify_value(element))
173 return RepeatedFieldSize
175 def FieldSize(value):
176 return tag_size + compute_value_size(modify_value(value))
183 """Like _SimpleSizer except for a fixed-size field. The input is the size
186 def SpecificSizer(field_number, is_repeated, is_packed):
189 local_VarintSize = _VarintSize
190 def PackedFieldSize(value):
191 result =
len(value) * value_size
192 return result + local_VarintSize(result) + tag_size
193 return PackedFieldSize
195 element_size = value_size + tag_size
196 def RepeatedFieldSize(value):
197 return len(value) * element_size
198 return RepeatedFieldSize
200 field_size = value_size + tag_size
201 def FieldSize(value):
215 Int32Sizer = Int64Sizer = EnumSizer =
_SimpleSizer(_SignedVarintSize)
220 _SignedVarintSize, wire_format.ZigZagEncode)
222 Fixed32Sizer = SFixed32Sizer = FloatSizer =
_FixedSizer(4)
223 Fixed64Sizer = SFixed64Sizer = DoubleSizer =
_FixedSizer(8)
228 def StringSizer(field_number, is_repeated, is_packed):
229 """Returns a sizer for a string field."""
232 local_VarintSize = _VarintSize
236 def RepeatedFieldSize(value):
237 result = tag_size *
len(value)
238 for element
in value:
239 l = local_len(element.encode(
'utf-8'))
240 result += local_VarintSize(l) + l
242 return RepeatedFieldSize
244 def FieldSize(value):
245 l = local_len(value.encode(
'utf-8'))
246 return tag_size + local_VarintSize(l) + l
250 def BytesSizer(field_number, is_repeated, is_packed):
251 """Returns a sizer for a bytes field."""
254 local_VarintSize = _VarintSize
258 def RepeatedFieldSize(value):
259 result = tag_size *
len(value)
260 for element
in value:
261 l = local_len(element)
262 result += local_VarintSize(l) + l
264 return RepeatedFieldSize
266 def FieldSize(value):
268 return tag_size + local_VarintSize(l) + l
272 def GroupSizer(field_number, is_repeated, is_packed):
273 """Returns a sizer for a group field."""
275 tag_size =
_TagSize(field_number) * 2
278 def RepeatedFieldSize(value):
279 result = tag_size *
len(value)
280 for element
in value:
281 result += element.ByteSize()
283 return RepeatedFieldSize
285 def FieldSize(value):
286 return tag_size + value.ByteSize()
291 """Returns a sizer for a message field."""
294 local_VarintSize = _VarintSize
297 def RepeatedFieldSize(value):
298 result = tag_size *
len(value)
299 for element
in value:
300 l = element.ByteSize()
301 result += local_VarintSize(l) + l
303 return RepeatedFieldSize
305 def FieldSize(value):
307 return tag_size + local_VarintSize(l) + l
316 """Returns a sizer for extensions of MessageSet.
318 The message set message looks like this:
320 repeated group Item = 1 {
321 required int32 type_id = 2;
322 required string message = 3;
328 local_VarintSize = _VarintSize
330 def FieldSize(value):
332 return static_size + local_VarintSize(l) + l
341 def MapSizer(field_descriptor, is_message_map):
342 """Returns a sizer for a map field."""
346 message_type = field_descriptor.message_type
347 message_sizer =
MessageSizer(field_descriptor.number,
False,
False)
349 def FieldSize(map_value):
351 for key
in map_value:
352 value = map_value[key]
358 entry_msg = message_type._concrete_class(key=key, value=value)
359 total += message_sizer(entry_msg)
371 """Return an encoder for a basic varint value (does not include tag)."""
373 local_int2byte = struct.Struct(
'>B').pack
375 def EncodeVarint(write, value, unused_deterministic=None):
379 write(local_int2byte(0x80|bits))
382 return write(local_int2byte(bits))
388 """Return an encoder for a basic signed varint value (does not include
391 local_int2byte = struct.Struct(
'>B').pack
393 def EncodeSignedVarint(write, value, unused_deterministic=None):
399 write(local_int2byte(0x80|bits))
402 return write(local_int2byte(bits))
404 return EncodeSignedVarint
412 """Encode the given integer as a varint and return the bytes. This is only
413 called at startup time so it doesn't need to be fast."""
417 return b
"".join(pieces)
420 def TagBytes(field_number, wire_type):
421 """Encode the given tag and return the bytes. Only called at startup."""
431 """Return a constructor for an encoder for fields of a particular type.
434 wire_type: The field's wire type, for encoding tags.
435 encode_value: A function which encodes an individual value, e.g.
437 compute_value_size: A function which computes the size of an individual
438 value, e.g. _VarintSize().
441 def SpecificEncoder(field_number, is_repeated, is_packed):
443 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
444 local_EncodeVarint = _EncodeVarint
445 def EncodePackedField(write, value, deterministic):
448 for element
in value:
449 size += compute_value_size(element)
450 local_EncodeVarint(write, size, deterministic)
451 for element
in value:
452 encode_value(write, element, deterministic)
453 return EncodePackedField
455 tag_bytes =
TagBytes(field_number, wire_type)
456 def EncodeRepeatedField(write, value, deterministic):
457 for element
in value:
459 encode_value(write, element, deterministic)
460 return EncodeRepeatedField
462 tag_bytes =
TagBytes(field_number, wire_type)
463 def EncodeField(write, value, deterministic):
465 return encode_value(write, value, deterministic)
468 return SpecificEncoder
471 def _ModifiedEncoder(wire_type, encode_value, compute_value_size, modify_value):
472 """Like SimpleEncoder but additionally invokes modify_value on every value
473 before passing it to encode_value. Usually modify_value is ZigZagEncode."""
475 def SpecificEncoder(field_number, is_repeated, is_packed):
477 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
478 local_EncodeVarint = _EncodeVarint
479 def EncodePackedField(write, value, deterministic):
482 for element
in value:
483 size += compute_value_size(modify_value(element))
484 local_EncodeVarint(write, size, deterministic)
485 for element
in value:
486 encode_value(write, modify_value(element), deterministic)
487 return EncodePackedField
489 tag_bytes =
TagBytes(field_number, wire_type)
490 def EncodeRepeatedField(write, value, deterministic):
491 for element
in value:
493 encode_value(write, modify_value(element), deterministic)
494 return EncodeRepeatedField
496 tag_bytes =
TagBytes(field_number, wire_type)
497 def EncodeField(write, value, deterministic):
499 return encode_value(write, modify_value(value), deterministic)
502 return SpecificEncoder
506 """Return a constructor for an encoder for a fixed-width field.
509 wire_type: The field's wire type, for encoding tags.
510 format: The format string to pass to struct.pack().
513 value_size = struct.calcsize(format)
515 def SpecificEncoder(field_number, is_repeated, is_packed):
516 local_struct_pack = struct.pack
518 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
519 local_EncodeVarint = _EncodeVarint
520 def EncodePackedField(write, value, deterministic):
522 local_EncodeVarint(write,
len(value) * value_size, deterministic)
523 for element
in value:
524 write(local_struct_pack(format, element))
525 return EncodePackedField
527 tag_bytes =
TagBytes(field_number, wire_type)
528 def EncodeRepeatedField(write, value, unused_deterministic=None):
529 for element
in value:
531 write(local_struct_pack(format, element))
532 return EncodeRepeatedField
534 tag_bytes =
TagBytes(field_number, wire_type)
535 def EncodeField(write, value, unused_deterministic=None):
537 return write(local_struct_pack(format, value))
540 return SpecificEncoder
544 """Return a constructor for an encoder for float fields.
546 This is like StructPackEncoder, but catches errors that may be due to
547 passing non-finite floating-point values to struct.pack, and makes a
548 second attempt to encode those values.
551 wire_type: The field's wire type, for encoding tags.
552 format: The format string to pass to struct.pack().
555 value_size = struct.calcsize(format)
557 def EncodeNonFiniteOrRaise(write, value):
559 if value == _POS_INF:
560 write(b
'\x00\x00\x80\x7F')
561 elif value == _NEG_INF:
562 write(b
'\x00\x00\x80\xFF')
564 write(b
'\x00\x00\xC0\x7F')
567 elif value_size == 8:
568 def EncodeNonFiniteOrRaise(write, value):
569 if value == _POS_INF:
570 write(b
'\x00\x00\x00\x00\x00\x00\xF0\x7F')
571 elif value == _NEG_INF:
572 write(b
'\x00\x00\x00\x00\x00\x00\xF0\xFF')
574 write(b
'\x00\x00\x00\x00\x00\x00\xF8\x7F')
578 raise ValueError(
'Can\'t encode floating-point values that are '
579 '%d bytes long (only 4 or 8)' % value_size)
581 def SpecificEncoder(field_number, is_repeated, is_packed):
582 local_struct_pack = struct.pack
584 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
585 local_EncodeVarint = _EncodeVarint
586 def EncodePackedField(write, value, deterministic):
588 local_EncodeVarint(write,
len(value) * value_size, deterministic)
589 for element
in value:
593 write(local_struct_pack(format, element))
595 EncodeNonFiniteOrRaise(write, element)
596 return EncodePackedField
598 tag_bytes =
TagBytes(field_number, wire_type)
599 def EncodeRepeatedField(write, value, unused_deterministic=None):
600 for element
in value:
603 write(local_struct_pack(format, element))
605 EncodeNonFiniteOrRaise(write, element)
606 return EncodeRepeatedField
608 tag_bytes =
TagBytes(field_number, wire_type)
609 def EncodeField(write, value, unused_deterministic=None):
612 write(local_struct_pack(format, value))
614 EncodeNonFiniteOrRaise(write, value)
617 return SpecificEncoder
626 wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize)
629 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize)
632 wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize,
633 wire_format.ZigZagEncode)
647 def BoolEncoder(field_number, is_repeated, is_packed):
648 """Returns an encoder for a boolean field."""
653 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
654 local_EncodeVarint = _EncodeVarint
655 def EncodePackedField(write, value, deterministic):
657 local_EncodeVarint(write,
len(value), deterministic)
658 for element
in value:
663 return EncodePackedField
665 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_VARINT)
666 def EncodeRepeatedField(write, value, unused_deterministic=None):
667 for element
in value:
673 return EncodeRepeatedField
675 tag_bytes =
TagBytes(field_number, wire_format.WIRETYPE_VARINT)
676 def EncodeField(write, value, unused_deterministic=None):
679 return write(true_byte)
680 return write(false_byte)
685 """Returns an encoder for a string field."""
687 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
688 local_EncodeVarint = _EncodeVarint
692 def EncodeRepeatedField(write, value, deterministic):
693 for element
in value:
694 encoded = element.encode(
'utf-8')
696 local_EncodeVarint(write, local_len(encoded), deterministic)
698 return EncodeRepeatedField
700 def EncodeField(write, value, deterministic):
701 encoded = value.encode(
'utf-8')
703 local_EncodeVarint(write, local_len(encoded), deterministic)
704 return write(encoded)
709 """Returns an encoder for a bytes field."""
711 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
712 local_EncodeVarint = _EncodeVarint
716 def EncodeRepeatedField(write, value, deterministic):
717 for element
in value:
719 local_EncodeVarint(write, local_len(element), deterministic)
721 return EncodeRepeatedField
723 def EncodeField(write, value, deterministic):
725 local_EncodeVarint(write, local_len(value), deterministic)
731 """Returns an encoder for a group field."""
733 start_tag =
TagBytes(field_number, wire_format.WIRETYPE_START_GROUP)
734 end_tag =
TagBytes(field_number, wire_format.WIRETYPE_END_GROUP)
737 def EncodeRepeatedField(write, value, deterministic):
738 for element
in value:
740 element._InternalSerialize(write, deterministic)
742 return EncodeRepeatedField
744 def EncodeField(write, value, deterministic):
746 value._InternalSerialize(write, deterministic)
747 return write(end_tag)
752 """Returns an encoder for a message field."""
754 tag =
TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
755 local_EncodeVarint = _EncodeVarint
758 def EncodeRepeatedField(write, value, deterministic):
759 for element
in value:
761 local_EncodeVarint(write, element.ByteSize(), deterministic)
762 element._InternalSerialize(write, deterministic)
763 return EncodeRepeatedField
765 def EncodeField(write, value, deterministic):
767 local_EncodeVarint(write, value.ByteSize(), deterministic)
768 return value._InternalSerialize(write, deterministic)
777 """Encoder for extensions of MessageSet.
779 The message set message looks like this:
781 repeated group Item = 1 {
782 required int32 type_id = 2;
783 required string message = 3;
787 start_bytes = b
"".join([
788 TagBytes(1, wire_format.WIRETYPE_START_GROUP),
789 TagBytes(2, wire_format.WIRETYPE_VARINT),
791 TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)])
792 end_bytes =
TagBytes(1, wire_format.WIRETYPE_END_GROUP)
793 local_EncodeVarint = _EncodeVarint
795 def EncodeField(write, value, deterministic):
797 local_EncodeVarint(write, value.ByteSize(), deterministic)
798 value._InternalSerialize(write, deterministic)
799 return write(end_bytes)
809 """Encoder for extensions of MessageSet.
811 Maps always have a wire format like this:
814 value_type value = 2;
816 repeated MapEntry map = N;
820 message_type = field_descriptor.message_type
821 encode_message =
MessageEncoder(field_descriptor.number,
False,
False)
823 def EncodeField(write, value, deterministic):
824 value_keys = sorted(value.keys())
if deterministic
else value
825 for key
in value_keys:
826 entry_msg = message_type._concrete_class(key=key, value=value[key])