31 """Encoding related utilities."""
34 _cescape_chr_to_symbol_map = {}
35 _cescape_chr_to_symbol_map[9] =
r'\t'
36 _cescape_chr_to_symbol_map[10] =
r'\n'
37 _cescape_chr_to_symbol_map[13] =
r'\r'
38 _cescape_chr_to_symbol_map[34] =
r'\"'
39 _cescape_chr_to_symbol_map[39] =
r"\'"
40 _cescape_chr_to_symbol_map[92] =
r'\\'
43 _cescape_unicode_to_str = [chr(i)
for i
in range(0, 256)]
44 for byte, string
in _cescape_chr_to_symbol_map.items():
45 _cescape_unicode_to_str[byte] = string
48 _cescape_byte_to_str = ([
r'\%03o' % i
for i
in range(0, 32)] +
49 [chr(i)
for i
in range(32, 127)] +
50 [
r'\%03o' % i
for i
in range(127, 256)])
51 for byte, string
in _cescape_chr_to_symbol_map.items():
52 _cescape_byte_to_str[byte] = string
58 """Escape a bytes string for use in an text protocol buffer.
61 text: A byte string to be escaped.
62 as_utf8: Specifies if result may contain non-ASCII characters.
63 In Python 3 this allows unescaped non-ASCII Unicode characters.
64 In Python 2 the return value will be valid UTF-8 rather than only ASCII.
73 text_is_unicode = isinstance(text, str)
74 if as_utf8
and text_is_unicode:
76 return text.translate(_cescape_chr_to_symbol_map)
77 ord_ = ord
if text_is_unicode
else lambda x: x
79 return ''.join(_cescape_unicode_to_str[ord_(c)]
for c
in text)
80 return ''.join(_cescape_byte_to_str[ord_(c)]
for c
in text)
83 _CUNESCAPE_HEX = re.compile(
r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
88 """Unescape a text string with C-style escape sequences to UTF-8 bytes.
91 text: The data to parse in a str.
99 if len(m.group(1)) & 1:
100 return m.group(1) +
'x0' + m.group(2)
105 result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
107 return (result.encode(
'utf-8')
110 .
encode(
'raw_unicode_escape'))