31 """Encoding related utilities."""
36 _cescape_chr_to_symbol_map = {}
37 _cescape_chr_to_symbol_map[9] =
r'\t'
38 _cescape_chr_to_symbol_map[10] =
r'\n'
39 _cescape_chr_to_symbol_map[13] =
r'\r'
40 _cescape_chr_to_symbol_map[34] =
r'\"'
41 _cescape_chr_to_symbol_map[39] =
r"\'"
42 _cescape_chr_to_symbol_map[92] =
r'\\'
45 _cescape_unicode_to_str = [chr(i)
for i
in range(0, 256)]
46 for byte, string
in _cescape_chr_to_symbol_map.items():
47 _cescape_unicode_to_str[byte] = string
50 _cescape_byte_to_str = ([
r'\%03o' % i
for i
in range(0, 32)] +
51 [chr(i)
for i
in range(32, 127)] +
52 [
r'\%03o' % i
for i
in range(127, 256)])
53 for byte, string
in _cescape_chr_to_symbol_map.items():
54 _cescape_byte_to_str[byte] = string
60 """Escape a bytes string for use in an text protocol buffer.
63 text: A byte string to be escaped.
64 as_utf8: Specifies if result may contain non-ASCII characters.
65 In Python 3 this allows unescaped non-ASCII Unicode characters.
66 In Python 2 the return value will be valid UTF-8 rather than only ASCII.
76 text_is_unicode = isinstance(text, str)
77 if as_utf8
and text_is_unicode:
79 return text.translate(_cescape_chr_to_symbol_map)
80 ord_ = ord
if text_is_unicode
else lambda x: x
84 return ''.join(_cescape_unicode_to_str[ord_(c)]
for c
in text)
85 return ''.join(_cescape_byte_to_str[ord_(c)]
for c
in text)
88 _CUNESCAPE_HEX = re.compile(
r'(\\+)x([0-9a-fA-F])(?![0-9a-fA-F])')
93 """Unescape a text string with C-style escape sequences to UTF-8 bytes.
96 text: The data to parse in a str.
104 if len(m.group(1)) & 1:
105 return m.group(1) +
'x0' + m.group(2)
110 result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
113 return result.decode(
'string_escape')
114 return (result.encode(
'utf-8')
115 .decode(
'unicode_escape')
117 .encode(
'raw_unicode_escape'))