bloaty/third_party/re2/re2/make_unicode_groups.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # Copyright 2008 The RE2 Authors. All Rights Reserved.
3 # Use of this source code is governed by a BSD-style
4 # license that can be found in the LICENSE file.
5 
6 """Generate C++ tables for Unicode Script and Category groups."""
7 
8 from __future__ import absolute_import
9 from __future__ import division
10 from __future__ import print_function
11 
12 import sys
13 import unicode
14 
15 _header = """
16 // GENERATED BY make_unicode_groups.py; DO NOT EDIT.
17 // make_unicode_groups.py >unicode_groups.cc
18 
19 #include "re2/unicode_groups.h"
20 
21 namespace re2 {
22 
23 """
24 
25 _trailer = """
26 
27 } // namespace re2
28 
29 """
30 
31 n16 = 0
32 n32 = 0
33 
34 def MakeRanges(codes):
35  """Turn a list like [1,2,3,7,8,9] into a range list [[1,3], [7,9]]"""
36  ranges = []
37  last = -100
38  for c in codes:
39  if c == last+1:
40  ranges[-1][1] = c
41  else:
42  ranges.append([c, c])
43  last = c
44  return ranges
45 
46 def PrintRanges(type, name, ranges):
47  """Print the ranges as an array of type named name."""
48  print("static const %s %s[] = {" % (type, name))
49  for lo, hi in ranges:
50  print("\t{ %d, %d }," % (lo, hi))
51  print("};")
52 
53 # def PrintCodes(type, name, codes):
54 # """Print the codes as an array of type named name."""
55 # print("static %s %s[] = {" % (type, name))
56 # for c in codes:
57 # print("\t%d," % (c,))
58 # print("};")
59 
60 def PrintGroup(name, codes):
61  """Print the data structures for the group of codes.
62  Return a UGroup literal for the group."""
63 
64  # See unicode_groups.h for a description of the data structure.
65 
66  # Split codes into 16-bit ranges and 32-bit ranges.
67  range16 = MakeRanges([c for c in codes if c < 65536])
68  range32 = MakeRanges([c for c in codes if c >= 65536])
69 
70  # Pull singleton ranges out of range16.
71  # code16 = [lo for lo, hi in range16 if lo == hi]
72  # range16 = [[lo, hi] for lo, hi in range16 if lo != hi]
73 
74  global n16
75  global n32
76  n16 += len(range16)
77  n32 += len(range32)
78 
79  ugroup = "{ \"%s\", +1" % (name,)
80  # if len(code16) > 0:
81  # PrintCodes("uint16_t", name+"_code16", code16)
82  # ugroup += ", %s_code16, %d" % (name, len(code16))
83  # else:
84  # ugroup += ", 0, 0"
85  if len(range16) > 0:
86  PrintRanges("URange16", name+"_range16", range16)
87  ugroup += ", %s_range16, %d" % (name, len(range16))
88  else:
89  ugroup += ", 0, 0"
90  if len(range32) > 0:
91  PrintRanges("URange32", name+"_range32", range32)
92  ugroup += ", %s_range32, %d" % (name, len(range32))
93  else:
94  ugroup += ", 0, 0"
95  ugroup += " }"
96  return ugroup
97 
98 def main():
99  categories = unicode.Categories()
100  scripts = unicode.Scripts()
101  print(_header)
102  ugroups = []
103  for name in sorted(categories):
104  ugroups.append(PrintGroup(name, categories[name]))
105  for name in sorted(scripts):
106  ugroups.append(PrintGroup(name, scripts[name]))
107  print("// %d 16-bit ranges, %d 32-bit ranges" % (n16, n32))
108  print("const UGroup unicode_groups[] = {")
109  ugroups.sort()
110  for ug in ugroups:
111  print("\t%s," % (ug,))
112  print("};")
113  print("const int num_unicode_groups = %d;" % (len(ugroups),))
114  print(_trailer)
115 
116 if __name__ == '__main__':
117  main()
make_unicode_groups.PrintRanges
def PrintRanges(type, name, ranges)
Definition: bloaty/third_party/re2/re2/make_unicode_groups.py:46
unicode.Categories
def Categories(unicode_dir=_UNICODE_DIR)
Definition: bloaty/third_party/re2/re2/unicode.py:280
make_unicode_groups.PrintGroup
def PrintGroup(name, codes)
Definition: bloaty/third_party/re2/re2/make_unicode_groups.py:60
unicode.Scripts
def Scripts(unicode_dir=_UNICODE_DIR)
Definition: bloaty/third_party/re2/re2/unicode.py:259
make_unicode_groups.main
def main()
Definition: bloaty/third_party/re2/re2/make_unicode_groups.py:98
make_unicode_groups.MakeRanges
def MakeRanges(codes)
Definition: bloaty/third_party/re2/re2/make_unicode_groups.py:34
main
Definition: main.py:1
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:17