bloaty/third_party/re2/re2/make_unicode_casefold.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # coding=utf-8
3 #
4 # Copyright 2008 The RE2 Authors. All Rights Reserved.
5 # Use of this source code is governed by a BSD-style
6 # license that can be found in the LICENSE file.
7 
8 # See unicode_casefold.h for description of case folding tables.
9 
10 """Generate C++ table for Unicode case folding."""
11 
12 from __future__ import absolute_import
13 from __future__ import division
14 from __future__ import print_function
15 
16 import sys
17 import unicode
18 
19 _header = """
20 // GENERATED BY make_unicode_casefold.py; DO NOT EDIT.
21 // make_unicode_casefold.py >unicode_casefold.cc
22 
23 #include "re2/unicode_casefold.h"
24 
25 namespace re2 {
26 
27 """
28 
29 _trailer = """
30 
31 } // namespace re2
32 
33 """
34 
35 def _Delta(a, b):
36  """Compute the delta for b - a. Even/odd and odd/even
37  are handled specially, as described above."""
38  if a+1 == b:
39  if a%2 == 0:
40  return 'EvenOdd'
41  else:
42  return 'OddEven'
43  if a == b+1:
44  if a%2 == 0:
45  return 'OddEven'
46  else:
47  return 'EvenOdd'
48  return b - a
49 
50 def _AddDelta(a, delta):
51  """Return a + delta, handling EvenOdd and OddEven specially."""
52  if type(delta) == int:
53  return a+delta
54  if delta == 'EvenOdd':
55  if a%2 == 0:
56  return a+1
57  else:
58  return a-1
59  if delta == 'OddEven':
60  if a%2 == 1:
61  return a+1
62  else:
63  return a-1
64  print("Bad Delta:", delta, file=sys.stderr)
65  raise unicode.Error("Bad Delta")
66 
67 def _MakeRanges(pairs):
68  """Turn a list like [(65,97), (66, 98), ..., (90,122)]
69  into [(65, 90, +32)]."""
70  ranges = []
71  last = -100
72 
73  def evenodd(last, a, b, r):
74  if a != last+1 or b != _AddDelta(a, r[2]):
75  return False
76  r[1] = a
77  return True
78 
79  def evenoddpair(last, a, b, r):
80  if a != last+2:
81  return False
82  delta = r[2]
83  d = delta
84  if type(delta) is not str:
85  return False
86  if delta.endswith('Skip'):
87  d = delta[:-4]
88  else:
89  delta = d + 'Skip'
90  if b != _AddDelta(a, d):
91  return False
92  r[1] = a
93  r[2] = delta
94  return True
95 
96  for a, b in pairs:
97  if ranges and evenodd(last, a, b, ranges[-1]):
98  pass
99  elif ranges and evenoddpair(last, a, b, ranges[-1]):
100  pass
101  else:
102  ranges.append([a, a, _Delta(a, b)])
103  last = a
104  return ranges
105 
106 # The maximum size of a case-folding group.
107 # Case folding is implemented in parse.cc by a recursive process
108 # with a recursion depth equal to the size of the largest
109 # case-folding group, so it is important that this bound be small.
110 # The current tables have no group bigger than 4.
111 # If there are ever groups bigger than 10 or so, it will be
112 # time to rework the code in parse.cc.
113 MaxCasefoldGroup = 4
114 
115 def main():
116  lowergroups, casegroups = unicode.CaseGroups()
117  foldpairs = []
118  seen = {}
119  for c in casegroups:
120  if len(c) > MaxCasefoldGroup:
121  raise unicode.Error("casefold group too long: %s" % (c,))
122  for i in range(len(c)):
123  if c[i-1] in seen:
124  raise unicode.Error("bad casegroups %d -> %d" % (c[i-1], c[i]))
125  seen[c[i-1]] = True
126  foldpairs.append([c[i-1], c[i]])
127 
128  lowerpairs = []
129  for lower, group in lowergroups.items():
130  for g in group:
131  if g != lower:
132  lowerpairs.append([g, lower])
133 
134  def printpairs(name, foldpairs):
135  foldpairs.sort()
136  foldranges = _MakeRanges(foldpairs)
137  print("// %d groups, %d pairs, %d ranges" % (len(casegroups), len(foldpairs), len(foldranges)))
138  print("const CaseFold unicode_%s[] = {" % (name,))
139  for lo, hi, delta in foldranges:
140  print("\t{ %d, %d, %s }," % (lo, hi, delta))
141  print("};")
142  print("const int num_unicode_%s = %d;" % (name, len(foldranges)))
143  print("")
144 
145  print(_header)
146  printpairs("casefold", foldpairs)
147  printpairs("tolower", lowerpairs)
148  print(_trailer)
149 
150 if __name__ == '__main__':
151  main()
make_unicode_casefold.main
def main()
Definition: bloaty/third_party/re2/re2/make_unicode_casefold.py:115
capstone.range
range
Definition: third_party/bloaty/third_party/capstone/bindings/python/capstone/__init__.py:6
unicode.Error
Definition: bloaty/third_party/re2/re2/unicode.py:22
make_unicode_casefold._AddDelta
def _AddDelta(a, delta)
Definition: bloaty/third_party/re2/re2/make_unicode_casefold.py:50
make_unicode_casefold._Delta
def _Delta(a, b)
Definition: bloaty/third_party/re2/re2/make_unicode_casefold.py:35
make_unicode_casefold._MakeRanges
def _MakeRanges(pairs)
Definition: bloaty/third_party/re2/re2/make_unicode_casefold.py:67
unicode.CaseGroups
def CaseGroups(unicode_dir=_UNICODE_DIR)
Definition: bloaty/third_party/re2/re2/unicode.py:225
main
Definition: main.py:1
asyncio_get_stats.type
type
Definition: asyncio_get_stats.py:37
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:17