bloaty/third_party/re2/util/rune.cc
Go to the documentation of this file.
1 /*
2  * The authors of this software are Rob Pike and Ken Thompson.
3  * Copyright (c) 2002 by Lucent Technologies.
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose without fee is hereby granted, provided that this entire notice
6  * is included in all copies of any software which is or includes a copy
7  * or modification of this software and in all copies of the supporting
8  * documentation for such software.
9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10  * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11  * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13  */
14 
15 #include <stdarg.h>
16 #include <string.h>
17 
18 #include "util/utf.h"
19 
20 namespace re2 {
21 
22 enum
23 {
24  Bit1 = 7,
25  Bitx = 6,
26  Bit2 = 5,
27  Bit3 = 4,
28  Bit4 = 3,
29  Bit5 = 2,
30 
31  T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
32  Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
33  T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
34  T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
35  T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
36  T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
37 
38  Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
39  Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
40  Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
41  Rune4 = (1<<(Bit4+3*Bitx))-1,
42  /* 0001 1111 1111 1111 1111 1111 */
43 
44  Maskx = (1<<Bitx)-1, /* 0011 1111 */
45  Testx = Maskx ^ 0xFF, /* 1100 0000 */
46 
48 };
49 
50 int
51 chartorune(Rune *rune, const char *str)
52 {
53  int c, c1, c2, c3;
54  long l;
55 
56  /*
57  * one character sequence
58  * 00000-0007F => T1
59  */
60  c = *(unsigned char*)str;
61  if(c < Tx) {
62  *rune = c;
63  return 1;
64  }
65 
66  /*
67  * two character sequence
68  * 0080-07FF => T2 Tx
69  */
70  c1 = *(unsigned char*)(str+1) ^ Tx;
71  if(c1 & Testx)
72  goto bad;
73  if(c < T3) {
74  if(c < T2)
75  goto bad;
76  l = ((c << Bitx) | c1) & Rune2;
77  if(l <= Rune1)
78  goto bad;
79  *rune = l;
80  return 2;
81  }
82 
83  /*
84  * three character sequence
85  * 0800-FFFF => T3 Tx Tx
86  */
87  c2 = *(unsigned char*)(str+2) ^ Tx;
88  if(c2 & Testx)
89  goto bad;
90  if(c < T4) {
91  l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
92  if(l <= Rune2)
93  goto bad;
94  *rune = l;
95  return 3;
96  }
97 
98  /*
99  * four character sequence (21-bit value)
100  * 10000-1FFFFF => T4 Tx Tx Tx
101  */
102  c3 = *(unsigned char*)(str+3) ^ Tx;
103  if (c3 & Testx)
104  goto bad;
105  if (c < T5) {
106  l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
107  if (l <= Rune3)
108  goto bad;
109  *rune = l;
110  return 4;
111  }
112 
113  /*
114  * Support for 5-byte or longer UTF-8 would go here, but
115  * since we don't have that, we'll just fall through to bad.
116  */
117 
118  /*
119  * bad decoding
120  */
121 bad:
122  *rune = Bad;
123  return 1;
124 }
125 
126 int
127 runetochar(char *str, const Rune *rune)
128 {
129  /* Runes are signed, so convert to unsigned for range check. */
130  unsigned long c;
131 
132  /*
133  * one character sequence
134  * 00000-0007F => 00-7F
135  */
136  c = *rune;
137  if(c <= Rune1) {
138  str[0] = static_cast<char>(c);
139  return 1;
140  }
141 
142  /*
143  * two character sequence
144  * 0080-07FF => T2 Tx
145  */
146  if(c <= Rune2) {
147  str[0] = T2 | static_cast<char>(c >> 1*Bitx);
148  str[1] = Tx | (c & Maskx);
149  return 2;
150  }
151 
152  /*
153  * If the Rune is out of range, convert it to the error rune.
154  * Do this test here because the error rune encodes to three bytes.
155  * Doing it earlier would duplicate work, since an out of range
156  * Rune wouldn't have fit in one or two bytes.
157  */
158  if (c > Runemax)
159  c = Runeerror;
160 
161  /*
162  * three character sequence
163  * 0800-FFFF => T3 Tx Tx
164  */
165  if (c <= Rune3) {
166  str[0] = T3 | static_cast<char>(c >> 2*Bitx);
167  str[1] = Tx | ((c >> 1*Bitx) & Maskx);
168  str[2] = Tx | (c & Maskx);
169  return 3;
170  }
171 
172  /*
173  * four character sequence (21-bit value)
174  * 10000-1FFFFF => T4 Tx Tx Tx
175  */
176  str[0] = T4 | static_cast<char>(c >> 3*Bitx);
177  str[1] = Tx | ((c >> 2*Bitx) & Maskx);
178  str[2] = Tx | ((c >> 1*Bitx) & Maskx);
179  str[3] = Tx | (c & Maskx);
180  return 4;
181 }
182 
183 int
185 {
186  char str[10];
187 
188  return runetochar(str, &rune);
189 }
190 
191 int
192 fullrune(const char *str, int n)
193 {
194  if (n > 0) {
195  int c = *(unsigned char*)str;
196  if (c < Tx)
197  return 1;
198  if (n > 1) {
199  if (c < T3)
200  return 1;
201  if (n > 2) {
202  if (c < T4 || n > 3)
203  return 1;
204  }
205  }
206  }
207  return 0;
208 }
209 
210 
211 int
212 utflen(const char *s)
213 {
214  int c;
215  long n;
216  Rune rune;
217 
218  n = 0;
219  for(;;) {
220  c = *(unsigned char*)s;
221  if(c < Runeself) {
222  if(c == 0)
223  return n;
224  s++;
225  } else
226  s += chartorune(&rune, s);
227  n++;
228  }
229  return 0;
230 }
231 
232 char*
233 utfrune(const char *s, Rune c)
234 {
235  long c1;
236  Rune r;
237  int n;
238 
239  if(c < Runesync) /* not part of utf sequence */
240  return strchr((char*)s, c);
241 
242  for(;;) {
243  c1 = *(unsigned char*)s;
244  if(c1 < Runeself) { /* one byte rune */
245  if(c1 == 0)
246  return 0;
247  if(c1 == c)
248  return (char*)s;
249  s++;
250  continue;
251  }
252  n = chartorune(&r, s);
253  if(r == c)
254  return (char*)s;
255  s += n;
256  }
257  return 0;
258 }
259 
260 } // namespace re2
xds_interop_client.str
str
Definition: xds_interop_client.py:487
check_banned_filenames.bad
bad
Definition: check_banned_filenames.py:26
re2::fullrune
int fullrune(const char *str, int n)
Definition: bloaty/third_party/re2/util/rune.cc:192
re2::utflen
int utflen(const char *s)
Definition: bloaty/third_party/re2/util/rune.cc:212
re2::Tx
@ Tx
Definition: bloaty/third_party/re2/util/rune.cc:32
re2::utfrune
char * utfrune(const char *s, Rune c)
Definition: bloaty/third_party/re2/util/rune.cc:233
string.h
re2::Bit4
@ Bit4
Definition: bloaty/third_party/re2/util/rune.cc:28
re2::Runesync
@ Runesync
Definition: bloaty/third_party/re2/util/utf.h:30
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Rune3
@ Rune3
Definition: bloaty/third_party/re2/util/rune.cc:40
re2::Bad
@ Bad
Definition: bloaty/third_party/re2/util/rune.cc:47
re2::T1
@ T1
Definition: bloaty/third_party/re2/util/rune.cc:31
re2::Bitx
@ Bitx
Definition: bloaty/third_party/re2/util/rune.cc:25
re2::Runeself
@ Runeself
Definition: bloaty/third_party/re2/util/utf.h:31
re2::runetochar
int runetochar(char *str, const Rune *rune)
Definition: bloaty/third_party/re2/util/rune.cc:127
re2::Bit1
@ Bit1
Definition: bloaty/third_party/re2/util/rune.cc:24
re2::Rune2
@ Rune2
Definition: bloaty/third_party/re2/util/rune.cc:39
re2::T3
@ T3
Definition: bloaty/third_party/re2/util/rune.cc:34
re2::Bit3
@ Bit3
Definition: bloaty/third_party/re2/util/rune.cc:27
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
re2::Rune4
@ Rune4
Definition: bloaty/third_party/re2/util/rune.cc:41
absl::hash_internal::c1
static const uint32_t c1
Definition: abseil-cpp/absl/hash/internal/city.cc:58
re2::Bit2
@ Bit2
Definition: bloaty/third_party/re2/util/rune.cc:26
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
re2::T2
@ T2
Definition: bloaty/third_party/re2/util/rune.cc:33
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::Rune1
@ Rune1
Definition: bloaty/third_party/re2/util/rune.cc:38
re2::Maskx
@ Maskx
Definition: bloaty/third_party/re2/util/rune.cc:44
re2::chartorune
int chartorune(Rune *rune, const char *str)
Definition: bloaty/third_party/re2/util/rune.cc:51
re2::Runeerror
@ Runeerror
Definition: bloaty/third_party/re2/util/utf.h:32
re2::Testx
@ Testx
Definition: bloaty/third_party/re2/util/rune.cc:45
re2::Bit5
@ Bit5
Definition: bloaty/third_party/re2/util/rune.cc:29
re2::runelen
int runelen(Rune rune)
Definition: bloaty/third_party/re2/util/rune.cc:184
re2::T4
@ T4
Definition: bloaty/third_party/re2/util/rune.cc:35
re2::T5
@ T5
Definition: bloaty/third_party/re2/util/rune.cc:36
re2::Runemax
@ Runemax
Definition: bloaty/third_party/re2/util/utf.h:33
absl::hash_internal::c2
static const uint32_t c2
Definition: abseil-cpp/absl/hash/internal/city.cc:59


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:08