grpc
third_party
bloaty
third_party
re2
re2
testing
bloaty/third_party/re2/re2/testing/simplify_test.cc
Go to the documentation of this file.
1
// Copyright 2006 The RE2 Authors. All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Test simplify.cc.
6
7
#include <
string.h
>
8
#include <string>
9
10
#include "util/test.h"
11
#include "util/logging.h"
12
#include "re2/regexp.h"
13
14
namespace
re2
{
15
16
struct
Test
{
17
const
char
*
regexp
;
18
const
char
*
simplified
;
19
};
20
21
static
Test
tests
[] = {
22
// Already-simple constructs
23
{
"a"
,
"a"
},
24
{
"ab"
,
"ab"
},
25
{
"a|b"
,
"[a-b]"
},
26
{
"ab|cd"
,
"ab|cd"
},
27
{
"(ab)*"
,
"(ab)*"
},
28
{
"(ab)+"
,
"(ab)+"
},
29
{
"(ab)?"
,
"(ab)?"
},
30
{
"."
,
"."
},
31
{
"^"
,
"^"
},
32
{
"$"
,
"$"
},
33
{
"[ac]"
,
"[ac]"
},
34
{
"[^ac]"
,
"[^ac]"
},
35
36
// Posix character classes
37
{
"[[:alnum:]]"
,
"[0-9A-Za-z]"
},
38
{
"[[:alpha:]]"
,
"[A-Za-z]"
},
39
{
"[[:blank:]]"
,
"[\\t ]"
},
40
{
"[[:cntrl:]]"
,
"[\\x00-\\x1f\\x7f]"
},
41
{
"[[:digit:]]"
,
"[0-9]"
},
42
{
"[[:graph:]]"
,
"[!-~]"
},
43
{
"[[:lower:]]"
,
"[a-z]"
},
44
{
"[[:print:]]"
,
"[ -~]"
},
45
{
"[[:punct:]]"
,
"[!-/:-@\\[-`{-~]"
},
46
{
"[[:space:]]"
,
"[\\t-\\r ]"
},
47
{
"[[:upper:]]"
,
"[A-Z]"
},
48
{
"[[:xdigit:]]"
,
"[0-9A-Fa-f]"
},
49
50
// Perl character classes
51
{
"\\d"
,
"[0-9]"
},
52
{
"\\s"
,
"[\\t-\\n\\f-\\r ]"
},
53
{
"\\w"
,
"[0-9A-Z_a-z]"
},
54
{
"\\D"
,
"[^0-9]"
},
55
{
"\\S"
,
"[^\\t-\\n\\f-\\r ]"
},
56
{
"\\W"
,
"[^0-9A-Z_a-z]"
},
57
{
"[\\d]"
,
"[0-9]"
},
58
{
"[\\s]"
,
"[\\t-\\n\\f-\\r ]"
},
59
{
"[\\w]"
,
"[0-9A-Z_a-z]"
},
60
{
"[\\D]"
,
"[^0-9]"
},
61
{
"[\\S]"
,
"[^\\t-\\n\\f-\\r ]"
},
62
{
"[\\W]"
,
"[^0-9A-Z_a-z]"
},
63
64
// Posix repetitions
65
{
"a{1}"
,
"a"
},
66
{
"a{2}"
,
"aa"
},
67
{
"a{5}"
,
"aaaaa"
},
68
{
"a{0,1}"
,
"a?"
},
69
// The next three are illegible because Simplify inserts (?:)
70
// parens instead of () parens to avoid creating extra
71
// captured subexpressions. The comments show a version fewer parens.
72
{
"(a){0,2}"
,
"(?:(a)(a)?)?"
},
// (aa?)?
73
{
"(a){0,4}"
,
"(?:(a)(?:(a)(?:(a)(a)?)?)?)?"
},
// (a(a(aa?)?)?)?
74
{
"(a){2,6}"
,
"(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?"
},
// aa(a(a(aa?)?)?)?
75
{
"a{0,2}"
,
"(?:aa?)?"
},
// (aa?)?
76
{
"a{0,4}"
,
"(?:a(?:a(?:aa?)?)?)?"
},
// (a(a(aa?)?)?)?
77
{
"a{2,6}"
,
"aa(?:a(?:a(?:aa?)?)?)?"
},
// aa(a(a(aa?)?)?)?
78
{
"a{0,}"
,
"a*"
},
79
{
"a{1,}"
,
"a+"
},
80
{
"a{2,}"
,
"aa+"
},
81
{
"a{5,}"
,
"aaaaa+"
},
82
83
// Test that operators simplify their arguments.
84
// (Simplify used to not simplify arguments to a {} repeat.)
85
{
"(?:a{1,}){1,}"
,
"a+"
},
86
{
"(a{1,}b{1,})"
,
"(a+b+)"
},
87
{
"a{1,}|b{1,}"
,
"a+|b+"
},
88
{
"(?:a{1,})*"
,
"(?:a+)*"
},
89
{
"(?:a{1,})+"
,
"a+"
},
90
{
"(?:a{1,})?"
,
"(?:a+)?"
},
91
{
"a{0}"
,
""
},
92
93
// Character class simplification
94
{
"[ab]"
,
"[a-b]"
},
95
{
"[a-za-za-z]"
,
"[a-z]"
},
96
{
"[A-Za-zA-Za-z]"
,
"[A-Za-z]"
},
97
{
"[ABCDEFGH]"
,
"[A-H]"
},
98
{
"[AB-CD-EF-GH]"
,
"[A-H]"
},
99
{
"[W-ZP-XE-R]"
,
"[E-Z]"
},
100
{
"[a-ee-gg-m]"
,
"[a-m]"
},
101
{
"[a-ea-ha-m]"
,
"[a-m]"
},
102
{
"[a-ma-ha-e]"
,
"[a-m]"
},
103
{
"[a-zA-Z0-9 -~]"
,
"[ -~]"
},
104
105
// Empty character classes
106
{
"[^[:cntrl:][:^cntrl:]]"
,
"[^\\x00-\\x{10ffff}]"
},
107
108
// Full character classes
109
{
"[[:cntrl:][:^cntrl:]]"
,
"."
},
110
111
// Unicode case folding.
112
{
"(?i)A"
,
"[Aa]"
},
113
{
"(?i)a"
,
"[Aa]"
},
114
{
"(?i)K"
,
"[Kk\\x{212a}]"
},
115
{
"(?i)k"
,
"[Kk\\x{212a}]"
},
116
{
"(?i)\\x{212a}"
,
"[Kk\\x{212a}]"
},
117
{
"(?i)[a-z]"
,
"[A-Za-z\\x{17f}\\x{212a}]"
},
118
{
"(?i)[\\x00-\\x{FFFD}]"
,
"[\\x00-\\x{fffd}]"
},
119
{
"(?i)[\\x00-\\x{10ffff}]"
,
"."
},
120
121
// Empty string as a regular expression.
122
// Empty string must be preserved inside parens in order
123
// to make submatches work right, so these are less
124
// interesting than they used to be. ToString inserts
125
// explicit (?:) in place of non-parenthesized empty strings,
126
// to make them easier to spot for other parsers.
127
{
"(a|b|)"
,
"([a-b]|(?:))"
},
128
{
"(|)"
,
"((?:)|(?:))"
},
129
{
"a()"
,
"a()"
},
130
{
"(()|())"
,
"(()|())"
},
131
{
"(a|)"
,
"(a|(?:))"
},
132
{
"ab()cd()"
,
"ab()cd()"
},
133
{
"()"
,
"()"
},
134
{
"()*"
,
"()*"
},
135
{
"()+"
,
"()+"
},
136
{
"()?"
,
"()?"
},
137
{
"(){0}"
,
""
},
138
{
"(){1}"
,
"()"
},
139
{
"(){1,}"
,
"()+"
},
140
{
"(){0,2}"
,
"(?:()()?)?"
},
141
142
// Test that coalescing occurs and that the resulting repeats are simplified.
143
// Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
144
{
"a*a*"
,
"a*"
},
145
{
"a*a+"
,
"a+"
},
146
{
"a*a?"
,
"a*"
},
147
{
"a*a{2}"
,
"aa+"
},
148
{
"a*a{2,}"
,
"aa+"
},
149
{
"a*a{2,3}"
,
"aa+"
},
150
{
"a+a*"
,
"a+"
},
151
{
"a+a+"
,
"aa+"
},
152
{
"a+a?"
,
"a+"
},
153
{
"a+a{2}"
,
"aaa+"
},
154
{
"a+a{2,}"
,
"aaa+"
},
155
{
"a+a{2,3}"
,
"aaa+"
},
156
{
"a?a*"
,
"a*"
},
157
{
"a?a+"
,
"a+"
},
158
{
"a?a?"
,
"(?:aa?)?"
},
159
{
"a?a{2}"
,
"aaa?"
},
160
{
"a?a{2,}"
,
"aa+"
},
161
{
"a?a{2,3}"
,
"aa(?:aa?)?"
},
162
{
"a{2}a*"
,
"aa+"
},
163
{
"a{2}a+"
,
"aaa+"
},
164
{
"a{2}a?"
,
"aaa?"
},
165
{
"a{2}a{2}"
,
"aaaa"
},
166
{
"a{2}a{2,}"
,
"aaaa+"
},
167
{
"a{2}a{2,3}"
,
"aaaaa?"
},
168
{
"a{2,}a*"
,
"aa+"
},
169
{
"a{2,}a+"
,
"aaa+"
},
170
{
"a{2,}a?"
,
"aa+"
},
171
{
"a{2,}a{2}"
,
"aaaa+"
},
172
{
"a{2,}a{2,}"
,
"aaaa+"
},
173
{
"a{2,}a{2,3}"
,
"aaaa+"
},
174
{
"a{2,3}a*"
,
"aa+"
},
175
{
"a{2,3}a+"
,
"aaa+"
},
176
{
"a{2,3}a?"
,
"aa(?:aa?)?"
},
177
{
"a{2,3}a{2}"
,
"aaaaa?"
},
178
{
"a{2,3}a{2,}"
,
"aaaa+"
},
179
{
"a{2,3}a{2,3}"
,
"aaaa(?:aa?)?"
},
180
// With a char class, any char and any byte:
181
{
"\\d*\\d*"
,
"[0-9]*"
},
182
{
".*.*"
,
".*"
},
183
{
"\\C*\\C*"
,
"\\C*"
},
184
// FoldCase works, but must be consistent:
185
{
"(?i)A*a*"
,
"[Aa]*"
},
186
{
"(?i)a+A+"
,
"[Aa][Aa]+"
},
187
{
"(?i)A*(?-i)a*"
,
"[Aa]*a*"
},
188
{
"(?i)a+(?-i)A+"
,
"[Aa]+A+"
},
189
// NonGreedy works, but must be consistent:
190
{
"a*?a*?"
,
"a*?"
},
191
{
"a+?a+?"
,
"aa+?"
},
192
{
"a*?a*"
,
"a*?a*"
},
193
{
"a+a+?"
,
"a+a+?"
},
194
// The second element is the literal, char class, any char or any byte:
195
{
"a*a"
,
"a+"
},
196
{
"\\d*\\d"
,
"[0-9]+"
},
197
{
".*."
,
".+"
},
198
{
"\\C*\\C"
,
"\\C+"
},
199
// FoldCase works, but must be consistent:
200
{
"(?i)A*a"
,
"[Aa]+"
},
201
{
"(?i)a+A"
,
"[Aa][Aa]+"
},
202
{
"(?i)A*(?-i)a"
,
"[Aa]*a"
},
203
{
"(?i)a+(?-i)A"
,
"[Aa]+A"
},
204
// The second element is a literal string that begins with the literal:
205
{
"a*aa"
,
"aa+"
},
206
{
"a*aab"
,
"aa+b"
},
207
// FoldCase works, but must be consistent:
208
{
"(?i)a*aa"
,
"[Aa][Aa]+"
},
209
{
"(?i)a*aab"
,
"[Aa][Aa]+[Bb]"
},
210
{
"(?i)a*(?-i)aa"
,
"[Aa]*aa"
},
211
{
"(?i)a*(?-i)aab"
,
"[Aa]*aab"
},
212
// Negative tests with mismatching ops:
213
{
"a*b*"
,
"a*b*"
},
214
{
"\\d*\\D*"
,
"[0-9]*[^0-9]*"
},
215
{
"a+b"
,
"a+b"
},
216
{
"\\d+\\D"
,
"[0-9]+[^0-9]"
},
217
{
"a?bb"
,
"a?bb"
},
218
// Negative tests with capturing groups:
219
{
"(a*)a*"
,
"(a*)a*"
},
220
{
"a+(a)"
,
"a+(a)"
},
221
{
"(a?)(aa)"
,
"(a?)(aa)"
},
222
// Just for fun:
223
{
"aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a"
,
"aaaaaaaaaaaaaaaa+"
},
224
225
// During coalescing, the child of the repeat changes, so we build a new
226
// repeat. The new repeat must have the min and max of the old repeat.
227
// Failure to copy them results in min=0 and max=0 -> empty match.
228
{
"(?:a*aab){2}"
,
"aa+baa+b"
},
229
230
// During coalescing, the child of the capture changes, so we build a new
231
// capture. The new capture must have the cap of the old capture.
232
// Failure to copy it results in cap=0 -> ToString() logs a fatal error.
233
{
"(a*aab)"
,
"(aa+b)"
},
234
235
// Test squashing of **, ++, ?? et cetera.
236
{
"(?:(?:a){0,}){0,}"
,
"a*"
},
237
{
"(?:(?:a){1,}){1,}"
,
"a+"
},
238
{
"(?:(?:a){0,1}){0,1}"
,
"a?"
},
239
{
"(?:(?:a){0,}){1,}"
,
"a*"
},
240
{
"(?:(?:a){0,}){0,1}"
,
"a*"
},
241
{
"(?:(?:a){1,}){0,}"
,
"a*"
},
242
{
"(?:(?:a){1,}){0,1}"
,
"a*"
},
243
{
"(?:(?:a){0,1}){0,}"
,
"a*"
},
244
{
"(?:(?:a){0,1}){1,}"
,
"a*"
},
245
};
246
247
TEST
(TestSimplify, SimpleRegexps) {
248
for
(
size_t
i
= 0;
i
<
arraysize
(
tests
);
i
++) {
249
RegexpStatus
status
;
250
VLOG
(1) <<
"Testing "
<<
tests
[
i
].regexp;
251
Regexp
* re =
Regexp::Parse
(
tests
[
i
].regexp,
252
Regexp::MatchNL
| (
Regexp::LikePerl
&
253
~
Regexp::OneLine
),
254
&
status
);
255
ASSERT_TRUE
(re != NULL) <<
" "
<<
tests
[
i
].regexp <<
" "
<<
status
.Text();
256
Regexp
* sre = re->
Simplify
();
257
ASSERT_TRUE
(sre != NULL);
258
259
// Check that already-simple regexps don't allocate new ones.
260
if
(strcmp(
tests
[
i
].regexp,
tests
[
i
].simplified) == 0) {
261
ASSERT_TRUE
(re == sre) <<
" "
<<
tests
[
i
].regexp
262
<<
" "
<< re->
ToString
() <<
" "
<< sre->
ToString
();
263
}
264
265
EXPECT_EQ
(
tests
[
i
].simplified, sre->
ToString
())
266
<<
" "
<<
tests
[
i
].regexp <<
" "
<< sre->
Dump
();
267
268
re->
Decref
();
269
sre->
Decref
();
270
}
271
}
272
273
}
// namespace re2
re2::Regexp::Decref
void Decref()
Definition:
bloaty/third_party/re2/re2/regexp.cc:115
VLOG
#define VLOG(x)
Definition:
third_party/bloaty/third_party/protobuf/third_party/benchmark/src/log.h:69
string.h
re2::Regexp
Definition:
bloaty/third_party/re2/re2/regexp.h:274
status
absl::Status status
Definition:
rls.cc:251
re2
Definition:
bloaty/third_party/re2/re2/bitmap256.h:17
re2::RegexpStatus
Definition:
bloaty/third_party/re2/re2/regexp.h:190
re2::Regexp::OneLine
@ OneLine
Definition:
bloaty/third_party/re2/re2/regexp.h:286
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition:
iomgr/time_averaged_stats_test.cc:27
re2::Regexp::ToString
std::string ToString()
Definition:
bloaty/third_party/re2/re2/tostring.cc:55
re2::Regexp::Parse
static Regexp * Parse(const StringPiece &s, ParseFlags flags, RegexpStatus *status)
Definition:
bloaty/third_party/re2/re2/parse.cc:2200
re2::Regexp::Dump
std::string Dump()
Definition:
bloaty/third_party/re2/re2/testing/dump.cc:156
arraysize
#define arraysize(array)
Definition:
benchmark/src/arraysize.h:28
re2::Test::simplified
const char * simplified
Definition:
bloaty/third_party/re2/re2/testing/simplify_test.cc:18
tests
Definition:
src/python/grpcio_tests/tests/__init__.py:1
re2::Regexp::Simplify
Regexp * Simplify()
Definition:
bloaty/third_party/re2/re2/simplify.cc:179
ASSERT_TRUE
#define ASSERT_TRUE(condition)
Definition:
bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1973
re2::Test
Definition:
bloaty/third_party/re2/re2/testing/compile_test.cc:22
re2::Test::regexp
const char * regexp
Definition:
bloaty/third_party/re2/re2/testing/compile_test.cc:23
re2::Regexp::MatchNL
@ MatchNL
Definition:
bloaty/third_party/re2/re2/regexp.h:285
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition:
bloaty/third_party/re2/re2/testing/charclass_test.cc:198
Test
Definition:
hpack_parser_test.cc:43
i
uint64_t i
Definition:
abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::LikePerl
@ LikePerl
Definition:
bloaty/third_party/re2/re2/regexp.h:312
grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:13