grpc
third_party
re2
re2
testing
re2/re2/testing/search_test.cc
Go to the documentation of this file.
1
// Copyright 2006-2007 The RE2 Authors. All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
#include "util/test.h"
6
#include "re2/prog.h"
7
#include "re2/regexp.h"
8
#include "re2/testing/tester.h"
9
#include "re2/testing/exhaustive_tester.h"
10
11
// For target `log' in the Makefile.
12
#ifndef LOGGING
13
#define LOGGING 0
14
#endif
15
16
namespace
re2
{
17
18
struct
RegexpTest {
19
const
char
*
regexp
;
20
const
char
*
text
;
21
};
22
23
RegexpTest
simple_tests
[] = {
24
{
"a"
,
"a"
},
25
{
"a"
,
"zyzzyva"
},
26
{
"a+"
,
"aa"
},
27
{
"(a+|b)+"
,
"ab"
},
28
{
"ab|cd"
,
"xabcdx"
},
29
{
"h.*od?"
,
"hello\ngoodbye\n"
},
30
{
"h.*o"
,
"hello\ngoodbye\n"
},
31
{
"h.*o"
,
"goodbye\nhello\n"
},
32
{
"h.*o"
,
"hello world"
},
33
{
"h.*o"
,
"othello, world"
},
34
{
"[^\\s\\S]"
,
"aaaaaaa"
},
35
{
"a"
,
"aaaaaaa"
},
36
{
"a*"
,
"aaaaaaa"
},
37
{
"a*"
,
""
},
38
{
"ab|cd"
,
"xabcdx"
},
39
{
"a"
,
"cab"
},
40
{
"a*b"
,
"cab"
},
41
{
"((((((((((((((((((((x))))))))))))))))))))"
,
"x"
},
42
{
"[abcd]"
,
"xxxabcdxxx"
},
43
{
"[^x]"
,
"xxxabcdxxx"
},
44
{
"[abcd]+"
,
"xxxabcdxxx"
},
45
{
"[^x]+"
,
"xxxabcdxxx"
},
46
{
"(fo|foo)"
,
"fo"
},
47
{
"(foo|fo)"
,
"foo"
},
48
49
{
"aa"
,
"aA"
},
50
{
"a"
,
"Aa"
},
51
{
"a"
,
"A"
},
52
{
"ABC"
,
"abc"
},
53
{
"abc"
,
"XABCY"
},
54
{
"ABC"
,
"xabcy"
},
55
56
// Make sure ^ and $ work.
57
// The pathological cases didn't work
58
// in the original grep code.
59
{
"foo|bar|[A-Z]"
,
"foo"
},
60
{
"^(foo|bar|[A-Z])"
,
"foo"
},
61
{
"(foo|bar|[A-Z])$"
,
"foo\n"
},
62
{
"(foo|bar|[A-Z])$"
,
"foo"
},
63
{
"^(foo|bar|[A-Z])$"
,
"foo\n"
},
64
{
"^(foo|bar|[A-Z])$"
,
"foo"
},
65
{
"^(foo|bar|[A-Z])$"
,
"bar"
},
66
{
"^(foo|bar|[A-Z])$"
,
"X"
},
67
{
"^(foo|bar|[A-Z])$"
,
"XY"
},
68
{
"^(fo|foo)$"
,
"fo"
},
69
{
"^(fo|foo)$"
,
"foo"
},
70
{
"^^(fo|foo)$"
,
"fo"
},
71
{
"^^(fo|foo)$"
,
"foo"
},
72
{
"^$"
,
""
},
73
{
"^$"
,
"x"
},
74
{
"^^$"
,
""
},
75
{
"^$$"
,
""
},
76
{
"^^$"
,
"x"
},
77
{
"^$$"
,
"x"
},
78
{
"^^$$"
,
""
},
79
{
"^^$$"
,
"x"
},
80
{
"^^^^^^^^$$$$$$$$"
,
""
},
81
{
"^"
,
"x"
},
82
{
"$"
,
"x"
},
83
84
// Word boundaries.
85
{
"\\bfoo\\b"
,
"nofoo foo that"
},
86
{
"a\\b"
,
"faoa x"
},
87
{
"\\bbar"
,
"bar x"
},
88
{
"\\bbar"
,
"foo\nbar x"
},
89
{
"bar\\b"
,
"foobar"
},
90
{
"bar\\b"
,
"foobar\nxxx"
},
91
{
"(foo|bar|[A-Z])\\b"
,
"foo"
},
92
{
"(foo|bar|[A-Z])\\b"
,
"foo\n"
},
93
{
"\\b"
,
""
},
94
{
"\\b"
,
"x"
},
95
{
"\\b(foo|bar|[A-Z])"
,
"foo"
},
96
{
"\\b(foo|bar|[A-Z])\\b"
,
"X"
},
97
{
"\\b(foo|bar|[A-Z])\\b"
,
"XY"
},
98
{
"\\b(foo|bar|[A-Z])\\b"
,
"bar"
},
99
{
"\\b(foo|bar|[A-Z])\\b"
,
"foo"
},
100
{
"\\b(foo|bar|[A-Z])\\b"
,
"foo\n"
},
101
{
"\\b(foo|bar|[A-Z])\\b"
,
"ffoo bbar N x"
},
102
{
"\\b(fo|foo)\\b"
,
"fo"
},
103
{
"\\b(fo|foo)\\b"
,
"foo"
},
104
{
"\\b\\b"
,
""
},
105
{
"\\b\\b"
,
"x"
},
106
{
"\\b$"
,
""
},
107
{
"\\b$"
,
"x"
},
108
{
"\\b$"
,
"y x"
},
109
{
"\\b.$"
,
"x"
},
110
{
"^\\b(fo|foo)\\b"
,
"fo"
},
111
{
"^\\b(fo|foo)\\b"
,
"foo"
},
112
{
"^\\b"
,
""
},
113
{
"^\\b"
,
"x"
},
114
{
"^\\b\\b"
,
""
},
115
{
"^\\b\\b"
,
"x"
},
116
{
"^\\b$"
,
""
},
117
{
"^\\b$"
,
"x"
},
118
{
"^\\b.$"
,
"x"
},
119
{
"^\\b.\\b$"
,
"x"
},
120
{
"^^^^^^^^\\b$$$$$$$"
,
""
},
121
{
"^^^^^^^^\\b.$$$$$$"
,
"x"
},
122
{
"^^^^^^^^\\b$$$$$$$"
,
"x"
},
123
124
// Non-word boundaries.
125
{
"\\Bfoo\\B"
,
"n foo xfoox that"
},
126
{
"a\\B"
,
"faoa x"
},
127
{
"\\Bbar"
,
"bar x"
},
128
{
"\\Bbar"
,
"foo\nbar x"
},
129
{
"bar\\B"
,
"foobar"
},
130
{
"bar\\B"
,
"foobar\nxxx"
},
131
{
"(foo|bar|[A-Z])\\B"
,
"foox"
},
132
{
"(foo|bar|[A-Z])\\B"
,
"foo\n"
},
133
{
"\\B"
,
""
},
134
{
"\\B"
,
"x"
},
135
{
"\\B(foo|bar|[A-Z])"
,
"foo"
},
136
{
"\\B(foo|bar|[A-Z])\\B"
,
"xXy"
},
137
{
"\\B(foo|bar|[A-Z])\\B"
,
"XY"
},
138
{
"\\B(foo|bar|[A-Z])\\B"
,
"XYZ"
},
139
{
"\\B(foo|bar|[A-Z])\\B"
,
"abara"
},
140
{
"\\B(foo|bar|[A-Z])\\B"
,
"xfoo_"
},
141
{
"\\B(foo|bar|[A-Z])\\B"
,
"xfoo\n"
},
142
{
"\\B(foo|bar|[A-Z])\\B"
,
"foo bar vNx"
},
143
{
"\\B(fo|foo)\\B"
,
"xfoo"
},
144
{
"\\B(foo|fo)\\B"
,
"xfooo"
},
145
{
"\\B\\B"
,
""
},
146
{
"\\B\\B"
,
"x"
},
147
{
"\\B$"
,
""
},
148
{
"\\B$"
,
"x"
},
149
{
"\\B$"
,
"y x"
},
150
{
"\\B.$"
,
"x"
},
151
{
"^\\B(fo|foo)\\B"
,
"fo"
},
152
{
"^\\B(fo|foo)\\B"
,
"foo"
},
153
{
"^\\B"
,
""
},
154
{
"^\\B"
,
"x"
},
155
{
"^\\B\\B"
,
""
},
156
{
"^\\B\\B"
,
"x"
},
157
{
"^\\B$"
,
""
},
158
{
"^\\B$"
,
"x"
},
159
{
"^\\B.$"
,
"x"
},
160
{
"^\\B.\\B$"
,
"x"
},
161
{
"^^^^^^^^\\B$$$$$$$"
,
""
},
162
{
"^^^^^^^^\\B.$$$$$$"
,
"x"
},
163
{
"^^^^^^^^\\B$$$$$$$"
,
"x"
},
164
165
// PCRE uses only ASCII for \b computation.
166
// All non-ASCII are *not* word characters.
167
{
"\\bx\\b"
,
"x"
},
168
{
"\\bx\\b"
,
"x>"
},
169
{
"\\bx\\b"
,
"<x"
},
170
{
"\\bx\\b"
,
"<x>"
},
171
{
"\\bx\\b"
,
"ax"
},
172
{
"\\bx\\b"
,
"xb"
},
173
{
"\\bx\\b"
,
"axb"
},
174
{
"\\bx\\b"
,
"«x"
},
175
{
"\\bx\\b"
,
"x»"
},
176
{
"\\bx\\b"
,
"«x»"
},
177
{
"\\bx\\b"
,
"axb"
},
178
{
"\\bx\\b"
,
"áxβ"
},
179
{
"\\Bx\\B"
,
"axb"
},
180
{
"\\Bx\\B"
,
"áxβ"
},
181
182
// Weird boundary cases.
183
{
"^$^$"
,
""
},
184
{
"^$^"
,
""
},
185
{
"$^$"
,
""
},
186
187
{
"^$^$"
,
"x"
},
188
{
"^$^"
,
"x"
},
189
{
"$^$"
,
"x"
},
190
191
{
"^$^$"
,
"x\ny"
},
192
{
"^$^"
,
"x\ny"
},
193
{
"$^$"
,
"x\ny"
},
194
195
{
"^$^$"
,
"x\n\ny"
},
196
{
"^$^"
,
"x\n\ny"
},
197
{
"$^$"
,
"x\n\ny"
},
198
199
{
"^(foo\\$)$"
,
"foo$bar"
},
200
{
"(foo\\$)"
,
"foo$bar"
},
201
{
"^...$"
,
"abc"
},
202
203
// UTF-8
204
{
"^\xe6\x9c\xac$"
,
"\xe6\x9c\xac"
},
205
{
"^...$"
,
"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"
},
206
{
"^...$"
,
".\xe6\x9c\xac."
},
207
208
{
"^\\C\\C\\C$"
,
"\xe6\x9c\xac"
},
209
{
"^\\C$"
,
"\xe6\x9c\xac"
},
210
{
"^\\C\\C\\C$"
,
"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"
},
211
212
// Latin1
213
{
"^...$"
,
"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"
},
214
{
"^.........$"
,
"\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"
},
215
{
"^...$"
,
".\xe6\x9c\xac."
},
216
{
"^.....$"
,
".\xe6\x9c\xac."
},
217
218
// Perl v Posix
219
{
"\\B(fo|foo)\\B"
,
"xfooo"
},
220
{
"(fo|foo)"
,
"foo"
},
221
222
// Octal escapes.
223
{
"\\141"
,
"a"
},
224
{
"\\060"
,
"0"
},
225
{
"\\0600"
,
"00"
},
226
{
"\\608"
,
"08"
},
227
{
"\\01"
,
"\01"
},
228
{
"\\018"
,
"\01"
"8"
},
229
230
// Hexadecimal escapes
231
{
"\\x{61}"
,
"a"
},
232
{
"\\x61"
,
"a"
},
233
{
"\\x{00000061}"
,
"a"
},
234
235
// Unicode scripts.
236
{
"\\p{Greek}+"
,
"aαβb"
},
237
{
"\\P{Greek}+"
,
"aαβb"
},
238
{
"\\p{^Greek}+"
,
"aαβb"
},
239
{
"\\P{^Greek}+"
,
"aαβb"
},
240
241
// Unicode properties. Nd is decimal number. N is any number.
242
{
"[^0-9]+"
,
"abc123"
},
243
{
"\\p{Nd}+"
,
"abc123²³¼½¾₀₉"
},
244
{
"\\p{^Nd}+"
,
"abc123²³¼½¾₀₉"
},
245
{
"\\P{Nd}+"
,
"abc123²³¼½¾₀₉"
},
246
{
"\\P{^Nd}+"
,
"abc123²³¼½¾₀₉"
},
247
{
"\\pN+"
,
"abc123²³¼½¾₀₉"
},
248
{
"\\p{N}+"
,
"abc123²³¼½¾₀₉"
},
249
{
"\\p{^N}+"
,
"abc123²³¼½¾₀₉"
},
250
251
{
"\\p{Any}+"
,
"abc123"
},
252
253
// Character classes & case folding.
254
{
"(?i)[@-A]+"
,
"@AaB"
},
// matches @Aa but not B
255
{
"(?i)[A-Z]+"
,
"aAzZ"
},
256
{
"(?i)[^\\\\]+"
,
"Aa\\"
},
// \\ is between A-Z and a-z -
257
// splits the ranges in an interesting way.
258
259
// would like to use, but PCRE mishandles in full-match, non-greedy mode
260
// { "(?i)[\\\\]+", "Aa" },
261
262
{
"(?i)[acegikmoqsuwy]+"
,
"acegikmoqsuwyACEGIKMOQSUWY"
},
263
264
// Character classes & case folding.
265
{
"[@-A]+"
,
"@AaB"
},
266
{
"[A-Z]+"
,
"aAzZ"
},
267
{
"[^\\\\]+"
,
"Aa\\"
},
268
{
"[acegikmoqsuwy]+"
,
"acegikmoqsuwyACEGIKMOQSUWY"
},
269
270
// Anchoring. (^abc in aabcdef was a former bug)
271
// The tester checks for a match in the text and
272
// subpieces of the text with a byte removed on either side.
273
{
"^abc"
,
"abcdef"
},
274
{
"^abc"
,
"aabcdef"
},
275
{
"^[ay]*[bx]+c"
,
"abcdef"
},
276
{
"^[ay]*[bx]+c"
,
"aabcdef"
},
277
{
"def$"
,
"abcdef"
},
278
{
"def$"
,
"abcdeff"
},
279
{
"d[ex][fy]$"
,
"abcdef"
},
280
{
"d[ex][fy]$"
,
"abcdeff"
},
281
{
"[dz][ex][fy]$"
,
"abcdef"
},
282
{
"[dz][ex][fy]$"
,
"abcdeff"
},
283
{
"(?m)^abc"
,
"abcdef"
},
284
{
"(?m)^abc"
,
"aabcdef"
},
285
{
"(?m)^[ay]*[bx]+c"
,
"abcdef"
},
286
{
"(?m)^[ay]*[bx]+c"
,
"aabcdef"
},
287
{
"(?m)def$"
,
"abcdef"
},
288
{
"(?m)def$"
,
"abcdeff"
},
289
{
"(?m)d[ex][fy]$"
,
"abcdef"
},
290
{
"(?m)d[ex][fy]$"
,
"abcdeff"
},
291
{
"(?m)[dz][ex][fy]$"
,
"abcdef"
},
292
{
"(?m)[dz][ex][fy]$"
,
"abcdeff"
},
293
{
"^"
,
"a"
},
294
{
"^^"
,
"a"
},
295
296
// Context.
297
// The tester checks for a match in the text and
298
// subpieces of the text with a byte removed on either side.
299
{
"a"
,
"a"
},
300
{
"ab*"
,
"a"
},
301
{
"a\\C*"
,
"a"
},
302
{
"a\\C+"
,
"a"
},
303
{
"a\\C?"
,
"a"
},
304
{
"a\\C*?"
,
"a"
},
305
{
"a\\C+?"
,
"a"
},
306
{
"a\\C??"
,
"a"
},
307
308
// Former bugs.
309
{
"a\\C*|ba\\C"
,
"baba"
},
310
{
"\\w*I\\w*"
,
"Inc."
},
311
{
"(?:|a)*"
,
"aaa"
},
312
{
"(?:|a)+"
,
"aaa"
},
313
};
314
315
TEST
(Regexp, SearchTests) {
316
int
failures
= 0;
317
for
(
size_t
i
= 0;
i
<
arraysize
(
simple_tests
);
i
++) {
318
const
RegexpTest& t =
simple_tests
[
i
];
319
if
(!
TestRegexpOnText
(t.regexp, t.text))
320
failures
++;
321
322
if
(
LOGGING
) {
323
// Build a dummy ExhaustiveTest call that will trigger just
324
// this one test, so that we log the test case.
325
std::vector<std::string> atom, alpha,
ops
;
326
atom.push_back(t.regexp);
327
alpha.push_back(t.text);
328
ExhaustiveTest
(1, 0, atom,
ops
, 1, alpha,
""
,
""
);
329
}
330
}
331
EXPECT_EQ
(
failures
, 0);
332
}
333
334
}
// namespace re2
re2::TestRegexpOnText
bool TestRegexpOnText(const StringPiece ®exp, const StringPiece &text)
Definition:
bloaty/third_party/re2/re2/testing/tester.cc:663
re2::ExhaustiveTest
void ExhaustiveTest(int maxatoms, int maxops, const std::vector< std::string > &alphabet, const std::vector< std::string > &ops, int maxstrlen, const std::vector< std::string > &stralphabet, const std::string &wrapper, const std::string &topwrapper)
Definition:
bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:144
re2
Definition:
bloaty/third_party/re2/re2/bitmap256.h:17
re2::RegexpTest::text
const char * text
Definition:
bloaty/third_party/re2/re2/testing/search_test.cc:20
re2::RegexpTest::regexp
const char * regexp
Definition:
bloaty/third_party/re2/re2/testing/search_test.cc:19
failures
std::atomic< uint64_t > failures
Definition:
outlier_detection.cc:233
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition:
iomgr/time_averaged_stats_test.cc:27
arraysize
#define arraysize(array)
Definition:
benchmark/src/arraysize.h:28
LOGGING
#define LOGGING
Definition:
re2/re2/testing/search_test.cc:13
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition:
bloaty/third_party/re2/re2/testing/charclass_test.cc:198
ops
static grpc_op ops[6]
Definition:
test/core/fling/client.cc:39
i
uint64_t i
Definition:
abseil-cpp/absl/container/btree_benchmark.cc:230
re2::simple_tests
RegexpTest simple_tests[]
Definition:
bloaty/third_party/re2/re2/testing/search_test.cc:23
grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:09