re2/re2/testing/possible_match_test.cc
Go to the documentation of this file.
1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include <string.h>
6 #include <string>
7 #include <vector>
8 
9 #include "util/test.h"
10 #include "util/logging.h"
11 #include "util/strutil.h"
12 #include "re2/prog.h"
13 #include "re2/re2.h"
14 #include "re2/regexp.h"
15 #include "re2/testing/exhaustive_tester.h"
16 #include "re2/testing/regexp_generator.h"
17 #include "re2/testing/string_generator.h"
18 
19 namespace re2 {
20 
21 // Test that C++ strings are compared as uint8s, not int8s.
22 // PossibleMatchRange doesn't depend on this, but callers probably will.
23 TEST(CplusplusStrings, EightBit) {
24  std::string s = "\x70";
25  std::string t = "\xA0";
26  EXPECT_LT(s, t);
27 }
28 
29 struct PrefixTest {
30  const char* regexp;
31  int maxlen;
32  const char* min;
33  const char* max;
34 };
35 
36 static PrefixTest tests[] = {
37  { "", 10, "", "", },
38  { "Abcdef", 10, "Abcdef", "Abcdef" },
39  { "abc(def|ghi)", 10, "abcdef", "abcghi" },
40  { "a+hello", 10, "aa", "ahello" },
41  { "a*hello", 10, "a", "hello" },
42  { "def|abc", 10, "abc", "def" },
43  { "a(b)(c)[d]", 10, "abcd", "abcd" },
44  { "ab(cab|cat)", 10, "abcab", "abcat" },
45  { "ab(cab|ca)x", 10, "abcabx", "abcax" },
46  { "(ab|x)(c|de)", 10, "abc", "xde" },
47  { "(ab|x)?(c|z)?", 10, "", "z" },
48  { "[^\\s\\S]", 10, "", "" },
49  { "(abc)+", 5, "abc", "abcac" },
50  { "(abc)+", 2, "ab", "ac" },
51  { "(abc)+", 1, "a", "b" },
52  { "[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
53  { "a*", 10, "", "ab" },
54 
55  { "(?i)Abcdef", 10, "ABCDEF", "abcdef" },
56  { "(?i)abc(def|ghi)", 10, "ABCDEF", "abcghi" },
57  { "(?i)a+hello", 10, "AA", "ahello" },
58  { "(?i)a*hello", 10, "A", "hello" },
59  { "(?i)def|abc", 10, "ABC", "def" },
60  { "(?i)a(b)(c)[d]", 10, "ABCD", "abcd" },
61  { "(?i)ab(cab|cat)", 10, "ABCAB", "abcat" },
62  { "(?i)ab(cab|ca)x", 10, "ABCABX", "abcax" },
63  { "(?i)(ab|x)(c|de)", 10, "ABC", "xde" },
64  { "(?i)(ab|x)?(c|z)?", 10, "", "z" },
65  { "(?i)[^\\s\\S]", 10, "", "" },
66  { "(?i)(abc)+", 5, "ABC", "abcac" },
67  { "(?i)(abc)+", 2, "AB", "ac" },
68  { "(?i)(abc)+", 1, "A", "b" },
69  { "(?i)[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
70  { "(?i)a*", 10, "", "ab" },
71  { "(?i)A*", 10, "", "ab" },
72 
73  { "\\AAbcdef", 10, "Abcdef", "Abcdef" },
74  { "\\Aabc(def|ghi)", 10, "abcdef", "abcghi" },
75  { "\\Aa+hello", 10, "aa", "ahello" },
76  { "\\Aa*hello", 10, "a", "hello" },
77  { "\\Adef|abc", 10, "abc", "def" },
78  { "\\Aa(b)(c)[d]", 10, "abcd", "abcd" },
79  { "\\Aab(cab|cat)", 10, "abcab", "abcat" },
80  { "\\Aab(cab|ca)x", 10, "abcabx", "abcax" },
81  { "\\A(ab|x)(c|de)", 10, "abc", "xde" },
82  { "\\A(ab|x)?(c|z)?", 10, "", "z" },
83  { "\\A[^\\s\\S]", 10, "", "" },
84  { "\\A(abc)+", 5, "abc", "abcac" },
85  { "\\A(abc)+", 2, "ab", "ac" },
86  { "\\A(abc)+", 1, "a", "b" },
87  { "\\A[a\xC3\xA1]", 4, "a", "\xC3\xA1" },
88  { "\\Aa*", 10, "", "ab" },
89 
90  { "(?i)\\AAbcdef", 10, "ABCDEF", "abcdef" },
91  { "(?i)\\Aabc(def|ghi)", 10, "ABCDEF", "abcghi" },
92  { "(?i)\\Aa+hello", 10, "AA", "ahello" },
93  { "(?i)\\Aa*hello", 10, "A", "hello" },
94  { "(?i)\\Adef|abc", 10, "ABC", "def" },
95  { "(?i)\\Aa(b)(c)[d]", 10, "ABCD", "abcd" },
96  { "(?i)\\Aab(cab|cat)", 10, "ABCAB", "abcat" },
97  { "(?i)\\Aab(cab|ca)x", 10, "ABCABX", "abcax" },
98  { "(?i)\\A(ab|x)(c|de)", 10, "ABC", "xde" },
99  { "(?i)\\A(ab|x)?(c|z)?", 10, "", "z" },
100  { "(?i)\\A[^\\s\\S]", 10, "", "" },
101  { "(?i)\\A(abc)+", 5, "ABC", "abcac" },
102  { "(?i)\\A(abc)+", 2, "AB", "ac" },
103  { "(?i)\\A(abc)+", 1, "A", "b" },
104  { "(?i)\\A[a\xC3\xA1]", 4, "A", "\xC3\xA1" },
105  { "(?i)\\Aa*", 10, "", "ab" },
106  { "(?i)\\AA*", 10, "", "ab" },
107 };
108 
109 TEST(PossibleMatchRange, HandWritten) {
110  for (size_t i = 0; i < arraysize(tests); i++) {
111  for (size_t j = 0; j < 2; j++) {
112  const PrefixTest& t = tests[i];
114  if (j == 0) {
115  LOG(INFO) << "Checking regexp=" << CEscape(t.regexp);
116  Regexp* re = Regexp::Parse(t.regexp, Regexp::LikePerl, NULL);
117  ASSERT_TRUE(re != NULL);
118  Prog* prog = re->CompileToProg(0);
119  ASSERT_TRUE(prog != NULL);
120  ASSERT_TRUE(prog->PossibleMatchRange(&min, &max, t.maxlen))
121  << " " << t.regexp;
122  delete prog;
123  re->Decref();
124  } else {
125  ASSERT_TRUE(RE2(t.regexp).PossibleMatchRange(&min, &max, t.maxlen));
126  }
127  EXPECT_EQ(t.min, min) << t.regexp;
128  EXPECT_EQ(t.max, max) << t.regexp;
129  }
130  }
131 }
132 
133 // Test cases where PossibleMatchRange should return false.
134 TEST(PossibleMatchRange, Failures) {
136 
137  // Fails because no room to write max.
138  EXPECT_FALSE(RE2("abc").PossibleMatchRange(&min, &max, 0));
139 
140  // Fails because there is no max -- any non-empty string matches
141  // or begins a match. Have to use Latin-1 input, because there
142  // are no valid UTF-8 strings beginning with byte 0xFF.
143  EXPECT_FALSE(RE2("[\\s\\S]+", RE2::Latin1).
144  PossibleMatchRange(&min, &max, 10))
145  << "min=" << CEscape(min) << ", max=" << CEscape(max);
146  EXPECT_FALSE(RE2("[\\0-\xFF]+", RE2::Latin1).
147  PossibleMatchRange(&min, &max, 10))
148  << "min=" << CEscape(min) << ", max=" << CEscape(max);
149  EXPECT_FALSE(RE2(".+hello", RE2::Latin1).
150  PossibleMatchRange(&min, &max, 10))
151  << "min=" << CEscape(min) << ", max=" << CEscape(max);
152  EXPECT_FALSE(RE2(".*hello", RE2::Latin1).
153  PossibleMatchRange(&min, &max, 10))
154  << "min=" << CEscape(min) << ", max=" << CEscape(max);
155  EXPECT_FALSE(RE2(".*", RE2::Latin1).
156  PossibleMatchRange(&min, &max, 10))
157  << "min=" << CEscape(min) << ", max=" << CEscape(max);
158  EXPECT_FALSE(RE2("\\C*").
159  PossibleMatchRange(&min, &max, 10))
160  << "min=" << CEscape(min) << ", max=" << CEscape(max);
161 
162  // Fails because it's a malformed regexp.
163  EXPECT_FALSE(RE2("*hello").PossibleMatchRange(&min, &max, 10))
164  << "min=" << CEscape(min) << ", max=" << CEscape(max);
165 }
166 
167 // Exhaustive test: generate all regexps within parameters,
168 // then generate all strings of a given length over a given alphabet,
169 // then check that the prefix information agrees with whether
170 // the regexp matches each of the strings.
171 class PossibleMatchTester : public RegexpGenerator {
172  public:
173  PossibleMatchTester(int maxatoms,
174  int maxops,
175  const std::vector<std::string>& alphabet,
176  const std::vector<std::string>& ops,
177  int maxstrlen,
178  const std::vector<std::string>& stralphabet)
179  : RegexpGenerator(maxatoms, maxops, alphabet, ops),
180  strgen_(maxstrlen, stralphabet),
181  regexps_(0), tests_(0) { }
182 
183  int regexps() { return regexps_; }
184  int tests() { return tests_; }
185 
186  // Needed for RegexpGenerator interface.
187  void HandleRegexp(const std::string& regexp);
188 
189  private:
191 
192  int regexps_; // Number of HandleRegexp calls
193  int tests_; // Number of regexp tests.
194 
195  PossibleMatchTester(const PossibleMatchTester&) = delete;
197 };
198 
199 // Processes a single generated regexp.
200 // Checks that all accepted strings agree with the prefix range.
202  regexps_++;
203 
204  VLOG(3) << CEscape(regexp);
205 
206  RE2 re(regexp, RE2::Latin1);
207  ASSERT_EQ(re.error(), "");
208 
210  if(!re.PossibleMatchRange(&min, &max, 10)) {
211  // There's no good max for "\\C*". Can't use strcmp
212  // because sometimes it gets embedded in more
213  // complicated expressions.
214  if(strstr(regexp.c_str(), "\\C*"))
215  return;
216  LOG(QFATAL) << "PossibleMatchRange failed on: " << CEscape(regexp);
217  }
218 
219  strgen_.Reset();
220  while (strgen_.HasNext()) {
221  const StringPiece& s = strgen_.Next();
222  tests_++;
223  if (!RE2::FullMatch(s, re))
224  continue;
225  ASSERT_GE(s, min) << " regexp: " << regexp << " max: " << max;
226  ASSERT_LE(s, max) << " regexp: " << regexp << " min: " << min;
227  }
228 }
229 
230 TEST(PossibleMatchRange, Exhaustive) {
231  int natom = 3;
232  int noperator = 3;
233  int stringlen = 5;
234  if (RE2_DEBUG_MODE) {
235  natom = 2;
236  noperator = 3;
237  stringlen = 3;
238  }
239  PossibleMatchTester t(natom, noperator, Split(" ", "a b [0-9]"),
241  stringlen, Explode("ab4"));
242  t.Generate();
243  LOG(INFO) << t.regexps() << " regexps, "
244  << t.tests() << " tests";
245 }
246 
247 } // namespace re2
EXPECT_FALSE
#define EXPECT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1970
re2::RE2::FullMatch
static bool FullMatch(const StringPiece &text, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:367
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::PrefixTest::min
const char * min
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:32
VLOG
#define VLOG(x)
Definition: third_party/bloaty/third_party/protobuf/third_party/benchmark/src/log.h:69
re2::Split
std::vector< std::string > Split(const StringPiece &sep, const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:256
string.h
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
ASSERT_GE
#define ASSERT_GE(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2072
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
absl::FormatConversionChar::s
@ s
ASSERT_LE
#define ASSERT_LE(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2064
alphabet
static const char alphabet[]
Definition: bin_encoder.cc:30
re2::StringGenerator::HasNext
bool HasNext()
Definition: bloaty/third_party/re2/re2/testing/string_generator.h:28
re2::PossibleMatchTester::PossibleMatchTester
PossibleMatchTester(int maxatoms, int maxops, const std::vector< std::string > &alphabet, const std::vector< std::string > &ops, int maxstrlen, const std::vector< std::string > &stralphabet)
Definition: re2/re2/testing/possible_match_test.cc:173
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: iomgr/time_averaged_stats_test.cc:27
re2::PossibleMatchTester::tests_
int tests_
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:193
re2::PossibleMatchTester::regexps
int regexps()
Definition: re2/re2/testing/possible_match_test.cc:183
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
python_utils.jobset.INFO
INFO
Definition: jobset.py:111
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::PossibleMatchTester::regexps_
int regexps_
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:192
re2::PossibleMatchTester::strgen_
StringGenerator strgen_
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:190
re2::PossibleMatchTester::operator=
PossibleMatchTester & operator=(const PossibleMatchTester &)=delete
re2::Regexp::Parse
static Regexp * Parse(const StringPiece &s, ParseFlags flags, RegexpStatus *status)
Definition: bloaty/third_party/re2/re2/parse.cc:2200
re2::StringGenerator::Reset
void Reset()
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:35
min
#define min(a, b)
Definition: qsort.h:83
re2::RE2::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/re2.h:249
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::PossibleMatchTester::tests
int tests()
Definition: re2/re2/testing/possible_match_test.cc:184
re2::PrefixTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:30
re2::PrefixTest::maxlen
int maxlen
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:31
re2::RegexpGenerator::EgrepOps
static const std::vector< std::string > & EgrepOps()
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:41
re2::Explode
std::vector< std::string > Explode(const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:241
tests
Definition: src/python/grpcio_tests/tests/__init__.py:1
re2::RE2_DEBUG_MODE
const bool RE2_DEBUG_MODE
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:25
re2::CEscape
std::string CEscape(const StringPiece &src)
Definition: bloaty/third_party/re2/util/strutil.cc:68
absl::str_format_internal::LengthMod::t
@ t
re2::PrefixTest
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:29
re2::RegexpGenerator
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.h:30
EXPECT_LT
#define EXPECT_LT(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2032
ASSERT_TRUE
#define ASSERT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1973
re2::PossibleMatchTester::HandleRegexp
void HandleRegexp(const std::string &regexp)
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:201
re2::PrefixTest::max
const char * max
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:33
re2::StringGenerator::Next
const StringPiece & Next()
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:84
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
re2::PossibleMatchTester
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:171
ops
static grpc_op ops[6]
Definition: test/core/fling/client.cc:39
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::LikePerl
@ LikePerl
Definition: bloaty/third_party/re2/re2/regexp.h:312
ASSERT_EQ
#define ASSERT_EQ(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2056
re2::StringGenerator
Definition: bloaty/third_party/re2/re2/testing/string_generator.h:22


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:44