re2/re2/testing/exhaustive_tester.cc
Go to the documentation of this file.
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression. Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13 
14 #include <stdio.h>
15 
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/strutil.h"
20 #include "re2/testing/exhaustive_tester.h"
21 #include "re2/testing/tester.h"
22 
23 // For target `log' in the Makefile.
24 #ifndef LOGGING
25 #define LOGGING 0
26 #endif
27 
28 DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
29 
30 DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
31  "Stop testing a regular expression after finding this many "
32  "strings that break it.");
33 
34 namespace re2 {
35 
36 static char* escape(const StringPiece& sp) {
37  static char buf[512];
38  char* p = buf;
39  *p++ = '\"';
40  for (size_t i = 0; i < sp.size(); i++) {
41  if(p+5 >= buf+sizeof buf)
42  LOG(FATAL) << "ExhaustiveTester escape: too long";
43  if(sp[i] == '\\' || sp[i] == '\"') {
44  *p++ = '\\';
45  *p++ = sp[i];
46  } else if(sp[i] == '\n') {
47  *p++ = '\\';
48  *p++ = 'n';
49  } else {
50  *p++ = sp[i];
51  }
52  }
53  *p++ = '\"';
54  *p = '\0';
55  return buf;
56 }
57 
58 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
59  if (!re.Match(input, 0, input.size(), anchor, m, n)) {
60  printf("-");
61  return;
62  }
63  for (int i = 0; i < n; i++) {
64  if (i > 0)
65  printf(" ");
66  if (m[i].data() == NULL)
67  printf("-");
68  else
69  printf("%td-%td",
70  m[i].begin() - input.begin(),
71  m[i].end() - input.begin());
72  }
73 }
74 
75 // Processes a single generated regexp.
76 // Compiles it using Regexp interface and PCRE, and then
77 // checks that NFA, DFA, and PCRE all return the same results.
78 void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
79  regexps_++;
80  std::string regexp = const_regexp;
81  if (!topwrapper_.empty()) {
82  regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
83  }
84 
85  if (GetFlag(FLAGS_show_regexps)) {
86  printf("\r%s", regexp.c_str());
87  fflush(stdout);
88  }
89 
90  if (LOGGING) {
91  // Write out test cases and answers for use in testing
92  // other implementations, such as Go's regexp package.
93  if (randomstrings_)
94  LOG(ERROR) << "Cannot log with random strings.";
95  if (regexps_ == 1) { // first
96  printf("strings\n");
97  strgen_.Reset();
98  while (strgen_.HasNext())
99  printf("%s\n", escape(strgen_.Next()));
100  printf("regexps\n");
101  }
102  printf("%s\n", escape(regexp));
103 
104  RE2 re(regexp);
105  RE2::Options longest;
106  longest.set_longest_match(true);
107  RE2 relongest(regexp, longest);
108  int ngroup = re.NumberOfCapturingGroups()+1;
109  StringPiece* group = new StringPiece[ngroup];
110 
111  strgen_.Reset();
112  while (strgen_.HasNext()) {
113  StringPiece input = strgen_.Next();
114  PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
115  printf(";");
116  PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
117  printf(";");
118  PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
119  printf(";");
120  PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
121  printf("\n");
122  }
123  delete[] group;
124  return;
125  }
126 
127  Tester tester(regexp);
128  if (tester.error())
129  return;
130 
131  strgen_.Reset();
133  if (randomstrings_)
135  int bad_inputs = 0;
136  while (strgen_.HasNext()) {
137  tests_++;
138  if (!tester.TestInput(strgen_.Next())) {
139  failures_++;
140  if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
141  break;
142  }
143  }
144 }
145 
146 // Runs an exhaustive test on the given parameters.
147 void ExhaustiveTest(int maxatoms, int maxops,
148  const std::vector<std::string>& alphabet,
149  const std::vector<std::string>& ops,
150  int maxstrlen,
151  const std::vector<std::string>& stralphabet,
152  const std::string& wrapper,
153  const std::string& topwrapper) {
154  if (RE2_DEBUG_MODE) {
155  if (maxatoms > 1)
156  maxatoms--;
157  if (maxops > 1)
158  maxops--;
159  if (maxstrlen > 1)
160  maxstrlen--;
161  }
162  ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
163  maxstrlen, stralphabet, wrapper,
164  topwrapper);
165  t.Generate();
166  if (!LOGGING) {
167  printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
168  t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
169  }
170  EXPECT_EQ(0, t.failures());
171 }
172 
173 // Runs an exhaustive test using the given parameters and
174 // the basic egrep operators.
175 void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
176  int maxstrlen, const std::string& stralphabet,
177  const std::string& wrapper) {
178  const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
179 
180  for (size_t i = 0; i < arraysize(tops); i++) {
181  ExhaustiveTest(maxatoms, maxops,
182  Split("", alphabet),
184  maxstrlen,
185  Split("", stralphabet),
186  wrapper,
187  tops[i]);
188  }
189 }
190 
191 } // namespace re2
re2::ExhaustiveTester::tests_
int tests_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:77
re2::StringGenerator::GenerateNULL
void GenerateNULL()
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:109
re2::ExhaustiveTester::strgen_
StringGenerator strgen_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:73
demumble_test.stdout
stdout
Definition: demumble_test.py:38
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
re2::Split
std::vector< std::string > Split(const StringPiece &sep, const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:256
re2::StringPiece::size
size_type size() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:80
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
printf
_Use_decl_annotations_ int __cdecl printf(const char *_Format,...)
Definition: cs_driver.c:91
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::ExhaustiveTester::stringseed_
int32_t stringseed_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:81
re2::RE2::ANCHOR_BOTH
@ ANCHOR_BOTH
Definition: bloaty/third_party/re2/re2/re2.h:475
re2::ExhaustiveTest
void ExhaustiveTest(int maxatoms, int maxops, const std::vector< std::string > &alphabet, const std::vector< std::string > &ops, int maxstrlen, const std::vector< std::string > &stralphabet, const std::string &wrapper, const std::string &topwrapper)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:144
re2::RE2::UNANCHORED
@ UNANCHORED
Definition: bloaty/third_party/re2/re2/re2.h:473
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
alphabet
static const char alphabet[]
Definition: bin_encoder.cc:30
re2::PrintResult
static void PrintResult(const RE2 &re, const StringPiece &input, RE2::Anchor anchor, StringPiece *m, int n)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:57
re2::GetFlag
T GetFlag(const T &flag)
Definition: re2/util/flags.h:21
re2::StringGenerator::HasNext
bool HasNext()
Definition: bloaty/third_party/re2/re2/testing/string_generator.h:28
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: iomgr/time_averaged_stats_test.cc:27
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::ExhaustiveTester::regexps_
int regexps_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:76
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::StringGenerator::Reset
void Reset()
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:35
re2::StringGenerator::Random
void Random(int32_t seed, int n)
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:101
re2::EgrepTest
void EgrepTest(int maxatoms, int maxops, const std::string &alphabet, int maxstrlen, const std::string &stralphabet, const std::string &wrapper)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:172
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
wrapper
grpc_channel_wrapper * wrapper
Definition: src/php/ext/grpc/channel.h:48
re2::RE2
Definition: bloaty/third_party/re2/re2/re2.h:211
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
google::protobuf::ERROR
static const LogLevel ERROR
Definition: bloaty/third_party/protobuf/src/google/protobuf/testing/googletest.h:70
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::RE2::Match
bool Match(const StringPiece &text, size_t startpos, size_t endpos, Anchor re_anchor, StringPiece *submatch, int nsubmatch) const
Definition: bloaty/third_party/re2/re2/re2.cc:572
FATAL
#define FATAL(msg)
Definition: task.h:88
re2::RegexpGenerator::EgrepOps
static const std::vector< std::string > & EgrepOps()
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:41
upload.group
group
Definition: bloaty/third_party/googletest/googlemock/scripts/upload.py:397
re2::RE2_DEBUG_MODE
const bool RE2_DEBUG_MODE
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:25
re2::ExhaustiveTester::HandleRegexp
void HandleRegexp(const std::string &regexp)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:76
DEFINE_FLAG
DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing")
absl::str_format_internal::LengthMod::t
@ t
re2::ExhaustiveTester::stringcount_
int stringcount_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:82
input
std::string input
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/tokenizer_unittest.cc:197
re2::escape
static char * escape(const StringPiece &sp)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:35
LOGGING
#define LOGGING
Definition: re2/re2/testing/exhaustive_tester.cc:25
re2::RE2::Anchor
Anchor
Definition: bloaty/third_party/re2/re2/re2.h:472
re2::StringGenerator::Next
const StringPiece & Next()
Definition: bloaty/third_party/re2/re2/testing/string_generator.cc:84
regress.m
m
Definition: regress/regress.py:25
re2::ExhaustiveTester::randomstrings_
bool randomstrings_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:80
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
ops
static grpc_op ops[6]
Definition: test/core/fling/client.cc:39
re2::ExhaustiveTester::topwrapper_
std::string topwrapper_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:75
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::ExhaustiveTester::failures_
int failures_
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.h:78


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:20