re2/re2/testing/required_prefix_test.cc
Go to the documentation of this file.
1 // Copyright 2009 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include <string>
6 
7 #include "util/test.h"
8 #include "util/logging.h"
9 #include "re2/prog.h"
10 #include "re2/regexp.h"
11 
12 namespace re2 {
13 
14 struct PrefixTest {
15  const char* regexp;
16  bool return_value;
17  const char* prefix;
18  bool foldcase;
19  const char* suffix;
20 };
21 
22 static PrefixTest tests[] = {
23  // Empty cases.
24  { "", false },
25  { "(?m)^", false },
26  { "(?-m)^", false },
27 
28  // If the regexp has no ^, there's no required prefix.
29  { "abc", false },
30 
31  // If the regexp immediately goes into
32  // something not a literal match, there's no required prefix.
33  { "^a*", false },
34  { "^(abc)", false },
35 
36  // Otherwise, it should work.
37  { "^abc$", true, "abc", false, "(?-m:$)" },
38  { "^abc", true, "abc", false, "" },
39  { "^(?i)abc", true, "abc", true, "" },
40  { "^abcd*", true, "abc", false, "d*" },
41  { "^[Aa][Bb]cd*", true, "ab", true, "cd*" },
42  { "^ab[Cc]d*", true, "ab", false, "[Cc]d*" },
43  { "^☺abc", true, "☺abc", false, "" },
44 };
45 
46 TEST(RequiredPrefix, SimpleTests) {
47  for (size_t i = 0; i < arraysize(tests); i++) {
48  const PrefixTest& t = tests[i];
49  for (size_t j = 0; j < 2; j++) {
51  if (j == 0)
53  Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
54  ASSERT_TRUE(re != NULL) << " " << t.regexp;
55 
56  std::string p;
57  bool f;
58  Regexp* s;
59  ASSERT_EQ(t.return_value, re->RequiredPrefix(&p, &f, &s))
60  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
61  << " " << re->Dump();
62  if (t.return_value) {
63  ASSERT_EQ(p, std::string(t.prefix))
64  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
65  ASSERT_EQ(f, t.foldcase)
66  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
67  ASSERT_EQ(s->ToString(), std::string(t.suffix))
68  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
69  s->Decref();
70  }
71  re->Decref();
72  }
73  }
74 }
75 
77  // Empty cases.
78  { "", false },
79  { "(?m)^", false },
80  { "(?-m)^", false },
81 
82  // If the regexp has a ^, there's no required prefix.
83  { "^abc", false },
84 
85  // If the regexp immediately goes into
86  // something not a literal match, there's no required prefix.
87  { "a*", false },
88 
89  // Unlike RequiredPrefix(), RequiredPrefixForAccel() can "see through"
90  // capturing groups, but doesn't try to glue prefix fragments together.
91  { "(a?)def", false },
92  { "(ab?)def", true, "a", false },
93  { "(abc?)def", true, "ab", false },
94  { "(()a)def", false },
95  { "((a)b)def", true, "a", false },
96  { "((ab)c)def", true, "ab", false },
97 
98  // Otherwise, it should work.
99  { "abc$", true, "abc", false },
100  { "abc", true, "abc", false },
101  { "(?i)abc", true, "abc", true },
102  { "abcd*", true, "abc", false },
103  { "[Aa][Bb]cd*", true, "ab", true },
104  { "ab[Cc]d*", true, "ab", false },
105  { "☺abc", true, "☺abc", false },
106 };
107 
108 TEST(RequiredPrefixForAccel, SimpleTests) {
109  for (size_t i = 0; i < arraysize(for_accel_tests); i++) {
110  const PrefixTest& t = for_accel_tests[i];
111  for (size_t j = 0; j < 2; j++) {
113  if (j == 0)
115  Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
116  ASSERT_TRUE(re != NULL) << " " << t.regexp;
117 
118  std::string p;
119  bool f;
120  ASSERT_EQ(t.return_value, re->RequiredPrefixForAccel(&p, &f))
121  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8")
122  << " " << re->Dump();
123  if (t.return_value) {
124  ASSERT_EQ(p, std::string(t.prefix))
125  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
126  ASSERT_EQ(f, t.foldcase)
127  << " " << t.regexp << " " << (j == 0 ? "latin1" : "utf8");
128  }
129  re->Decref();
130  }
131  }
132 }
133 
134 TEST(RequiredPrefixForAccel, CaseFoldingForKAndS) {
135  Regexp* re;
136  std::string p;
137  bool f;
138 
139  // With Latin-1 encoding, `(?i)` prefixes can include 'k' and 's'.
140  re = Regexp::Parse("(?i)KLM", Regexp::LikePerl|Regexp::Latin1, NULL);
141  ASSERT_TRUE(re != NULL);
142  ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
143  ASSERT_EQ(p, "klm");
144  ASSERT_EQ(f, true);
145  re->Decref();
146 
147  re = Regexp::Parse("(?i)STU", Regexp::LikePerl|Regexp::Latin1, NULL);
148  ASSERT_TRUE(re != NULL);
149  ASSERT_TRUE(re->RequiredPrefixForAccel(&p, &f));
150  ASSERT_EQ(p, "stu");
151  ASSERT_EQ(f, true);
152  re->Decref();
153 
154  // With UTF-8 encoding, `(?i)` prefixes can't include 'k' and 's'.
155  // This is because they match U+212A and U+017F, respectively, and
156  // so the parser ends up emitting character classes, not literals.
157  re = Regexp::Parse("(?i)KLM", Regexp::LikePerl, NULL);
158  ASSERT_TRUE(re != NULL);
160  re->Decref();
161 
162  re = Regexp::Parse("(?i)STU", Regexp::LikePerl, NULL);
163  ASSERT_TRUE(re != NULL);
165  re->Decref();
166 }
167 
168 static const char* prefix_accel_tests[] = {
169  "aababc\\d+",
170  "(?i)AABABC\\d+",
171 };
172 
173 TEST(PrefixAccel, SimpleTests) {
174  for (size_t i = 0; i < arraysize(prefix_accel_tests); i++) {
175  const char* pattern = prefix_accel_tests[i];
177  ASSERT_TRUE(re != NULL);
178  Prog* prog = re->CompileToProg(0);
179  ASSERT_TRUE(prog != NULL);
180  ASSERT_TRUE(prog->can_prefix_accel());
181  for (int j = 0; j < 100; j++) {
182  std::string text(j, 'a');
183  const char* p = reinterpret_cast<const char*>(
184  prog->PrefixAccel(text.data(), text.size()));
185  EXPECT_TRUE(p == NULL);
186  text.append("aababc");
187  for (int k = 0; k < 100; k++) {
188  text.append(k, 'a');
189  p = reinterpret_cast<const char*>(
190  prog->PrefixAccel(text.data(), text.size()));
191  EXPECT_EQ(j, p - text.data());
192  }
193  }
194  delete prog;
195  re->Decref();
196  }
197 }
198 
199 } // namespace re2
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::Regexp::Decref
void Decref()
Definition: bloaty/third_party/re2/re2/regexp.cc:115
check_tracer_sanity.pattern
pattern
Definition: check_tracer_sanity.py:25
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Regexp::CompileToProg
Prog * CompileToProg(int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1220
setup.k
k
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::PrefixTest::prefix
const char * prefix
Definition: bloaty/third_party/re2/re2/testing/required_prefix_test.cc:16
re2::Regexp::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/regexp.h:289
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: iomgr/time_averaged_stats_test.cc:27
re2::for_accel_tests
static PrefixTest for_accel_tests[]
Definition: re2/re2/testing/required_prefix_test.cc:76
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
re2::Regexp::RequiredPrefixForAccel
bool RequiredPrefixForAccel(std::string *prefix, bool *foldcase)
Definition: re2/re2/regexp.cc:720
re2::Regexp::Parse
static Regexp * Parse(const StringPiece &s, ParseFlags flags, RegexpStatus *status)
Definition: bloaty/third_party/re2/re2/parse.cc:2200
re2::Regexp::ParseFlags
ParseFlags
Definition: bloaty/third_party/re2/re2/regexp.h:278
re2::PrefixTest::suffix
const char * suffix
Definition: bloaty/third_party/re2/re2/testing/required_prefix_test.cc:18
re2::Regexp::Dump
std::string Dump()
Definition: bloaty/third_party/re2/re2/testing/dump.cc:156
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::PrefixTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:30
tests
Definition: src/python/grpcio_tests/tests/__init__.py:1
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
re2::PrefixTest::return_value
bool return_value
Definition: bloaty/third_party/re2/re2/testing/required_prefix_test.cc:15
re2::PrefixTest
Definition: bloaty/third_party/re2/re2/testing/possible_match_test.cc:29
ASSERT_TRUE
#define ASSERT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1973
ASSERT_FALSE
#define ASSERT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1976
re2::PrefixTest::foldcase
bool foldcase
Definition: bloaty/third_party/re2/re2/testing/required_prefix_test.cc:17
EXPECT_TRUE
#define EXPECT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1967
re2::prefix_accel_tests
static const char * prefix_accel_tests[]
Definition: re2/re2/testing/required_prefix_test.cc:168
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::LikePerl
@ LikePerl
Definition: bloaty/third_party/re2/re2/regexp.h:312
ASSERT_EQ
#define ASSERT_EQ(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2056


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:04