re2/re2/testing/exhaustive3_test.cc
Go to the documentation of this file.
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 #include <stddef.h>
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "util/test.h"
13 #include "util/utf.h"
14 #include "re2/testing/exhaustive_tester.h"
15 
16 namespace re2 {
17 
18 // Test simple character classes by themselves.
19 TEST(CharacterClasses, Exhaustive) {
20  std::vector<std::string> atoms = Split(" ",
21  "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
23  5, Explode("ab"), "", "");
24 }
25 
26 // Test simple character classes inside a___b (for example, a[a]b).
27 TEST(CharacterClasses, ExhaustiveAB) {
28  std::vector<std::string> atoms = Split(" ",
29  "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
31  5, Explode("ab"), "a%sb", "");
32 }
33 
34 // Returns UTF8 for Rune r
36  char buf[UTFmax+1];
37  buf[runetochar(buf, &r)] = 0;
38  return std::string(buf);
39 }
40 
41 // Returns a vector of "interesting" UTF8 characters.
42 // Unicode is now too big to just return all of them,
43 // so UTF8Characters return a set likely to be good test cases.
44 static const std::vector<std::string>& InterestingUTF8() {
45  static bool init;
46  static std::vector<std::string> v;
47 
48  if (init)
49  return v;
50 
51  init = true;
52  // All the Latin1 equivalents are interesting.
53  for (int i = 1; i < 256; i++)
54  v.push_back(UTF8(i));
55 
56  // After that, the codes near bit boundaries are
57  // interesting, because they span byte sequence lengths.
58  for (int j = 0; j < 8; j++)
59  v.push_back(UTF8(256 + j));
60  for (int i = 512; i < Runemax; i <<= 1)
61  for (int j = -8; j < 8; j++)
62  v.push_back(UTF8(i + j));
63 
64  // The codes near Runemax, including Runemax itself, are interesting.
65  for (int j = -8; j <= 0; j++)
66  v.push_back(UTF8(Runemax + j));
67 
68  return v;
69 }
70 
71 // Test interesting UTF-8 characters against character classes.
72 TEST(InterestingUTF8, SingleOps) {
73  std::vector<std::string> atoms = Split(" ",
74  ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
75  "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
76  "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
77  "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
78  std::vector<std::string> ops; // no ops
79  ExhaustiveTest(1, 0, atoms, ops,
80  1, InterestingUTF8(), "", "");
81 }
82 
83 // Test interesting UTF-8 characters against character classes,
84 // but wrap everything inside AB.
85 TEST(InterestingUTF8, AB) {
86  std::vector<std::string> atoms = Split(" ",
87  ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
88  "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
89  "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
90  "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
91  std::vector<std::string> ops; // no ops
92  std::vector<std::string> alpha = InterestingUTF8();
93  for (size_t i = 0; i < alpha.size(); i++)
94  alpha[i] = "a" + alpha[i] + "b";
95  ExhaustiveTest(1, 0, atoms, ops,
96  1, alpha, "a%sb", "");
97 }
98 
99 } // namespace re2
100 
init
const char * init
Definition: upb/upb/bindings/lua/main.c:49
re2::Split
std::vector< std::string > Split(const StringPiece &sep, const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:256
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::ExhaustiveTest
void ExhaustiveTest(int maxatoms, int maxops, const std::vector< std::string > &alphabet, const std::vector< std::string > &ops, int maxstrlen, const std::vector< std::string > &stralphabet, const std::string &wrapper, const std::string &topwrapper)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:144
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::UTF8
static std::string UTF8(Rune r)
Definition: bloaty/third_party/re2/re2/testing/exhaustive3_test.cc:35
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::runetochar
int runetochar(char *str, const Rune *rune)
Definition: bloaty/third_party/re2/util/rune.cc:127
re2::InterestingUTF8
static const std::vector< std::string > & InterestingUTF8()
Definition: bloaty/third_party/re2/re2/testing/exhaustive3_test.cc:44
re2::RegexpGenerator::EgrepOps
static const std::vector< std::string > & EgrepOps()
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:41
re2::Explode
std::vector< std::string > Explode(const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:241
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
ops
static grpc_op ops[6]
Definition: test/core/fling/client.cc:39
re2::Runemax
@ Runemax
Definition: bloaty/third_party/re2/util/utf.h:33
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::UTFmax
@ UTFmax
Definition: bloaty/third_party/re2/util/utf.h:29


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:20