bloaty/third_party/re2/re2/testing/exhaustive2_test.cc
Go to the documentation of this file.
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 #include <stddef.h>
8 #include <memory>
9 #include <string>
10 #include <vector>
11 
12 #include "util/test.h"
13 #include "re2/re2.h"
14 #include "re2/testing/exhaustive_tester.h"
15 
16 namespace re2 {
17 
18 // Test empty string matches (aka "(?:)")
19 TEST(EmptyString, Exhaustive) {
20  ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
22  5, Split("", "ab"), "", "");
23 }
24 
25 // Test escaped versions of regexp syntax.
26 TEST(Punctuation, Literals) {
27  std::vector<std::string> alphabet = Explode("()*+?{}[]\\^$.");
28  std::vector<std::string> escaped = alphabet;
29  for (size_t i = 0; i < escaped.size(); i++)
30  escaped[i] = "\\" + escaped[i];
32  2, alphabet, "", "");
33 }
34 
35 // Test ^ $ . \A \z in presence of line endings.
36 // Have to wrap the empty-width ones in (?:) so that
37 // they can be repeated -- PCRE rejects ^* but allows (?:^)*
38 TEST(LineEnds, Exhaustive) {
39  ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
41  4, Explode("ab\n"), "", "");
42 }
43 
44 // Test what does and does not match \n.
45 // This would be a good test, except that PCRE seems to have a bug:
46 // in single-byte character set mode (the default),
47 // [^a] matches \n, but in UTF-8 mode it does not.
48 // So when we run the test, the tester complains that
49 // we don't agree with PCRE, but it's PCRE that is at fault.
50 // For what it's worth, Perl gets this right (matches
51 // regardless of whether UTF-8 input is selected):
52 //
53 // #!/usr/bin/perl
54 // use POSIX qw(locale_h);
55 // print "matches in latin1\n" if "\n" =~ /[^a]/;
56 // setlocale("en_US.utf8");
57 // print "matches in utf8\n" if "\n" =~ /[^a]/;
58 //
59 // The rule chosen for RE2 is that by default, like Perl,
60 // dot does not match \n but negated character classes [^a] do.
61 // (?s) will allow dot to match \n; there is no way in RE2
62 // to stop [^a] from matching \n, though the underlying library
63 // provides a mechanism, and RE2 could add new syntax if needed.
64 //
65 // TEST(Newlines, Exhaustive) {
66 // std::vector<std::string> empty_vector;
67 // ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
68 // RegexpGenerator::EgrepOps(),
69 // 4, Explode("a\n"), "");
70 // }
71 
72 } // namespace re2
73 
re2::Split
std::vector< std::string > Split(const StringPiece &sep, const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:256
re2::ExhaustiveTest
void ExhaustiveTest(int maxatoms, int maxops, const std::vector< std::string > &alphabet, const std::vector< std::string > &ops, int maxstrlen, const std::vector< std::string > &stralphabet, const std::string &wrapper, const std::string &topwrapper)
Definition: bloaty/third_party/re2/re2/testing/exhaustive_tester.cc:144
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
alphabet
static const char alphabet[]
Definition: bin_encoder.cc:30
re2::RegexpGenerator::EgrepOps
static const std::vector< std::string > & EgrepOps()
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:41
re2::Explode
std::vector< std::string > Explode(const StringPiece &s)
Definition: bloaty/third_party/re2/re2/testing/regexp_generator.cc:241
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:20