re2/re2/fuzzing/re2_fuzzer.cc
Go to the documentation of this file.
1 // Copyright 2016 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <memory>
9 #include <queue>
10 #include <string>
11 #include <vector>
12 
13 #include "re2/prefilter.h"
14 #include "re2/re2.h"
15 #include "re2/regexp.h"
16 
17 using re2::StringPiece;
18 
19 // NOT static, NOT signed.
21 
22 void TestOneInput(StringPiece pattern, const RE2::Options& options,
23  StringPiece text) {
24  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
25  // Otherwise, we will waste time on inputs that have long runs of various
26  // character classes. The fuzzer has shown itself to be easily capable of
27  // generating such patterns that fall within the other limits, but result
28  // in timeouts nonetheless. The marginal cost is high - even more so when
29  // counted repetition is involved - whereas the marginal benefit is zero.
30  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
31  int char_class = 0;
32  int backslash_p = 0; // very expensive, so handle specially
33  for (size_t i = 0; i < pattern.size(); i++) {
34  if (pattern[i] == '.')
35  char_class++;
36  if (pattern[i] != '\\')
37  continue;
38  i++;
39  if (i >= pattern.size())
40  break;
41  if (pattern[i] == 'p' || pattern[i] == 'P' ||
42  pattern[i] == 'd' || pattern[i] == 'D' ||
43  pattern[i] == 's' || pattern[i] == 'S' ||
44  pattern[i] == 'w' || pattern[i] == 'W')
45  char_class++;
46  if (pattern[i] == 'p' || pattern[i] == 'P')
47  backslash_p++;
48  }
49  if (char_class > 9)
50  return;
51  if (backslash_p > 1)
52  return;
53 
54  // The default is 1000. Even 100 turned out to be too generous
55  // for fuzzing, empirically speaking, so let's try 10 instead.
57 
58  RE2 re(pattern, options);
59  if (!re.ok())
60  return;
61 
62  // Don't waste time fuzzing programs with large substrings.
63  // They can cause bug reports due to fuzzer timeouts when they
64  // are repetitions (e.g. hundreds of NUL bytes) and matching is
65  // unanchored. And they aren't interesting for fuzzing purposes.
66  std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re));
67  if (prefilter == nullptr)
68  return;
69  std::queue<re2::Prefilter*> nodes;
70  nodes.push(prefilter.get());
71  while (!nodes.empty()) {
72  re2::Prefilter* node = nodes.front();
73  nodes.pop();
74  if (node->op() == re2::Prefilter::ATOM) {
75  if (node->atom().size() > 9)
76  return;
77  } else if (node->op() == re2::Prefilter::AND ||
78  node->op() == re2::Prefilter::OR) {
79  for (re2::Prefilter* sub : *node->subs())
80  nodes.push(sub);
81  }
82  }
83 
84  // Don't waste time fuzzing high-size programs.
85  // They can cause bug reports due to fuzzer timeouts.
86  int size = re.ProgramSize();
87  if (size > 9999)
88  return;
89  int rsize = re.ReverseProgramSize();
90  if (rsize > 9999)
91  return;
92 
93  // Don't waste time fuzzing high-fanout programs.
94  // They can cause bug reports due to fuzzer timeouts.
95  std::vector<int> histogram;
96  int fanout = re.ProgramFanout(&histogram);
97  if (fanout > 9)
98  return;
99  int rfanout = re.ReverseProgramFanout(&histogram);
100  if (rfanout > 9)
101  return;
102 
103  if (re.NumberOfCapturingGroups() == 0) {
104  // Avoid early return due to too many arguments.
105  StringPiece sp = text;
106  RE2::FullMatch(sp, re);
107  RE2::PartialMatch(sp, re);
108  RE2::Consume(&sp, re);
109  sp = text; // Reset.
110  RE2::FindAndConsume(&sp, re);
111  } else {
112  // Okay, we have at least one capturing group...
113  // Try conversion for variously typed arguments.
114  StringPiece sp = text;
115  short s;
116  RE2::FullMatch(sp, re, &s);
117  long l;
118  RE2::PartialMatch(sp, re, &l);
119  float f;
120  RE2::Consume(&sp, re, &f);
121  sp = text; // Reset.
122  double d;
123  RE2::FindAndConsume(&sp, re, &d);
124  }
125 
127  RE2::Replace(&s, re, "");
128  s = std::string(text); // Reset.
129  RE2::GlobalReplace(&s, re, "");
130 
132  re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
133 
134  // Exercise some other API functionality.
135  dummy += re.NamedCapturingGroups().size();
136  dummy += re.CapturingGroupNames().size();
137  dummy += RE2::QuoteMeta(pattern).size();
138 }
139 
140 // Entry point for libFuzzer.
141 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
142  // An input larger than 4 KiB probably isn't interesting. (This limit
143  // allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
144  if (size == 0 || size > 4096)
145  return 0;
146 
148 
149  // The convention here is that fdp.ConsumeBool() returning false sets
150  // the default value whereas returning true sets the alternate value:
151  // most options default to false and so can be set directly; encoding
152  // defaults to UTF-8; case_sensitive defaults to true. We do NOT want
153  // to log errors. max_mem is 64 MiB because we can afford to use more
154  // RAM in exchange for (hopefully) faster fuzzing.
155  RE2::Options options;
156  options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
157  : RE2::Options::EncodingUTF8);
158  options.set_posix_syntax(fdp.ConsumeBool());
159  options.set_longest_match(fdp.ConsumeBool());
160  options.set_log_errors(false);
161  options.set_max_mem(64 << 20);
162  options.set_literal(fdp.ConsumeBool());
163  options.set_never_nl(fdp.ConsumeBool());
164  options.set_dot_nl(fdp.ConsumeBool());
165  options.set_never_capture(fdp.ConsumeBool());
166  options.set_case_sensitive(!fdp.ConsumeBool());
167  options.set_perl_classes(fdp.ConsumeBool());
168  options.set_word_boundary(fdp.ConsumeBool());
169  options.set_one_line(fdp.ConsumeBool());
170 
173 
175  return 0;
176 }
FuzzedDataProvider
Definition: FuzzedDataProvider.h:29
check_tracer_sanity.pattern
pattern
Definition: check_tracer_sanity.py:25
re2::Prefilter::AND
@ AND
Definition: bloaty/third_party/re2/re2/prefilter.h:32
TestOneInput
void TestOneInput(StringPiece pattern, const RE2::Options &options, StringPiece text)
Definition: re2/re2/fuzzing/re2_fuzzer.cc:22
re2::Prefilter
Definition: bloaty/third_party/re2/re2/prefilter.h:25
options
double_dict options[]
Definition: capstone_test.c:55
absl::cord_internal::Consume
void Consume(CordRep *rep, ConsumeFn consume_fn)
Definition: cord_rep_consume.cc:45
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::Prefilter::atom
const std::string & atom() const
Definition: bloaty/third_party/re2/re2/prefilter.h:40
re2::Prefilter::OR
@ OR
Definition: bloaty/third_party/re2/re2/prefilter.h:33
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count
static void FUZZING_ONLY_set_maximum_repeat_count(int i)
Definition: re2/re2/parse.cc:50
FuzzedDataProvider.h
autogen_x86imm.f
f
Definition: autogen_x86imm.py:9
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
re2::Prefilter::ATOM
@ ATOM
Definition: bloaty/third_party/re2/re2/prefilter.h:31
re2::Prefilter::FromRE2
static Prefilter * FromRE2(const RE2 *re2)
Definition: bloaty/third_party/re2/re2/prefilter.cc:698
histogram
static grpc_histogram * histogram
Definition: test/core/fling/client.cc:34
grpc_ruby_generator::Replace
std::string Replace(std::string s, const std::string &from, const std::string &to)
Definition: ruby_generator_string-inl.h:52
FuzzedDataProvider::ConsumeRandomLengthString
std::string ConsumeRandomLengthString(size_t max_length)
Definition: FuzzedDataProvider.h:115
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
min
#define min(a, b)
Definition: qsort.h:83
d
static const fe d
Definition: curve25519_tables.h:19
stdint.h
re2::Prefilter::op
Op op()
Definition: bloaty/third_party/re2/re2/prefilter.h:39
re2::FindAndConsume
void FindAndConsume(int iters, int nbytes)
Definition: bloaty/third_party/re2/re2/testing/regexp_benchmark.cc:292
LLVMFuzzerTestOneInput
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
Definition: re2/re2/fuzzing/re2_fuzzer.cc:141
re2::Prefilter::subs
std::vector< Prefilter * > * subs()
Definition: bloaty/third_party/re2/re2/prefilter.h:45
run_grpclb_interop_tests.l
dictionary l
Definition: run_grpclb_interop_tests.py:410
size
voidpf void uLong size
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
FuzzedDataProvider::ConsumeBool
bool ConsumeBool()
Definition: FuzzedDataProvider.h:163
dummy
uint8_t dummy
Definition: re2/re2/fuzzing/re2_fuzzer.cc:20


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:00