bloaty/third_party/re2/re2/fuzzing/re2_fuzzer.cc
Go to the documentation of this file.
1 // Copyright 2016 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include <stddef.h>
6 #include <stdint.h>
7 #include <map>
8 #include <memory>
9 #include <queue>
10 #include <string>
11 
12 #include "re2/prefilter.h"
13 #include "re2/re2.h"
14 
15 using re2::StringPiece;
16 
17 // NOT static, NOT signed.
19 
20 void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
21  RE2 re(pattern, options);
22  if (!re.ok())
23  return;
24 
25  // Don't waste time fuzzing programs with large substrings.
26  // They can cause bug reports due to fuzzer timeouts when they
27  // are repetitions (e.g. hundreds of NUL bytes) and matching is
28  // unanchored. And they aren't interesting for fuzzing purposes.
29  std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re));
30  if (prefilter == nullptr)
31  return;
32  std::queue<re2::Prefilter*> nodes;
33  nodes.push(prefilter.get());
34  while (!nodes.empty()) {
35  re2::Prefilter* node = nodes.front();
36  nodes.pop();
37  if (node->op() == re2::Prefilter::ATOM) {
38  if (node->atom().size() > 9)
39  return;
40  } else if (node->op() == re2::Prefilter::AND ||
41  node->op() == re2::Prefilter::OR) {
42  for (re2::Prefilter* sub : *node->subs())
43  nodes.push(sub);
44  }
45  }
46 
47  // Don't waste time fuzzing high-size programs.
48  // They can cause bug reports due to fuzzer timeouts.
49  int size = re.ProgramSize();
50  if (size > 9999)
51  return;
52  int rsize = re.ReverseProgramSize();
53  if (rsize > 9999)
54  return;
55 
56  // Don't waste time fuzzing high-fanout programs.
57  // They can cause bug reports due to fuzzer timeouts.
58  std::map<int, int> histogram;
59  int fanout = re.ProgramFanout(&histogram);
60  if (fanout > 9)
61  return;
62  int rfanout = re.ReverseProgramFanout(&histogram);
63  if (rfanout > 9)
64  return;
65 
66  if (re.NumberOfCapturingGroups() == 0) {
67  // Avoid early return due to too many arguments.
68  StringPiece sp = text;
69  RE2::FullMatch(sp, re);
70  RE2::PartialMatch(sp, re);
71  RE2::Consume(&sp, re);
72  sp = text; // Reset.
73  RE2::FindAndConsume(&sp, re);
74  } else {
75  // Okay, we have at least one capturing group...
76  // Try conversion for variously typed arguments.
77  StringPiece sp = text;
78  short s;
79  RE2::FullMatch(sp, re, &s);
80  long l;
81  RE2::PartialMatch(sp, re, &l);
82  float f;
83  RE2::Consume(&sp, re, &f);
84  sp = text; // Reset.
85  double d;
86  RE2::FindAndConsume(&sp, re, &d);
87  }
88 
90  RE2::Replace(&s, re, "");
91  s = std::string(text); // Reset.
92  RE2::GlobalReplace(&s, re, "");
93 
95  re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
96 
97  // Exercise some other API functionality.
98  dummy += re.NamedCapturingGroups().size();
99  dummy += re.CapturingGroupNames().size();
100  dummy += RE2::QuoteMeta(pattern).size();
101 }
102 
103 // Entry point for libFuzzer.
104 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
105  if (size == 0 || size > 999)
106  return 0;
107 
108  // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
109  // Otherwise, we will waste time on inputs that have long runs of various
110  // character classes. The fuzzer has shown itself to be easily capable of
111  // generating such patterns that fall within the other limits, but result
112  // in timeouts nonetheless. The marginal cost is high - even more so when
113  // counted repetition is involved - whereas the marginal benefit is zero.
114  // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
115  int char_class = 0;
116  int backslash_p = 0; // very expensive, so handle specially
117  for (size_t i = 0; i < size; i++) {
118  if (data[i] == '.')
119  char_class++;
120  if (data[i] != '\\')
121  continue;
122  i++;
123  if (i >= size)
124  break;
125  if (data[i] == 'p' || data[i] == 'P' ||
126  data[i] == 'd' || data[i] == 'D' ||
127  data[i] == 's' || data[i] == 'S' ||
128  data[i] == 'w' || data[i] == 'W')
129  char_class++;
130  if (data[i] == 'p' || data[i] == 'P')
131  backslash_p++;
132  }
133  if (char_class > 9)
134  return 0;
135  if (backslash_p > 1)
136  return 0;
137 
138  // The one-at-a-time hash by Bob Jenkins.
139  uint32_t hash = 0;
140  for (size_t i = 0; i < size; i++) {
141  hash += data[i];
142  hash += (hash << 10);
143  hash ^= (hash >> 6);
144  }
145  hash += (hash << 3);
146  hash ^= (hash >> 11);
147  hash += (hash << 15);
148 
149  RE2::Options options;
150  options.set_log_errors(false);
151  options.set_max_mem(64 << 20);
152  options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
153  : RE2::Options::EncodingUTF8);
154  options.set_posix_syntax(hash & 2);
155  options.set_longest_match(hash & 4);
156  options.set_literal(hash & 8);
157  options.set_never_nl(hash & 16);
158  options.set_dot_nl(hash & 32);
159  options.set_never_capture(hash & 64);
160  options.set_case_sensitive(hash & 128);
161  options.set_perl_classes(hash & 256);
162  options.set_word_boundary(hash & 512);
163  options.set_one_line(hash & 1024);
164 
165  const char* ptr = reinterpret_cast<const char*>(data);
166  int len = static_cast<int>(size);
167 
171 
172  return 0;
173 }
ptr
char * ptr
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:45
Test
void Test(StringPiece pattern, const RE2::Options &options, StringPiece text)
Definition: bloaty/third_party/re2/re2/fuzzing/re2_fuzzer.cc:20
check_tracer_sanity.pattern
pattern
Definition: check_tracer_sanity.py:25
re2::Prefilter::AND
@ AND
Definition: bloaty/third_party/re2/re2/prefilter.h:32
re2::Prefilter
Definition: bloaty/third_party/re2/re2/prefilter.h:25
dummy
uint8_t dummy
Definition: bloaty/third_party/re2/re2/fuzzing/re2_fuzzer.cc:18
options
double_dict options[]
Definition: capstone_test.c:55
absl::cord_internal::Consume
void Consume(CordRep *rep, ConsumeFn consume_fn)
Definition: cord_rep_consume.cc:45
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::Prefilter::atom
const std::string & atom() const
Definition: bloaty/third_party/re2/re2/prefilter.h:40
re2::Prefilter::OR
@ OR
Definition: bloaty/third_party/re2/re2/prefilter.h:33
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
hash
uint64_t hash
Definition: ring_hash.cc:284
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
autogen_x86imm.f
f
Definition: autogen_x86imm.py:9
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
re2::Prefilter::ATOM
@ ATOM
Definition: bloaty/third_party/re2/re2/prefilter.h:31
re2::Prefilter::FromRE2
static Prefilter * FromRE2(const RE2 *re2)
Definition: bloaty/third_party/re2/re2/prefilter.cc:698
histogram
static grpc_histogram * histogram
Definition: test/core/fling/client.cc:34
grpc_ruby_generator::Replace
std::string Replace(std::string s, const std::string &from, const std::string &to)
Definition: ruby_generator_string-inl.h:52
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
min
#define min(a, b)
Definition: qsort.h:83
LLVMFuzzerTestOneInput
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
Definition: bloaty/third_party/re2/re2/fuzzing/re2_fuzzer.cc:104
d
static const fe d
Definition: curve25519_tables.h:19
stdint.h
re2::Prefilter::op
Op op()
Definition: bloaty/third_party/re2/re2/prefilter.h:39
re2::FindAndConsume
void FindAndConsume(int iters, int nbytes)
Definition: bloaty/third_party/re2/re2/testing/regexp_benchmark.cc:292
re2::Prefilter::subs
std::vector< Prefilter * > * subs()
Definition: bloaty/third_party/re2/re2/prefilter.h:45
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
run_grpclb_interop_tests.l
dictionary l
Definition: run_grpclb_interop_tests.py:410
size
voidpf void uLong size
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:00