re2/re2/walker-inl.h
Go to the documentation of this file.
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_WALKER_INL_H_
6 #define RE2_WALKER_INL_H_
7 
8 // Helper class for traversing Regexps without recursion.
9 // Clients should declare their own subclasses that override
10 // the PreVisit and PostVisit methods, which are called before
11 // and after visiting the subexpressions.
12 
13 // Not quite the Visitor pattern, because (among other things)
14 // the Visitor pattern is recursive.
15 
16 #include <stack>
17 
18 #include "util/logging.h"
19 #include "re2/regexp.h"
20 
21 namespace re2 {
22 
23 template<typename T> struct WalkState;
24 
25 template<typename T> class Regexp::Walker {
26  public:
27  Walker();
28  virtual ~Walker();
29 
30  // Virtual method called before visiting re's children.
31  // PreVisit passes ownership of its return value to its caller.
32  // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
33  // and passed to the child PreVisits and PostVisits as parent_arg.
34  // At the top-most Regexp, parent_arg is arg passed to walk.
35  // If PreVisit sets *stop to true, the walk does not recurse
36  // into the children. Instead it behaves as though the return
37  // value from PreVisit is the return value from PostVisit.
38  // The default PreVisit returns parent_arg.
39  virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
40 
41  // Virtual method called after visiting re's children.
42  // The pre_arg is the T that PreVisit returned.
43  // The child_args is a vector of the T that the child PostVisits returned.
44  // PostVisit takes ownership of pre_arg.
45  // PostVisit takes ownership of the Ts
46  // in *child_args, but not the vector itself.
47  // PostVisit passes ownership of its return value
48  // to its caller.
49  // The default PostVisit simply returns pre_arg.
50  virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
51  T* child_args, int nchild_args);
52 
53  // Virtual method called to copy a T,
54  // when Walk notices that more than one child is the same re.
55  virtual T Copy(T arg);
56 
57  // Virtual method called to do a "quick visit" of the re,
58  // but not its children. Only called once the visit budget
59  // has been used up and we're trying to abort the walk
60  // as quickly as possible. Should return a value that
61  // makes sense for the parent PostVisits still to be run.
62  // This function is (hopefully) only called by
63  // WalkExponential, but must be implemented by all clients,
64  // just in case.
65  virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
66 
67  // Walks over a regular expression.
68  // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
69  // Returns the T returned by PostVisit on re.
70  T Walk(Regexp* re, T top_arg);
71 
72  // Like Walk, but doesn't use Copy. This can lead to
73  // exponential runtimes on cross-linked Regexps like the
74  // ones generated by Simplify. To help limit this,
75  // at most max_visits nodes will be visited and then
76  // the walk will be cut off early.
77  // If the walk *is* cut off early, ShortVisit(re)
78  // will be called on regexps that cannot be fully
79  // visited rather than calling PreVisit/PostVisit.
80  T WalkExponential(Regexp* re, T top_arg, int max_visits);
81 
82  // Clears the stack. Should never be necessary, since
83  // Walk always enters and exits with an empty stack.
84  // Logs DFATAL if stack is not already clear.
85  void Reset();
86 
87  // Returns whether walk was cut off.
88  bool stopped_early() { return stopped_early_; }
89 
90  private:
91  // Walk state for the entire traversal.
92  std::stack<WalkState<T>> stack_;
93  bool stopped_early_;
94  int max_visits_;
95 
96  T WalkInternal(Regexp* re, T top_arg, bool use_copy);
97 
98  Walker(const Walker&) = delete;
99  Walker& operator=(const Walker&) = delete;
100 };
101 
102 template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
103  T parent_arg,
104  bool* stop) {
105  return parent_arg;
106 }
107 
108 template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
109  T parent_arg,
110  T pre_arg,
111  T* child_args,
112  int nchild_args) {
113  return pre_arg;
114 }
115 
116 template<typename T> T Regexp::Walker<T>::Copy(T arg) {
117  return arg;
118 }
119 
120 // State about a single level in the traversal.
121 template<typename T> struct WalkState {
122  WalkState(Regexp* re, T parent)
123  : re(re),
124  n(-1),
125  parent_arg(parent),
126  child_args(NULL) { }
127 
128  Regexp* re; // The regexp
129  int n; // The index of the next child to process; -1 means need to PreVisit
130  T parent_arg; // Accumulated arguments.
131  T pre_arg;
132  T child_arg; // One-element buffer for child_args.
133  T* child_args;
134 };
135 
136 template<typename T> Regexp::Walker<T>::Walker() {
137  stopped_early_ = false;
138 }
139 
140 template<typename T> Regexp::Walker<T>::~Walker() {
141  Reset();
142 }
143 
144 // Clears the stack. Should never be necessary, since
145 // Walk always enters and exits with an empty stack.
146 // Logs DFATAL if stack is not already clear.
147 template<typename T> void Regexp::Walker<T>::Reset() {
148  if (!stack_.empty()) {
149  LOG(DFATAL) << "Stack not empty.";
150  while (!stack_.empty()) {
151  delete[] stack_.top().child_args;
152  stack_.pop();
153  }
154  }
155 }
156 
157 template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
158  bool use_copy) {
159  Reset();
160 
161  if (re == NULL) {
162  LOG(DFATAL) << "Walk NULL";
163  return top_arg;
164  }
165 
166  stack_.push(WalkState<T>(re, top_arg));
167 
168  WalkState<T>* s;
169  for (;;) {
170  T t;
171  s = &stack_.top();
172  Regexp* re = s->re;
173  switch (s->n) {
174  case -1: {
175  if (--max_visits_ < 0) {
176  stopped_early_ = true;
177  t = ShortVisit(re, s->parent_arg);
178  break;
179  }
180  bool stop = false;
181  s->pre_arg = PreVisit(re, s->parent_arg, &stop);
182  if (stop) {
183  t = s->pre_arg;
184  break;
185  }
186  s->n = 0;
187  s->child_args = NULL;
188  if (re->nsub_ == 1)
189  s->child_args = &s->child_arg;
190  else if (re->nsub_ > 1)
191  s->child_args = new T[re->nsub_];
193  }
194  default: {
195  if (re->nsub_ > 0) {
196  Regexp** sub = re->sub();
197  if (s->n < re->nsub_) {
198  if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
199  s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
200  s->n++;
201  } else {
202  stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
203  }
204  continue;
205  }
206  }
207 
208  t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
209  if (re->nsub_ > 1)
210  delete[] s->child_args;
211  break;
212  }
213  }
214 
215  // We've finished stack_.top().
216  // Update next guy down.
217  stack_.pop();
218  if (stack_.empty())
219  return t;
220  s = &stack_.top();
221  if (s->child_args != NULL)
222  s->child_args[s->n] = t;
223  else
224  s->child_arg = t;
225  s->n++;
226  }
227 }
228 
229 template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
230  // Without the exponential walking behavior,
231  // this budget should be more than enough for any
232  // regexp, and yet not enough to get us in trouble
233  // as far as CPU time.
234  max_visits_ = 1000000;
235  return WalkInternal(re, top_arg, true);
236 }
237 
238 template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
239  int max_visits) {
240  max_visits_ = max_visits;
241  return WalkInternal(re, top_arg, false);
242 }
243 
244 } // namespace re2
245 
246 #endif // RE2_WALKER_INL_H_
re2::Regexp::Walker::stopped_early_
bool stopped_early_
Definition: bloaty/third_party/re2/re2/walker-inl.h:93
re2::Regexp::Walker::Walker
Walker()
Definition: bloaty/third_party/re2/re2/walker-inl.h:136
re2::WalkState::WalkState
WalkState(Regexp *re, T parent)
Definition: re2/re2/walker-inl.h:122
re2::Regexp::Walker::Reset
void Reset()
Definition: bloaty/third_party/re2/re2/walker-inl.h:149
re2::WalkState::child_args
T * child_args
Definition: bloaty/third_party/re2/re2/walker-inl.h:133
re2::WalkState::n
int n
Definition: bloaty/third_party/re2/re2/walker-inl.h:129
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::Regexp::Walker::WalkExponential
T WalkExponential(Regexp *re, T top_arg, int max_visits)
Definition: bloaty/third_party/re2/re2/walker-inl.h:240
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Regexp::Walker::WalkInternal
T WalkInternal(Regexp *re, T top_arg, bool use_copy)
Definition: bloaty/third_party/re2/re2/walker-inl.h:159
absl::FormatConversionChar::s
@ s
re2::Regexp::Walker::operator=
Walker & operator=(const Walker &)=delete
T
#define T(upbtypeconst, upbtype, ctype, default_value)
re2::Regexp::Walker
Definition: bloaty/third_party/re2/re2/regexp.h:414
mox.Reset
def Reset(*args)
Definition: bloaty/third_party/protobuf/python/mox.py:257
FALLTHROUGH_INTENDED
#define FALLTHROUGH_INTENDED
Definition: bloaty/third_party/re2/util/util.h:26
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::WalkState::child_arg
T child_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:132
arg
Definition: cmdline.cc:40
stack_
std::vector< Json * > stack_
Definition: json_reader.cc:133
re2::Regexp::Walker::stack_
std::stack< WalkState< T > > stack_
Definition: re2/re2/walker-inl.h:92
re2::WalkState::parent_arg
T parent_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:130
re2::Regexp::Walker::Walk
T Walk(Regexp *re, T top_arg)
Definition: bloaty/third_party/re2/re2/walker-inl.h:231
re2::Regexp::Walker::stopped_early
bool stopped_early()
Definition: re2/re2/walker-inl.h:88
re2::Regexp::Walker::PreVisit
virtual T PreVisit(Regexp *re, T parent_arg, bool *stop)
Definition: bloaty/third_party/re2/re2/walker-inl.h:102
re2::Regexp::Walker::PostVisit
virtual T PostVisit(Regexp *re, T parent_arg, T pre_arg, T *child_args, int nchild_args)
Definition: bloaty/third_party/re2/re2/walker-inl.h:108
re2::Regexp::Walker::~Walker
virtual ~Walker()
Definition: bloaty/third_party/re2/re2/walker-inl.h:141
re2::Regexp::Walker::ShortVisit
virtual T ShortVisit(Regexp *re, T parent_arg)=0
re2::WalkState::pre_arg
T pre_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:131
absl::str_format_internal::LengthMod::t
@ t
Copy
@ Copy
Definition: upb/benchmarks/benchmark.cc:200
arg
struct arg arg
re2::Regexp::Walker::Copy
virtual T Copy(T arg)
Definition: bloaty/third_party/re2/re2/walker-inl.h:116
stop
static const char stop[]
Definition: benchmark-async-pummel.c:35
re2::Regexp::Walker::max_visits_
int max_visits_
Definition: bloaty/third_party/re2/re2/walker-inl.h:94
re2::WalkState::re
Regexp * re
Definition: bloaty/third_party/re2/re2/walker-inl.h:128
re2::Regexp::Regexp
Regexp(RegexpOp op, ParseFlags parse_flags)
Definition: bloaty/third_party/re2/re2/regexp.cc:29


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:52