bloaty/third_party/re2/re2/walker-inl.h
Go to the documentation of this file.
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_WALKER_INL_H_
6 #define RE2_WALKER_INL_H_
7 
8 // Helper class for traversing Regexps without recursion.
9 // Clients should declare their own subclasses that override
10 // the PreVisit and PostVisit methods, which are called before
11 // and after visiting the subexpressions.
12 
13 // Not quite the Visitor pattern, because (among other things)
14 // the Visitor pattern is recursive.
15 
16 #include <stack>
17 
18 #include "util/logging.h"
19 #include "re2/regexp.h"
20 
21 namespace re2 {
22 
23 template<typename T> struct WalkState;
24 
25 template<typename T> class Regexp::Walker {
26  public:
27  Walker();
28  virtual ~Walker();
29 
30  // Virtual method called before visiting re's children.
31  // PreVisit passes ownership of its return value to its caller.
32  // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
33  // and passed to the child PreVisits and PostVisits as parent_arg.
34  // At the top-most Regexp, parent_arg is arg passed to walk.
35  // If PreVisit sets *stop to true, the walk does not recurse
36  // into the children. Instead it behaves as though the return
37  // value from PreVisit is the return value from PostVisit.
38  // The default PreVisit returns parent_arg.
39  virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
40 
41  // Virtual method called after visiting re's children.
42  // The pre_arg is the T that PreVisit returned.
43  // The child_args is a vector of the T that the child PostVisits returned.
44  // PostVisit takes ownership of pre_arg.
45  // PostVisit takes ownership of the Ts
46  // in *child_args, but not the vector itself.
47  // PostVisit passes ownership of its return value
48  // to its caller.
49  // The default PostVisit simply returns pre_arg.
50  virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
51  T* child_args, int nchild_args);
52 
53  // Virtual method called to copy a T,
54  // when Walk notices that more than one child is the same re.
55  virtual T Copy(T arg);
56 
57  // Virtual method called to do a "quick visit" of the re,
58  // but not its children. Only called once the visit budget
59  // has been used up and we're trying to abort the walk
60  // as quickly as possible. Should return a value that
61  // makes sense for the parent PostVisits still to be run.
62  // This function is (hopefully) only called by
63  // WalkExponential, but must be implemented by all clients,
64  // just in case.
65  virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
66 
67  // Walks over a regular expression.
68  // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
69  // Returns the T returned by PostVisit on re.
70  T Walk(Regexp* re, T top_arg);
71 
72  // Like Walk, but doesn't use Copy. This can lead to
73  // exponential runtimes on cross-linked Regexps like the
74  // ones generated by Simplify. To help limit this,
75  // at most max_visits nodes will be visited and then
76  // the walk will be cut off early.
77  // If the walk *is* cut off early, ShortVisit(re)
78  // will be called on regexps that cannot be fully
79  // visited rather than calling PreVisit/PostVisit.
80  T WalkExponential(Regexp* re, T top_arg, int max_visits);
81 
82  // Clears the stack. Should never be necessary, since
83  // Walk always enters and exits with an empty stack.
84  // Logs DFATAL if stack is not already clear.
85  void Reset();
86 
87  // Returns whether walk was cut off.
88  bool stopped_early() { return stopped_early_; }
89 
90  private:
91  // Walk state for the entire traversal.
92  std::stack<WalkState<T> >* stack_;
95 
96  T WalkInternal(Regexp* re, T top_arg, bool use_copy);
97 
98  Walker(const Walker&) = delete;
99  Walker& operator=(const Walker&) = delete;
100 };
101 
102 template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
103  T parent_arg,
104  bool* stop) {
105  return parent_arg;
106 }
107 
108 template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
109  T parent_arg,
110  T pre_arg,
111  T* child_args,
112  int nchild_args) {
113  return pre_arg;
114 }
115 
116 template<typename T> T Regexp::Walker<T>::Copy(T arg) {
117  return arg;
118 }
119 
120 // State about a single level in the traversal.
121 template<typename T> struct WalkState {
123  : re(re),
124  n(-1),
125  parent_arg(parent),
126  child_args(NULL) { }
127 
128  Regexp* re; // The regexp
129  int n; // The index of the next child to process; -1 means need to PreVisit
130  T parent_arg; // Accumulated arguments.
132  T child_arg; // One-element buffer for child_args.
134 };
135 
136 template<typename T> Regexp::Walker<T>::Walker() {
137  stack_ = new std::stack<WalkState<T> >;
138  stopped_early_ = false;
139 }
140 
141 template<typename T> Regexp::Walker<T>::~Walker() {
142  Reset();
143  delete stack_;
144 }
145 
146 // Clears the stack. Should never be necessary, since
147 // Walk always enters and exits with an empty stack.
148 // Logs DFATAL if stack is not already clear.
149 template<typename T> void Regexp::Walker<T>::Reset() {
150  if (stack_ && stack_->size() > 0) {
151  LOG(DFATAL) << "Stack not empty.";
152  while (stack_->size() > 0) {
153  delete stack_->top().child_args;
154  stack_->pop();
155  }
156  }
157 }
158 
159 template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
160  bool use_copy) {
161  Reset();
162 
163  if (re == NULL) {
164  LOG(DFATAL) << "Walk NULL";
165  return top_arg;
166  }
167 
168  stack_->push(WalkState<T>(re, top_arg));
169 
170  WalkState<T>* s;
171  for (;;) {
172  T t;
173  s = &stack_->top();
174  Regexp* re = s->re;
175  switch (s->n) {
176  case -1: {
177  if (--max_visits_ < 0) {
178  stopped_early_ = true;
179  t = ShortVisit(re, s->parent_arg);
180  break;
181  }
182  bool stop = false;
183  s->pre_arg = PreVisit(re, s->parent_arg, &stop);
184  if (stop) {
185  t = s->pre_arg;
186  break;
187  }
188  s->n = 0;
189  s->child_args = NULL;
190  if (re->nsub_ == 1)
191  s->child_args = &s->child_arg;
192  else if (re->nsub_ > 1)
193  s->child_args = new T[re->nsub_];
195  }
196  default: {
197  if (re->nsub_ > 0) {
198  Regexp** sub = re->sub();
199  if (s->n < re->nsub_) {
200  if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
201  s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
202  s->n++;
203  } else {
204  stack_->push(WalkState<T>(sub[s->n], s->pre_arg));
205  }
206  continue;
207  }
208  }
209 
210  t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
211  if (re->nsub_ > 1)
212  delete[] s->child_args;
213  break;
214  }
215  }
216 
217  // We've finished stack_->top().
218  // Update next guy down.
219  stack_->pop();
220  if (stack_->size() == 0)
221  return t;
222  s = &stack_->top();
223  if (s->child_args != NULL)
224  s->child_args[s->n] = t;
225  else
226  s->child_arg = t;
227  s->n++;
228  }
229 }
230 
231 template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
232  // Without the exponential walking behavior,
233  // this budget should be more than enough for any
234  // regexp, and yet not enough to get us in trouble
235  // as far as CPU time.
236  max_visits_ = 1000000;
237  return WalkInternal(re, top_arg, true);
238 }
239 
240 template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
241  int max_visits) {
242  max_visits_ = max_visits;
243  return WalkInternal(re, top_arg, false);
244 }
245 
246 } // namespace re2
247 
248 #endif // RE2_WALKER_INL_H_
re2::Regexp::Walker::stopped_early_
bool stopped_early_
Definition: bloaty/third_party/re2/re2/walker-inl.h:93
re2::Regexp::Walker::Walker
Walker()
Definition: bloaty/third_party/re2/re2/walker-inl.h:136
re2::Regexp::Walker::Reset
void Reset()
Definition: bloaty/third_party/re2/re2/walker-inl.h:149
re2::WalkState::child_args
T * child_args
Definition: bloaty/third_party/re2/re2/walker-inl.h:133
re2::WalkState::n
int n
Definition: bloaty/third_party/re2/re2/walker-inl.h:129
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::Regexp::Walker::WalkExponential
T WalkExponential(Regexp *re, T top_arg, int max_visits)
Definition: bloaty/third_party/re2/re2/walker-inl.h:240
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Regexp::Walker::WalkInternal
T WalkInternal(Regexp *re, T top_arg, bool use_copy)
Definition: bloaty/third_party/re2/re2/walker-inl.h:159
re2::Regexp::Walker::operator=
Walker & operator=(const Walker &)=delete
T
#define T(upbtypeconst, upbtype, ctype, default_value)
re2::Regexp::Walker
Definition: bloaty/third_party/re2/re2/regexp.h:414
mox.Reset
def Reset(*args)
Definition: bloaty/third_party/protobuf/python/mox.py:257
FALLTHROUGH_INTENDED
#define FALLTHROUGH_INTENDED
Definition: bloaty/third_party/re2/util/util.h:26
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::Regexp::sub
Regexp ** sub()
Definition: bloaty/third_party/re2/re2/regexp.h:327
re2::WalkState::child_arg
T child_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:132
re2::Regexp::nsub_
uint16_t nsub_
Definition: bloaty/third_party/re2/re2/regexp.h:549
arg
Definition: cmdline.cc:40
stack_
std::vector< Json * > stack_
Definition: json_reader.cc:133
re2::WalkState::parent_arg
T parent_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:130
re2::Regexp::Walker::Walk
T Walk(Regexp *re, T top_arg)
Definition: bloaty/third_party/re2/re2/walker-inl.h:231
re2::WalkState
Definition: bloaty/third_party/re2/re2/walker-inl.h:23
re2::Regexp::Walker::stopped_early
bool stopped_early()
Definition: bloaty/third_party/re2/re2/walker-inl.h:88
re2::Regexp::Walker::PreVisit
virtual T PreVisit(Regexp *re, T parent_arg, bool *stop)
Definition: bloaty/third_party/re2/re2/walker-inl.h:102
re2::Regexp::Walker::PostVisit
virtual T PostVisit(Regexp *re, T parent_arg, T pre_arg, T *child_args, int nchild_args)
Definition: bloaty/third_party/re2/re2/walker-inl.h:108
re2::Regexp::Walker::~Walker
virtual ~Walker()
Definition: bloaty/third_party/re2/re2/walker-inl.h:141
re2::Regexp::Walker::ShortVisit
virtual T ShortVisit(Regexp *re, T parent_arg)=0
re2::WalkState::pre_arg
T pre_arg
Definition: bloaty/third_party/re2/re2/walker-inl.h:131
Copy
@ Copy
Definition: upb/benchmarks/benchmark.cc:200
arg
struct arg arg
re2::Regexp::Walker::Copy
virtual T Copy(T arg)
Definition: bloaty/third_party/re2/re2/walker-inl.h:116
stop
static const char stop[]
Definition: benchmark-async-pummel.c:35
re2::Regexp::Walker::stack_
std::stack< WalkState< T > > * stack_
Definition: bloaty/third_party/re2/re2/walker-inl.h:92
re2::Regexp::Walker::max_visits_
int max_visits_
Definition: bloaty/third_party/re2/re2/walker-inl.h:94
re2::WalkState::re
Regexp * re
Definition: bloaty/third_party/re2/re2/walker-inl.h:128


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:52