re2/re2/dfa.cc
Go to the documentation of this file.
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // A DFA (deterministic finite automaton)-based regular expression search.
6 //
7 // The DFA search has two main parts: the construction of the automaton,
8 // which is represented by a graph of State structures, and the execution
9 // of the automaton over a given input string.
10 //
11 // The basic idea is that the State graph is constructed so that the
12 // execution can simply start with a state s, and then for each byte c in
13 // the input string, execute "s = s->next[c]", checking at each point whether
14 // the current s represents a matching state.
15 //
16 // The simple explanation just given does convey the essence of this code,
17 // but it omits the details of how the State graph gets constructed as well
18 // as some performance-driven optimizations to the execution of the automaton.
19 // All these details are explained in the comments for the code following
20 // the definition of class DFA.
21 //
22 // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <algorithm>
29 #include <atomic>
30 #include <deque>
31 #include <mutex>
32 #include <new>
33 #include <string>
34 #include <unordered_map>
35 #include <unordered_set>
36 #include <utility>
37 #include <vector>
38 
39 #include "util/logging.h"
40 #include "util/mix.h"
41 #include "util/mutex.h"
42 #include "util/strutil.h"
43 #include "re2/pod_array.h"
44 #include "re2/prog.h"
45 #include "re2/re2.h"
46 #include "re2/sparse_set.h"
47 #include "re2/stringpiece.h"
48 
49 // Silence "zero-sized array in struct/union" warning for DFA::State::next_.
50 #ifdef _MSC_VER
51 #pragma warning(disable: 4200)
52 #endif
53 
54 namespace re2 {
55 
56 // Controls whether the DFA should bail out early if the NFA would be faster.
57 static bool dfa_should_bail_when_slow = true;
58 
61 }
62 
63 // Changing this to true compiles in prints that trace execution of the DFA.
64 // Generates a lot of output -- only useful for debugging.
65 static const bool ExtraDebug = false;
66 
67 // A DFA implementation of a regular expression program.
68 // Since this is entirely a forward declaration mandated by C++,
69 // some of the comments here are better understood after reading
70 // the comments in the sections that follow the DFA definition.
71 class DFA {
72  public:
74  ~DFA();
75  bool ok() const { return !init_failed_; }
76  Prog::MatchKind kind() { return kind_; }
77 
78  // Searches for the regular expression in text, which is considered
79  // as a subsection of context for the purposes of interpreting flags
80  // like ^ and $ and \A and \z.
81  // Returns whether a match was found.
82  // If a match is found, sets *ep to the end point of the best match in text.
83  // If "anchored", the match must begin at the start of text.
84  // If "want_earliest_match", the match that ends first is used, not
85  // necessarily the best one.
86  // If "run_forward" is true, the DFA runs from text.begin() to text.end().
87  // If it is false, the DFA runs from text.end() to text.begin(),
88  // returning the leftmost end of the match instead of the rightmost one.
89  // If the DFA cannot complete the search (for example, if it is out of
90  // memory), it sets *failed and returns false.
91  bool Search(const StringPiece& text, const StringPiece& context,
92  bool anchored, bool want_earliest_match, bool run_forward,
93  bool* failed, const char** ep, SparseSet* matches);
94 
95  // Builds out all states for the entire DFA.
96  // If cb is not empty, it receives one callback per state built.
97  // Returns the number of states built.
98  // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
100 
101  // Computes min and max for matching strings. Won't return strings
102  // bigger than maxlen.
103  bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
104 
105  // These data structures are logically private, but C++ makes it too
106  // difficult to mark them as such.
107  class RWLocker;
108  class StateSaver;
109  class Workq;
110 
111  // A single DFA state. The DFA is represented as a graph of these
112  // States, linked by the next_ pointers. If in state s and reading
113  // byte c, the next state should be s->next_[c].
114  struct State {
115  inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
116 
117  int* inst_; // Instruction pointers in the state.
118  int ninst_; // # of inst_ pointers.
119  uint32_t flag_; // Empty string bitfield flags in effect on the way
120  // into this state, along with kFlagMatch if this
121  // is a matching state.
122 
123 // Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
124 // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
125 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
126  std::atomic<State*> next_[0]; // Outgoing arrows from State,
127 #else
128  std::atomic<State*> next_[]; // Outgoing arrows from State,
129 #endif
130 
131  // one per input byte class
132  };
133 
134  enum {
135  kByteEndText = 256, // imaginary byte at end of text
136 
137  kFlagEmptyMask = 0xFF, // State.flag_: bits holding kEmptyXXX flags
138  kFlagMatch = 0x0100, // State.flag_: this is a matching state
139  kFlagLastWord = 0x0200, // State.flag_: last byte was a word char
140  kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left
141  };
142 
143  struct StateHash {
144  size_t operator()(const State* a) const {
145  DCHECK(a != NULL);
146  HashMix mix(a->flag_);
147  for (int i = 0; i < a->ninst_; i++)
148  mix.Mix(a->inst_[i]);
149  mix.Mix(0);
150  return mix.get();
151  }
152  };
153 
154  struct StateEqual {
155  bool operator()(const State* a, const State* b) const {
156  DCHECK(a != NULL);
157  DCHECK(b != NULL);
158  if (a == b)
159  return true;
160  if (a->flag_ != b->flag_)
161  return false;
162  if (a->ninst_ != b->ninst_)
163  return false;
164  for (int i = 0; i < a->ninst_; i++)
165  if (a->inst_[i] != b->inst_[i])
166  return false;
167  return true;
168  }
169  };
170 
171  typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
172 
173  private:
174  // Make it easier to swap in a scalable reader-writer mutex.
175  using CacheMutex = Mutex;
176 
177  enum {
178  // Indices into start_ for unanchored searches.
179  // Add kStartAnchored for anchored searches.
180  kStartBeginText = 0, // text at beginning of context
181  kStartBeginLine = 2, // text at beginning of line
182  kStartAfterWordChar = 4, // text follows a word character
183  kStartAfterNonWordChar = 6, // text follows non-word character
184  kMaxStart = 8,
185 
186  kStartAnchored = 1,
187  };
188 
189  // Resets the DFA State cache, flushing all saved State* information.
190  // Releases and reacquires cache_mutex_ via cache_lock, so any
191  // State* existing before the call are not valid after the call.
192  // Use a StateSaver to preserve important states across the call.
193  // cache_mutex_.r <= L < mutex_
194  // After: cache_mutex_.w <= L < mutex_
195  void ResetCache(RWLocker* cache_lock);
196 
197  // Looks up and returns the State corresponding to a Workq.
198  // L >= mutex_
199  State* WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag);
200 
201  // Looks up and returns a State matching the inst, ninst, and flag.
202  // L >= mutex_
203  State* CachedState(int* inst, int ninst, uint32_t flag);
204 
205  // Clear the cache entirely.
206  // Must hold cache_mutex_.w or be in destructor.
207  void ClearCache();
208 
209  // Converts a State into a Workq: the opposite of WorkqToCachedState.
210  // L >= mutex_
211  void StateToWorkq(State* s, Workq* q);
212 
213  // Runs a State on a given byte, returning the next state.
214  State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_
215  State* RunStateOnByte(State*, int); // L >= mutex_
216 
217  // Runs a Workq on a given byte followed by a set of empty-string flags,
218  // producing a new Workq in nq. If a match instruction is encountered,
219  // sets *ismatch to true.
220  // L >= mutex_
221  void RunWorkqOnByte(Workq* q, Workq* nq,
222  int c, uint32_t flag, bool* ismatch);
223 
224  // Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
225  // L >= mutex_
227 
228  // Adds the instruction id to the Workq, following empty arrows
229  // according to flag.
230  // L >= mutex_
231  void AddToQueue(Workq* q, int id, uint32_t flag);
232 
233  // For debugging, returns a text representation of State.
234  static std::string DumpState(State* state);
235 
236  // For debugging, returns a text representation of a Workq.
237  static std::string DumpWorkq(Workq* q);
238 
239  // Search parameters
240  struct SearchParams {
243  : text(text),
244  context(context),
245  anchored(false),
249  start(NULL),
251  failed(false),
252  ep(NULL),
253  matches(NULL) {}
254 
257  bool anchored;
259  bool want_earliest_match;
260  bool run_forward;
261  State* start;
263  bool failed; // "out" parameter: whether search gave up
264  const char* ep; // "out" parameter: end pointer for match
266 
267  private:
268  SearchParams(const SearchParams&) = delete;
269  SearchParams& operator=(const SearchParams&) = delete;
270  };
271 
272  // Before each search, the parameters to Search are analyzed by
273  // AnalyzeSearch to determine the state in which to start.
274  struct StartInfo {
275  StartInfo() : start(NULL) {}
276  std::atomic<State*> start;
277  };
278 
279  // Fills in params->start and params->can_prefix_accel using
280  // the other search parameters. Returns true on success,
281  // false on failure.
282  // cache_mutex_.r <= L < mutex_
283  bool AnalyzeSearch(SearchParams* params);
284  bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
285  uint32_t flags);
286 
287  // The generic search loop, inlined to create specialized versions.
288  // cache_mutex_.r <= L < mutex_
289  // Might unlock and relock cache_mutex_ via params->cache_lock.
290  template <bool can_prefix_accel,
291  bool want_earliest_match,
292  bool run_forward>
293  inline bool InlinedSearchLoop(SearchParams* params);
294 
295  // The specialized versions of InlinedSearchLoop. The three letters
296  // at the ends of the name denote the true/false values used as the
297  // last three parameters of InlinedSearchLoop.
298  // cache_mutex_.r <= L < mutex_
299  // Might unlock and relock cache_mutex_ via params->cache_lock.
300  bool SearchFFF(SearchParams* params);
301  bool SearchFFT(SearchParams* params);
302  bool SearchFTF(SearchParams* params);
303  bool SearchFTT(SearchParams* params);
304  bool SearchTFF(SearchParams* params);
305  bool SearchTFT(SearchParams* params);
306  bool SearchTTF(SearchParams* params);
307  bool SearchTTT(SearchParams* params);
308 
309  // The main search loop: calls an appropriate specialized version of
310  // InlinedSearchLoop.
311  // cache_mutex_.r <= L < mutex_
312  // Might unlock and relock cache_mutex_ via params->cache_lock.
313  bool FastSearchLoop(SearchParams* params);
314 
315 
316  // Looks up bytes in bytemap_ but handles case c == kByteEndText too.
317  int ByteMap(int c) {
318  if (c == kByteEndText)
319  return prog_->bytemap_range();
320  return prog_->bytemap()[c];
321  }
322 
323  // Constant after initialization.
324  Prog* prog_; // The regular expression program to run.
325  Prog::MatchKind kind_; // The kind of DFA.
326  bool init_failed_; // initialization failed (out of memory)
327 
328  Mutex mutex_; // mutex_ >= cache_mutex_.r
329 
330  // Scratch areas, protected by mutex_.
331  Workq* q0_; // Two pre-allocated work queues.
332  Workq* q1_;
333  PODArray<int> stack_; // Pre-allocated stack for AddToQueue
334 
335  // State* cache. Many threads use and add to the cache simultaneously,
336  // holding cache_mutex_ for reading and mutex_ (above) when adding.
337  // If the cache fills and needs to be discarded, the discarding is done
338  // while holding cache_mutex_ for writing, to avoid interrupting other
339  // readers. Any State* pointers are only valid while cache_mutex_
340  // is held.
342  int64_t mem_budget_; // Total memory budget for all States.
343  int64_t state_budget_; // Amount of memory remaining for new States.
344  StateSet state_cache_; // All States computed so far.
346 
347  DFA(const DFA&) = delete;
348  DFA& operator=(const DFA&) = delete;
349 };
350 
351 // Shorthand for casting to uint8_t*.
352 static inline const uint8_t* BytePtr(const void* v) {
353  return reinterpret_cast<const uint8_t*>(v);
354 }
355 
356 // Work queues
357 
358 // Marks separate thread groups of different priority
359 // in the work queue when in leftmost-longest matching mode.
360 #define Mark (-1)
361 
362 // Separates the match IDs from the instructions in inst_.
363 // Used only for "many match" DFA states.
364 #define MatchSep (-2)
365 
366 // Internally, the DFA uses a sparse array of
367 // program instruction pointers as a work queue.
368 // In leftmost longest mode, marks separate sections
369 // of workq that started executing at different
370 // locations in the string (earlier locations first).
371 class DFA::Workq : public SparseSet {
372  public:
373  // Constructor: n is number of normal slots, maxmark number of mark slots.
374  Workq(int n, int maxmark) :
376  n_(n),
377  maxmark_(maxmark),
378  nextmark_(n),
380  }
381 
382  bool is_mark(int i) { return i >= n_; }
383 
384  int maxmark() { return maxmark_; }
385 
386  void clear() {
388  nextmark_ = n_;
389  }
390 
391  void mark() {
392  if (last_was_mark_)
393  return;
394  last_was_mark_ = false;
396  }
397 
398  int size() {
399  return n_ + maxmark_;
400  }
401 
402  void insert(int id) {
403  if (contains(id))
404  return;
405  insert_new(id);
406  }
407 
408  void insert_new(int id) {
409  last_was_mark_ = false;
411  }
412 
413  private:
414  int n_; // size excluding marks
415  int maxmark_; // maximum number of marks
416  int nextmark_; // id of next mark
417  bool last_was_mark_; // last inserted was mark
418 
419  Workq(const Workq&) = delete;
420  Workq& operator=(const Workq&) = delete;
421 };
422 
423 DFA::DFA(Prog* prog, Prog::MatchKind kind, int64_t max_mem)
424  : prog_(prog),
425  kind_(kind),
426  init_failed_(false),
427  q0_(NULL),
428  q1_(NULL),
429  mem_budget_(max_mem) {
430  if (ExtraDebug)
431  fprintf(stderr, "\nkind %d\n%s\n", kind_, prog_->DumpUnanchored().c_str());
432  int nmark = 0;
433  if (kind_ == Prog::kLongestMatch)
434  nmark = prog_->size();
435  // See DFA::AddToQueue() for why this is so.
439  nmark + 1; // + 1 for start inst
440 
441  // Account for space needed for DFA, q0, q1, stack.
442  mem_budget_ -= sizeof(DFA);
443  mem_budget_ -= (prog_->size() + nmark) *
444  (sizeof(int)+sizeof(int)) * 2; // q0, q1
445  mem_budget_ -= nstack * sizeof(int); // stack
446  if (mem_budget_ < 0) {
447  init_failed_ = true;
448  return;
449  }
450 
452 
453  // Make sure there is a reasonable amount of working room left.
454  // At minimum, the search requires room for two states in order
455  // to limp along, restarting frequently. We'll get better performance
456  // if there is room for a larger number of states, say 20.
457  // Note that a state stores list heads only, so we use the program
458  // list count for the upper bound, not the program size.
459  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
460  int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
461  (prog_->list_count()+nmark)*sizeof(int);
462  if (state_budget_ < 20*one_state) {
463  init_failed_ = true;
464  return;
465  }
466 
467  q0_ = new Workq(prog_->size(), nmark);
468  q1_ = new Workq(prog_->size(), nmark);
469  stack_ = PODArray<int>(nstack);
470 }
471 
472 DFA::~DFA() {
473  delete q0_;
474  delete q1_;
475  ClearCache();
476 }
477 
478 // In the DFA state graph, s->next[c] == NULL means that the
479 // state has not yet been computed and needs to be. We need
480 // a different special value to signal that s->next[c] is a
481 // state that can never lead to a match (and thus the search
482 // can be called off). Hence DeadState.
483 #define DeadState reinterpret_cast<State*>(1)
484 
485 // Signals that the rest of the string matches no matter what it is.
486 #define FullMatchState reinterpret_cast<State*>(2)
487 
488 #define SpecialStateMax FullMatchState
489 
490 // Debugging printouts
491 
492 // For debugging, returns a string representation of the work queue.
494  std::string s;
495  const char* sep = "";
496  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
497  if (q->is_mark(*it)) {
498  s += "|";
499  sep = "";
500  } else {
501  s += StringPrintf("%s%d", sep, *it);
502  sep = ",";
503  }
504  }
505  return s;
506 }
507 
508 // For debugging, returns a string representation of the state.
510  if (state == NULL)
511  return "_";
512  if (state == DeadState)
513  return "X";
514  if (state == FullMatchState)
515  return "*";
516  std::string s;
517  const char* sep = "";
518  s += StringPrintf("(%p)", state);
519  for (int i = 0; i < state->ninst_; i++) {
520  if (state->inst_[i] == Mark) {
521  s += "|";
522  sep = "";
523  } else if (state->inst_[i] == MatchSep) {
524  s += "||";
525  sep = "";
526  } else {
527  s += StringPrintf("%s%d", sep, state->inst_[i]);
528  sep = ",";
529  }
530  }
531  s += StringPrintf(" flag=%#x", state->flag_);
532  return s;
533 }
534 
536 //
537 // DFA state graph construction.
538 //
539 // The DFA state graph is a heavily-linked collection of State* structures.
540 // The state_cache_ is a set of all the State structures ever allocated,
541 // so that if the same state is reached by two different paths,
542 // the same State structure can be used. This reduces allocation
543 // requirements and also avoids duplication of effort across the two
544 // identical states.
545 //
546 // A State is defined by an ordered list of instruction ids and a flag word.
547 //
548 // The choice of an ordered list of instructions differs from a typical
549 // textbook DFA implementation, which would use an unordered set.
550 // Textbook descriptions, however, only care about whether
551 // the DFA matches, not where it matches in the text. To decide where the
552 // DFA matches, we need to mimic the behavior of the dominant backtracking
553 // implementations like PCRE, which try one possible regular expression
554 // execution, then another, then another, stopping when one of them succeeds.
555 // The DFA execution tries these many executions in parallel, representing
556 // each by an instruction id. These pointers are ordered in the State.inst_
557 // list in the same order that the executions would happen in a backtracking
558 // search: if a match is found during execution of inst_[2], inst_[i] for i>=3
559 // can be discarded.
560 //
561 // Textbooks also typically do not consider context-aware empty string operators
562 // like ^ or $. These are handled by the flag word, which specifies the set
563 // of empty-string operators that should be matched when executing at the
564 // current text position. These flag bits are defined in prog.h.
565 // The flag word also contains two DFA-specific bits: kFlagMatch if the state
566 // is a matching state (one that reached a kInstMatch in the program)
567 // and kFlagLastWord if the last processed byte was a word character, for the
568 // implementation of \B and \b.
569 //
570 // The flag word also contains, shifted up 16 bits, the bits looked for by
571 // any kInstEmptyWidth instructions in the state. These provide a useful
572 // summary indicating when new flags might be useful.
573 //
574 // The permanent representation of a State's instruction ids is just an array,
575 // but while a state is being analyzed, these instruction ids are represented
576 // as a Workq, which is an array that allows iteration in insertion order.
577 
578 // NOTE(rsc): The choice of State construction determines whether the DFA
579 // mimics backtracking implementations (so-called leftmost first matching) or
580 // traditional DFA implementations (so-called leftmost longest matching as
581 // prescribed by POSIX). This implementation chooses to mimic the
582 // backtracking implementations, because we want to replace PCRE. To get
583 // POSIX behavior, the states would need to be considered not as a simple
584 // ordered list of instruction ids, but as a list of unordered sets of instruction
585 // ids. A match by a state in one set would inhibit the running of sets
586 // farther down the list but not other instruction ids in the same set. Each
587 // set would correspond to matches beginning at a given point in the string.
588 // This is implemented by separating different sets with Mark pointers.
589 
590 // Looks in the State cache for a State matching q, flag.
591 // If one is found, returns it. If one is not found, allocates one,
592 // inserts it in the cache, and returns it.
593 // If mq is not null, MatchSep and the match IDs in mq will be appended
594 // to the State.
595 DFA::State* DFA::WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag) {
596  //mutex_.AssertHeld();
597 
598  // Construct array of instruction ids for the new state.
599  // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
600  // those are the only operators with any effect in
601  // RunWorkqOnEmptyString or RunWorkqOnByte.
602  PODArray<int> inst(q->size());
603  int n = 0;
604  uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
605  bool sawmatch = false; // whether queue contains guaranteed kInstMatch
606  bool sawmark = false; // whether queue contains a Mark
607  if (ExtraDebug)
608  fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
609  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
610  int id = *it;
611  if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
612  break;
613  if (q->is_mark(id)) {
614  if (n > 0 && inst[n-1] != Mark) {
615  sawmark = true;
616  inst[n++] = Mark;
617  }
618  continue;
619  }
620  Prog::Inst* ip = prog_->inst(id);
621  switch (ip->opcode()) {
622  case kInstAltMatch:
623  // This state will continue to a match no matter what
624  // the rest of the input is. If it is the highest priority match
625  // being considered, return the special FullMatchState
626  // to indicate that it's all matches from here out.
627  if (kind_ != Prog::kManyMatch &&
628  (kind_ != Prog::kFirstMatch ||
629  (it == q->begin() && ip->greedy(prog_))) &&
630  (kind_ != Prog::kLongestMatch || !sawmark) &&
631  (flag & kFlagMatch)) {
632  if (ExtraDebug)
633  fprintf(stderr, " -> FullMatchState\n");
634  return FullMatchState;
635  }
637  default:
638  // Record iff id is the head of its list, which must
639  // be the case if id-1 is the last of *its* list. :)
640  if (prog_->inst(id-1)->last())
641  inst[n++] = *it;
642  if (ip->opcode() == kInstEmptyWidth)
643  needflags |= ip->empty();
644  if (ip->opcode() == kInstMatch && !prog_->anchor_end())
645  sawmatch = true;
646  break;
647  }
648  }
649  DCHECK_LE(n, q->size());
650  if (n > 0 && inst[n-1] == Mark)
651  n--;
652 
653  // If there are no empty-width instructions waiting to execute,
654  // then the extra flag bits will not be used, so there is no
655  // point in saving them. (Discarding them reduces the number
656  // of distinct states.)
657  if (needflags == 0)
658  flag &= kFlagMatch;
659 
660  // NOTE(rsc): The code above cannot do flag &= needflags,
661  // because if the right flags were present to pass the current
662  // kInstEmptyWidth instructions, new kInstEmptyWidth instructions
663  // might be reached that in turn need different flags.
664  // The only sure thing is that if there are no kInstEmptyWidth
665  // instructions at all, no flags will be needed.
666  // We could do the extra work to figure out the full set of
667  // possibly needed flags by exploring past the kInstEmptyWidth
668  // instructions, but the check above -- are any flags needed
669  // at all? -- handles the most common case. More fine-grained
670  // analysis can only be justified by measurements showing that
671  // too many redundant states are being allocated.
672 
673  // If there are no Insts in the list, it's a dead state,
674  // which is useful to signal with a special pointer so that
675  // the execution loop can stop early. This is only okay
676  // if the state is *not* a matching state.
677  if (n == 0 && flag == 0) {
678  if (ExtraDebug)
679  fprintf(stderr, " -> DeadState\n");
680  return DeadState;
681  }
682 
683  // If we're in longest match mode, the state is a sequence of
684  // unordered state sets separated by Marks. Sort each set
685  // to canonicalize, to reduce the number of distinct sets stored.
686  if (kind_ == Prog::kLongestMatch) {
687  int* ip = inst.data();
688  int* ep = ip + n;
689  while (ip < ep) {
690  int* markp = ip;
691  while (markp < ep && *markp != Mark)
692  markp++;
693  std::sort(ip, markp);
694  if (markp < ep)
695  markp++;
696  ip = markp;
697  }
698  }
699 
700  // If we're in many match mode, canonicalize for similar reasons:
701  // we have an unordered set of states (i.e. we don't have Marks)
702  // and sorting will reduce the number of distinct sets stored.
703  if (kind_ == Prog::kManyMatch) {
704  int* ip = inst.data();
705  int* ep = ip + n;
706  std::sort(ip, ep);
707  }
708 
709  // Append MatchSep and the match IDs in mq if necessary.
710  if (mq != NULL) {
711  inst[n++] = MatchSep;
712  for (Workq::iterator i = mq->begin(); i != mq->end(); ++i) {
713  int id = *i;
714  Prog::Inst* ip = prog_->inst(id);
715  if (ip->opcode() == kInstMatch)
716  inst[n++] = ip->match_id();
717  }
718  }
719 
720  // Save the needed empty-width flags in the top bits for use later.
721  flag |= needflags << kFlagNeedShift;
722 
723  State* state = CachedState(inst.data(), n, flag);
724  return state;
725 }
726 
727 // Looks in the State cache for a State matching inst, ninst, flag.
728 // If one is found, returns it. If one is not found, allocates one,
729 // inserts it in the cache, and returns it.
730 DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
731  //mutex_.AssertHeld();
732 
733  // Look in the cache for a pre-existing state.
734  // We have to initialise the struct like this because otherwise
735  // MSVC will complain about the flexible array member. :(
736  State state;
737  state.inst_ = inst;
738  state.ninst_ = ninst;
739  state.flag_ = flag;
741  if (it != state_cache_.end()) {
742  if (ExtraDebug)
743  fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
744  return *it;
745  }
746 
747  // Must have enough memory for new state.
748  // In addition to what we're going to allocate,
749  // the state cache hash table seems to incur about 40 bytes per
750  // State*, empirically.
751  const int kStateCacheOverhead = 40;
752  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
753  int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
754  ninst*sizeof(int);
755  if (mem_budget_ < mem + kStateCacheOverhead) {
756  mem_budget_ = -1;
757  return NULL;
758  }
759  mem_budget_ -= mem + kStateCacheOverhead;
760 
761  // Allocate new state along with room for next_ and inst_.
762  char* space = std::allocator<char>().allocate(mem);
763  State* s = new (space) State;
764  (void) new (s->next_) std::atomic<State*>[nnext];
765  // Work around a unfortunate bug in older versions of libstdc++.
766  // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
767  for (int i = 0; i < nnext; i++)
768  (void) new (s->next_ + i) std::atomic<State*>(NULL);
769  s->inst_ = new (s->next_ + nnext) int[ninst];
770  memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
771  s->ninst_ = ninst;
772  s->flag_ = flag;
773  if (ExtraDebug)
774  fprintf(stderr, " -> %s\n", DumpState(s).c_str());
775 
776  // Put state in cache and return it.
777  state_cache_.insert(s);
778  return s;
779 }
780 
781 // Clear the cache. Must hold cache_mutex_.w or be in destructor.
782 void DFA::ClearCache() {
785  while (begin != end) {
787  ++begin;
788  // Deallocate the blob of memory that we allocated in DFA::CachedState().
789  // We recompute mem in order to benefit from sized delete where possible.
790  int ninst = (*tmp)->ninst_;
791  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
792  int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
793  ninst*sizeof(int);
794  std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
795  }
796  state_cache_.clear();
797 }
798 
799 // Copies insts in state s to the work queue q.
800 void DFA::StateToWorkq(State* s, Workq* q) {
801  q->clear();
802  for (int i = 0; i < s->ninst_; i++) {
803  if (s->inst_[i] == Mark) {
804  q->mark();
805  } else if (s->inst_[i] == MatchSep) {
806  // Nothing after this is an instruction!
807  break;
808  } else {
809  // Explore from the head of the list.
810  AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
811  }
812  }
813 }
814 
815 // Adds ip to the work queue, following empty arrows according to flag.
816 void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
817 
818  // Use stack_ to hold our stack of instructions yet to process.
819  // It was preallocated as follows:
820  // one entry per Capture;
821  // one entry per EmptyWidth; and
822  // one entry per Nop.
823  // This reflects the maximum number of stack pushes that each can
824  // perform. (Each instruction can be processed at most once.)
825  // When using marks, we also added nmark == prog_->size().
826  // (Otherwise, nmark == 0.)
827  int* stk = stack_.data();
828  int nstk = 0;
829 
830  stk[nstk++] = id;
831  while (nstk > 0) {
832  DCHECK_LE(nstk, stack_.size());
833  id = stk[--nstk];
834 
835  Loop:
836  if (id == Mark) {
837  q->mark();
838  continue;
839  }
840 
841  if (id == 0)
842  continue;
843 
844  // If ip is already on the queue, nothing to do.
845  // Otherwise add it. We don't actually keep all the
846  // ones that get added, but adding all of them here
847  // increases the likelihood of q->contains(id),
848  // reducing the amount of duplicated work.
849  if (q->contains(id))
850  continue;
851  q->insert_new(id);
852 
853  // Process instruction.
854  Prog::Inst* ip = prog_->inst(id);
855  switch (ip->opcode()) {
856  default:
857  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
858  break;
859 
860  case kInstByteRange: // just save these on the queue
861  case kInstMatch:
862  if (ip->last())
863  break;
864  id = id+1;
865  goto Loop;
866 
867  case kInstCapture: // DFA treats captures as no-ops.
868  case kInstNop:
869  if (!ip->last())
870  stk[nstk++] = id+1;
871 
872  // If this instruction is the [00-FF]* loop at the beginning of
873  // a leftmost-longest unanchored search, separate with a Mark so
874  // that future threads (which will start farther to the right in
875  // the input string) are lower priority than current threads.
876  if (ip->opcode() == kInstNop && q->maxmark() > 0 &&
877  id == prog_->start_unanchored() && id != prog_->start())
878  stk[nstk++] = Mark;
879  id = ip->out();
880  goto Loop;
881 
882  case kInstAltMatch:
883  DCHECK(!ip->last());
884  id = id+1;
885  goto Loop;
886 
887  case kInstEmptyWidth:
888  if (!ip->last())
889  stk[nstk++] = id+1;
890 
891  // Continue on if we have all the right flag bits.
892  if (ip->empty() & ~flag)
893  break;
894  id = ip->out();
895  goto Loop;
896  }
897  }
898 }
899 
900 // Running of work queues. In the work queue, order matters:
901 // the queue is sorted in priority order. If instruction i comes before j,
902 // then the instructions that i produces during the run must come before
903 // the ones that j produces. In order to keep this invariant, all the
904 // work queue runners have to take an old queue to process and then
905 // also a new queue to fill in. It's not acceptable to add to the end of
906 // an existing queue, because new instructions will not end up in the
907 // correct position.
908 
909 // Runs the work queue, processing the empty strings indicated by flag.
910 // For example, flag == kEmptyBeginLine|kEmptyEndLine means to match
911 // both ^ and $. It is important that callers pass all flags at once:
912 // processing both ^ and $ is not the same as first processing only ^
913 // and then processing only $. Doing the two-step sequence won't match
914 // ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior
915 // exhibited by existing implementations).
916 void DFA::RunWorkqOnEmptyString(Workq* oldq, Workq* newq, uint32_t flag) {
917  newq->clear();
918  for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
919  if (oldq->is_mark(*i))
920  AddToQueue(newq, Mark, flag);
921  else
922  AddToQueue(newq, *i, flag);
923  }
924 }
925 
926 // Runs the work queue, processing the single byte c followed by any empty
927 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine,
928 // means to match c$. Sets the bool *ismatch to true if the end of the
929 // regular expression program has been reached (the regexp has matched).
930 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
931  int c, uint32_t flag, bool* ismatch) {
932  //mutex_.AssertHeld();
933 
934  newq->clear();
935  for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
936  if (oldq->is_mark(*i)) {
937  if (*ismatch)
938  return;
939  newq->mark();
940  continue;
941  }
942  int id = *i;
943  Prog::Inst* ip = prog_->inst(id);
944  switch (ip->opcode()) {
945  default:
946  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
947  break;
948 
949  case kInstFail: // never succeeds
950  case kInstCapture: // already followed
951  case kInstNop: // already followed
952  case kInstAltMatch: // already followed
953  case kInstEmptyWidth: // already followed
954  break;
955 
956  case kInstByteRange: // can follow if c is in range
957  if (!ip->Matches(c))
958  break;
959  AddToQueue(newq, ip->out(), flag);
960  if (ip->hint() != 0) {
961  // We have a hint, but we must cancel out the
962  // increment that will occur after the break.
963  i += ip->hint() - 1;
964  } else {
965  // We have no hint, so we must find the end
966  // of the current list and then skip to it.
967  Prog::Inst* ip0 = ip;
968  while (!ip->last())
969  ++ip;
970  i += ip - ip0;
971  }
972  break;
973 
974  case kInstMatch:
975  if (prog_->anchor_end() && c != kByteEndText &&
977  break;
978  *ismatch = true;
979  if (kind_ == Prog::kFirstMatch) {
980  // Can stop processing work queue since we found a match.
981  return;
982  }
983  break;
984  }
985  }
986 
987  if (ExtraDebug)
988  fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n",
989  DumpWorkq(oldq).c_str(), c, flag, DumpWorkq(newq).c_str(), *ismatch);
990 }
991 
992 // Processes input byte c in state, returning new state.
993 // Caller does not hold mutex.
994 DFA::State* DFA::RunStateOnByteUnlocked(State* state, int c) {
995  // Keep only one RunStateOnByte going
996  // even if the DFA is being run by multiple threads.
997  MutexLock l(&mutex_);
998  return RunStateOnByte(state, c);
999 }
1000 
1001 // Processes input byte c in state, returning new state.
1002 DFA::State* DFA::RunStateOnByte(State* state, int c) {
1003  //mutex_.AssertHeld();
1004 
1005  if (state <= SpecialStateMax) {
1006  if (state == FullMatchState) {
1007  // It is convenient for routines like PossibleMatchRange
1008  // if we implement RunStateOnByte for FullMatchState:
1009  // once you get into this state you never get out,
1010  // so it's pretty easy.
1011  return FullMatchState;
1012  }
1013  if (state == DeadState) {
1014  LOG(DFATAL) << "DeadState in RunStateOnByte";
1015  return NULL;
1016  }
1017  if (state == NULL) {
1018  LOG(DFATAL) << "NULL state in RunStateOnByte";
1019  return NULL;
1020  }
1021  LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
1022  return NULL;
1023  }
1024 
1025  // If someone else already computed this, return it.
1026  State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
1027  if (ns != NULL)
1028  return ns;
1029 
1030  // Convert state into Workq.
1032 
1033  // Flags marking the kinds of empty-width things (^ $ etc)
1034  // around this byte. Before the byte we have the flags recorded
1035  // in the State structure itself. After the byte we have
1036  // nothing yet (but that will change: read on).
1037  uint32_t needflag = state->flag_ >> kFlagNeedShift;
1038  uint32_t beforeflag = state->flag_ & kFlagEmptyMask;
1039  uint32_t oldbeforeflag = beforeflag;
1040  uint32_t afterflag = 0;
1041 
1042  if (c == '\n') {
1043  // Insert implicit $ and ^ around \n
1044  beforeflag |= kEmptyEndLine;
1045  afterflag |= kEmptyBeginLine;
1046  }
1047 
1048  if (c == kByteEndText) {
1049  // Insert implicit $ and \z before the fake "end text" byte.
1050  beforeflag |= kEmptyEndLine | kEmptyEndText;
1051  }
1052 
1053  // The state flag kFlagLastWord says whether the last
1054  // byte processed was a word character. Use that info to
1055  // insert empty-width (non-)word boundaries.
1056  bool islastword = (state->flag_ & kFlagLastWord) != 0;
1057  bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c));
1058  if (isword == islastword)
1059  beforeflag |= kEmptyNonWordBoundary;
1060  else
1061  beforeflag |= kEmptyWordBoundary;
1062 
1063  // Okay, finally ready to run.
1064  // Only useful to rerun on empty string if there are new, useful flags.
1065  if (beforeflag & ~oldbeforeflag & needflag) {
1066  RunWorkqOnEmptyString(q0_, q1_, beforeflag);
1067  using std::swap;
1068  swap(q0_, q1_);
1069  }
1070  bool ismatch = false;
1071  RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch);
1072  using std::swap;
1073  swap(q0_, q1_);
1074 
1075  // Save afterflag along with ismatch and isword in new state.
1076  uint32_t flag = afterflag;
1077  if (ismatch)
1078  flag |= kFlagMatch;
1079  if (isword)
1080  flag |= kFlagLastWord;
1081 
1082  if (ismatch && kind_ == Prog::kManyMatch)
1084  else
1085  ns = WorkqToCachedState(q0_, NULL, flag);
1086 
1087  // Flush ns before linking to it.
1088  // Write barrier before updating state->next_ so that the
1089  // main search loop can proceed without any locking, for speed.
1090  // (Otherwise it would need one mutex operation per input byte.)
1091  state->next_[ByteMap(c)].store(ns, std::memory_order_release);
1092  return ns;
1093 }
1094 
1095 
1097 // DFA cache reset.
1098 
1099 // Reader-writer lock helper.
1100 //
1101 // The DFA uses a reader-writer mutex to protect the state graph itself.
1102 // Traversing the state graph requires holding the mutex for reading,
1103 // and discarding the state graph and starting over requires holding the
1104 // lock for writing. If a search needs to expand the graph but is out
1105 // of memory, it will need to drop its read lock and then acquire the
1106 // write lock. Since it cannot then atomically downgrade from write lock
1107 // to read lock, it runs the rest of the search holding the write lock.
1108 // (This probably helps avoid repeated contention, but really the decision
1109 // is forced by the Mutex interface.) It's a bit complicated to keep
1110 // track of whether the lock is held for reading or writing and thread
1111 // that through the search, so instead we encapsulate it in the RWLocker
1112 // and pass that around.
1113 
1114 class DFA::RWLocker {
1115  public:
1116  explicit RWLocker(CacheMutex* mu);
1117  ~RWLocker();
1118 
1119  // If the lock is only held for reading right now,
1120  // drop the read lock and re-acquire for writing.
1121  // Subsequent calls to LockForWriting are no-ops.
1122  // Notice that the lock is *released* temporarily.
1123  void LockForWriting();
1124 
1125  private:
1127  bool writing_;
1128 
1129  RWLocker(const RWLocker&) = delete;
1130  RWLocker& operator=(const RWLocker&) = delete;
1131 };
1132 
1133 DFA::RWLocker::RWLocker(CacheMutex* mu) : mu_(mu), writing_(false) {
1134  mu_->ReaderLock();
1135 }
1136 
1137 // This function is marked as NO_THREAD_SAFETY_ANALYSIS because
1138 // the annotations don't support lock upgrade.
1140  if (!writing_) {
1141  mu_->ReaderUnlock();
1142  mu_->WriterLock();
1143  writing_ = true;
1144  }
1145 }
1146 
1148  if (!writing_)
1149  mu_->ReaderUnlock();
1150  else
1151  mu_->WriterUnlock();
1152 }
1153 
1154 
1155 // When the DFA's State cache fills, we discard all the states in the
1156 // cache and start over. Many threads can be using and adding to the
1157 // cache at the same time, so we synchronize using the cache_mutex_
1158 // to keep from stepping on other threads. Specifically, all the
1159 // threads using the current cache hold cache_mutex_ for reading.
1160 // When a thread decides to flush the cache, it drops cache_mutex_
1161 // and then re-acquires it for writing. That ensures there are no
1162 // other threads accessing the cache anymore. The rest of the search
1163 // runs holding cache_mutex_ for writing, avoiding any contention
1164 // with or cache pollution caused by other threads.
1165 
1166 void DFA::ResetCache(RWLocker* cache_lock) {
1167  // Re-acquire the cache_mutex_ for writing (exclusive use).
1168  cache_lock->LockForWriting();
1169 
1170  hooks::GetDFAStateCacheResetHook()({
1171  state_budget_,
1172  state_cache_.size(),
1173  });
1174 
1175  // Clear the cache, reset the memory budget.
1176  for (int i = 0; i < kMaxStart; i++)
1177  start_[i].start.store(NULL, std::memory_order_relaxed);
1178  ClearCache();
1180 }
1181 
1182 // Typically, a couple States do need to be preserved across a cache
1183 // reset, like the State at the current point in the search.
1184 // The StateSaver class helps keep States across cache resets.
1185 // It makes a copy of the state's guts outside the cache (before the reset)
1186 // and then can be asked, after the reset, to recreate the State
1187 // in the new cache. For example, in a DFA method ("this" is a DFA):
1188 //
1189 // StateSaver saver(this, s);
1190 // ResetCache(cache_lock);
1191 // s = saver.Restore();
1192 //
1193 // The saver should always have room in the cache to re-create the state,
1194 // because resetting the cache locks out all other threads, and the cache
1195 // is known to have room for at least a couple states (otherwise the DFA
1196 // constructor fails).
1197 
1198 class DFA::StateSaver {
1199  public:
1200  explicit StateSaver(DFA* dfa, State* state);
1201  ~StateSaver();
1202 
1203  // Recreates and returns a state equivalent to the
1204  // original state passed to the constructor.
1205  // Returns NULL if the cache has filled, but
1206  // since the DFA guarantees to have room in the cache
1207  // for a couple states, should never return NULL
1208  // if used right after ResetCache.
1209  State* Restore();
1210 
1211  private:
1212  DFA* dfa_; // the DFA to use
1213  int* inst_; // saved info from State
1214  int ninst_;
1215  uint32_t flag_;
1216  bool is_special_; // whether original state was special
1217  State* special_; // if is_special_, the original state
1218 
1219  StateSaver(const StateSaver&) = delete;
1220  StateSaver& operator=(const StateSaver&) = delete;
1221 };
1222 
1223 DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
1224  dfa_ = dfa;
1225  if (state <= SpecialStateMax) {
1226  inst_ = NULL;
1227  ninst_ = 0;
1228  flag_ = 0;
1229  is_special_ = true;
1230  special_ = state;
1231  return;
1232  }
1233  is_special_ = false;
1234  special_ = NULL;
1235  flag_ = state->flag_;
1236  ninst_ = state->ninst_;
1237  inst_ = new int[ninst_];
1238  memmove(inst_, state->inst_, ninst_*sizeof inst_[0]);
1239 }
1240 
1242  if (!is_special_)
1243  delete[] inst_;
1244 }
1245 
1246 DFA::State* DFA::StateSaver::Restore() {
1247  if (is_special_)
1248  return special_;
1249  MutexLock l(&dfa_->mutex_);
1250  State* s = dfa_->CachedState(inst_, ninst_, flag_);
1251  if (s == NULL)
1252  LOG(DFATAL) << "StateSaver failed to restore state.";
1253  return s;
1254 }
1255 
1256 
1258 //
1259 // DFA execution.
1260 //
1261 // The basic search loop is easy: start in a state s and then for each
1262 // byte c in the input, s = s->next[c].
1263 //
1264 // This simple description omits a few efficiency-driven complications.
1265 //
1266 // First, the State graph is constructed incrementally: it is possible
1267 // that s->next[c] is null, indicating that that state has not been
1268 // fully explored. In this case, RunStateOnByte must be invoked to
1269 // determine the next state, which is cached in s->next[c] to save
1270 // future effort. An alternative reason for s->next[c] to be null is
1271 // that the DFA has reached a so-called "dead state", in which any match
1272 // is no longer possible. In this case RunStateOnByte will return NULL
1273 // and the processing of the string can stop early.
1274 //
1275 // Second, a 256-element pointer array for s->next_ makes each State
1276 // quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[]
1277 // maps from bytes to "byte classes" and then next_ only needs to have
1278 // as many pointers as there are byte classes. A byte class is simply a
1279 // range of bytes that the regexp never distinguishes between.
1280 // A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1,
1281 // 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit
1282 // but in exchange we typically cut the size of a State (and thus our
1283 // memory footprint) by about 5-10x. The comments still refer to
1284 // s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]].
1285 //
1286 // Third, it is common for a DFA for an unanchored match to begin in a
1287 // state in which only one particular byte value can take the DFA to a
1288 // different state. That is, s->next[c] != s for only one c. In this
1289 // situation, the DFA can do better than executing the simple loop.
1290 // Instead, it can call memchr to search very quickly for the byte c.
1291 // Whether the start state has this property is determined during a
1292 // pre-compilation pass and the "can_prefix_accel" argument is set.
1293 //
1294 // Fourth, the desired behavior is to search for the leftmost-best match
1295 // (approximately, the same one that Perl would find), which is not
1296 // necessarily the match ending earliest in the string. Each time a
1297 // match is found, it must be noted, but the DFA must continue on in
1298 // hope of finding a higher-priority match. In some cases, the caller only
1299 // cares whether there is any match at all, not which one is found.
1300 // The "want_earliest_match" flag causes the search to stop at the first
1301 // match found.
1302 //
1303 // Fifth, one algorithm that uses the DFA needs it to run over the
1304 // input string backward, beginning at the end and ending at the beginning.
1305 // Passing false for the "run_forward" flag causes the DFA to run backward.
1306 //
1307 // The checks for these last three cases, which in a naive implementation
1308 // would be performed once per input byte, slow the general loop enough
1309 // to merit specialized versions of the search loop for each of the
1310 // eight possible settings of the three booleans. Rather than write
1311 // eight different functions, we write one general implementation and then
1312 // inline it to create the specialized ones.
1313 //
1314 // Note that matches are delayed by one byte, to make it easier to
1315 // accomodate match conditions depending on the next input byte (like $ and \b).
1316 // When s->next[c]->IsMatch(), it means that there is a match ending just
1317 // *before* byte c.
1318 
1319 // The generic search loop. Searches text for a match, returning
1320 // the pointer to the end of the chosen match, or NULL if no match.
1321 // The bools are equal to the same-named variables in params, but
1322 // making them function arguments lets the inliner specialize
1323 // this function to each combination (see two paragraphs above).
1324 template <bool can_prefix_accel,
1325  bool want_earliest_match,
1326  bool run_forward>
1327 inline bool DFA::InlinedSearchLoop(SearchParams* params) {
1328  State* start = params->start;
1329  const uint8_t* bp = BytePtr(params->text.data()); // start of text
1330  const uint8_t* p = bp; // text scanning point
1331  const uint8_t* ep = BytePtr(params->text.data() +
1332  params->text.size()); // end of text
1333  const uint8_t* resetp = NULL; // p at last cache reset
1334  if (!run_forward) {
1335  using std::swap;
1336  swap(p, ep);
1337  }
1338 
1339  const uint8_t* bytemap = prog_->bytemap();
1340  const uint8_t* lastmatch = NULL; // most recent matching position in text
1341  bool matched = false;
1342 
1343  State* s = start;
1344  if (ExtraDebug)
1345  fprintf(stderr, "@stx: %s\n", DumpState(s).c_str());
1346 
1347  if (s->IsMatch()) {
1348  matched = true;
1349  lastmatch = p;
1350  if (ExtraDebug)
1351  fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
1352  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1353  for (int i = s->ninst_ - 1; i >= 0; i--) {
1354  int id = s->inst_[i];
1355  if (id == MatchSep)
1356  break;
1357  params->matches->insert(id);
1358  }
1359  }
1360  if (want_earliest_match) {
1361  params->ep = reinterpret_cast<const char*>(lastmatch);
1362  return true;
1363  }
1364  }
1365 
1366  while (p != ep) {
1367  if (ExtraDebug)
1368  fprintf(stderr, "@%td: %s\n", p - bp, DumpState(s).c_str());
1369 
1370  if (can_prefix_accel && s == start) {
1371  // In start state, only way out is to find the prefix,
1372  // so we use prefix accel (e.g. memchr) to skip ahead.
1373  // If not found, we can skip to the end of the string.
1374  p = BytePtr(prog_->PrefixAccel(p, ep - p));
1375  if (p == NULL) {
1376  p = ep;
1377  break;
1378  }
1379  }
1380 
1381  int c;
1382  if (run_forward)
1383  c = *p++;
1384  else
1385  c = *--p;
1386 
1387  // Note that multiple threads might be consulting
1388  // s->next_[bytemap[c]] simultaneously.
1389  // RunStateOnByte takes care of the appropriate locking,
1390  // including a memory barrier so that the unlocked access
1391  // (sometimes known as "double-checked locking") is safe.
1392  // The alternative would be either one DFA per thread
1393  // or one mutex operation per input byte.
1394  //
1395  // ns == DeadState means the state is known to be dead
1396  // (no more matches are possible).
1397  // ns == NULL means the state has not yet been computed
1398  // (need to call RunStateOnByteUnlocked).
1399  // RunStateOnByte returns ns == NULL if it is out of memory.
1400  // ns == FullMatchState means the rest of the string matches.
1401  //
1402  // Okay to use bytemap[] not ByteMap() here, because
1403  // c is known to be an actual byte and not kByteEndText.
1404 
1405  State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
1406  if (ns == NULL) {
1407  ns = RunStateOnByteUnlocked(s, c);
1408  if (ns == NULL) {
1409  // After we reset the cache, we hold cache_mutex exclusively,
1410  // so if resetp != NULL, it means we filled the DFA state
1411  // cache with this search alone (without any other threads).
1412  // Benchmarks show that doing a state computation on every
1413  // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the
1414  // same at about 2 MB/s. Unless we're processing an average
1415  // of 10 bytes per state computation, fail so that RE2 can
1416  // fall back to the NFA. However, RE2::Set cannot fall back,
1417  // so we just have to keep on keeping on in that case.
1418  if (dfa_should_bail_when_slow && resetp != NULL &&
1419  static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
1420  kind_ != Prog::kManyMatch) {
1421  params->failed = true;
1422  return false;
1423  }
1424  resetp = p;
1425 
1426  // Prepare to save start and s across the reset.
1427  StateSaver save_start(this, start);
1428  StateSaver save_s(this, s);
1429 
1430  // Discard all the States in the cache.
1431  ResetCache(params->cache_lock);
1432 
1433  // Restore start and s so we can continue.
1434  if ((start = save_start.Restore()) == NULL ||
1435  (s = save_s.Restore()) == NULL) {
1436  // Restore already did LOG(DFATAL).
1437  params->failed = true;
1438  return false;
1439  }
1440  ns = RunStateOnByteUnlocked(s, c);
1441  if (ns == NULL) {
1442  LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
1443  params->failed = true;
1444  return false;
1445  }
1446  }
1447  }
1448  if (ns <= SpecialStateMax) {
1449  if (ns == DeadState) {
1450  params->ep = reinterpret_cast<const char*>(lastmatch);
1451  return matched;
1452  }
1453  // FullMatchState
1454  params->ep = reinterpret_cast<const char*>(ep);
1455  return true;
1456  }
1457 
1458  s = ns;
1459  if (s->IsMatch()) {
1460  matched = true;
1461  // The DFA notices the match one byte late,
1462  // so adjust p before using it in the match.
1463  if (run_forward)
1464  lastmatch = p - 1;
1465  else
1466  lastmatch = p + 1;
1467  if (ExtraDebug)
1468  fprintf(stderr, "match @%td! [%s]\n", lastmatch - bp, DumpState(s).c_str());
1469  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1470  for (int i = s->ninst_ - 1; i >= 0; i--) {
1471  int id = s->inst_[i];
1472  if (id == MatchSep)
1473  break;
1474  params->matches->insert(id);
1475  }
1476  }
1477  if (want_earliest_match) {
1478  params->ep = reinterpret_cast<const char*>(lastmatch);
1479  return true;
1480  }
1481  }
1482  }
1483 
1484  // Process one more byte to see if it triggers a match.
1485  // (Remember, matches are delayed one byte.)
1486  if (ExtraDebug)
1487  fprintf(stderr, "@etx: %s\n", DumpState(s).c_str());
1488 
1489  int lastbyte;
1490  if (run_forward) {
1491  if (params->text.end() == params->context.end())
1492  lastbyte = kByteEndText;
1493  else
1494  lastbyte = params->text.end()[0] & 0xFF;
1495  } else {
1496  if (params->text.begin() == params->context.begin())
1497  lastbyte = kByteEndText;
1498  else
1499  lastbyte = params->text.begin()[-1] & 0xFF;
1500  }
1501 
1502  State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
1503  if (ns == NULL) {
1504  ns = RunStateOnByteUnlocked(s, lastbyte);
1505  if (ns == NULL) {
1506  StateSaver save_s(this, s);
1507  ResetCache(params->cache_lock);
1508  if ((s = save_s.Restore()) == NULL) {
1509  params->failed = true;
1510  return false;
1511  }
1512  ns = RunStateOnByteUnlocked(s, lastbyte);
1513  if (ns == NULL) {
1514  LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
1515  params->failed = true;
1516  return false;
1517  }
1518  }
1519  }
1520  if (ns <= SpecialStateMax) {
1521  if (ns == DeadState) {
1522  params->ep = reinterpret_cast<const char*>(lastmatch);
1523  return matched;
1524  }
1525  // FullMatchState
1526  params->ep = reinterpret_cast<const char*>(ep);
1527  return true;
1528  }
1529 
1530  s = ns;
1531  if (s->IsMatch()) {
1532  matched = true;
1533  lastmatch = p;
1534  if (ExtraDebug)
1535  fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
1536  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1537  for (int i = s->ninst_ - 1; i >= 0; i--) {
1538  int id = s->inst_[i];
1539  if (id == MatchSep)
1540  break;
1541  params->matches->insert(id);
1542  }
1543  }
1544  }
1545 
1546  params->ep = reinterpret_cast<const char*>(lastmatch);
1547  return matched;
1548 }
1549 
1550 // Inline specializations of the general loop.
1551 bool DFA::SearchFFF(SearchParams* params) {
1552  return InlinedSearchLoop<false, false, false>(params);
1553 }
1554 bool DFA::SearchFFT(SearchParams* params) {
1555  return InlinedSearchLoop<false, false, true>(params);
1556 }
1557 bool DFA::SearchFTF(SearchParams* params) {
1558  return InlinedSearchLoop<false, true, false>(params);
1559 }
1560 bool DFA::SearchFTT(SearchParams* params) {
1561  return InlinedSearchLoop<false, true, true>(params);
1562 }
1563 bool DFA::SearchTFF(SearchParams* params) {
1564  return InlinedSearchLoop<true, false, false>(params);
1565 }
1566 bool DFA::SearchTFT(SearchParams* params) {
1567  return InlinedSearchLoop<true, false, true>(params);
1568 }
1569 bool DFA::SearchTTF(SearchParams* params) {
1570  return InlinedSearchLoop<true, true, false>(params);
1571 }
1572 bool DFA::SearchTTT(SearchParams* params) {
1573  return InlinedSearchLoop<true, true, true>(params);
1574 }
1575 
1576 // For performance, calls the appropriate specialized version
1577 // of InlinedSearchLoop.
1578 bool DFA::FastSearchLoop(SearchParams* params) {
1579  // Because the methods are private, the Searches array
1580  // cannot be declared at top level.
1581  static bool (DFA::*Searches[])(SearchParams*) = {
1582  &DFA::SearchFFF,
1583  &DFA::SearchFFT,
1584  &DFA::SearchFTF,
1585  &DFA::SearchFTT,
1586  &DFA::SearchTFF,
1587  &DFA::SearchTFT,
1588  &DFA::SearchTTF,
1589  &DFA::SearchTTT,
1590  };
1591 
1592  int index = 4 * params->can_prefix_accel +
1593  2 * params->want_earliest_match +
1594  1 * params->run_forward;
1595  return (this->*Searches[index])(params);
1596 }
1597 
1598 
1599 // The discussion of DFA execution above ignored the question of how
1600 // to determine the initial state for the search loop. There are two
1601 // factors that influence the choice of start state.
1602 //
1603 // The first factor is whether the search is anchored or not.
1604 // The regexp program (Prog*) itself has
1605 // two different entry points: one for anchored searches and one for
1606 // unanchored searches. (The unanchored version starts with a leading ".*?"
1607 // and then jumps to the anchored one.)
1608 //
1609 // The second factor is where text appears in the larger context, which
1610 // determines which empty-string operators can be matched at the beginning
1611 // of execution. If text is at the very beginning of context, \A and ^ match.
1612 // Otherwise if text is at the beginning of a line, then ^ matches.
1613 // Otherwise it matters whether the character before text is a word character
1614 // or a non-word character.
1615 //
1616 // The two cases (unanchored vs not) and four cases (empty-string flags)
1617 // combine to make the eight cases recorded in the DFA's begin_text_[2],
1618 // begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached
1619 // StartInfos. The start state for each is filled in the first time it
1620 // is used for an actual search.
1621 
1622 // Examines text, context, and anchored to determine the right start
1623 // state for the DFA search loop. Fills in params and returns true on success.
1624 // Returns false on failure.
1625 bool DFA::AnalyzeSearch(SearchParams* params) {
1626  const StringPiece& text = params->text;
1627  const StringPiece& context = params->context;
1628 
1629  // Sanity check: make sure that text lies within context.
1630  if (text.begin() < context.begin() || text.end() > context.end()) {
1631  LOG(DFATAL) << "context does not contain text";
1632  params->start = DeadState;
1633  return true;
1634  }
1635 
1636  // Determine correct search type.
1637  int start;
1638  uint32_t flags;
1639  if (params->run_forward) {
1640  if (text.begin() == context.begin()) {
1643  } else if (text.begin()[-1] == '\n') {
1646  } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) {
1648  flags = kFlagLastWord;
1649  } else {
1651  flags = 0;
1652  }
1653  } else {
1654  if (text.end() == context.end()) {
1657  } else if (text.end()[0] == '\n') {
1660  } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) {
1662  flags = kFlagLastWord;
1663  } else {
1665  flags = 0;
1666  }
1667  }
1668  if (params->anchored)
1669  start |= kStartAnchored;
1670  StartInfo* info = &start_[start];
1671 
1672  // Try once without cache_lock for writing.
1673  // Try again after resetting the cache
1674  // (ResetCache will relock cache_lock for writing).
1675  if (!AnalyzeSearchHelper(params, info, flags)) {
1676  ResetCache(params->cache_lock);
1677  if (!AnalyzeSearchHelper(params, info, flags)) {
1678  LOG(DFATAL) << "Failed to analyze start state.";
1679  params->failed = true;
1680  return false;
1681  }
1682  }
1683 
1684  params->start = info->start.load(std::memory_order_acquire);
1685 
1686  // Even if we could prefix accel, we cannot do so when anchored and,
1687  // less obviously, we cannot do so when we are going to need flags.
1688  // This trick works only when there is a single byte that leads to a
1689  // different state!
1690  if (prog_->can_prefix_accel() &&
1691  !params->anchored &&
1692  params->start > SpecialStateMax &&
1693  params->start->flag_ >> kFlagNeedShift == 0)
1694  params->can_prefix_accel = true;
1695 
1696  if (ExtraDebug)
1697  fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
1698  params->anchored, params->run_forward, flags,
1699  DumpState(params->start).c_str(), params->can_prefix_accel);
1700 
1701  return true;
1702 }
1703 
1704 // Fills in info if needed. Returns true on success, false on failure.
1705 bool DFA::AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
1706  uint32_t flags) {
1707  // Quick check.
1708  State* start = info->start.load(std::memory_order_acquire);
1709  if (start != NULL)
1710  return true;
1711 
1712  MutexLock l(&mutex_);
1713  start = info->start.load(std::memory_order_relaxed);
1714  if (start != NULL)
1715  return true;
1716 
1717  q0_->clear();
1718  AddToQueue(q0_,
1719  params->anchored ? prog_->start() : prog_->start_unanchored(),
1720  flags);
1721  start = WorkqToCachedState(q0_, NULL, flags);
1722  if (start == NULL)
1723  return false;
1724 
1725  // Synchronize with "quick check" above.
1726  info->start.store(start, std::memory_order_release);
1727  return true;
1728 }
1729 
1730 // The actual DFA search: calls AnalyzeSearch and then FastSearchLoop.
1731 bool DFA::Search(const StringPiece& text,
1732  const StringPiece& context,
1733  bool anchored,
1734  bool want_earliest_match,
1735  bool run_forward,
1736  bool* failed,
1737  const char** epp,
1738  SparseSet* matches) {
1739  *epp = NULL;
1740  if (!ok()) {
1741  *failed = true;
1742  return false;
1743  }
1744  *failed = false;
1745 
1746  if (ExtraDebug) {
1747  fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
1748  fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
1749  std::string(text).c_str(), anchored, want_earliest_match, run_forward, kind_);
1750  }
1751 
1752  RWLocker l(&cache_mutex_);
1753  SearchParams params(text, context, &l);
1754  params.anchored = anchored;
1755  params.want_earliest_match = want_earliest_match;
1756  params.run_forward = run_forward;
1757  params.matches = matches;
1758 
1759  if (!AnalyzeSearch(&params)) {
1760  *failed = true;
1761  return false;
1762  }
1763  if (params.start == DeadState)
1764  return false;
1765  if (params.start == FullMatchState) {
1766  if (run_forward == want_earliest_match)
1767  *epp = text.data();
1768  else
1769  *epp = text.data() + text.size();
1770  return true;
1771  }
1772  if (ExtraDebug)
1773  fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
1774  bool ret = FastSearchLoop(&params);
1775  if (params.failed) {
1776  *failed = true;
1777  return false;
1778  }
1779  *epp = params.ep;
1780  return ret;
1781 }
1782 
1784  // For a forward DFA, half the memory goes to each DFA.
1785  // However, if it is a "many match" DFA, then there is
1786  // no counterpart with which the memory must be shared.
1787  //
1788  // For a reverse DFA, all the memory goes to the
1789  // "longest match" DFA, because RE2 never does reverse
1790  // "first match" searches.
1791  if (kind == kFirstMatch) {
1792  std::call_once(dfa_first_once_, [](Prog* prog) {
1793  prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
1794  }, this);
1795  return dfa_first_;
1796  } else if (kind == kManyMatch) {
1797  std::call_once(dfa_first_once_, [](Prog* prog) {
1798  prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
1799  }, this);
1800  return dfa_first_;
1801  } else {
1802  std::call_once(dfa_longest_once_, [](Prog* prog) {
1803  if (!prog->reversed_)
1804  prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
1805  else
1806  prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_);
1807  }, this);
1808  return dfa_longest_;
1809  }
1810 }
1811 
1812 void Prog::DeleteDFA(DFA* dfa) {
1813  delete dfa;
1814 }
1815 
1816 // Executes the regexp program to search in text,
1817 // which itself is inside the larger context. (As a convenience,
1818 // passing a NULL context is equivalent to passing text.)
1819 // Returns true if a match is found, false if not.
1820 // If a match is found, fills in match0->end() to point at the end of the match
1821 // and sets match0->begin() to text.begin(), since the DFA can't track
1822 // where the match actually began.
1823 //
1824 // This is the only external interface (class DFA only exists in this file).
1825 //
1826 bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
1827  Anchor anchor, MatchKind kind, StringPiece* match0,
1828  bool* failed, SparseSet* matches) {
1829  *failed = false;
1830 
1831  StringPiece context = const_context;
1832  if (context.data() == NULL)
1833  context = text;
1834  bool caret = anchor_start();
1835  bool dollar = anchor_end();
1836  if (reversed_) {
1837  using std::swap;
1838  swap(caret, dollar);
1839  }
1840  if (caret && context.begin() != text.begin())
1841  return false;
1842  if (dollar && context.end() != text.end())
1843  return false;
1844 
1845  // Handle full match by running an anchored longest match
1846  // and then checking if it covers all of text.
1847  bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch;
1848  bool endmatch = false;
1849  if (kind == kManyMatch) {
1850  // This is split out in order to avoid clobbering kind.
1851  } else if (kind == kFullMatch || anchor_end()) {
1852  endmatch = true;
1853  kind = kLongestMatch;
1854  }
1855 
1856  // If the caller doesn't care where the match is (just whether one exists),
1857  // then we can stop at the very first match we find, the so-called
1858  // "earliest match".
1859  bool want_earliest_match = false;
1860  if (kind == kManyMatch) {
1861  // This is split out in order to avoid clobbering kind.
1862  if (matches == NULL) {
1863  want_earliest_match = true;
1864  }
1865  } else if (match0 == NULL && !endmatch) {
1866  want_earliest_match = true;
1867  kind = kLongestMatch;
1868  }
1869 
1870  DFA* dfa = GetDFA(kind);
1871  const char* ep;
1872  bool matched = dfa->Search(text, context, anchored,
1873  want_earliest_match, !reversed_,
1874  failed, &ep, matches);
1875  if (*failed) {
1876  hooks::GetDFASearchFailureHook()({
1877  // Nothing yet...
1878  });
1879  return false;
1880  }
1881  if (!matched)
1882  return false;
1883  if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
1884  return false;
1885 
1886  // If caller cares, record the boundary of the match.
1887  // We only know where it ends, so use the boundary of text
1888  // as the beginning.
1889  if (match0) {
1890  if (reversed_)
1891  *match0 =
1892  StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
1893  else
1894  *match0 =
1895  StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
1896  }
1897  return true;
1898 }
1899 
1900 // Build out all states in DFA. Returns number of states.
1902  if (!ok())
1903  return 0;
1904 
1905  // Pick out start state for unanchored search
1906  // at beginning of text.
1907  RWLocker l(&cache_mutex_);
1908  SearchParams params(StringPiece(), StringPiece(), &l);
1909  params.anchored = false;
1910  if (!AnalyzeSearch(&params) ||
1911  params.start == NULL ||
1912  params.start == DeadState)
1913  return 0;
1914 
1915  // Add start state to work queue.
1916  // Note that any State* that we handle here must point into the cache,
1917  // so we can simply depend on pointer-as-a-number hashing and equality.
1918  std::unordered_map<State*, int> m;
1919  std::deque<State*> q;
1920  m.emplace(params.start, static_cast<int>(m.size()));
1921  q.push_back(params.start);
1922 
1923  // Compute the input bytes needed to cover all of the next pointers.
1924  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
1925  std::vector<int> input(nnext);
1926  for (int c = 0; c < 256; c++) {
1927  int b = prog_->bytemap()[c];
1928  while (c < 256-1 && prog_->bytemap()[c+1] == b)
1929  c++;
1930  input[b] = c;
1931  }
1933 
1934  // Scratch space for the output.
1935  std::vector<int> output(nnext);
1936 
1937  // Flood to expand every state.
1938  bool oom = false;
1939  while (!q.empty()) {
1940  State* s = q.front();
1941  q.pop_front();
1942  for (int c : input) {
1943  State* ns = RunStateOnByteUnlocked(s, c);
1944  if (ns == NULL) {
1945  oom = true;
1946  break;
1947  }
1948  if (ns == DeadState) {
1949  output[ByteMap(c)] = -1;
1950  continue;
1951  }
1952  if (m.find(ns) == m.end()) {
1953  m.emplace(ns, static_cast<int>(m.size()));
1954  q.push_back(ns);
1955  }
1956  output[ByteMap(c)] = m[ns];
1957  }
1958  if (cb)
1959  cb(oom ? NULL : output.data(),
1960  s == FullMatchState || s->IsMatch());
1961  if (oom)
1962  break;
1963  }
1964 
1965  return static_cast<int>(m.size());
1966 }
1967 
1968 // Build out all states in DFA for kind. Returns number of states.
1969 int Prog::BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb) {
1970  return GetDFA(kind)->BuildAllStates(cb);
1971 }
1972 
1973 // Computes min and max for matching string.
1974 // Won't return strings bigger than maxlen.
1975 bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
1976  if (!ok())
1977  return false;
1978 
1979  // NOTE: if future users of PossibleMatchRange want more precision when
1980  // presented with infinitely repeated elements, consider making this a
1981  // parameter to PossibleMatchRange.
1982  static int kMaxEltRepetitions = 0;
1983 
1984  // Keep track of the number of times we've visited states previously. We only
1985  // revisit a given state if it's part of a repeated group, so if the value
1986  // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set
1987  // |*max| to |PrefixSuccessor(*max)|.
1988  //
1989  // Also note that previously_visited_states[UnseenStatePtr] will, in the STL
1990  // tradition, implicitly insert a '0' value at first use. We take advantage
1991  // of that property below.
1992  std::unordered_map<State*, int> previously_visited_states;
1993 
1994  // Pick out start state for anchored search at beginning of text.
1995  RWLocker l(&cache_mutex_);
1996  SearchParams params(StringPiece(), StringPiece(), &l);
1997  params.anchored = true;
1998  if (!AnalyzeSearch(&params))
1999  return false;
2000  if (params.start == DeadState) { // No matching strings
2001  *min = "";
2002  *max = "";
2003  return true;
2004  }
2005  if (params.start == FullMatchState) // Every string matches: no max
2006  return false;
2007 
2008  // The DFA is essentially a big graph rooted at params.start,
2009  // and paths in the graph correspond to accepted strings.
2010  // Each node in the graph has potentially 256+1 arrows
2011  // coming out, one for each byte plus the magic end of
2012  // text character kByteEndText.
2013 
2014  // To find the smallest possible prefix of an accepted
2015  // string, we just walk the graph preferring to follow
2016  // arrows with the lowest bytes possible. To find the
2017  // largest possible prefix, we follow the largest bytes
2018  // possible.
2019 
2020  // The test for whether there is an arrow from s on byte j is
2021  // ns = RunStateOnByteUnlocked(s, j);
2022  // if (ns == NULL)
2023  // return false;
2024  // if (ns != DeadState && ns->ninst > 0)
2025  // The RunStateOnByteUnlocked call asks the DFA to build out the graph.
2026  // It returns NULL only if the DFA has run out of memory,
2027  // in which case we can't be sure of anything.
2028  // The second check sees whether there was graph built
2029  // and whether it is interesting graph. Nodes might have
2030  // ns->ninst == 0 if they exist only to represent the fact
2031  // that a match was found on the previous byte.
2032 
2033  // Build minimum prefix.
2034  State* s = params.start;
2035  min->clear();
2036  MutexLock lock(&mutex_);
2037  for (int i = 0; i < maxlen; i++) {
2038  if (previously_visited_states[s] > kMaxEltRepetitions)
2039  break;
2040  previously_visited_states[s]++;
2041 
2042  // Stop if min is a match.
2043  State* ns = RunStateOnByte(s, kByteEndText);
2044  if (ns == NULL) // DFA out of memory
2045  return false;
2046  if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
2047  break;
2048 
2049  // Try to extend the string with low bytes.
2050  bool extended = false;
2051  for (int j = 0; j < 256; j++) {
2052  ns = RunStateOnByte(s, j);
2053  if (ns == NULL) // DFA out of memory
2054  return false;
2055  if (ns == FullMatchState ||
2056  (ns > SpecialStateMax && ns->ninst_ > 0)) {
2057  extended = true;
2058  min->append(1, static_cast<char>(j));
2059  s = ns;
2060  break;
2061  }
2062  }
2063  if (!extended)
2064  break;
2065  }
2066 
2067  // Build maximum prefix.
2068  previously_visited_states.clear();
2069  s = params.start;
2070  max->clear();
2071  for (int i = 0; i < maxlen; i++) {
2072  if (previously_visited_states[s] > kMaxEltRepetitions)
2073  break;
2074  previously_visited_states[s] += 1;
2075 
2076  // Try to extend the string with high bytes.
2077  bool extended = false;
2078  for (int j = 255; j >= 0; j--) {
2079  State* ns = RunStateOnByte(s, j);
2080  if (ns == NULL)
2081  return false;
2082  if (ns == FullMatchState ||
2083  (ns > SpecialStateMax && ns->ninst_ > 0)) {
2084  extended = true;
2085  max->append(1, static_cast<char>(j));
2086  s = ns;
2087  break;
2088  }
2089  }
2090  if (!extended) {
2091  // Done, no need for PrefixSuccessor.
2092  return true;
2093  }
2094  }
2095 
2096  // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b
2098 
2099  // If there are no bytes left, we have no way to say "there is no maximum
2100  // string". We could make the interface more complicated and be able to
2101  // return "there is no maximum but here is a minimum", but that seems like
2102  // overkill -- the most common no-max case is all possible strings, so not
2103  // telling the caller that the empty string is the minimum match isn't a
2104  // great loss.
2105  if (max->empty())
2106  return false;
2107 
2108  return true;
2109 }
2110 
2111 // PossibleMatchRange for a Prog.
2113  // Have to use dfa_longest_ to get all strings for full matches.
2114  // For example, (a|aa) never matches aa in first-match mode.
2115  return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
2116 }
2117 
2118 } // namespace re2
re2::DFA::RunStateOnByteUnlocked
State * RunStateOnByteUnlocked(State *, int)
Definition: bloaty/third_party/re2/re2/dfa.cc:998
re2::DFA::SearchParams::context
StringPiece context
Definition: bloaty/third_party/re2/re2/dfa.cc:264
re2::DFA::CacheMutex
Mutex CacheMutex
Definition: re2/re2/dfa.cc:175
re2::DFA::Workq::operator=
Workq & operator=(const Workq &)=delete
flag
uint32_t flag
Definition: ssl_versions.cc:162
NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS
Definition: abseil-cpp/absl/base/internal/thread_annotations.h:220
re2::kInstFail
@ kInstFail
Definition: bloaty/third_party/re2/re2/prog.h:37
re2::DFA::StateSaver
Definition: bloaty/third_party/re2/re2/dfa.cc:1199
re2::DFA::Workq::Workq
Workq(int n, int maxmark)
Definition: re2/re2/dfa.cc:374
re2::DFA::DFA
DFA(Prog *prog, Prog::MatchKind kind, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/dfa.cc:437
re2::SparseSet
SparseSetT< void > SparseSet
Definition: bloaty/third_party/re2/util/sparse_set.h:260
regen-readme.it
it
Definition: regen-readme.py:15
re2::DFA::SearchFTT
bool SearchFTT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1572
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::DFA::RWLocker::mu_
Mutex * mu_
Definition: bloaty/third_party/re2/re2/dfa.cc:1130
re2::DFA::Workq::clear
void clear()
Definition: re2/re2/dfa.cc:386
absl::str_format_internal::LengthMod::j
@ j
re2::Prog::list_count
int list_count()
Definition: bloaty/third_party/re2/re2/prog.h:207
re2::Prog::DumpUnanchored
std::string DumpUnanchored()
Definition: bloaty/third_party/re2/re2/prog.cc:166
re2::DFA::SearchParams
Definition: bloaty/third_party/re2/re2/dfa.cc:249
re2::DFA::mutex_
Mutex mutex_
Definition: bloaty/third_party/re2/re2/dfa.cc:345
re2::DFA::SearchTTT
bool SearchTTT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1584
MutexLock
#define MutexLock(x)
Definition: bloaty/third_party/re2/util/mutex.h:125
bool
bool
Definition: setup_once.h:312
re2::DFA::SearchParams::anchored
bool anchored
Definition: bloaty/third_party/re2/re2/dfa.cc:265
re2::Prog::BuildEntireDFA
int BuildEntireDFA(MatchKind kind, const DFAStateCallback &cb)
Definition: bloaty/third_party/re2/re2/dfa.cc:2000
re2::DFA::DumpWorkq
static std::string DumpWorkq(Workq *q)
Definition: bloaty/third_party/re2/re2/dfa.cc:507
re2::Mutex
Definition: bloaty/third_party/re2/util/mutex.h:34
fix_build_deps.c
list c
Definition: fix_build_deps.py:490
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
re2::HashMix::get
size_t get() const
Definition: bloaty/third_party/re2/util/mix.h:30
re2::DFA::kStartAfterNonWordChar
@ kStartAfterNonWordChar
Definition: bloaty/third_party/re2/re2/dfa.cc:192
false
#define false
Definition: setup_once.h:323
re2::DFA::Workq::maxmark_
int maxmark_
Definition: bloaty/third_party/re2/re2/dfa.cc:429
DCHECK_LE
#define DCHECK_LE(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:22
re2::DFA::Workq
Definition: bloaty/third_party/re2/re2/dfa.cc:385
re2::DFA::Workq::n_
int n_
Definition: bloaty/third_party/re2/re2/dfa.cc:428
re2::DFA::Workq::maxmark
int maxmark()
Definition: bloaty/third_party/re2/re2/dfa.cc:398
re2::DFA::SearchParams::ep
const char * ep
Definition: bloaty/third_party/re2/re2/dfa.cc:272
re2::DFA::kStartBeginLine
@ kStartBeginLine
Definition: bloaty/third_party/re2/re2/dfa.cc:190
re2::DFA::StartInfo::start
State * start
Definition: bloaty/third_party/re2/re2/dfa.cc:285
re2::DFA::RWLocker
Definition: bloaty/third_party/re2/re2/dfa.cc:1118
string.h
re2::DFA::Workq::last_was_mark_
bool last_was_mark_
Definition: bloaty/third_party/re2/re2/dfa.cc:431
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::DFA::kFlagMatch
@ kFlagMatch
Definition: bloaty/third_party/re2/re2/dfa.cc:144
re2::PODArray::data
T * data() const
Definition: bloaty/third_party/re2/util/pod_array.h:24
re2::DFA::kMaxStart
@ kMaxStart
Definition: bloaty/third_party/re2/re2/dfa.cc:193
pod_array.h
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
SpecialStateMax
#define SpecialStateMax
Definition: re2/re2/dfa.cc:488
re2::DFA::kByteEndText
@ kByteEndText
Definition: bloaty/third_party/re2/re2/dfa.cc:141
absl::FormatConversionChar::s
@ s
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::DFA::SearchFFT
bool SearchFFT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1566
re2::HashMix
Definition: bloaty/third_party/re2/util/mix.h:20
re2::Prog::bytemap_range
int bytemap_range()
Definition: bloaty/third_party/re2/re2/prog.h:218
re2::DFA::Workq::insert
void insert(int id)
Definition: re2/re2/dfa.cc:402
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
iterator
const typedef MCPhysReg * iterator
Definition: MCRegisterInfo.h:27
re2::Prog::Inst::match_id
int match_id()
Definition: bloaty/third_party/re2/re2/prog.h:91
re2::kInstEmptyWidth
@ kInstEmptyWidth
Definition: bloaty/third_party/re2/re2/prog.h:34
re2::BytePtr
static const uint8_t * BytePtr(const void *v)
Definition: bloaty/third_party/re2/re2/dfa.cc:366
re2::Prog::IsWordChar
static bool IsWordChar(uint8_t c)
Definition: bloaty/third_party/re2/re2/prog.h:240
re2::DFA::state_cache_
StateSet state_cache_
Definition: bloaty/third_party/re2/re2/dfa.cc:361
re2::Prog::inst_count
int inst_count(InstOp op)
Definition: bloaty/third_party/re2/re2/prog.h:208
re2::DFA::state_budget_
int64_t state_budget_
Definition: bloaty/third_party/re2/re2/dfa.cc:360
re2::DFA::State
Definition: bloaty/third_party/re2/re2/dfa.cc:120
true
#define true
Definition: setup_once.h:324
mu_
Mutex mu_
Definition: oob_backend_metric.cc:115
re2::DFA::DumpState
static std::string DumpState(State *state)
Definition: bloaty/third_party/re2/re2/dfa.cc:523
re2::DFA::CachedState
State * CachedState(int *inst, int ninst, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:747
re2::DFA::Workq::nextmark_
int nextmark_
Definition: bloaty/third_party/re2/re2/dfa.cc:430
python_utils.port_server.stderr
stderr
Definition: port_server.py:51
re2::DFA::kStartBeginText
@ kStartBeginText
Definition: bloaty/third_party/re2/re2/dfa.cc:189
re2::DFA::State::ninst_
int ninst_
Definition: bloaty/third_party/re2/re2/dfa.cc:124
re2::PODArray::size
int size() const
Definition: bloaty/third_party/re2/util/pod_array.h:28
re2::DFA::WorkqToCachedState
State * WorkqToCachedState(Workq *q, Workq *mq, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:609
re2::Prog::GetDFA
DFA * GetDFA(MatchKind kind)
Definition: bloaty/third_party/re2/re2/dfa.cc:1818
re2::DFA::RWLocker::LockForWriting
void LockForWriting()
Definition: bloaty/third_party/re2/re2/dfa.cc:1143
re2::Prog::MatchKind
MatchKind
Definition: bloaty/third_party/re2/re2/prog.h:192
re2::DFA::SearchParams::failed
bool failed
Definition: bloaty/third_party/re2/re2/dfa.cc:271
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
re2::DFA::Workq::insert_new
void insert_new(int id)
Definition: bloaty/third_party/re2/re2/dfa.cc:422
re2::SparseSetT::clear
void clear()
Definition: bloaty/third_party/re2/util/sparse_set.h:117
re2::DFA::prog_
Prog * prog_
Definition: bloaty/third_party/re2/re2/dfa.cc:341
re2::kEmptyWordBoundary
@ kEmptyWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:47
absl::call_once
void call_once(absl::once_flag &flag, Callable &&fn, Args &&... args)
Definition: abseil-cpp/absl/base/call_once.h:206
re2::SparseSetT::contains
bool contains(int i) const
Definition: bloaty/third_party/re2/util/sparse_set.h:220
re2::SparseSetT
Definition: bloaty/third_party/re2/util/sparse_set.h:69
re2::DFA::kind_
Prog::MatchKind kind_
Definition: bloaty/third_party/re2/re2/dfa.cc:342
re2::kInstCapture
@ kInstCapture
Definition: bloaty/third_party/re2/re2/prog.h:33
re2::DFA::State::flag_
uint32_t flag_
Definition: bloaty/third_party/re2/re2/dfa.cc:125
start
static uint64_t start
Definition: benchmark-pound.c:74
re2::DFA::State::IsMatch
bool IsMatch() const
Definition: re2/re2/dfa.cc:115
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
re2::DFA::SearchParams::start
State * start
Definition: bloaty/third_party/re2/re2/dfa.cc:268
xds_interop_client.int
int
Definition: xds_interop_client.py:113
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
re2::DFA::StartInfo::start
std::atomic< State * > start
Definition: re2/re2/dfa.cc:276
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
swap
#define swap(a, b)
Definition: qsort.h:111
gen_stats_data.c_str
def c_str(s, encoding='ascii')
Definition: gen_stats_data.py:38
re2::DFA::BuildAllStates
int BuildAllStates(const Prog::DFAStateCallback &cb)
Definition: bloaty/third_party/re2/re2/dfa.cc:1932
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
flag_
int flag_
Definition: transport_stream_receiver_test.cc:166
gmock_output_test.output
output
Definition: bloaty/third_party/googletest/googlemock/test/gmock_output_test.py:175
FALLTHROUGH_INTENDED
#define FALLTHROUGH_INTENDED
Definition: bloaty/third_party/re2/util/util.h:26
re2::DFA::SearchParams::cache_lock
RWLocker * cache_lock
Definition: bloaty/third_party/re2/re2/dfa.cc:270
re2::kEmptyNonWordBoundary
@ kEmptyNonWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:48
mu
Mutex mu
Definition: server_config_selector_filter.cc:74
re2::Prog::anchor_end
bool anchor_end()
Definition: bloaty/third_party/re2/re2/prog.h:216
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::DFA::RunWorkqOnByte
void RunWorkqOnByte(Workq *q, Workq *nq, int c, uint32_t flag, bool *ismatch)
Definition: bloaty/third_party/re2/re2/dfa.cc:947
text_format_test_wrapper.sep
sep
Definition: text_format_test_wrapper.py:34
re2::DFA::SearchFTF
bool SearchFTF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1569
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::DFA::operator=
DFA & operator=(const DFA &)=delete
re2::DFA::SearchTFT
bool SearchTFT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1578
re2::DFA::kFlagLastWord
@ kFlagLastWord
Definition: bloaty/third_party/re2/re2/dfa.cc:145
sparse_set.h
re2::DFA::q0_
Workq * q0_
Definition: bloaty/third_party/re2/re2/dfa.cc:348
nstack
int nstack
Definition: abseil-cpp/absl/synchronization/internal/graphcycles.cc:287
re2::DFA::InlinedSearchLoop
bool InlinedSearchLoop(SearchParams *params, bool have_first_byte, bool want_earliest_match, bool run_forward)
Definition: bloaty/third_party/re2/re2/dfa.cc:1326
re2::DFA::StateSaver::~StateSaver
~StateSaver()
Definition: bloaty/third_party/re2/re2/dfa.cc:1242
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::DFA::StateSaver::StateSaver
StateSaver(DFA *dfa, State *state)
Definition: bloaty/third_party/re2/re2/dfa.cc:1224
re2::Prog::inst
Inst * inst(int id)
Definition: bloaty/third_party/re2/re2/prog.h:199
re2::DFA::SearchParams::can_prefix_accel
bool can_prefix_accel
Definition: re2/re2/dfa.cc:258
re2::DFA::RWLocker::~RWLocker
~RWLocker()
Definition: bloaty/third_party/re2/re2/dfa.cc:1151
re2::DFA::FastSearchLoop
bool FastSearchLoop(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1598
re2::DFA::RunWorkqOnEmptyString
void RunWorkqOnEmptyString(Workq *q, Workq *nq, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:933
re2::DFA::PossibleMatchRange
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
Definition: bloaty/third_party/re2/re2/dfa.cc:2010
re2::DFA::StateSet
std::unordered_set< State *, StateHash, StateEqual > StateSet
Definition: re2/re2/dfa.cc:171
std::swap
void swap(Json::Value &a, Json::Value &b)
Specialize std::swap() for Json::Value.
Definition: third_party/bloaty/third_party/protobuf/conformance/third_party/jsoncpp/json.h:1226
re2::DFA::SearchParams::SearchParams
SearchParams(const StringPiece &text, const StringPiece &context, RWLocker *cache_lock)
Definition: re2/re2/dfa.cc:241
re2::DFA::ByteMap
int ByteMap(int c)
Definition: re2/re2/dfa.cc:317
re2::kEmptyBeginText
@ kEmptyBeginText
Definition: bloaty/third_party/re2/re2/prog.h:45
re2::DFA::Workq::mark
void mark()
Definition: re2/re2/dfa.cc:391
re2::Prog::DeleteDFA
void DeleteDFA(DFA *dfa)
Definition: bloaty/third_party/re2/re2/dfa.cc:1847
re2::Prog::Inst::last
int last()
Definition: bloaty/third_party/re2/re2/prog.h:83
re2::Prog::size
int size()
Definition: bloaty/third_party/re2/re2/prog.h:204
re2::DFA::RunStateOnByte
State * RunStateOnByte(State *, int)
Definition: bloaty/third_party/re2/re2/dfa.cc:1006
re2::HashMix::Mix
void Mix(size_t val)
Definition: bloaty/third_party/re2/util/mix.h:24
min
#define min(a, b)
Definition: qsort.h:83
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::Prog::PossibleMatchRange
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
Definition: bloaty/third_party/re2/re2/dfa.cc:2147
re2::Prog::kManyMatch
@ kManyMatch
Definition: bloaty/third_party/re2/re2/prog.h:196
re2::DFA::ok
bool ok() const
Definition: re2/re2/dfa.cc:75
re2::DFA::kind
Prog::MatchKind kind()
Definition: bloaty/third_party/re2/re2/dfa.cc:82
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
re2::PrefixSuccessor
void PrefixSuccessor(std::string *prefix)
Definition: bloaty/third_party/re2/util/strutil.cc:78
stdint.h
re2::Prog::bytemap
const uint8_t * bytemap()
Definition: bloaty/third_party/re2/re2/prog.h:219
re2::DFA::SearchTTF
bool SearchTTF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1581
re2::DFA::SearchFFF
bool SearchFFF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1563
re2::Prog::SearchDFA
bool SearchDFA(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match0, bool *failed, SparseSet *matches)
Definition: bloaty/third_party/re2/re2/dfa.cc:1861
re2::DFA
Definition: bloaty/third_party/re2/re2/dfa.cc:77
mem
void * mem
Definition: libc.cpp:91
re2::kEmptyEndLine
@ kEmptyEndLine
Definition: bloaty/third_party/re2/re2/prog.h:44
grpc_core::Loop
promise_detail::Loop< F > Loop(F f)
Definition: loop.h:130
re2::DFA::Search
bool Search(const StringPiece &text, const StringPiece &context, bool anchored, bool want_earliest_match, bool run_forward, bool *failed, const char **ep, SparseSet *matches)
Definition: bloaty/third_party/re2/re2/dfa.cc:1765
re2::DFA::cache_mutex_
Mutex cache_mutex_
Definition: bloaty/third_party/re2/re2/dfa.cc:358
re2::DFA::cache_mutex_
CacheMutex cache_mutex_
Definition: re2/re2/dfa.cc:341
re2::kInstNop
@ kInstNop
Definition: bloaty/third_party/re2/re2/prog.h:36
re2::DFA::kStartAnchored
@ kStartAnchored
Definition: bloaty/third_party/re2/re2/dfa.cc:195
re2::DFA::start_
StartInfo start_[kMaxStart]
Definition: bloaty/third_party/re2/re2/dfa.cc:362
re2::kInstMatch
@ kInstMatch
Definition: bloaty/third_party/re2/re2/prog.h:35
re2::DFA::Workq::is_mark
bool is_mark(int i)
Definition: re2/re2/dfa.cc:382
re2::kInstAltMatch
@ kInstAltMatch
Definition: bloaty/third_party/re2/re2/prog.h:31
Mark
#define Mark
Definition: re2/re2/dfa.cc:360
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::DFA::kStartAfterWordChar
@ kStartAfterWordChar
Definition: bloaty/third_party/re2/re2/dfa.cc:191
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
re2::DFA::~DFA
~DFA()
Definition: bloaty/third_party/re2/re2/dfa.cc:486
index
int index
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/protobuf.h:1184
re2::SparseSetT::iterator
int * iterator
Definition: bloaty/third_party/re2/util/sparse_set.h:75
ret
UniquePtr< SSL_SESSION > ret
Definition: ssl_x509.cc:1029
re2::DFA::stack_
PODArray< int > stack_
Definition: bloaty/third_party/re2/re2/dfa.cc:350
google::protobuf.internal::Mutex
WrappedMutex Mutex
Definition: bloaty/third_party/protobuf/src/google/protobuf/stubs/mutex.h:113
re2::dfa_should_bail_when_slow
static bool dfa_should_bail_when_slow
Definition: bloaty/third_party/re2/re2/dfa.cc:67
re2::DFA::StateSaver::Restore
State * Restore()
Definition: bloaty/third_party/re2/re2/dfa.cc:1247
re2::Prog::can_prefix_accel
bool can_prefix_accel()
Definition: re2/re2/prog.h:218
re2::Workq
SparseSet Workq
Definition: bloaty/third_party/re2/re2/prog.cc:125
absl::container_internal::MatchKind
MatchKind
Definition: abseil-cpp/absl/container/internal/btree.h:484
MatchSep
#define MatchSep
Definition: re2/re2/dfa.cc:364
re2::ExtraDebug
static const bool ExtraDebug
Definition: bloaty/third_party/re2/re2/dfa.cc:71
re2::Prog::PrefixAccel
const void * PrefixAccel(const void *data, size_t size)
Definition: re2/re2/prog.h:222
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
re2::DFA::RWLocker::RWLocker
RWLocker(Mutex *mu)
Definition: bloaty/third_party/re2/re2/dfa.cc:1137
FullMatchState
#define FullMatchState
Definition: re2/re2/dfa.cc:486
re2::DFA::kFlagEmptyMask
@ kFlagEmptyMask
Definition: bloaty/third_party/re2/re2/dfa.cc:143
re2::Prog::start_unanchored
int start_unanchored()
Definition: bloaty/third_party/re2/re2/prog.h:201
re2::kEmptyEndText
@ kEmptyEndText
Definition: bloaty/third_party/re2/re2/prog.h:46
re2::DFA::SearchParams::run_forward
bool run_forward
Definition: bloaty/third_party/re2/re2/dfa.cc:267
input
std::string input
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/tokenizer_unittest.cc:197
re2::DFA::StateEqual::operator()
bool operator()(const State *a, const State *b) const
Definition: re2/re2/dfa.cc:155
re2::kEmptyBeginLine
@ kEmptyBeginLine
Definition: bloaty/third_party/re2/re2/prog.h:43
re2::DFA::kFlagNeedShift
@ kFlagNeedShift
Definition: bloaty/third_party/re2/re2/dfa.cc:146
absl::str_format_internal::LengthMod::q
@ q
re2::DFA::StartInfo
Definition: bloaty/third_party/re2/re2/dfa.cc:283
re2::DFA::ResetCache
void ResetCache(RWLocker *cache_lock)
Definition: bloaty/third_party/re2/re2/dfa.cc:1170
re2::Prog::kFirstMatch
@ kFirstMatch
Definition: bloaty/third_party/re2/re2/prog.h:193
ns
static int64_t ns
Definition: bloaty/third_party/re2/util/benchmark.cc:43
re2::DFA::StateToWorkq
void StateToWorkq(State *s, Workq *q)
Definition: bloaty/third_party/re2/re2/dfa.cc:817
re2::DFA::q1_
Workq * q1_
Definition: bloaty/third_party/re2/re2/dfa.cc:349
re2::Mutex::ReaderLock
void ReaderLock()
Definition: bloaty/third_party/re2/util/mutex.h:81
context
grpc::ClientContext context
Definition: istio_echo_server_lib.cc:61
re2::DFA::SearchParams::operator=
SearchParams & operator=(const SearchParams &)=delete
re2::DFA::Workq::size
int size()
Definition: re2/re2/dfa.cc:398
re2::DFA::mem_budget_
int64_t mem_budget_
Definition: bloaty/third_party/re2/re2/dfa.cc:359
flags
uint32_t flags
Definition: retry_filter.cc:632
re2::Prog::DFAStateCallback
std::function< void(const int *next, bool match)> DFAStateCallback
Definition: bloaty/third_party/re2/re2/prog.h:283
re2::DFA::StateHash::operator()
size_t operator()(const State *a) const
Definition: re2/re2/dfa.cc:144
autogen_x86imm.tmp
tmp
Definition: autogen_x86imm.py:12
DeadState
#define DeadState
Definition: re2/re2/dfa.cc:483
DCHECK
#define DCHECK(condition)
Definition: bloaty/third_party/re2/util/logging.h:19
re2::DFA::State::inst_
int * inst_
Definition: bloaty/third_party/re2/re2/dfa.cc:123
re2::DFA::init_failed_
bool init_failed_
Definition: bloaty/third_party/re2/re2/dfa.cc:343
re2::DFA::AnalyzeSearch
bool AnalyzeSearch(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1646
run_grpclb_interop_tests.l
dictionary l
Definition: run_grpclb_interop_tests.py:410
re2::DFA::SearchParams::text
StringPiece text
Definition: bloaty/third_party/re2/re2/dfa.cc:263
re2::Prog::TESTING_ONLY_set_dfa_should_bail_when_slow
static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b)
Definition: re2/re2/dfa.cc:59
regress.m
m
Definition: regress/regress.py:25
re2::DFA::AnalyzeSearchHelper
bool AnalyzeSearchHelper(SearchParams *params, StartInfo *info, uint32_t flags)
Definition: bloaty/third_party/re2/re2/dfa.cc:1717
re2::DFA::SearchParams::want_earliest_match
bool want_earliest_match
Definition: bloaty/third_party/re2/re2/dfa.cc:266
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
re2::PODArray< int >
re2::Prog::kLongestMatch
@ kLongestMatch
Definition: bloaty/third_party/re2/re2/prog.h:194
re2::DFA::AddToQueue
void AddToQueue(Workq *q, int id, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:833
re2::DFA::RWLocker::mu_
CacheMutex * mu_
Definition: re2/re2/dfa.cc:1126
re2::DFA::SearchTFF
bool SearchTFF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1575
re2::kInstByteRange
@ kInstByteRange
Definition: bloaty/third_party/re2/re2/prog.h:32
re2::DFA::ClearCache
void ClearCache()
Definition: bloaty/third_party/re2/re2/dfa.cc:799
re2::DFA::State::next_
std::atomic< State * > next_[]
Definition: bloaty/third_party/re2/re2/dfa.cc:134
cb
OPENSSL_EXPORT pem_password_cb * cb
Definition: pem.h:351
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
state
static struct rpc_state state
Definition: bad_server_response_test.cc:87
re2::SparseSetT::insert_new
iterator insert_new(int i)
Definition: bloaty/third_party/re2/util/sparse_set.h:138
re2::DFA::StartInfo::StartInfo
StartInfo()
Definition: re2/re2/dfa.cc:275
re2::Prog::start
int start()
Definition: bloaty/third_party/re2/re2/prog.h:200
id
uint32_t id
Definition: flow_control_fuzzer.cc:70
re2::DFA::SearchParams::matches
SparseSet * matches
Definition: bloaty/third_party/re2/re2/dfa.cc:273


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:16