bloaty/third_party/re2/re2/dfa.cc
Go to the documentation of this file.
1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // A DFA (deterministic finite automaton)-based regular expression search.
6 //
7 // The DFA search has two main parts: the construction of the automaton,
8 // which is represented by a graph of State structures, and the execution
9 // of the automaton over a given input string.
10 //
11 // The basic idea is that the State graph is constructed so that the
12 // execution can simply start with a state s, and then for each byte c in
13 // the input string, execute "s = s->next[c]", checking at each point whether
14 // the current s represents a matching state.
15 //
16 // The simple explanation just given does convey the essence of this code,
17 // but it omits the details of how the State graph gets constructed as well
18 // as some performance-driven optimizations to the execution of the automaton.
19 // All these details are explained in the comments for the code following
20 // the definition of class DFA.
21 //
22 // See http://swtch.com/~rsc/regexp/ for a very bare-bones equivalent.
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <algorithm>
29 #include <atomic>
30 #include <deque>
31 #include <mutex>
32 #include <new>
33 #include <string>
34 #include <unordered_map>
35 #include <unordered_set>
36 #include <utility>
37 #include <vector>
38 
39 #include "util/logging.h"
40 #include "util/mix.h"
41 #include "util/mutex.h"
42 #include "util/pod_array.h"
43 #include "util/sparse_set.h"
44 #include "util/strutil.h"
45 #include "re2/prog.h"
46 #include "re2/stringpiece.h"
47 
48 // Silence "zero-sized array in struct/union" warning for DFA::State::next_.
49 #ifdef _MSC_VER
50 #pragma warning(disable: 4200)
51 #endif
52 
53 namespace re2 {
54 
55 #if !defined(__linux__) /* only Linux seems to have memrchr */
56 static void* memrchr(const void* s, int c, size_t n) {
57  const unsigned char* p = (const unsigned char*)s;
58  for (p += n; n > 0; n--)
59  if (*--p == c)
60  return (void*)p;
61 
62  return NULL;
63 }
64 #endif
65 
66 // Controls whether the DFA should bail out early if the NFA would be faster.
67 static bool dfa_should_bail_when_slow = true;
68 
69 // Changing this to true compiles in prints that trace execution of the DFA.
70 // Generates a lot of output -- only useful for debugging.
71 static const bool ExtraDebug = false;
72 
73 // A DFA implementation of a regular expression program.
74 // Since this is entirely a forward declaration mandated by C++,
75 // some of the comments here are better understood after reading
76 // the comments in the sections that follow the DFA definition.
77 class DFA {
78  public:
80  ~DFA();
81  bool ok() const { return !init_failed_; }
82  Prog::MatchKind kind() { return kind_; }
83 
84  // Searches for the regular expression in text, which is considered
85  // as a subsection of context for the purposes of interpreting flags
86  // like ^ and $ and \A and \z.
87  // Returns whether a match was found.
88  // If a match is found, sets *ep to the end point of the best match in text.
89  // If "anchored", the match must begin at the start of text.
90  // If "want_earliest_match", the match that ends first is used, not
91  // necessarily the best one.
92  // If "run_forward" is true, the DFA runs from text.begin() to text.end().
93  // If it is false, the DFA runs from text.end() to text.begin(),
94  // returning the leftmost end of the match instead of the rightmost one.
95  // If the DFA cannot complete the search (for example, if it is out of
96  // memory), it sets *failed and returns false.
97  bool Search(const StringPiece& text, const StringPiece& context,
98  bool anchored, bool want_earliest_match, bool run_forward,
99  bool* failed, const char** ep, SparseSet* matches);
100 
101  // Builds out all states for the entire DFA.
102  // If cb is not empty, it receives one callback per state built.
103  // Returns the number of states built.
104  // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
106 
107  // Computes min and max for matching strings. Won't return strings
108  // bigger than maxlen.
109  bool PossibleMatchRange(std::string* min, std::string* max, int maxlen);
110 
111  // These data structures are logically private, but C++ makes it too
112  // difficult to mark them as such.
113  class RWLocker;
114  class StateSaver;
115  class Workq;
116 
117  // A single DFA state. The DFA is represented as a graph of these
118  // States, linked by the next_ pointers. If in state s and reading
119  // byte c, the next state should be s->next_[c].
120  struct State {
121  inline bool IsMatch() const { return (flag_ & kFlagMatch) != 0; }
122 
123  int* inst_; // Instruction pointers in the state.
124  int ninst_; // # of inst_ pointers.
125  uint32_t flag_; // Empty string bitfield flags in effect on the way
126  // into this state, along with kFlagMatch if this
127  // is a matching state.
128 
129 // Work around the bug affecting flexible array members in GCC 6.x (for x >= 1).
130 // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70932)
131 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
132  std::atomic<State*> next_[0]; // Outgoing arrows from State,
133 #else
134  std::atomic<State*> next_[]; // Outgoing arrows from State,
135 #endif
136 
137  // one per input byte class
138  };
139 
140  enum {
141  kByteEndText = 256, // imaginary byte at end of text
142 
143  kFlagEmptyMask = 0xFF, // State.flag_: bits holding kEmptyXXX flags
144  kFlagMatch = 0x0100, // State.flag_: this is a matching state
145  kFlagLastWord = 0x0200, // State.flag_: last byte was a word char
146  kFlagNeedShift = 16, // needed kEmpty bits are or'ed in shifted left
147  };
148 
149  struct StateHash {
150  size_t operator()(const State* a) const {
151  DCHECK(a != NULL);
152  HashMix mix(a->flag_);
153  for (int i = 0; i < a->ninst_; i++)
154  mix.Mix(a->inst_[i]);
155  mix.Mix(0);
156  return mix.get();
157  }
158  };
159 
160  struct StateEqual {
161  bool operator()(const State* a, const State* b) const {
162  DCHECK(a != NULL);
163  DCHECK(b != NULL);
164  if (a == b)
165  return true;
166  if (a->flag_ != b->flag_)
167  return false;
168  if (a->ninst_ != b->ninst_)
169  return false;
170  for (int i = 0; i < a->ninst_; i++)
171  if (a->inst_[i] != b->inst_[i])
172  return false;
173  return true;
174  }
175  };
176 
177  typedef std::unordered_set<State*, StateHash, StateEqual> StateSet;
178 
179  private:
180  // Special "first_byte" values for a state. (Values >= 0 denote actual bytes.)
181  enum {
182  kFbUnknown = -1, // No analysis has been performed.
183  kFbNone = -2, // The first-byte trick cannot be used.
184  };
185 
186  enum {
187  // Indices into start_ for unanchored searches.
188  // Add kStartAnchored for anchored searches.
189  kStartBeginText = 0, // text at beginning of context
190  kStartBeginLine = 2, // text at beginning of line
191  kStartAfterWordChar = 4, // text follows a word character
192  kStartAfterNonWordChar = 6, // text follows non-word character
194 
196  };
197 
198  // Resets the DFA State cache, flushing all saved State* information.
199  // Releases and reacquires cache_mutex_ via cache_lock, so any
200  // State* existing before the call are not valid after the call.
201  // Use a StateSaver to preserve important states across the call.
202  // cache_mutex_.r <= L < mutex_
203  // After: cache_mutex_.w <= L < mutex_
204  void ResetCache(RWLocker* cache_lock);
205 
206  // Looks up and returns the State corresponding to a Workq.
207  // L >= mutex_
208  State* WorkqToCachedState(Workq* q, Workq* mq, uint32_t flag);
209 
210  // Looks up and returns a State matching the inst, ninst, and flag.
211  // L >= mutex_
212  State* CachedState(int* inst, int ninst, uint32_t flag);
213 
214  // Clear the cache entirely.
215  // Must hold cache_mutex_.w or be in destructor.
216  void ClearCache();
217 
218  // Converts a State into a Workq: the opposite of WorkqToCachedState.
219  // L >= mutex_
220  void StateToWorkq(State* s, Workq* q);
221 
222  // Runs a State on a given byte, returning the next state.
223  State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_
224  State* RunStateOnByte(State*, int); // L >= mutex_
225 
226  // Runs a Workq on a given byte followed by a set of empty-string flags,
227  // producing a new Workq in nq. If a match instruction is encountered,
228  // sets *ismatch to true.
229  // L >= mutex_
230  void RunWorkqOnByte(Workq* q, Workq* nq,
231  int c, uint32_t flag, bool* ismatch);
232 
233  // Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
234  // L >= mutex_
236 
237  // Adds the instruction id to the Workq, following empty arrows
238  // according to flag.
239  // L >= mutex_
240  void AddToQueue(Workq* q, int id, uint32_t flag);
241 
242  // For debugging, returns a text representation of State.
243  static std::string DumpState(State* state);
244 
245  // For debugging, returns a text representation of a Workq.
246  static std::string DumpWorkq(Workq* q);
247 
248  // Search parameters
249  struct SearchParams {
252  : text(text), context(context),
253  anchored(false),
256  start(NULL),
259  failed(false),
260  ep(NULL),
261  matches(NULL) { }
262 
265  bool anchored;
271  bool failed; // "out" parameter: whether search gave up
272  const char* ep; // "out" parameter: end pointer for match
274 
275  private:
276  SearchParams(const SearchParams&) = delete;
277  SearchParams& operator=(const SearchParams&) = delete;
278  };
279 
280  // Before each search, the parameters to Search are analyzed by
281  // AnalyzeSearch to determine the state in which to start and the
282  // "first_byte" for that state, if any.
283  struct StartInfo {
286  std::atomic<int> first_byte;
287  };
288 
289  // Fills in params->start and params->first_byte using
290  // the other search parameters. Returns true on success,
291  // false on failure.
292  // cache_mutex_.r <= L < mutex_
293  bool AnalyzeSearch(SearchParams* params);
294  bool AnalyzeSearchHelper(SearchParams* params, StartInfo* info,
295  uint32_t flags);
296 
297  // The generic search loop, inlined to create specialized versions.
298  // cache_mutex_.r <= L < mutex_
299  // Might unlock and relock cache_mutex_ via params->cache_lock.
300  inline bool InlinedSearchLoop(SearchParams* params,
301  bool have_first_byte,
302  bool want_earliest_match,
303  bool run_forward);
304 
305  // The specialized versions of InlinedSearchLoop. The three letters
306  // at the ends of the name denote the true/false values used as the
307  // last three parameters of InlinedSearchLoop.
308  // cache_mutex_.r <= L < mutex_
309  // Might unlock and relock cache_mutex_ via params->cache_lock.
310  bool SearchFFF(SearchParams* params);
311  bool SearchFFT(SearchParams* params);
312  bool SearchFTF(SearchParams* params);
313  bool SearchFTT(SearchParams* params);
314  bool SearchTFF(SearchParams* params);
315  bool SearchTFT(SearchParams* params);
316  bool SearchTTF(SearchParams* params);
317  bool SearchTTT(SearchParams* params);
318 
319  // The main search loop: calls an appropriate specialized version of
320  // InlinedSearchLoop.
321  // cache_mutex_.r <= L < mutex_
322  // Might unlock and relock cache_mutex_ via params->cache_lock.
323  bool FastSearchLoop(SearchParams* params);
324 
325  // For debugging, a slow search loop that calls InlinedSearchLoop
326  // directly -- because the booleans passed are not constants, the
327  // loop is not specialized like the SearchFFF etc. versions, so it
328  // runs much more slowly. Useful only for debugging.
329  // cache_mutex_.r <= L < mutex_
330  // Might unlock and relock cache_mutex_ via params->cache_lock.
331  bool SlowSearchLoop(SearchParams* params);
332 
333  // Looks up bytes in bytemap_ but handles case c == kByteEndText too.
334  int ByteMap(int c) {
335  if (c == kByteEndText)
336  return prog_->bytemap_range();
337  return prog_->bytemap()[c];
338  }
339 
340  // Constant after initialization.
341  Prog* prog_; // The regular expression program to run.
342  Prog::MatchKind kind_; // The kind of DFA.
343  bool init_failed_; // initialization failed (out of memory)
344 
345  Mutex mutex_; // mutex_ >= cache_mutex_.r
346 
347  // Scratch areas, protected by mutex_.
348  Workq* q0_; // Two pre-allocated work queues.
350  PODArray<int> stack_; // Pre-allocated stack for AddToQueue
351 
352  // State* cache. Many threads use and add to the cache simultaneously,
353  // holding cache_mutex_ for reading and mutex_ (above) when adding.
354  // If the cache fills and needs to be discarded, the discarding is done
355  // while holding cache_mutex_ for writing, to avoid interrupting other
356  // readers. Any State* pointers are only valid while cache_mutex_
357  // is held.
359  int64_t mem_budget_; // Total memory budget for all States.
360  int64_t state_budget_; // Amount of memory remaining for new States.
361  StateSet state_cache_; // All States computed so far.
363 };
364 
365 // Shorthand for casting to uint8_t*.
366 static inline const uint8_t* BytePtr(const void* v) {
367  return reinterpret_cast<const uint8_t*>(v);
368 }
369 
370 // Work queues
371 
372 // Marks separate thread groups of different priority
373 // in the work queue when in leftmost-longest matching mode.
374 #define Mark (-1)
375 
376 // Separates the match IDs from the instructions in inst_.
377 // Used only for "many match" DFA states.
378 #define MatchSep (-2)
379 
380 // Internally, the DFA uses a sparse array of
381 // program instruction pointers as a work queue.
382 // In leftmost longest mode, marks separate sections
383 // of workq that started executing at different
384 // locations in the string (earlier locations first).
385 class DFA::Workq : public SparseSet {
386  public:
387  // Constructor: n is number of normal slots, maxmark number of mark slots.
388  Workq(int n, int maxmark) :
390  n_(n),
391  maxmark_(maxmark),
392  nextmark_(n),
394  }
395 
396  bool is_mark(int i) { return i >= n_; }
397 
398  int maxmark() { return maxmark_; }
399 
400  void clear() {
402  nextmark_ = n_;
403  }
404 
405  void mark() {
406  if (last_was_mark_)
407  return;
408  last_was_mark_ = false;
410  }
411 
412  int size() {
413  return n_ + maxmark_;
414  }
415 
416  void insert(int id) {
417  if (contains(id))
418  return;
419  insert_new(id);
420  }
421 
422  void insert_new(int id) {
423  last_was_mark_ = false;
425  }
426 
427  private:
428  int n_; // size excluding marks
429  int maxmark_; // maximum number of marks
430  int nextmark_; // id of next mark
431  bool last_was_mark_; // last inserted was mark
432 
433  Workq(const Workq&) = delete;
434  Workq& operator=(const Workq&) = delete;
435 };
436 
438  : prog_(prog),
439  kind_(kind),
440  init_failed_(false),
441  q0_(NULL),
442  q1_(NULL),
443  mem_budget_(max_mem) {
444  if (ExtraDebug)
445  fprintf(stderr, "\nkind %d\n%s\n", (int)kind_, prog_->DumpUnanchored().c_str());
446  int nmark = 0;
447  if (kind_ == Prog::kLongestMatch)
448  nmark = prog_->size();
449  // See DFA::AddToQueue() for why this is so.
453  nmark + 1; // + 1 for start inst
454 
455  // Account for space needed for DFA, q0, q1, stack.
456  mem_budget_ -= sizeof(DFA);
457  mem_budget_ -= (prog_->size() + nmark) *
458  (sizeof(int)+sizeof(int)) * 2; // q0, q1
459  mem_budget_ -= nstack * sizeof(int); // stack
460  if (mem_budget_ < 0) {
461  init_failed_ = true;
462  return;
463  }
464 
466 
467  // Make sure there is a reasonable amount of working room left.
468  // At minimum, the search requires room for two states in order
469  // to limp along, restarting frequently. We'll get better performance
470  // if there is room for a larger number of states, say 20.
471  // Note that a state stores list heads only, so we use the program
472  // list count for the upper bound, not the program size.
473  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
474  int64_t one_state = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
475  (prog_->list_count()+nmark)*sizeof(int);
476  if (state_budget_ < 20*one_state) {
477  init_failed_ = true;
478  return;
479  }
480 
481  q0_ = new Workq(prog_->size(), nmark);
482  q1_ = new Workq(prog_->size(), nmark);
484 }
485 
487  delete q0_;
488  delete q1_;
489  ClearCache();
490 }
491 
492 // In the DFA state graph, s->next[c] == NULL means that the
493 // state has not yet been computed and needs to be. We need
494 // a different special value to signal that s->next[c] is a
495 // state that can never lead to a match (and thus the search
496 // can be called off). Hence DeadState.
497 #define DeadState reinterpret_cast<State*>(1)
498 
499 // Signals that the rest of the string matches no matter what it is.
500 #define FullMatchState reinterpret_cast<State*>(2)
501 
502 #define SpecialStateMax FullMatchState
503 
504 // Debugging printouts
505 
506 // For debugging, returns a string representation of the work queue.
508  std::string s;
509  const char* sep = "";
510  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
511  if (q->is_mark(*it)) {
512  s += "|";
513  sep = "";
514  } else {
515  s += StringPrintf("%s%d", sep, *it);
516  sep = ",";
517  }
518  }
519  return s;
520 }
521 
522 // For debugging, returns a string representation of the state.
524  if (state == NULL)
525  return "_";
526  if (state == DeadState)
527  return "X";
528  if (state == FullMatchState)
529  return "*";
530  std::string s;
531  const char* sep = "";
532  s += StringPrintf("(%p)", state);
533  for (int i = 0; i < state->ninst_; i++) {
534  if (state->inst_[i] == Mark) {
535  s += "|";
536  sep = "";
537  } else if (state->inst_[i] == MatchSep) {
538  s += "||";
539  sep = "";
540  } else {
541  s += StringPrintf("%s%d", sep, state->inst_[i]);
542  sep = ",";
543  }
544  }
545  s += StringPrintf(" flag=%#x", state->flag_);
546  return s;
547 }
548 
550 //
551 // DFA state graph construction.
552 //
553 // The DFA state graph is a heavily-linked collection of State* structures.
554 // The state_cache_ is a set of all the State structures ever allocated,
555 // so that if the same state is reached by two different paths,
556 // the same State structure can be used. This reduces allocation
557 // requirements and also avoids duplication of effort across the two
558 // identical states.
559 //
560 // A State is defined by an ordered list of instruction ids and a flag word.
561 //
562 // The choice of an ordered list of instructions differs from a typical
563 // textbook DFA implementation, which would use an unordered set.
564 // Textbook descriptions, however, only care about whether
565 // the DFA matches, not where it matches in the text. To decide where the
566 // DFA matches, we need to mimic the behavior of the dominant backtracking
567 // implementations like PCRE, which try one possible regular expression
568 // execution, then another, then another, stopping when one of them succeeds.
569 // The DFA execution tries these many executions in parallel, representing
570 // each by an instruction id. These pointers are ordered in the State.inst_
571 // list in the same order that the executions would happen in a backtracking
572 // search: if a match is found during execution of inst_[2], inst_[i] for i>=3
573 // can be discarded.
574 //
575 // Textbooks also typically do not consider context-aware empty string operators
576 // like ^ or $. These are handled by the flag word, which specifies the set
577 // of empty-string operators that should be matched when executing at the
578 // current text position. These flag bits are defined in prog.h.
579 // The flag word also contains two DFA-specific bits: kFlagMatch if the state
580 // is a matching state (one that reached a kInstMatch in the program)
581 // and kFlagLastWord if the last processed byte was a word character, for the
582 // implementation of \B and \b.
583 //
584 // The flag word also contains, shifted up 16 bits, the bits looked for by
585 // any kInstEmptyWidth instructions in the state. These provide a useful
586 // summary indicating when new flags might be useful.
587 //
588 // The permanent representation of a State's instruction ids is just an array,
589 // but while a state is being analyzed, these instruction ids are represented
590 // as a Workq, which is an array that allows iteration in insertion order.
591 
592 // NOTE(rsc): The choice of State construction determines whether the DFA
593 // mimics backtracking implementations (so-called leftmost first matching) or
594 // traditional DFA implementations (so-called leftmost longest matching as
595 // prescribed by POSIX). This implementation chooses to mimic the
596 // backtracking implementations, because we want to replace PCRE. To get
597 // POSIX behavior, the states would need to be considered not as a simple
598 // ordered list of instruction ids, but as a list of unordered sets of instruction
599 // ids. A match by a state in one set would inhibit the running of sets
600 // farther down the list but not other instruction ids in the same set. Each
601 // set would correspond to matches beginning at a given point in the string.
602 // This is implemented by separating different sets with Mark pointers.
603 
604 // Looks in the State cache for a State matching q, flag.
605 // If one is found, returns it. If one is not found, allocates one,
606 // inserts it in the cache, and returns it.
607 // If mq is not null, MatchSep and the match IDs in mq will be appended
608 // to the State.
610  //mutex_.AssertHeld();
611 
612  // Construct array of instruction ids for the new state.
613  // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
614  // those are the only operators with any effect in
615  // RunWorkqOnEmptyString or RunWorkqOnByte.
616  int* inst = new int[q->size()];
617  int n = 0;
618  uint32_t needflags = 0; // flags needed by kInstEmptyWidth instructions
619  bool sawmatch = false; // whether queue contains guaranteed kInstMatch
620  bool sawmark = false; // whether queue contains a Mark
621  if (ExtraDebug)
622  fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
623  for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
624  int id = *it;
625  if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
626  break;
627  if (q->is_mark(id)) {
628  if (n > 0 && inst[n-1] != Mark) {
629  sawmark = true;
630  inst[n++] = Mark;
631  }
632  continue;
633  }
634  Prog::Inst* ip = prog_->inst(id);
635  switch (ip->opcode()) {
636  case kInstAltMatch:
637  // This state will continue to a match no matter what
638  // the rest of the input is. If it is the highest priority match
639  // being considered, return the special FullMatchState
640  // to indicate that it's all matches from here out.
641  if (kind_ != Prog::kManyMatch &&
642  (kind_ != Prog::kFirstMatch ||
643  (it == q->begin() && ip->greedy(prog_))) &&
644  (kind_ != Prog::kLongestMatch || !sawmark) &&
645  (flag & kFlagMatch)) {
646  delete[] inst;
647  if (ExtraDebug)
648  fprintf(stderr, " -> FullMatchState\n");
649  return FullMatchState;
650  }
652  default:
653  // Record iff id is the head of its list, which must
654  // be the case if id-1 is the last of *its* list. :)
655  if (prog_->inst(id-1)->last())
656  inst[n++] = *it;
657  if (ip->opcode() == kInstEmptyWidth)
658  needflags |= ip->empty();
659  if (ip->opcode() == kInstMatch && !prog_->anchor_end())
660  sawmatch = true;
661  break;
662  }
663  }
664  DCHECK_LE(n, q->size());
665  if (n > 0 && inst[n-1] == Mark)
666  n--;
667 
668  // If there are no empty-width instructions waiting to execute,
669  // then the extra flag bits will not be used, so there is no
670  // point in saving them. (Discarding them reduces the number
671  // of distinct states.)
672  if (needflags == 0)
673  flag &= kFlagMatch;
674 
675  // NOTE(rsc): The code above cannot do flag &= needflags,
676  // because if the right flags were present to pass the current
677  // kInstEmptyWidth instructions, new kInstEmptyWidth instructions
678  // might be reached that in turn need different flags.
679  // The only sure thing is that if there are no kInstEmptyWidth
680  // instructions at all, no flags will be needed.
681  // We could do the extra work to figure out the full set of
682  // possibly needed flags by exploring past the kInstEmptyWidth
683  // instructions, but the check above -- are any flags needed
684  // at all? -- handles the most common case. More fine-grained
685  // analysis can only be justified by measurements showing that
686  // too many redundant states are being allocated.
687 
688  // If there are no Insts in the list, it's a dead state,
689  // which is useful to signal with a special pointer so that
690  // the execution loop can stop early. This is only okay
691  // if the state is *not* a matching state.
692  if (n == 0 && flag == 0) {
693  delete[] inst;
694  if (ExtraDebug)
695  fprintf(stderr, " -> DeadState\n");
696  return DeadState;
697  }
698 
699  // If we're in longest match mode, the state is a sequence of
700  // unordered state sets separated by Marks. Sort each set
701  // to canonicalize, to reduce the number of distinct sets stored.
702  if (kind_ == Prog::kLongestMatch) {
703  int* ip = inst;
704  int* ep = ip + n;
705  while (ip < ep) {
706  int* markp = ip;
707  while (markp < ep && *markp != Mark)
708  markp++;
709  std::sort(ip, markp);
710  if (markp < ep)
711  markp++;
712  ip = markp;
713  }
714  }
715 
716  // If we're in many match mode, canonicalize for similar reasons:
717  // we have an unordered set of states (i.e. we don't have Marks)
718  // and sorting will reduce the number of distinct sets stored.
719  if (kind_ == Prog::kManyMatch) {
720  int* ip = inst;
721  int* ep = ip + n;
722  std::sort(ip, ep);
723  }
724 
725  // Append MatchSep and the match IDs in mq if necessary.
726  if (mq != NULL) {
727  inst[n++] = MatchSep;
728  for (Workq::iterator i = mq->begin(); i != mq->end(); ++i) {
729  int id = *i;
730  Prog::Inst* ip = prog_->inst(id);
731  if (ip->opcode() == kInstMatch)
732  inst[n++] = ip->match_id();
733  }
734  }
735 
736  // Save the needed empty-width flags in the top bits for use later.
737  flag |= needflags << kFlagNeedShift;
738 
739  State* state = CachedState(inst, n, flag);
740  delete[] inst;
741  return state;
742 }
743 
744 // Looks in the State cache for a State matching inst, ninst, flag.
745 // If one is found, returns it. If one is not found, allocates one,
746 // inserts it in the cache, and returns it.
747 DFA::State* DFA::CachedState(int* inst, int ninst, uint32_t flag) {
748  //mutex_.AssertHeld();
749 
750  // Look in the cache for a pre-existing state.
751  // We have to initialise the struct like this because otherwise
752  // MSVC will complain about the flexible array member. :(
753  State state;
754  state.inst_ = inst;
755  state.ninst_ = ninst;
756  state.flag_ = flag;
758  if (it != state_cache_.end()) {
759  if (ExtraDebug)
760  fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
761  return *it;
762  }
763 
764  // Must have enough memory for new state.
765  // In addition to what we're going to allocate,
766  // the state cache hash table seems to incur about 40 bytes per
767  // State*, empirically.
768  const int kStateCacheOverhead = 40;
769  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
770  int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
771  ninst*sizeof(int);
772  if (mem_budget_ < mem + kStateCacheOverhead) {
773  mem_budget_ = -1;
774  return NULL;
775  }
776  mem_budget_ -= mem + kStateCacheOverhead;
777 
778  // Allocate new state along with room for next_ and inst_.
779  char* space = std::allocator<char>().allocate(mem);
780  State* s = new (space) State;
781  (void) new (s->next_) std::atomic<State*>[nnext];
782  // Work around a unfortunate bug in older versions of libstdc++.
783  // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64658)
784  for (int i = 0; i < nnext; i++)
785  (void) new (s->next_ + i) std::atomic<State*>(NULL);
786  s->inst_ = new (s->next_ + nnext) int[ninst];
787  memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
788  s->ninst_ = ninst;
789  s->flag_ = flag;
790  if (ExtraDebug)
791  fprintf(stderr, " -> %s\n", DumpState(s).c_str());
792 
793  // Put state in cache and return it.
794  state_cache_.insert(s);
795  return s;
796 }
797 
798 // Clear the cache. Must hold cache_mutex_.w or be in destructor.
802  while (begin != end) {
804  ++begin;
805  // Deallocate the blob of memory that we allocated in DFA::CachedState().
806  // We recompute mem in order to benefit from sized delete where possible.
807  int ninst = (*tmp)->ninst_;
808  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
809  int mem = sizeof(State) + nnext*sizeof(std::atomic<State*>) +
810  ninst*sizeof(int);
811  std::allocator<char>().deallocate(reinterpret_cast<char*>(*tmp), mem);
812  }
813  state_cache_.clear();
814 }
815 
816 // Copies insts in state s to the work queue q.
818  q->clear();
819  for (int i = 0; i < s->ninst_; i++) {
820  if (s->inst_[i] == Mark) {
821  q->mark();
822  } else if (s->inst_[i] == MatchSep) {
823  // Nothing after this is an instruction!
824  break;
825  } else {
826  // Explore from the head of the list.
827  AddToQueue(q, s->inst_[i], s->flag_ & kFlagEmptyMask);
828  }
829  }
830 }
831 
832 // Adds ip to the work queue, following empty arrows according to flag.
833 void DFA::AddToQueue(Workq* q, int id, uint32_t flag) {
834 
835  // Use stack_ to hold our stack of instructions yet to process.
836  // It was preallocated as follows:
837  // one entry per Capture;
838  // one entry per EmptyWidth; and
839  // one entry per Nop.
840  // This reflects the maximum number of stack pushes that each can
841  // perform. (Each instruction can be processed at most once.)
842  // When using marks, we also added nmark == prog_->size().
843  // (Otherwise, nmark == 0.)
844  int* stk = stack_.data();
845  int nstk = 0;
846 
847  stk[nstk++] = id;
848  while (nstk > 0) {
849  DCHECK_LE(nstk, stack_.size());
850  id = stk[--nstk];
851 
852  Loop:
853  if (id == Mark) {
854  q->mark();
855  continue;
856  }
857 
858  if (id == 0)
859  continue;
860 
861  // If ip is already on the queue, nothing to do.
862  // Otherwise add it. We don't actually keep all the
863  // ones that get added, but adding all of them here
864  // increases the likelihood of q->contains(id),
865  // reducing the amount of duplicated work.
866  if (q->contains(id))
867  continue;
868  q->insert_new(id);
869 
870  // Process instruction.
871  Prog::Inst* ip = prog_->inst(id);
872  switch (ip->opcode()) {
873  default:
874  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
875  break;
876 
877  case kInstByteRange: // just save these on the queue
878  case kInstMatch:
879  if (ip->last())
880  break;
881  id = id+1;
882  goto Loop;
883 
884  case kInstCapture: // DFA treats captures as no-ops.
885  case kInstNop:
886  if (!ip->last())
887  stk[nstk++] = id+1;
888 
889  // If this instruction is the [00-FF]* loop at the beginning of
890  // a leftmost-longest unanchored search, separate with a Mark so
891  // that future threads (which will start farther to the right in
892  // the input string) are lower priority than current threads.
893  if (ip->opcode() == kInstNop && q->maxmark() > 0 &&
894  id == prog_->start_unanchored() && id != prog_->start())
895  stk[nstk++] = Mark;
896  id = ip->out();
897  goto Loop;
898 
899  case kInstAltMatch:
900  DCHECK(!ip->last());
901  id = id+1;
902  goto Loop;
903 
904  case kInstEmptyWidth:
905  if (!ip->last())
906  stk[nstk++] = id+1;
907 
908  // Continue on if we have all the right flag bits.
909  if (ip->empty() & ~flag)
910  break;
911  id = ip->out();
912  goto Loop;
913  }
914  }
915 }
916 
917 // Running of work queues. In the work queue, order matters:
918 // the queue is sorted in priority order. If instruction i comes before j,
919 // then the instructions that i produces during the run must come before
920 // the ones that j produces. In order to keep this invariant, all the
921 // work queue runners have to take an old queue to process and then
922 // also a new queue to fill in. It's not acceptable to add to the end of
923 // an existing queue, because new instructions will not end up in the
924 // correct position.
925 
926 // Runs the work queue, processing the empty strings indicated by flag.
927 // For example, flag == kEmptyBeginLine|kEmptyEndLine means to match
928 // both ^ and $. It is important that callers pass all flags at once:
929 // processing both ^ and $ is not the same as first processing only ^
930 // and then processing only $. Doing the two-step sequence won't match
931 // ^$^$^$ but processing ^ and $ simultaneously will (and is the behavior
932 // exhibited by existing implementations).
934  newq->clear();
935  for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
936  if (oldq->is_mark(*i))
937  AddToQueue(newq, Mark, flag);
938  else
939  AddToQueue(newq, *i, flag);
940  }
941 }
942 
943 // Runs the work queue, processing the single byte c followed by any empty
944 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine,
945 // means to match c$. Sets the bool *ismatch to true if the end of the
946 // regular expression program has been reached (the regexp has matched).
947 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
948  int c, uint32_t flag, bool* ismatch) {
949  //mutex_.AssertHeld();
950 
951  newq->clear();
952  for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
953  if (oldq->is_mark(*i)) {
954  if (*ismatch)
955  return;
956  newq->mark();
957  continue;
958  }
959  int id = *i;
960  Prog::Inst* ip = prog_->inst(id);
961  switch (ip->opcode()) {
962  default:
963  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
964  break;
965 
966  case kInstFail: // never succeeds
967  case kInstCapture: // already followed
968  case kInstNop: // already followed
969  case kInstAltMatch: // already followed
970  case kInstEmptyWidth: // already followed
971  break;
972 
973  case kInstByteRange: // can follow if c is in range
974  if (ip->Matches(c))
975  AddToQueue(newq, ip->out(), flag);
976  break;
977 
978  case kInstMatch:
979  if (prog_->anchor_end() && c != kByteEndText &&
981  break;
982  *ismatch = true;
983  if (kind_ == Prog::kFirstMatch) {
984  // Can stop processing work queue since we found a match.
985  return;
986  }
987  break;
988  }
989  }
990 
991  if (ExtraDebug)
992  fprintf(stderr, "%s on %d[%#x] -> %s [%d]\n", DumpWorkq(oldq).c_str(),
993  c, flag, DumpWorkq(newq).c_str(), *ismatch);
994 }
995 
996 // Processes input byte c in state, returning new state.
997 // Caller does not hold mutex.
999  // Keep only one RunStateOnByte going
1000  // even if the DFA is being run by multiple threads.
1001  MutexLock l(&mutex_);
1002  return RunStateOnByte(state, c);
1003 }
1004 
1005 // Processes input byte c in state, returning new state.
1007  //mutex_.AssertHeld();
1008 
1009  if (state <= SpecialStateMax) {
1010  if (state == FullMatchState) {
1011  // It is convenient for routines like PossibleMatchRange
1012  // if we implement RunStateOnByte for FullMatchState:
1013  // once you get into this state you never get out,
1014  // so it's pretty easy.
1015  return FullMatchState;
1016  }
1017  if (state == DeadState) {
1018  LOG(DFATAL) << "DeadState in RunStateOnByte";
1019  return NULL;
1020  }
1021  if (state == NULL) {
1022  LOG(DFATAL) << "NULL state in RunStateOnByte";
1023  return NULL;
1024  }
1025  LOG(DFATAL) << "Unexpected special state in RunStateOnByte";
1026  return NULL;
1027  }
1028 
1029  // If someone else already computed this, return it.
1030  State* ns = state->next_[ByteMap(c)].load(std::memory_order_relaxed);
1031  if (ns != NULL)
1032  return ns;
1033 
1034  // Convert state into Workq.
1036 
1037  // Flags marking the kinds of empty-width things (^ $ etc)
1038  // around this byte. Before the byte we have the flags recorded
1039  // in the State structure itself. After the byte we have
1040  // nothing yet (but that will change: read on).
1041  uint32_t needflag = state->flag_ >> kFlagNeedShift;
1042  uint32_t beforeflag = state->flag_ & kFlagEmptyMask;
1043  uint32_t oldbeforeflag = beforeflag;
1044  uint32_t afterflag = 0;
1045 
1046  if (c == '\n') {
1047  // Insert implicit $ and ^ around \n
1048  beforeflag |= kEmptyEndLine;
1049  afterflag |= kEmptyBeginLine;
1050  }
1051 
1052  if (c == kByteEndText) {
1053  // Insert implicit $ and \z before the fake "end text" byte.
1054  beforeflag |= kEmptyEndLine | kEmptyEndText;
1055  }
1056 
1057  // The state flag kFlagLastWord says whether the last
1058  // byte processed was a word character. Use that info to
1059  // insert empty-width (non-)word boundaries.
1060  bool islastword = (state->flag_ & kFlagLastWord) != 0;
1061  bool isword = c != kByteEndText && Prog::IsWordChar(static_cast<uint8_t>(c));
1062  if (isword == islastword)
1063  beforeflag |= kEmptyNonWordBoundary;
1064  else
1065  beforeflag |= kEmptyWordBoundary;
1066 
1067  // Okay, finally ready to run.
1068  // Only useful to rerun on empty string if there are new, useful flags.
1069  if (beforeflag & ~oldbeforeflag & needflag) {
1070  RunWorkqOnEmptyString(q0_, q1_, beforeflag);
1071  using std::swap;
1072  swap(q0_, q1_);
1073  }
1074  bool ismatch = false;
1075  RunWorkqOnByte(q0_, q1_, c, afterflag, &ismatch);
1076  using std::swap;
1077  swap(q0_, q1_);
1078 
1079  // Save afterflag along with ismatch and isword in new state.
1080  uint32_t flag = afterflag;
1081  if (ismatch)
1082  flag |= kFlagMatch;
1083  if (isword)
1084  flag |= kFlagLastWord;
1085 
1086  if (ismatch && kind_ == Prog::kManyMatch)
1088  else
1089  ns = WorkqToCachedState(q0_, NULL, flag);
1090 
1091  // Flush ns before linking to it.
1092  // Write barrier before updating state->next_ so that the
1093  // main search loop can proceed without any locking, for speed.
1094  // (Otherwise it would need one mutex operation per input byte.)
1095  state->next_[ByteMap(c)].store(ns, std::memory_order_release);
1096  return ns;
1097 }
1098 
1099 
1101 // DFA cache reset.
1102 
1103 // Reader-writer lock helper.
1104 //
1105 // The DFA uses a reader-writer mutex to protect the state graph itself.
1106 // Traversing the state graph requires holding the mutex for reading,
1107 // and discarding the state graph and starting over requires holding the
1108 // lock for writing. If a search needs to expand the graph but is out
1109 // of memory, it will need to drop its read lock and then acquire the
1110 // write lock. Since it cannot then atomically downgrade from write lock
1111 // to read lock, it runs the rest of the search holding the write lock.
1112 // (This probably helps avoid repeated contention, but really the decision
1113 // is forced by the Mutex interface.) It's a bit complicated to keep
1114 // track of whether the lock is held for reading or writing and thread
1115 // that through the search, so instead we encapsulate it in the RWLocker
1116 // and pass that around.
1117 
1119  public:
1120  explicit RWLocker(Mutex* mu);
1121  ~RWLocker();
1122 
1123  // If the lock is only held for reading right now,
1124  // drop the read lock and re-acquire for writing.
1125  // Subsequent calls to LockForWriting are no-ops.
1126  // Notice that the lock is *released* temporarily.
1127  void LockForWriting();
1128 
1129  private:
1131  bool writing_;
1132 
1133  RWLocker(const RWLocker&) = delete;
1134  RWLocker& operator=(const RWLocker&) = delete;
1135 };
1136 
1138  mu_->ReaderLock();
1139 }
1140 
1141 // This function is marked as NO_THREAD_SAFETY_ANALYSIS because
1142 // the annotations don't support lock upgrade.
1144  if (!writing_) {
1145  mu_->ReaderUnlock();
1146  mu_->WriterLock();
1147  writing_ = true;
1148  }
1149 }
1150 
1152  if (!writing_)
1153  mu_->ReaderUnlock();
1154  else
1155  mu_->WriterUnlock();
1156 }
1157 
1158 
1159 // When the DFA's State cache fills, we discard all the states in the
1160 // cache and start over. Many threads can be using and adding to the
1161 // cache at the same time, so we synchronize using the cache_mutex_
1162 // to keep from stepping on other threads. Specifically, all the
1163 // threads using the current cache hold cache_mutex_ for reading.
1164 // When a thread decides to flush the cache, it drops cache_mutex_
1165 // and then re-acquires it for writing. That ensures there are no
1166 // other threads accessing the cache anymore. The rest of the search
1167 // runs holding cache_mutex_ for writing, avoiding any contention
1168 // with or cache pollution caused by other threads.
1169 
1170 void DFA::ResetCache(RWLocker* cache_lock) {
1171  // Re-acquire the cache_mutex_ for writing (exclusive use).
1172  cache_lock->LockForWriting();
1173 
1174  // Clear the cache, reset the memory budget.
1175  for (int i = 0; i < kMaxStart; i++) {
1176  start_[i].start = NULL;
1177  start_[i].first_byte.store(kFbUnknown, std::memory_order_relaxed);
1178  }
1179  ClearCache();
1181 }
1182 
1183 // Typically, a couple States do need to be preserved across a cache
1184 // reset, like the State at the current point in the search.
1185 // The StateSaver class helps keep States across cache resets.
1186 // It makes a copy of the state's guts outside the cache (before the reset)
1187 // and then can be asked, after the reset, to recreate the State
1188 // in the new cache. For example, in a DFA method ("this" is a DFA):
1189 //
1190 // StateSaver saver(this, s);
1191 // ResetCache(cache_lock);
1192 // s = saver.Restore();
1193 //
1194 // The saver should always have room in the cache to re-create the state,
1195 // because resetting the cache locks out all other threads, and the cache
1196 // is known to have room for at least a couple states (otherwise the DFA
1197 // constructor fails).
1198 
1200  public:
1201  explicit StateSaver(DFA* dfa, State* state);
1202  ~StateSaver();
1203 
1204  // Recreates and returns a state equivalent to the
1205  // original state passed to the constructor.
1206  // Returns NULL if the cache has filled, but
1207  // since the DFA guarantees to have room in the cache
1208  // for a couple states, should never return NULL
1209  // if used right after ResetCache.
1210  State* Restore();
1211 
1212  private:
1213  DFA* dfa_; // the DFA to use
1214  int* inst_; // saved info from State
1215  int ninst_;
1217  bool is_special_; // whether original state was special
1218  State* special_; // if is_special_, the original state
1219 
1220  StateSaver(const StateSaver&) = delete;
1221  StateSaver& operator=(const StateSaver&) = delete;
1222 };
1223 
1225  dfa_ = dfa;
1226  if (state <= SpecialStateMax) {
1227  inst_ = NULL;
1228  ninst_ = 0;
1229  flag_ = 0;
1230  is_special_ = true;
1231  special_ = state;
1232  return;
1233  }
1234  is_special_ = false;
1235  special_ = NULL;
1236  flag_ = state->flag_;
1237  ninst_ = state->ninst_;
1238  inst_ = new int[ninst_];
1239  memmove(inst_, state->inst_, ninst_*sizeof inst_[0]);
1240 }
1241 
1243  if (!is_special_)
1244  delete[] inst_;
1245 }
1246 
1248  if (is_special_)
1249  return special_;
1250  MutexLock l(&dfa_->mutex_);
1251  State* s = dfa_->CachedState(inst_, ninst_, flag_);
1252  if (s == NULL)
1253  LOG(DFATAL) << "StateSaver failed to restore state.";
1254  return s;
1255 }
1256 
1257 
1259 //
1260 // DFA execution.
1261 //
1262 // The basic search loop is easy: start in a state s and then for each
1263 // byte c in the input, s = s->next[c].
1264 //
1265 // This simple description omits a few efficiency-driven complications.
1266 //
1267 // First, the State graph is constructed incrementally: it is possible
1268 // that s->next[c] is null, indicating that that state has not been
1269 // fully explored. In this case, RunStateOnByte must be invoked to
1270 // determine the next state, which is cached in s->next[c] to save
1271 // future effort. An alternative reason for s->next[c] to be null is
1272 // that the DFA has reached a so-called "dead state", in which any match
1273 // is no longer possible. In this case RunStateOnByte will return NULL
1274 // and the processing of the string can stop early.
1275 //
1276 // Second, a 256-element pointer array for s->next_ makes each State
1277 // quite large (2kB on 64-bit machines). Instead, dfa->bytemap_[]
1278 // maps from bytes to "byte classes" and then next_ only needs to have
1279 // as many pointers as there are byte classes. A byte class is simply a
1280 // range of bytes that the regexp never distinguishes between.
1281 // A regexp looking for a[abc] would have four byte ranges -- 0 to 'a'-1,
1282 // 'a', 'b' to 'c', and 'c' to 0xFF. The bytemap slows us a little bit
1283 // but in exchange we typically cut the size of a State (and thus our
1284 // memory footprint) by about 5-10x. The comments still refer to
1285 // s->next[c] for simplicity, but code should refer to s->next_[bytemap_[c]].
1286 //
1287 // Third, it is common for a DFA for an unanchored match to begin in a
1288 // state in which only one particular byte value can take the DFA to a
1289 // different state. That is, s->next[c] != s for only one c. In this
1290 // situation, the DFA can do better than executing the simple loop.
1291 // Instead, it can call memchr to search very quickly for the byte c.
1292 // Whether the start state has this property is determined during a
1293 // pre-compilation pass, and if so, the byte b is passed to the search
1294 // loop as the "first_byte" argument, along with a boolean "have_first_byte".
1295 //
1296 // Fourth, the desired behavior is to search for the leftmost-best match
1297 // (approximately, the same one that Perl would find), which is not
1298 // necessarily the match ending earliest in the string. Each time a
1299 // match is found, it must be noted, but the DFA must continue on in
1300 // hope of finding a higher-priority match. In some cases, the caller only
1301 // cares whether there is any match at all, not which one is found.
1302 // The "want_earliest_match" flag causes the search to stop at the first
1303 // match found.
1304 //
1305 // Fifth, one algorithm that uses the DFA needs it to run over the
1306 // input string backward, beginning at the end and ending at the beginning.
1307 // Passing false for the "run_forward" flag causes the DFA to run backward.
1308 //
1309 // The checks for these last three cases, which in a naive implementation
1310 // would be performed once per input byte, slow the general loop enough
1311 // to merit specialized versions of the search loop for each of the
1312 // eight possible settings of the three booleans. Rather than write
1313 // eight different functions, we write one general implementation and then
1314 // inline it to create the specialized ones.
1315 //
1316 // Note that matches are delayed by one byte, to make it easier to
1317 // accomodate match conditions depending on the next input byte (like $ and \b).
1318 // When s->next[c]->IsMatch(), it means that there is a match ending just
1319 // *before* byte c.
1320 
1321 // The generic search loop. Searches text for a match, returning
1322 // the pointer to the end of the chosen match, or NULL if no match.
1323 // The bools are equal to the same-named variables in params, but
1324 // making them function arguments lets the inliner specialize
1325 // this function to each combination (see two paragraphs above).
1327  bool have_first_byte,
1328  bool want_earliest_match,
1329  bool run_forward) {
1330  State* start = params->start;
1331  const uint8_t* bp = BytePtr(params->text.data()); // start of text
1332  const uint8_t* p = bp; // text scanning point
1333  const uint8_t* ep = BytePtr(params->text.data() +
1334  params->text.size()); // end of text
1335  const uint8_t* resetp = NULL; // p at last cache reset
1336  if (!run_forward) {
1337  using std::swap;
1338  swap(p, ep);
1339  }
1340 
1341  const uint8_t* bytemap = prog_->bytemap();
1342  const uint8_t* lastmatch = NULL; // most recent matching position in text
1343  bool matched = false;
1344 
1345  State* s = start;
1346  if (ExtraDebug)
1347  fprintf(stderr, "@stx: %s\n", DumpState(s).c_str());
1348 
1349  if (s->IsMatch()) {
1350  matched = true;
1351  lastmatch = p;
1352  if (ExtraDebug)
1353  fprintf(stderr, "match @stx! [%s]\n", DumpState(s).c_str());
1354  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1355  for (int i = s->ninst_ - 1; i >= 0; i--) {
1356  int id = s->inst_[i];
1357  if (id == MatchSep)
1358  break;
1359  params->matches->insert(id);
1360  }
1361  }
1362  if (want_earliest_match) {
1363  params->ep = reinterpret_cast<const char*>(lastmatch);
1364  return true;
1365  }
1366  }
1367 
1368  while (p != ep) {
1369  if (ExtraDebug)
1370  fprintf(stderr, "@%td: %s\n",
1371  p - bp, DumpState(s).c_str());
1372 
1373  if (have_first_byte && s == start) {
1374  // In start state, only way out is to find first_byte,
1375  // so use optimized assembly in memchr to skip ahead.
1376  // If first_byte isn't found, we can skip to the end
1377  // of the string.
1378  if (run_forward) {
1379  if ((p = BytePtr(memchr(p, params->first_byte, ep - p))) == NULL) {
1380  p = ep;
1381  break;
1382  }
1383  } else {
1384  if ((p = BytePtr(memrchr(ep, params->first_byte, p - ep))) == NULL) {
1385  p = ep;
1386  break;
1387  }
1388  p++;
1389  }
1390  }
1391 
1392  int c;
1393  if (run_forward)
1394  c = *p++;
1395  else
1396  c = *--p;
1397 
1398  // Note that multiple threads might be consulting
1399  // s->next_[bytemap[c]] simultaneously.
1400  // RunStateOnByte takes care of the appropriate locking,
1401  // including a memory barrier so that the unlocked access
1402  // (sometimes known as "double-checked locking") is safe.
1403  // The alternative would be either one DFA per thread
1404  // or one mutex operation per input byte.
1405  //
1406  // ns == DeadState means the state is known to be dead
1407  // (no more matches are possible).
1408  // ns == NULL means the state has not yet been computed
1409  // (need to call RunStateOnByteUnlocked).
1410  // RunStateOnByte returns ns == NULL if it is out of memory.
1411  // ns == FullMatchState means the rest of the string matches.
1412  //
1413  // Okay to use bytemap[] not ByteMap() here, because
1414  // c is known to be an actual byte and not kByteEndText.
1415 
1416  State* ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
1417  if (ns == NULL) {
1418  ns = RunStateOnByteUnlocked(s, c);
1419  if (ns == NULL) {
1420  // After we reset the cache, we hold cache_mutex exclusively,
1421  // so if resetp != NULL, it means we filled the DFA state
1422  // cache with this search alone (without any other threads).
1423  // Benchmarks show that doing a state computation on every
1424  // byte runs at about 0.2 MB/s, while the NFA (nfa.cc) can do the
1425  // same at about 2 MB/s. Unless we're processing an average
1426  // of 10 bytes per state computation, fail so that RE2 can
1427  // fall back to the NFA. However, RE2::Set cannot fall back,
1428  // so we just have to keep on keeping on in that case.
1429  if (dfa_should_bail_when_slow && resetp != NULL &&
1430  static_cast<size_t>(p - resetp) < 10*state_cache_.size() &&
1431  kind_ != Prog::kManyMatch) {
1432  params->failed = true;
1433  return false;
1434  }
1435  resetp = p;
1436 
1437  // Prepare to save start and s across the reset.
1438  StateSaver save_start(this, start);
1439  StateSaver save_s(this, s);
1440 
1441  // Discard all the States in the cache.
1442  ResetCache(params->cache_lock);
1443 
1444  // Restore start and s so we can continue.
1445  if ((start = save_start.Restore()) == NULL ||
1446  (s = save_s.Restore()) == NULL) {
1447  // Restore already did LOG(DFATAL).
1448  params->failed = true;
1449  return false;
1450  }
1451  ns = RunStateOnByteUnlocked(s, c);
1452  if (ns == NULL) {
1453  LOG(DFATAL) << "RunStateOnByteUnlocked failed after ResetCache";
1454  params->failed = true;
1455  return false;
1456  }
1457  }
1458  }
1459  if (ns <= SpecialStateMax) {
1460  if (ns == DeadState) {
1461  params->ep = reinterpret_cast<const char*>(lastmatch);
1462  return matched;
1463  }
1464  // FullMatchState
1465  params->ep = reinterpret_cast<const char*>(ep);
1466  return true;
1467  }
1468 
1469  s = ns;
1470  if (s->IsMatch()) {
1471  matched = true;
1472  // The DFA notices the match one byte late,
1473  // so adjust p before using it in the match.
1474  if (run_forward)
1475  lastmatch = p - 1;
1476  else
1477  lastmatch = p + 1;
1478  if (ExtraDebug)
1479  fprintf(stderr, "match @%td! [%s]\n",
1480  lastmatch - bp, DumpState(s).c_str());
1481  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1482  for (int i = s->ninst_ - 1; i >= 0; i--) {
1483  int id = s->inst_[i];
1484  if (id == MatchSep)
1485  break;
1486  params->matches->insert(id);
1487  }
1488  }
1489  if (want_earliest_match) {
1490  params->ep = reinterpret_cast<const char*>(lastmatch);
1491  return true;
1492  }
1493  }
1494  }
1495 
1496  // Process one more byte to see if it triggers a match.
1497  // (Remember, matches are delayed one byte.)
1498  if (ExtraDebug)
1499  fprintf(stderr, "@etx: %s\n", DumpState(s).c_str());
1500 
1501  int lastbyte;
1502  if (run_forward) {
1503  if (params->text.end() == params->context.end())
1504  lastbyte = kByteEndText;
1505  else
1506  lastbyte = params->text.end()[0] & 0xFF;
1507  } else {
1508  if (params->text.begin() == params->context.begin())
1509  lastbyte = kByteEndText;
1510  else
1511  lastbyte = params->text.begin()[-1] & 0xFF;
1512  }
1513 
1514  State* ns = s->next_[ByteMap(lastbyte)].load(std::memory_order_acquire);
1515  if (ns == NULL) {
1516  ns = RunStateOnByteUnlocked(s, lastbyte);
1517  if (ns == NULL) {
1518  StateSaver save_s(this, s);
1519  ResetCache(params->cache_lock);
1520  if ((s = save_s.Restore()) == NULL) {
1521  params->failed = true;
1522  return false;
1523  }
1524  ns = RunStateOnByteUnlocked(s, lastbyte);
1525  if (ns == NULL) {
1526  LOG(DFATAL) << "RunStateOnByteUnlocked failed after Reset";
1527  params->failed = true;
1528  return false;
1529  }
1530  }
1531  }
1532  if (ns <= SpecialStateMax) {
1533  if (ns == DeadState) {
1534  params->ep = reinterpret_cast<const char*>(lastmatch);
1535  return matched;
1536  }
1537  // FullMatchState
1538  params->ep = reinterpret_cast<const char*>(ep);
1539  return true;
1540  }
1541 
1542  s = ns;
1543  if (s->IsMatch()) {
1544  matched = true;
1545  lastmatch = p;
1546  if (ExtraDebug)
1547  fprintf(stderr, "match @etx! [%s]\n", DumpState(s).c_str());
1548  if (params->matches != NULL && kind_ == Prog::kManyMatch) {
1549  for (int i = s->ninst_ - 1; i >= 0; i--) {
1550  int id = s->inst_[i];
1551  if (id == MatchSep)
1552  break;
1553  params->matches->insert(id);
1554  }
1555  }
1556  }
1557 
1558  params->ep = reinterpret_cast<const char*>(lastmatch);
1559  return matched;
1560 }
1561 
1562 // Inline specializations of the general loop.
1564  return InlinedSearchLoop(params, 0, 0, 0);
1565 }
1567  return InlinedSearchLoop(params, 0, 0, 1);
1568 }
1570  return InlinedSearchLoop(params, 0, 1, 0);
1571 }
1573  return InlinedSearchLoop(params, 0, 1, 1);
1574 }
1576  return InlinedSearchLoop(params, 1, 0, 0);
1577 }
1579  return InlinedSearchLoop(params, 1, 0, 1);
1580 }
1582  return InlinedSearchLoop(params, 1, 1, 0);
1583 }
1585  return InlinedSearchLoop(params, 1, 1, 1);
1586 }
1587 
1588 // For debugging, calls the general code directly.
1590  return InlinedSearchLoop(params,
1591  params->first_byte >= 0,
1592  params->want_earliest_match,
1593  params->run_forward);
1594 }
1595 
1596 // For performance, calls the appropriate specialized version
1597 // of InlinedSearchLoop.
1599  // Because the methods are private, the Searches array
1600  // cannot be declared at top level.
1601  static bool (DFA::*Searches[])(SearchParams*) = {
1602  &DFA::SearchFFF,
1603  &DFA::SearchFFT,
1604  &DFA::SearchFTF,
1605  &DFA::SearchFTT,
1606  &DFA::SearchTFF,
1607  &DFA::SearchTFT,
1608  &DFA::SearchTTF,
1609  &DFA::SearchTTT,
1610  };
1611 
1612  bool have_first_byte = params->first_byte >= 0;
1613  int index = 4 * have_first_byte +
1614  2 * params->want_earliest_match +
1615  1 * params->run_forward;
1616  return (this->*Searches[index])(params);
1617 }
1618 
1619 
1620 // The discussion of DFA execution above ignored the question of how
1621 // to determine the initial state for the search loop. There are two
1622 // factors that influence the choice of start state.
1623 //
1624 // The first factor is whether the search is anchored or not.
1625 // The regexp program (Prog*) itself has
1626 // two different entry points: one for anchored searches and one for
1627 // unanchored searches. (The unanchored version starts with a leading ".*?"
1628 // and then jumps to the anchored one.)
1629 //
1630 // The second factor is where text appears in the larger context, which
1631 // determines which empty-string operators can be matched at the beginning
1632 // of execution. If text is at the very beginning of context, \A and ^ match.
1633 // Otherwise if text is at the beginning of a line, then ^ matches.
1634 // Otherwise it matters whether the character before text is a word character
1635 // or a non-word character.
1636 //
1637 // The two cases (unanchored vs not) and four cases (empty-string flags)
1638 // combine to make the eight cases recorded in the DFA's begin_text_[2],
1639 // begin_line_[2], after_wordchar_[2], and after_nonwordchar_[2] cached
1640 // StartInfos. The start state for each is filled in the first time it
1641 // is used for an actual search.
1642 
1643 // Examines text, context, and anchored to determine the right start
1644 // state for the DFA search loop. Fills in params and returns true on success.
1645 // Returns false on failure.
1647  const StringPiece& text = params->text;
1648  const StringPiece& context = params->context;
1649 
1650  // Sanity check: make sure that text lies within context.
1651  if (text.begin() < context.begin() || text.end() > context.end()) {
1652  LOG(DFATAL) << "context does not contain text";
1653  params->start = DeadState;
1654  return true;
1655  }
1656 
1657  // Determine correct search type.
1658  int start;
1659  uint32_t flags;
1660  if (params->run_forward) {
1661  if (text.begin() == context.begin()) {
1664  } else if (text.begin()[-1] == '\n') {
1667  } else if (Prog::IsWordChar(text.begin()[-1] & 0xFF)) {
1669  flags = kFlagLastWord;
1670  } else {
1672  flags = 0;
1673  }
1674  } else {
1675  if (text.end() == context.end()) {
1678  } else if (text.end()[0] == '\n') {
1681  } else if (Prog::IsWordChar(text.end()[0] & 0xFF)) {
1683  flags = kFlagLastWord;
1684  } else {
1686  flags = 0;
1687  }
1688  }
1689  if (params->anchored)
1690  start |= kStartAnchored;
1691  StartInfo* info = &start_[start];
1692 
1693  // Try once without cache_lock for writing.
1694  // Try again after resetting the cache
1695  // (ResetCache will relock cache_lock for writing).
1696  if (!AnalyzeSearchHelper(params, info, flags)) {
1697  ResetCache(params->cache_lock);
1698  if (!AnalyzeSearchHelper(params, info, flags)) {
1699  LOG(DFATAL) << "Failed to analyze start state.";
1700  params->failed = true;
1701  return false;
1702  }
1703  }
1704 
1705  if (ExtraDebug)
1706  fprintf(stderr, "anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
1707  params->anchored, params->run_forward, flags,
1708  DumpState(info->start).c_str(), info->first_byte.load());
1709 
1710  params->start = info->start;
1711  params->first_byte = info->first_byte.load(std::memory_order_acquire);
1712 
1713  return true;
1714 }
1715 
1716 // Fills in info if needed. Returns true on success, false on failure.
1718  uint32_t flags) {
1719  // Quick check.
1720  int fb = info->first_byte.load(std::memory_order_acquire);
1721  if (fb != kFbUnknown)
1722  return true;
1723 
1724  MutexLock l(&mutex_);
1725  fb = info->first_byte.load(std::memory_order_relaxed);
1726  if (fb != kFbUnknown)
1727  return true;
1728 
1729  q0_->clear();
1730  AddToQueue(q0_,
1731  params->anchored ? prog_->start() : prog_->start_unanchored(),
1732  flags);
1733  info->start = WorkqToCachedState(q0_, NULL, flags);
1734  if (info->start == NULL)
1735  return false;
1736 
1737  if (info->start == DeadState) {
1738  // Synchronize with "quick check" above.
1739  info->first_byte.store(kFbNone, std::memory_order_release);
1740  return true;
1741  }
1742 
1743  if (info->start == FullMatchState) {
1744  // Synchronize with "quick check" above.
1745  info->first_byte.store(kFbNone, std::memory_order_release); // will be ignored
1746  return true;
1747  }
1748 
1749  // Even if we have a first_byte, we cannot use it when anchored and,
1750  // less obviously, we cannot use it when we are going to need flags.
1751  // This trick works only when there is a single byte that leads to a
1752  // different state!
1753  int first_byte = prog_->first_byte();
1754  if (first_byte == -1 ||
1755  params->anchored ||
1756  info->start->flag_ >> kFlagNeedShift != 0)
1757  first_byte = kFbNone;
1758 
1759  // Synchronize with "quick check" above.
1760  info->first_byte.store(first_byte, std::memory_order_release);
1761  return true;
1762 }
1763 
1764 // The actual DFA search: calls AnalyzeSearch and then FastSearchLoop.
1766  const StringPiece& context,
1767  bool anchored,
1768  bool want_earliest_match,
1769  bool run_forward,
1770  bool* failed,
1771  const char** epp,
1772  SparseSet* matches) {
1773  *epp = NULL;
1774  if (!ok()) {
1775  *failed = true;
1776  return false;
1777  }
1778  *failed = false;
1779 
1780  if (ExtraDebug) {
1781  fprintf(stderr, "\nprogram:\n%s\n", prog_->DumpUnanchored().c_str());
1782  fprintf(stderr, "text %s anchored=%d earliest=%d fwd=%d kind %d\n",
1783  std::string(text).c_str(), anchored, want_earliest_match,
1784  run_forward, kind_);
1785  }
1786 
1787  RWLocker l(&cache_mutex_);
1788  SearchParams params(text, context, &l);
1789  params.anchored = anchored;
1790  params.want_earliest_match = want_earliest_match;
1791  params.run_forward = run_forward;
1792  params.matches = matches;
1793 
1794  if (!AnalyzeSearch(&params)) {
1795  *failed = true;
1796  return false;
1797  }
1798  if (params.start == DeadState)
1799  return false;
1800  if (params.start == FullMatchState) {
1801  if (run_forward == want_earliest_match)
1802  *epp = text.data();
1803  else
1804  *epp = text.data() + text.size();
1805  return true;
1806  }
1807  if (ExtraDebug)
1808  fprintf(stderr, "start %s\n", DumpState(params.start).c_str());
1809  bool ret = FastSearchLoop(&params);
1810  if (params.failed) {
1811  *failed = true;
1812  return false;
1813  }
1814  *epp = params.ep;
1815  return ret;
1816 }
1817 
1819  // For a forward DFA, half the memory goes to each DFA.
1820  // However, if it is a "many match" DFA, then there is
1821  // no counterpart with which the memory must be shared.
1822  //
1823  // For a reverse DFA, all the memory goes to the
1824  // "longest match" DFA, because RE2 never does reverse
1825  // "first match" searches.
1826  if (kind == kFirstMatch) {
1827  std::call_once(dfa_first_once_, [](Prog* prog) {
1828  prog->dfa_first_ = new DFA(prog, kFirstMatch, prog->dfa_mem_ / 2);
1829  }, this);
1830  return dfa_first_;
1831  } else if (kind == kManyMatch) {
1832  std::call_once(dfa_first_once_, [](Prog* prog) {
1833  prog->dfa_first_ = new DFA(prog, kManyMatch, prog->dfa_mem_);
1834  }, this);
1835  return dfa_first_;
1836  } else {
1837  std::call_once(dfa_longest_once_, [](Prog* prog) {
1838  if (!prog->reversed_)
1839  prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_ / 2);
1840  else
1841  prog->dfa_longest_ = new DFA(prog, kLongestMatch, prog->dfa_mem_);
1842  }, this);
1843  return dfa_longest_;
1844  }
1845 }
1846 
1847 void Prog::DeleteDFA(DFA* dfa) {
1848  delete dfa;
1849 }
1850 
1851 // Executes the regexp program to search in text,
1852 // which itself is inside the larger context. (As a convenience,
1853 // passing a NULL context is equivalent to passing text.)
1854 // Returns true if a match is found, false if not.
1855 // If a match is found, fills in match0->end() to point at the end of the match
1856 // and sets match0->begin() to text.begin(), since the DFA can't track
1857 // where the match actually began.
1858 //
1859 // This is the only external interface (class DFA only exists in this file).
1860 //
1861 bool Prog::SearchDFA(const StringPiece& text, const StringPiece& const_context,
1862  Anchor anchor, MatchKind kind, StringPiece* match0,
1863  bool* failed, SparseSet* matches) {
1864  *failed = false;
1865 
1866  StringPiece context = const_context;
1867  if (context.data() == NULL)
1868  context = text;
1869  bool carat = anchor_start();
1870  bool dollar = anchor_end();
1871  if (reversed_) {
1872  using std::swap;
1873  swap(carat, dollar);
1874  }
1875  if (carat && context.begin() != text.begin())
1876  return false;
1877  if (dollar && context.end() != text.end())
1878  return false;
1879 
1880  // Handle full match by running an anchored longest match
1881  // and then checking if it covers all of text.
1882  bool anchored = anchor == kAnchored || anchor_start() || kind == kFullMatch;
1883  bool endmatch = false;
1884  if (kind == kManyMatch) {
1885  // This is split out in order to avoid clobbering kind.
1886  } else if (kind == kFullMatch || anchor_end()) {
1887  endmatch = true;
1888  kind = kLongestMatch;
1889  }
1890 
1891  // If the caller doesn't care where the match is (just whether one exists),
1892  // then we can stop at the very first match we find, the so-called
1893  // "earliest match".
1894  bool want_earliest_match = false;
1895  if (kind == kManyMatch) {
1896  // This is split out in order to avoid clobbering kind.
1897  if (matches == NULL) {
1898  want_earliest_match = true;
1899  }
1900  } else if (match0 == NULL && !endmatch) {
1901  want_earliest_match = true;
1902  kind = kLongestMatch;
1903  }
1904 
1905  DFA* dfa = GetDFA(kind);
1906  const char* ep;
1907  bool matched = dfa->Search(text, context, anchored,
1908  want_earliest_match, !reversed_,
1909  failed, &ep, matches);
1910  if (*failed)
1911  return false;
1912  if (!matched)
1913  return false;
1914  if (endmatch && ep != (reversed_ ? text.data() : text.data() + text.size()))
1915  return false;
1916 
1917  // If caller cares, record the boundary of the match.
1918  // We only know where it ends, so use the boundary of text
1919  // as the beginning.
1920  if (match0) {
1921  if (reversed_)
1922  *match0 =
1923  StringPiece(ep, static_cast<size_t>(text.data() + text.size() - ep));
1924  else
1925  *match0 =
1926  StringPiece(text.data(), static_cast<size_t>(ep - text.data()));
1927  }
1928  return true;
1929 }
1930 
1931 // Build out all states in DFA. Returns number of states.
1933  if (!ok())
1934  return 0;
1935 
1936  // Pick out start state for unanchored search
1937  // at beginning of text.
1938  RWLocker l(&cache_mutex_);
1939  SearchParams params(StringPiece(), StringPiece(), &l);
1940  params.anchored = false;
1941  if (!AnalyzeSearch(&params) ||
1942  params.start == NULL ||
1943  params.start == DeadState)
1944  return 0;
1945 
1946  // Add start state to work queue.
1947  // Note that any State* that we handle here must point into the cache,
1948  // so we can simply depend on pointer-as-a-number hashing and equality.
1949  std::unordered_map<State*, int> m;
1950  std::deque<State*> q;
1951  m.emplace(params.start, static_cast<int>(m.size()));
1952  q.push_back(params.start);
1953 
1954  // Compute the input bytes needed to cover all of the next pointers.
1955  int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
1956  std::vector<int> input(nnext);
1957  for (int c = 0; c < 256; c++) {
1958  int b = prog_->bytemap()[c];
1959  while (c < 256-1 && prog_->bytemap()[c+1] == b)
1960  c++;
1961  input[b] = c;
1962  }
1964 
1965  // Scratch space for the output.
1966  std::vector<int> output(nnext);
1967 
1968  // Flood to expand every state.
1969  bool oom = false;
1970  while (!q.empty()) {
1971  State* s = q.front();
1972  q.pop_front();
1973  for (int c : input) {
1974  State* ns = RunStateOnByteUnlocked(s, c);
1975  if (ns == NULL) {
1976  oom = true;
1977  break;
1978  }
1979  if (ns == DeadState) {
1980  output[ByteMap(c)] = -1;
1981  continue;
1982  }
1983  if (m.find(ns) == m.end()) {
1984  m.emplace(ns, static_cast<int>(m.size()));
1985  q.push_back(ns);
1986  }
1987  output[ByteMap(c)] = m[ns];
1988  }
1989  if (cb)
1990  cb(oom ? NULL : output.data(),
1991  s == FullMatchState || s->IsMatch());
1992  if (oom)
1993  break;
1994  }
1995 
1996  return static_cast<int>(m.size());
1997 }
1998 
1999 // Build out all states in DFA for kind. Returns number of states.
2001  return GetDFA(kind)->BuildAllStates(cb);
2002 }
2003 
2006 }
2007 
2008 // Computes min and max for matching string.
2009 // Won't return strings bigger than maxlen.
2011  if (!ok())
2012  return false;
2013 
2014  // NOTE: if future users of PossibleMatchRange want more precision when
2015  // presented with infinitely repeated elements, consider making this a
2016  // parameter to PossibleMatchRange.
2017  static int kMaxEltRepetitions = 0;
2018 
2019  // Keep track of the number of times we've visited states previously. We only
2020  // revisit a given state if it's part of a repeated group, so if the value
2021  // portion of the map tuple exceeds kMaxEltRepetitions we bail out and set
2022  // |*max| to |PrefixSuccessor(*max)|.
2023  //
2024  // Also note that previously_visited_states[UnseenStatePtr] will, in the STL
2025  // tradition, implicitly insert a '0' value at first use. We take advantage
2026  // of that property below.
2027  std::unordered_map<State*, int> previously_visited_states;
2028 
2029  // Pick out start state for anchored search at beginning of text.
2030  RWLocker l(&cache_mutex_);
2031  SearchParams params(StringPiece(), StringPiece(), &l);
2032  params.anchored = true;
2033  if (!AnalyzeSearch(&params))
2034  return false;
2035  if (params.start == DeadState) { // No matching strings
2036  *min = "";
2037  *max = "";
2038  return true;
2039  }
2040  if (params.start == FullMatchState) // Every string matches: no max
2041  return false;
2042 
2043  // The DFA is essentially a big graph rooted at params.start,
2044  // and paths in the graph correspond to accepted strings.
2045  // Each node in the graph has potentially 256+1 arrows
2046  // coming out, one for each byte plus the magic end of
2047  // text character kByteEndText.
2048 
2049  // To find the smallest possible prefix of an accepted
2050  // string, we just walk the graph preferring to follow
2051  // arrows with the lowest bytes possible. To find the
2052  // largest possible prefix, we follow the largest bytes
2053  // possible.
2054 
2055  // The test for whether there is an arrow from s on byte j is
2056  // ns = RunStateOnByteUnlocked(s, j);
2057  // if (ns == NULL)
2058  // return false;
2059  // if (ns != DeadState && ns->ninst > 0)
2060  // The RunStateOnByteUnlocked call asks the DFA to build out the graph.
2061  // It returns NULL only if the DFA has run out of memory,
2062  // in which case we can't be sure of anything.
2063  // The second check sees whether there was graph built
2064  // and whether it is interesting graph. Nodes might have
2065  // ns->ninst == 0 if they exist only to represent the fact
2066  // that a match was found on the previous byte.
2067 
2068  // Build minimum prefix.
2069  State* s = params.start;
2070  min->clear();
2071  MutexLock lock(&mutex_);
2072  for (int i = 0; i < maxlen; i++) {
2073  if (previously_visited_states[s] > kMaxEltRepetitions)
2074  break;
2075  previously_visited_states[s]++;
2076 
2077  // Stop if min is a match.
2079  if (ns == NULL) // DFA out of memory
2080  return false;
2081  if (ns != DeadState && (ns == FullMatchState || ns->IsMatch()))
2082  break;
2083 
2084  // Try to extend the string with low bytes.
2085  bool extended = false;
2086  for (int j = 0; j < 256; j++) {
2087  ns = RunStateOnByte(s, j);
2088  if (ns == NULL) // DFA out of memory
2089  return false;
2090  if (ns == FullMatchState ||
2091  (ns > SpecialStateMax && ns->ninst_ > 0)) {
2092  extended = true;
2093  min->append(1, static_cast<char>(j));
2094  s = ns;
2095  break;
2096  }
2097  }
2098  if (!extended)
2099  break;
2100  }
2101 
2102  // Build maximum prefix.
2103  previously_visited_states.clear();
2104  s = params.start;
2105  max->clear();
2106  for (int i = 0; i < maxlen; i++) {
2107  if (previously_visited_states[s] > kMaxEltRepetitions)
2108  break;
2109  previously_visited_states[s] += 1;
2110 
2111  // Try to extend the string with high bytes.
2112  bool extended = false;
2113  for (int j = 255; j >= 0; j--) {
2114  State* ns = RunStateOnByte(s, j);
2115  if (ns == NULL)
2116  return false;
2117  if (ns == FullMatchState ||
2118  (ns > SpecialStateMax && ns->ninst_ > 0)) {
2119  extended = true;
2120  max->append(1, static_cast<char>(j));
2121  s = ns;
2122  break;
2123  }
2124  }
2125  if (!extended) {
2126  // Done, no need for PrefixSuccessor.
2127  return true;
2128  }
2129  }
2130 
2131  // Stopped while still adding to *max - round aaaaaaaaaa... to aaaa...b
2133 
2134  // If there are no bytes left, we have no way to say "there is no maximum
2135  // string". We could make the interface more complicated and be able to
2136  // return "there is no maximum but here is a minimum", but that seems like
2137  // overkill -- the most common no-max case is all possible strings, so not
2138  // telling the caller that the empty string is the minimum match isn't a
2139  // great loss.
2140  if (max->empty())
2141  return false;
2142 
2143  return true;
2144 }
2145 
2146 // PossibleMatchRange for a Prog.
2148  // Have to use dfa_longest_ to get all strings for full matches.
2149  // For example, (a|aa) never matches aa in first-match mode.
2150  return GetDFA(kLongestMatch)->PossibleMatchRange(min, max, maxlen);
2151 }
2152 
2153 } // namespace re2
re2::DFA::RunStateOnByteUnlocked
State * RunStateOnByteUnlocked(State *, int)
Definition: bloaty/third_party/re2/re2/dfa.cc:998
re2::Prog::Inst::Matches
bool Matches(int c)
Definition: bloaty/third_party/re2/re2/prog.h:102
re2::DFA::SearchParams::context
StringPiece context
Definition: bloaty/third_party/re2/re2/dfa.cc:264
re2::Prog::Inst::greedy
bool greedy(Prog *p)
Definition: bloaty/third_party/re2/re2/prog.h:94
re2::DFA::Workq::operator=
Workq & operator=(const Workq &)=delete
flag
uint32_t flag
Definition: ssl_versions.cc:162
NO_THREAD_SAFETY_ANALYSIS
#define NO_THREAD_SAFETY_ANALYSIS
Definition: abseil-cpp/absl/base/internal/thread_annotations.h:220
re2::kInstFail
@ kInstFail
Definition: bloaty/third_party/re2/re2/prog.h:37
re2::DFA::StateSaver
Definition: bloaty/third_party/re2/re2/dfa.cc:1199
re2::DFA::Workq::Workq
Workq(int n, int maxmark)
Definition: bloaty/third_party/re2/re2/dfa.cc:388
re2::DFA::StateEqual
Definition: bloaty/third_party/re2/re2/dfa.cc:160
re2::DFA::DFA
DFA(Prog *prog, Prog::MatchKind kind, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/dfa.cc:437
regen-readme.it
it
Definition: regen-readme.py:15
re2::DFA::SearchFTT
bool SearchFTT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1572
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::DFA::RWLocker::mu_
Mutex * mu_
Definition: bloaty/third_party/re2/re2/dfa.cc:1130
re2::DFA::Workq::clear
void clear()
Definition: bloaty/third_party/re2/re2/dfa.cc:400
re2::Prog::list_count
int list_count()
Definition: bloaty/third_party/re2/re2/prog.h:207
re2::Prog::DumpUnanchored
std::string DumpUnanchored()
Definition: bloaty/third_party/re2/re2/prog.cc:166
re2::DFA::SearchParams
Definition: bloaty/third_party/re2/re2/dfa.cc:249
re2::DFA::mutex_
Mutex mutex_
Definition: bloaty/third_party/re2/re2/dfa.cc:345
re2::DFA::SearchTTT
bool SearchTTT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1584
bool
bool
Definition: setup_once.h:312
re2::DFA::SearchParams::anchored
bool anchored
Definition: bloaty/third_party/re2/re2/dfa.cc:265
re2::DFA::StateSaver::dfa_
DFA * dfa_
Definition: bloaty/third_party/re2/re2/dfa.cc:1213
pod_array.h
re2::Prog::BuildEntireDFA
int BuildEntireDFA(MatchKind kind, const DFAStateCallback &cb)
Definition: bloaty/third_party/re2/re2/dfa.cc:2000
re2::DFA::DumpWorkq
static std::string DumpWorkq(Workq *q)
Definition: bloaty/third_party/re2/re2/dfa.cc:507
re2::Mutex
Definition: bloaty/third_party/re2/util/mutex.h:34
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
re2::HashMix::get
size_t get() const
Definition: bloaty/third_party/re2/util/mix.h:30
re2::DFA::kStartAfterNonWordChar
@ kStartAfterNonWordChar
Definition: bloaty/third_party/re2/re2/dfa.cc:192
re2::MutexLock
Definition: bloaty/third_party/re2/util/mutex.h:90
re2::DFA::SlowSearchLoop
bool SlowSearchLoop(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1589
false
#define false
Definition: setup_once.h:323
re2::DFA::Workq::maxmark_
int maxmark_
Definition: bloaty/third_party/re2/re2/dfa.cc:429
DCHECK_LE
#define DCHECK_LE(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:22
re2::DFA::Workq
Definition: bloaty/third_party/re2/re2/dfa.cc:385
re2::DFA::Workq::n_
int n_
Definition: bloaty/third_party/re2/re2/dfa.cc:428
re2::StringPiece::end
const_iterator end() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:72
re2::DFA::Workq::maxmark
int maxmark()
Definition: bloaty/third_party/re2/re2/dfa.cc:398
re2::Prog::Inst
Definition: bloaty/third_party/re2/re2/prog.h:62
re2::DFA::SearchParams::ep
const char * ep
Definition: bloaty/third_party/re2/re2/dfa.cc:272
re2::Prog::Anchor
Anchor
Definition: bloaty/third_party/re2/re2/prog.h:175
re2::DFA::kStartBeginLine
@ kStartBeginLine
Definition: bloaty/third_party/re2/re2/dfa.cc:190
re2::DFA::StartInfo::start
State * start
Definition: bloaty/third_party/re2/re2/dfa.cc:285
re2::DFA::RWLocker
Definition: bloaty/third_party/re2/re2/dfa.cc:1118
string.h
re2::DFA::Workq::last_was_mark_
bool last_was_mark_
Definition: bloaty/third_party/re2/re2/dfa.cc:431
re2::StringPiece::size
size_type size() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:80
re2::DFA::StateHash
Definition: bloaty/third_party/re2/re2/dfa.cc:149
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::DFA::kFlagMatch
@ kFlagMatch
Definition: bloaty/third_party/re2/re2/dfa.cc:144
re2::DFA::kFbNone
@ kFbNone
Definition: bloaty/third_party/re2/re2/dfa.cc:183
re2::DFA::RWLocker::operator=
RWLocker & operator=(const RWLocker &)=delete
re2::PODArray::data
T * data() const
Definition: bloaty/third_party/re2/util/pod_array.h:24
re2::DFA::kMaxStart
@ kMaxStart
Definition: bloaty/third_party/re2/re2/dfa.cc:193
SpecialStateMax
#define SpecialStateMax
Definition: bloaty/third_party/re2/re2/dfa.cc:502
re2::Prog::Inst::empty
EmptyOp empty()
Definition: bloaty/third_party/re2/re2/prog.h:92
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::DFA::kByteEndText
@ kByteEndText
Definition: bloaty/third_party/re2/re2/dfa.cc:141
re2::DFA::StateSaver::inst_
int * inst_
Definition: bloaty/third_party/re2/re2/dfa.cc:1214
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::DFA::SearchFFT
bool SearchFFT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1566
re2::DFA::StateSaver::ninst_
int ninst_
Definition: bloaty/third_party/re2/re2/dfa.cc:1215
re2::HashMix
Definition: bloaty/third_party/re2/util/mix.h:20
re2::Prog::bytemap_range
int bytemap_range()
Definition: bloaty/third_party/re2/re2/prog.h:218
re2::DFA::Workq::insert
void insert(int id)
Definition: bloaty/third_party/re2/re2/dfa.cc:416
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
iterator
const typedef MCPhysReg * iterator
Definition: MCRegisterInfo.h:27
re2::Prog::Inst::match_id
int match_id()
Definition: bloaty/third_party/re2/re2/prog.h:91
re2::DFA::StateSaver::special_
State * special_
Definition: bloaty/third_party/re2/re2/dfa.cc:1218
re2::kInstEmptyWidth
@ kInstEmptyWidth
Definition: bloaty/third_party/re2/re2/prog.h:34
re2::BytePtr
static const uint8_t * BytePtr(const void *v)
Definition: bloaty/third_party/re2/re2/dfa.cc:366
re2::Prog::IsWordChar
static bool IsWordChar(uint8_t c)
Definition: bloaty/third_party/re2/re2/prog.h:240
re2::DFA::state_cache_
StateSet state_cache_
Definition: bloaty/third_party/re2/re2/dfa.cc:361
re2::Prog::inst_count
int inst_count(InstOp op)
Definition: bloaty/third_party/re2/re2/prog.h:208
re2::DFA::state_budget_
int64_t state_budget_
Definition: bloaty/third_party/re2/re2/dfa.cc:360
re2::DFA::State
Definition: bloaty/third_party/re2/re2/dfa.cc:120
sparse_set.h
true
#define true
Definition: setup_once.h:324
mu_
Mutex mu_
Definition: oob_backend_metric.cc:115
re2::DFA::DumpState
static std::string DumpState(State *state)
Definition: bloaty/third_party/re2/re2/dfa.cc:523
re2::DFA::CachedState
State * CachedState(int *inst, int ninst, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:747
re2::DFA::Workq::nextmark_
int nextmark_
Definition: bloaty/third_party/re2/re2/dfa.cc:430
FullMatchState
#define FullMatchState
Definition: bloaty/third_party/re2/re2/dfa.cc:500
python_utils.port_server.stderr
stderr
Definition: port_server.py:51
re2::DFA::kStartBeginText
@ kStartBeginText
Definition: bloaty/third_party/re2/re2/dfa.cc:189
re2::DFA::State::ninst_
int ninst_
Definition: bloaty/third_party/re2/re2/dfa.cc:124
re2::PODArray::size
int size() const
Definition: bloaty/third_party/re2/util/pod_array.h:28
re2::DFA::WorkqToCachedState
State * WorkqToCachedState(Workq *q, Workq *mq, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:609
re2::Prog::GetDFA
DFA * GetDFA(MatchKind kind)
Definition: bloaty/third_party/re2/re2/dfa.cc:1818
re2::DFA::RWLocker::LockForWriting
void LockForWriting()
Definition: bloaty/third_party/re2/re2/dfa.cc:1143
re2::Prog::MatchKind
MatchKind
Definition: bloaty/third_party/re2/re2/prog.h:192
re2::DFA::SearchParams::failed
bool failed
Definition: bloaty/third_party/re2/re2/dfa.cc:271
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
re2::DFA::Workq::insert_new
void insert_new(int id)
Definition: bloaty/third_party/re2/re2/dfa.cc:422
re2::SparseSetT::clear
void clear()
Definition: bloaty/third_party/re2/util/sparse_set.h:117
re2::DFA::prog_
Prog * prog_
Definition: bloaty/third_party/re2/re2/dfa.cc:341
re2::kEmptyWordBoundary
@ kEmptyWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:47
absl::call_once
void call_once(absl::once_flag &flag, Callable &&fn, Args &&... args)
Definition: abseil-cpp/absl/base/call_once.h:206
re2::SparseSetT::contains
bool contains(int i) const
Definition: bloaty/third_party/re2/util/sparse_set.h:220
re2::SparseSetT
Definition: bloaty/third_party/re2/util/sparse_set.h:69
re2::DFA::kind_
Prog::MatchKind kind_
Definition: bloaty/third_party/re2/re2/dfa.cc:342
re2::kInstCapture
@ kInstCapture
Definition: bloaty/third_party/re2/re2/prog.h:33
re2::DFA::State::flag_
uint32_t flag_
Definition: bloaty/third_party/re2/re2/dfa.cc:125
start
static uint64_t start
Definition: benchmark-pound.c:74
re2::DFA::State::IsMatch
bool IsMatch() const
Definition: bloaty/third_party/re2/re2/dfa.cc:121
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
re2::DFA::SearchParams::start
State * start
Definition: bloaty/third_party/re2/re2/dfa.cc:268
xds_interop_client.int
int
Definition: xds_interop_client.py:113
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
Mark
#define Mark
Definition: bloaty/third_party/re2/re2/dfa.cc:374
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
swap
#define swap(a, b)
Definition: qsort.h:111
gen_stats_data.c_str
def c_str(s, encoding='ascii')
Definition: gen_stats_data.py:38
re2::DFA::BuildAllStates
int BuildAllStates(const Prog::DFAStateCallback &cb)
Definition: bloaty/third_party/re2/re2/dfa.cc:1932
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
flag_
int flag_
Definition: transport_stream_receiver_test.cc:166
gmock_output_test.output
output
Definition: bloaty/third_party/googletest/googlemock/test/gmock_output_test.py:175
FALLTHROUGH_INTENDED
#define FALLTHROUGH_INTENDED
Definition: bloaty/third_party/re2/util/util.h:26
re2::DFA::SearchParams::cache_lock
RWLocker * cache_lock
Definition: bloaty/third_party/re2/re2/dfa.cc:270
re2::kEmptyNonWordBoundary
@ kEmptyNonWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:48
mu
Mutex mu
Definition: server_config_selector_filter.cc:74
re2::Prog::anchor_end
bool anchor_end()
Definition: bloaty/third_party/re2/re2/prog.h:216
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::DFA::RunWorkqOnByte
void RunWorkqOnByte(Workq *q, Workq *nq, int c, uint32_t flag, bool *ismatch)
Definition: bloaty/third_party/re2/re2/dfa.cc:947
text_format_test_wrapper.sep
sep
Definition: text_format_test_wrapper.py:34
re2::DFA::SearchFTF
bool SearchFTF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1569
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::DFA::operator=
DFA & operator=(const DFA &)=delete
re2::DFA::SearchTFT
bool SearchTFT(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1578
re2::DFA::kFlagLastWord
@ kFlagLastWord
Definition: bloaty/third_party/re2/re2/dfa.cc:145
re2::memrchr
static void * memrchr(const void *s, int c, size_t n)
Definition: bloaty/third_party/re2/re2/dfa.cc:56
re2::DFA::q0_
Workq * q0_
Definition: bloaty/third_party/re2/re2/dfa.cc:348
re2::DFA::StateSaver::flag_
uint32_t flag_
Definition: bloaty/third_party/re2/re2/dfa.cc:1216
nstack
int nstack
Definition: abseil-cpp/absl/synchronization/internal/graphcycles.cc:287
re2::DFA::InlinedSearchLoop
bool InlinedSearchLoop(SearchParams *params, bool have_first_byte, bool want_earliest_match, bool run_forward)
Definition: bloaty/third_party/re2/re2/dfa.cc:1326
re2::DFA::StateSaver::~StateSaver
~StateSaver()
Definition: bloaty/third_party/re2/re2/dfa.cc:1242
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::DFA::StateSaver::StateSaver
StateSaver(DFA *dfa, State *state)
Definition: bloaty/third_party/re2/re2/dfa.cc:1224
re2::Prog::inst
Inst * inst(int id)
Definition: bloaty/third_party/re2/re2/prog.h:199
re2::DFA::RWLocker::~RWLocker
~RWLocker()
Definition: bloaty/third_party/re2/re2/dfa.cc:1151
re2::DFA::FastSearchLoop
bool FastSearchLoop(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1598
re2::DFA::RunWorkqOnEmptyString
void RunWorkqOnEmptyString(Workq *q, Workq *nq, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:933
re2::DFA::PossibleMatchRange
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
Definition: bloaty/third_party/re2/re2/dfa.cc:2010
re2::DFA::StateSet
std::unordered_set< State *, StateHash, StateEqual > StateSet
Definition: bloaty/third_party/re2/re2/dfa.cc:177
std::swap
void swap(Json::Value &a, Json::Value &b)
Specialize std::swap() for Json::Value.
Definition: third_party/bloaty/third_party/protobuf/conformance/third_party/jsoncpp/json.h:1226
re2::DFA::SearchParams::SearchParams
SearchParams(const StringPiece &text, const StringPiece &context, RWLocker *cache_lock)
Definition: bloaty/third_party/re2/re2/dfa.cc:250
re2::DFA::ByteMap
int ByteMap(int c)
Definition: bloaty/third_party/re2/re2/dfa.cc:334
re2::kEmptyBeginText
@ kEmptyBeginText
Definition: bloaty/third_party/re2/re2/prog.h:45
re2::DFA::Workq::mark
void mark()
Definition: bloaty/third_party/re2/re2/dfa.cc:405
re2::Prog::DeleteDFA
void DeleteDFA(DFA *dfa)
Definition: bloaty/third_party/re2/re2/dfa.cc:1847
re2::Prog::Inst::last
int last()
Definition: bloaty/third_party/re2/re2/prog.h:83
re2::Prog::size
int size()
Definition: bloaty/third_party/re2/re2/prog.h:204
re2::DFA::RunStateOnByte
State * RunStateOnByte(State *, int)
Definition: bloaty/third_party/re2/re2/dfa.cc:1006
re2::HashMix::Mix
void Mix(size_t val)
Definition: bloaty/third_party/re2/util/mix.h:24
min
#define min(a, b)
Definition: qsort.h:83
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::Prog::PossibleMatchRange
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
Definition: bloaty/third_party/re2/re2/dfa.cc:2147
re2::Prog::kManyMatch
@ kManyMatch
Definition: bloaty/third_party/re2/re2/prog.h:196
re2::DFA::ok
bool ok() const
Definition: bloaty/third_party/re2/re2/dfa.cc:81
re2::DFA::kind
Prog::MatchKind kind()
Definition: bloaty/third_party/re2/re2/dfa.cc:82
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
re2::PrefixSuccessor
void PrefixSuccessor(std::string *prefix)
Definition: bloaty/third_party/re2/util/strutil.cc:78
stdint.h
re2::Prog::bytemap
const uint8_t * bytemap()
Definition: bloaty/third_party/re2/re2/prog.h:219
re2::Prog::Inst::opcode
InstOp opcode()
Definition: bloaty/third_party/re2/re2/prog.h:82
re2::DFA::SearchTTF
bool SearchTTF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1581
re2::DFA::SearchFFF
bool SearchFFF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1563
re2::Prog::SearchDFA
bool SearchDFA(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match0, bool *failed, SparseSet *matches)
Definition: bloaty/third_party/re2/re2/dfa.cc:1861
re2::DFA
Definition: bloaty/third_party/re2/re2/dfa.cc:77
mem
void * mem
Definition: libc.cpp:91
re2::kEmptyEndLine
@ kEmptyEndLine
Definition: bloaty/third_party/re2/re2/prog.h:44
grpc_core::Loop
promise_detail::Loop< F > Loop(F f)
Definition: loop.h:130
re2::SparseSetT::begin
iterator begin()
Definition: bloaty/third_party/re2/util/sparse_set.h:89
DeadState
#define DeadState
Definition: bloaty/third_party/re2/re2/dfa.cc:497
re2::DFA::SearchParams::first_byte
int first_byte
Definition: bloaty/third_party/re2/re2/dfa.cc:269
re2::StringPiece::data
const_pointer data() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:85
re2::DFA::Search
bool Search(const StringPiece &text, const StringPiece &context, bool anchored, bool want_earliest_match, bool run_forward, bool *failed, const char **ep, SparseSet *matches)
Definition: bloaty/third_party/re2/re2/dfa.cc:1765
re2::DFA::cache_mutex_
Mutex cache_mutex_
Definition: bloaty/third_party/re2/re2/dfa.cc:358
re2::kInstNop
@ kInstNop
Definition: bloaty/third_party/re2/re2/prog.h:36
re2::DFA::kStartAnchored
@ kStartAnchored
Definition: bloaty/third_party/re2/re2/dfa.cc:195
re2::DFA::start_
StartInfo start_[kMaxStart]
Definition: bloaty/third_party/re2/re2/dfa.cc:362
re2::kInstMatch
@ kInstMatch
Definition: bloaty/third_party/re2/re2/prog.h:35
re2::DFA::Workq::is_mark
bool is_mark(int i)
Definition: bloaty/third_party/re2/re2/dfa.cc:396
re2::kInstAltMatch
@ kInstAltMatch
Definition: bloaty/third_party/re2/re2/prog.h:31
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::DFA::kStartAfterWordChar
@ kStartAfterWordChar
Definition: bloaty/third_party/re2/re2/dfa.cc:191
re2::Prog::first_byte
int first_byte()
Definition: bloaty/third_party/re2/re2/prog.cc:188
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
MatchSep
#define MatchSep
Definition: bloaty/third_party/re2/re2/dfa.cc:378
re2::DFA::~DFA
~DFA()
Definition: bloaty/third_party/re2/re2/dfa.cc:486
re2::DFA::StartInfo::first_byte
std::atomic< int > first_byte
Definition: bloaty/third_party/re2/re2/dfa.cc:286
index
int index
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/protobuf.h:1184
re2::SparseSetT::iterator
int * iterator
Definition: bloaty/third_party/re2/util/sparse_set.h:75
ret
UniquePtr< SSL_SESSION > ret
Definition: ssl_x509.cc:1029
re2::DFA::stack_
PODArray< int > stack_
Definition: bloaty/third_party/re2/re2/dfa.cc:350
re2::dfa_should_bail_when_slow
static bool dfa_should_bail_when_slow
Definition: bloaty/third_party/re2/re2/dfa.cc:67
re2::DFA::StateSaver::Restore
State * Restore()
Definition: bloaty/third_party/re2/re2/dfa.cc:1247
re2::StringPiece::begin
const_iterator begin() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:71
re2::Prog::Inst::out
int out()
Definition: bloaty/third_party/re2/re2/prog.h:84
re2::Workq
SparseSet Workq
Definition: bloaty/third_party/re2/re2/prog.cc:125
re2::ExtraDebug
static const bool ExtraDebug
Definition: bloaty/third_party/re2/re2/dfa.cc:71
state
Definition: bloaty/third_party/zlib/contrib/blast/blast.c:41
re2::DFA::RWLocker::RWLocker
RWLocker(Mutex *mu)
Definition: bloaty/third_party/re2/re2/dfa.cc:1137
re2::DFA::kFlagEmptyMask
@ kFlagEmptyMask
Definition: bloaty/third_party/re2/re2/dfa.cc:143
re2::Prog::start_unanchored
int start_unanchored()
Definition: bloaty/third_party/re2/re2/prog.h:201
re2::kEmptyEndText
@ kEmptyEndText
Definition: bloaty/third_party/re2/re2/prog.h:46
re2::DFA::SearchParams::run_forward
bool run_forward
Definition: bloaty/third_party/re2/re2/dfa.cc:267
input
std::string input
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/tokenizer_unittest.cc:197
re2::DFA::StateEqual::operator()
bool operator()(const State *a, const State *b) const
Definition: bloaty/third_party/re2/re2/dfa.cc:161
re2::kEmptyBeginLine
@ kEmptyBeginLine
Definition: bloaty/third_party/re2/re2/prog.h:43
re2::Prog::TEST_dfa_should_bail_when_slow
static void TEST_dfa_should_bail_when_slow(bool b)
Definition: bloaty/third_party/re2/re2/dfa.cc:2004
re2::DFA::kFlagNeedShift
@ kFlagNeedShift
Definition: bloaty/third_party/re2/re2/dfa.cc:146
re2::DFA::StartInfo
Definition: bloaty/third_party/re2/re2/dfa.cc:283
re2::DFA::ResetCache
void ResetCache(RWLocker *cache_lock)
Definition: bloaty/third_party/re2/re2/dfa.cc:1170
re2::Prog::kFirstMatch
@ kFirstMatch
Definition: bloaty/third_party/re2/re2/prog.h:193
ns
static int64_t ns
Definition: bloaty/third_party/re2/util/benchmark.cc:43
re2::DFA::StateToWorkq
void StateToWorkq(State *s, Workq *q)
Definition: bloaty/third_party/re2/re2/dfa.cc:817
re2::DFA::q1_
Workq * q1_
Definition: bloaty/third_party/re2/re2/dfa.cc:349
re2::Mutex::ReaderLock
void ReaderLock()
Definition: bloaty/third_party/re2/util/mutex.h:81
context
grpc::ClientContext context
Definition: istio_echo_server_lib.cc:61
re2::DFA::SearchParams::operator=
SearchParams & operator=(const SearchParams &)=delete
re2::DFA::Workq::size
int size()
Definition: bloaty/third_party/re2/re2/dfa.cc:412
re2::DFA::mem_budget_
int64_t mem_budget_
Definition: bloaty/third_party/re2/re2/dfa.cc:359
flags
uint32_t flags
Definition: retry_filter.cc:632
re2::Prog::DFAStateCallback
std::function< void(const int *next, bool match)> DFAStateCallback
Definition: bloaty/third_party/re2/re2/prog.h:283
re2::DFA::StateHash::operator()
size_t operator()(const State *a) const
Definition: bloaty/third_party/re2/re2/dfa.cc:150
autogen_x86imm.tmp
tmp
Definition: autogen_x86imm.py:12
DCHECK
#define DCHECK(condition)
Definition: bloaty/third_party/re2/util/logging.h:19
re2::DFA::State::inst_
int * inst_
Definition: bloaty/third_party/re2/re2/dfa.cc:123
re2::DFA::init_failed_
bool init_failed_
Definition: bloaty/third_party/re2/re2/dfa.cc:343
re2::DFA::AnalyzeSearch
bool AnalyzeSearch(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1646
re2::DFA::StateSaver::is_special_
bool is_special_
Definition: bloaty/third_party/re2/re2/dfa.cc:1217
re2::DFA::RWLocker::writing_
bool writing_
Definition: bloaty/third_party/re2/re2/dfa.cc:1131
re2::DFA::SearchParams::text
StringPiece text
Definition: bloaty/third_party/re2/re2/dfa.cc:263
regress.m
m
Definition: regress/regress.py:25
re2::DFA::AnalyzeSearchHelper
bool AnalyzeSearchHelper(SearchParams *params, StartInfo *info, uint32_t flags)
Definition: bloaty/third_party/re2/re2/dfa.cc:1717
re2::DFA::SearchParams::want_earliest_match
bool want_earliest_match
Definition: bloaty/third_party/re2/re2/dfa.cc:266
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
re2::SparseSetT::end
iterator end()
Definition: bloaty/third_party/re2/util/sparse_set.h:92
re2::PODArray< int >
re2::Prog::kLongestMatch
@ kLongestMatch
Definition: bloaty/third_party/re2/re2/prog.h:194
re2::DFA::AddToQueue
void AddToQueue(Workq *q, int id, uint32_t flag)
Definition: bloaty/third_party/re2/re2/dfa.cc:833
re2::DFA::SearchTFF
bool SearchTFF(SearchParams *params)
Definition: bloaty/third_party/re2/re2/dfa.cc:1575
re2::DFA::kFbUnknown
@ kFbUnknown
Definition: bloaty/third_party/re2/re2/dfa.cc:182
re2::kInstByteRange
@ kInstByteRange
Definition: bloaty/third_party/re2/re2/prog.h:32
re2::DFA::ClearCache
void ClearCache()
Definition: bloaty/third_party/re2/re2/dfa.cc:799
re2::DFA::State::next_
std::atomic< State * > next_[]
Definition: bloaty/third_party/re2/re2/dfa.cc:134
cb
OPENSSL_EXPORT pem_password_cb * cb
Definition: pem.h:351
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
state
static struct rpc_state state
Definition: bad_server_response_test.cc:87
re2::SparseSetT::insert_new
iterator insert_new(int i)
Definition: bloaty/third_party/re2/util/sparse_set.h:138
re2::DFA::StartInfo::StartInfo
StartInfo()
Definition: bloaty/third_party/re2/re2/dfa.cc:284
re2::Prog::start
int start()
Definition: bloaty/third_party/re2/re2/prog.h:200
id
uint32_t id
Definition: flow_control_fuzzer.cc:70
re2::SparseSetT::insert
iterator insert(int i)
Definition: bloaty/third_party/re2/util/sparse_set.h:132
re2::DFA::SearchParams::matches
SparseSet * matches
Definition: bloaty/third_party/re2/re2/dfa.cc:273


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:16