re2/re2/compile.cc
Go to the documentation of this file.
1 // Copyright 2007 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Compile regular expression to Prog.
6 //
7 // Prog and Inst are defined in prog.h.
8 // This file's external interface is just Regexp::CompileToProg.
9 // The Compiler class defined in this file is private.
10 
11 #include <stdint.h>
12 #include <string.h>
13 #include <unordered_map>
14 #include <utility>
15 
16 #include "util/logging.h"
17 #include "util/utf.h"
18 #include "re2/pod_array.h"
19 #include "re2/prog.h"
20 #include "re2/re2.h"
21 #include "re2/regexp.h"
22 #include "re2/walker-inl.h"
23 
24 namespace re2 {
25 
26 // List of pointers to Inst* that need to be filled in (patched).
27 // Because the Inst* haven't been filled in yet,
28 // we can use the Inst* word to hold the list's "next" pointer.
29 // It's kind of sleazy, but it works well in practice.
30 // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
31 //
32 // Because the out and out1 fields in Inst are no longer pointers,
33 // we can't use pointers directly here either. Instead, head refers
34 // to inst_[head>>1].out (head&1 == 0) or inst_[head>>1].out1 (head&1 == 1).
35 // head == 0 represents the NULL list. This is okay because instruction #0
36 // is always the fail instruction, which never appears on a list.
37 struct PatchList {
38  // Returns patch list containing just p.
39  static PatchList Mk(uint32_t p) {
40  return {p, p};
41  }
42 
43  // Patches all the entries on l to have value p.
44  // Caller must not ever use patch list again.
45  static void Patch(Prog::Inst* inst0, PatchList l, uint32_t p) {
46  while (l.head != 0) {
47  Prog::Inst* ip = &inst0[l.head>>1];
48  if (l.head&1) {
49  l.head = ip->out1();
50  ip->out1_ = p;
51  } else {
52  l.head = ip->out();
53  ip->set_out(p);
54  }
55  }
56  }
57 
58  // Appends two patch lists and returns result.
59  static PatchList Append(Prog::Inst* inst0, PatchList l1, PatchList l2) {
60  if (l1.head == 0)
61  return l2;
62  if (l2.head == 0)
63  return l1;
64  Prog::Inst* ip = &inst0[l1.tail>>1];
65  if (l1.tail&1)
66  ip->out1_ = l2.head;
67  else
68  ip->set_out(l2.head);
69  return {l1.head, l2.tail};
70  }
71 
73  uint32_t tail; // for constant-time append
74 };
75 
76 static const PatchList kNullPatchList = {0, 0};
77 
78 // Compiled program fragment.
79 struct Frag {
81  PatchList end;
82  bool nullable;
83 
87 };
88 
89 // Input encodings.
90 enum Encoding {
91  kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
92  kEncodingLatin1, // Latin-1 (0-FF)
93 };
94 
95 class Compiler : public Regexp::Walker<Frag> {
96  public:
97  explicit Compiler();
98  ~Compiler();
99 
100  // Compiles Regexp to a new Prog.
101  // Caller is responsible for deleting Prog when finished with it.
102  // If reversed is true, compiles for walking over the input
103  // string backward (reverses all concatenations).
104  static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem);
105 
106  // Compiles alternation of all the re to a new Prog.
107  // Each re has a match with an id equal to its index in the vector.
108  static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
109 
110  // Interface for Regexp::Walker, which helps traverse the Regexp.
111  // The walk is purely post-recursive: given the machines for the
112  // children, PostVisit combines them to create the machine for
113  // the current node. The child_args are Frags.
114  // The Compiler traverses the Regexp parse tree, visiting
115  // each node in depth-first order. It invokes PreVisit before
116  // visiting the node's children and PostVisit after visiting
117  // the children.
118  Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop);
119  Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args,
120  int nchild_args);
121  Frag ShortVisit(Regexp* re, Frag parent_arg);
122  Frag Copy(Frag arg);
123 
124  // Given fragment a, returns a+ or a+?; a* or a*?; a? or a??
125  Frag Plus(Frag a, bool nongreedy);
126  Frag Star(Frag a, bool nongreedy);
127  Frag Quest(Frag a, bool nongreedy);
128 
129  // Given fragment a, returns (a) capturing as \n.
130  Frag Capture(Frag a, int n);
131 
132  // Given fragments a and b, returns ab; a|b
133  Frag Cat(Frag a, Frag b);
134  Frag Alt(Frag a, Frag b);
135 
136  // Returns a fragment that can't match anything.
137  Frag NoMatch();
138 
139  // Returns a fragment that matches the empty string.
140  Frag Match(int32_t id);
141 
142  // Returns a no-op fragment.
143  Frag Nop();
144 
145  // Returns a fragment matching the byte range lo-hi.
146  Frag ByteRange(int lo, int hi, bool foldcase);
147 
148  // Returns a fragment matching an empty-width special op.
149  Frag EmptyWidth(EmptyOp op);
150 
151  // Adds n instructions to the program.
152  // Returns the index of the first one.
153  // Returns -1 if no more instructions are available.
154  int AllocInst(int n);
155 
156  // Rune range compiler.
157 
158  // Begins a new alternation.
159  void BeginRange();
160 
161  // Adds a fragment matching the rune range lo-hi.
162  void AddRuneRange(Rune lo, Rune hi, bool foldcase);
163  void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
164  void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
165  void Add_80_10ffff();
166 
167  // New suffix that matches the byte range lo-hi, then goes to next.
168  int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
169  int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
170 
171  // Returns true iff the suffix is cached.
172  bool IsCachedRuneByteSuffix(int id);
173 
174  // Adds a suffix to alternation.
175  void AddSuffix(int id);
176 
177  // Adds a suffix to the trie starting from the given root node.
178  // Returns zero iff allocating an instruction fails. Otherwise, returns
179  // the current root node, which might be different from what was given.
180  int AddSuffixRecursive(int root, int id);
181 
182  // Finds the trie node for the given suffix. Returns a Frag in order to
183  // distinguish between pointing at the root node directly (end.head == 0)
184  // and pointing at an Alt's out1 or out (end.head&1 == 1 or 0, respectively).
185  Frag FindByteRange(int root, int id);
186 
187  // Compares two ByteRanges and returns true iff they are equal.
188  bool ByteRangeEqual(int id1, int id2);
189 
190  // Returns the alternation of all the added suffixes.
191  Frag EndRange();
192 
193  // Single rune.
194  Frag Literal(Rune r, bool foldcase);
195 
196  void Setup(Regexp::ParseFlags flags, int64_t max_mem, RE2::Anchor anchor);
197  Prog* Finish(Regexp* re);
198 
199  // Returns .* where dot = any byte
200  Frag DotStar();
201 
202  private:
203  Prog* prog_; // Program being built.
204  bool failed_; // Did we give up compiling?
205  Encoding encoding_; // Input encoding
206  bool reversed_; // Should program run backward over text?
207 
208  PODArray<Prog::Inst> inst_;
209  int ninst_; // Number of instructions used.
210  int max_ninst_; // Maximum number of instructions.
211 
212  int64_t max_mem_; // Total memory budget.
213 
214  std::unordered_map<uint64_t, int> rune_cache_;
215  Frag rune_range_;
216 
217  RE2::Anchor anchor_; // anchor mode for RE2::Set
218 
219  Compiler(const Compiler&) = delete;
220  Compiler& operator=(const Compiler&) = delete;
221 };
222 
224  prog_ = new Prog();
225  failed_ = false;
227  reversed_ = false;
228  ninst_ = 0;
229  max_ninst_ = 1; // make AllocInst for fail instruction okay
230  max_mem_ = 0;
231  int fail = AllocInst(1);
232  inst_[fail].InitFail();
233  max_ninst_ = 0; // Caller must change
234 }
235 
237  delete prog_;
238 }
239 
240 int Compiler::AllocInst(int n) {
241  if (failed_ || ninst_ + n > max_ninst_) {
242  failed_ = true;
243  return -1;
244  }
245 
246  if (ninst_ + n > inst_.size()) {
247  int cap = inst_.size();
248  if (cap == 0)
249  cap = 8;
250  while (ninst_ + n > cap)
251  cap *= 2;
252  PODArray<Prog::Inst> inst(cap);
253  if (inst_.data() != NULL)
254  memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
255  memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
256  inst_ = std::move(inst);
257  }
258  int id = ninst_;
259  ninst_ += n;
260  return id;
261 }
262 
263 // These routines are somewhat hard to visualize in text --
264 // see http://swtch.com/~rsc/regexp/regexp1.html for
265 // pictures explaining what is going on here.
266 
267 // Returns an unmatchable fragment.
268 Frag Compiler::NoMatch() {
269  return Frag();
270 }
271 
272 // Is a an unmatchable fragment?
273 static bool IsNoMatch(Frag a) {
274  return a.begin == 0;
275 }
276 
277 // Given fragments a and b, returns fragment for ab.
278 Frag Compiler::Cat(Frag a, Frag b) {
279  if (IsNoMatch(a) || IsNoMatch(b))
280  return NoMatch();
281 
282  // Elide no-op.
283  Prog::Inst* begin = &inst_[a.begin];
284  if (begin->opcode() == kInstNop &&
285  a.end.head == (a.begin << 1) &&
286  begin->out() == 0) {
287  // in case refs to a somewhere
288  PatchList::Patch(inst_.data(), a.end, b.begin);
289  return b;
290  }
291 
292  // To run backward over string, reverse all concatenations.
293  if (reversed_) {
294  PatchList::Patch(inst_.data(), b.end, a.begin);
295  return Frag(b.begin, a.end, b.nullable && a.nullable);
296  }
297 
298  PatchList::Patch(inst_.data(), a.end, b.begin);
299  return Frag(a.begin, b.end, a.nullable && b.nullable);
300 }
301 
302 // Given fragments for a and b, returns fragment for a|b.
303 Frag Compiler::Alt(Frag a, Frag b) {
304  // Special case for convenience in loops.
305  if (IsNoMatch(a))
306  return b;
307  if (IsNoMatch(b))
308  return a;
309 
310  int id = AllocInst(1);
311  if (id < 0)
312  return NoMatch();
313 
314  inst_[id].InitAlt(a.begin, b.begin);
315  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end),
316  a.nullable || b.nullable);
317 }
318 
319 // When capturing submatches in like-Perl mode, a kOpAlt Inst
320 // treats out_ as the first choice, out1_ as the second.
321 //
322 // For *, +, and ?, if out_ causes another repetition,
323 // then the operator is greedy. If out1_ is the repetition
324 // (and out_ moves forward), then the operator is non-greedy.
325 
326 // Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
327 Frag Compiler::Plus(Frag a, bool nongreedy) {
328  int id = AllocInst(1);
329  if (id < 0)
330  return NoMatch();
331  PatchList pl;
332  if (nongreedy) {
333  inst_[id].InitAlt(0, a.begin);
334  pl = PatchList::Mk(id << 1);
335  } else {
336  inst_[id].InitAlt(a.begin, 0);
337  pl = PatchList::Mk((id << 1) | 1);
338  }
339  PatchList::Patch(inst_.data(), a.end, id);
340  return Frag(a.begin, pl, a.nullable);
341 }
342 
343 // Given a fragment for a, returns a fragment for a* or a*? (if nongreedy)
344 Frag Compiler::Star(Frag a, bool nongreedy) {
345  // When the subexpression is nullable, one Alt isn't enough to guarantee
346  // correct priority ordering within the transitive closure. The simplest
347  // solution is to handle it as (a+)? instead, which adds the second Alt.
348  if (a.nullable)
349  return Quest(Plus(a, nongreedy), nongreedy);
350 
351  int id = AllocInst(1);
352  if (id < 0)
353  return NoMatch();
354  PatchList pl;
355  if (nongreedy) {
356  inst_[id].InitAlt(0, a.begin);
357  pl = PatchList::Mk(id << 1);
358  } else {
359  inst_[id].InitAlt(a.begin, 0);
360  pl = PatchList::Mk((id << 1) | 1);
361  }
362  PatchList::Patch(inst_.data(), a.end, id);
363  return Frag(id, pl, true);
364 }
365 
366 // Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
367 Frag Compiler::Quest(Frag a, bool nongreedy) {
368  if (IsNoMatch(a))
369  return Nop();
370  int id = AllocInst(1);
371  if (id < 0)
372  return NoMatch();
373  PatchList pl;
374  if (nongreedy) {
375  inst_[id].InitAlt(0, a.begin);
376  pl = PatchList::Mk(id << 1);
377  } else {
378  inst_[id].InitAlt(a.begin, 0);
379  pl = PatchList::Mk((id << 1) | 1);
380  }
381  return Frag(id, PatchList::Append(inst_.data(), pl, a.end), true);
382 }
383 
384 // Returns a fragment for the byte range lo-hi.
385 Frag Compiler::ByteRange(int lo, int hi, bool foldcase) {
386  int id = AllocInst(1);
387  if (id < 0)
388  return NoMatch();
389  inst_[id].InitByteRange(lo, hi, foldcase, 0);
390  return Frag(id, PatchList::Mk(id << 1), false);
391 }
392 
393 // Returns a no-op fragment. Sometimes unavoidable.
394 Frag Compiler::Nop() {
395  int id = AllocInst(1);
396  if (id < 0)
397  return NoMatch();
398  inst_[id].InitNop(0);
399  return Frag(id, PatchList::Mk(id << 1), true);
400 }
401 
402 // Returns a fragment that signals a match.
403 Frag Compiler::Match(int32_t match_id) {
404  int id = AllocInst(1);
405  if (id < 0)
406  return NoMatch();
407  inst_[id].InitMatch(match_id);
408  return Frag(id, kNullPatchList, false);
409 }
410 
411 // Returns a fragment matching a particular empty-width op (like ^ or $)
413  int id = AllocInst(1);
414  if (id < 0)
415  return NoMatch();
416  inst_[id].InitEmptyWidth(empty, 0);
417  return Frag(id, PatchList::Mk(id << 1), true);
418 }
419 
420 // Given a fragment a, returns a fragment with capturing parens around a.
421 Frag Compiler::Capture(Frag a, int n) {
422  if (IsNoMatch(a))
423  return NoMatch();
424  int id = AllocInst(2);
425  if (id < 0)
426  return NoMatch();
427  inst_[id].InitCapture(2*n, a.begin);
428  inst_[id+1].InitCapture(2*n+1, 0);
429  PatchList::Patch(inst_.data(), a.end, id+1);
430 
431  return Frag(id, PatchList::Mk((id+1) << 1), a.nullable);
432 }
433 
434 // A Rune is a name for a Unicode code point.
435 // Returns maximum rune encoded by UTF-8 sequence of length len.
436 static int MaxRune(int len) {
437  int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax)
438  if (len == 1)
439  b = 7;
440  else
441  b = 8-(len+1) + 6*(len-1);
442  return (1<<b) - 1; // maximum Rune for b bits.
443 }
444 
445 // The rune range compiler caches common suffix fragments,
446 // which are very common in UTF-8 (e.g., [80-bf]).
447 // The fragment suffixes are identified by their start
448 // instructions. NULL denotes the eventual end match.
449 // The Frag accumulates in rune_range_. Caching common
450 // suffixes reduces the UTF-8 "." from 32 to 24 instructions,
451 // and it reduces the corresponding one-pass NFA from 16 nodes to 8.
452 
453 void Compiler::BeginRange() {
454  rune_cache_.clear();
455  rune_range_.begin = 0;
457 }
458 
459 int Compiler::UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
460  int next) {
461  Frag f = ByteRange(lo, hi, foldcase);
462  if (next != 0) {
463  PatchList::Patch(inst_.data(), f.end, next);
464  } else {
466  }
467  return f.begin;
468 }
469 
470 static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase,
471  int next) {
472  return (uint64_t)next << 17 |
473  (uint64_t)lo << 9 |
474  (uint64_t)hi << 1 |
475  (uint64_t)foldcase;
476 }
477 
478 int Compiler::CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase,
479  int next) {
480  uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
481  std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
482  if (it != rune_cache_.end())
483  return it->second;
484  int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
485  rune_cache_[key] = id;
486  return id;
487 }
488 
490  uint8_t lo = inst_[id].lo_;
491  uint8_t hi = inst_[id].hi_;
492  bool foldcase = inst_[id].foldcase() != 0;
493  int next = inst_[id].out();
494 
495  uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
496  return rune_cache_.find(key) != rune_cache_.end();
497 }
498 
499 void Compiler::AddSuffix(int id) {
500  if (failed_)
501  return;
502 
503  if (rune_range_.begin == 0) {
504  rune_range_.begin = id;
505  return;
506  }
507 
508  if (encoding_ == kEncodingUTF8) {
509  // Build a trie in order to reduce fanout.
511  return;
512  }
513 
514  int alt = AllocInst(1);
515  if (alt < 0) {
516  rune_range_.begin = 0;
517  return;
518  }
519  inst_[alt].InitAlt(rune_range_.begin, id);
520  rune_range_.begin = alt;
521 }
522 
523 int Compiler::AddSuffixRecursive(int root, int id) {
524  DCHECK(inst_[root].opcode() == kInstAlt ||
526 
527  Frag f = FindByteRange(root, id);
528  if (IsNoMatch(f)) {
529  int alt = AllocInst(1);
530  if (alt < 0)
531  return 0;
532  inst_[alt].InitAlt(root, id);
533  return alt;
534  }
535 
536  int br;
537  if (f.end.head == 0)
538  br = root;
539  else if (f.end.head&1)
540  br = inst_[f.begin].out1();
541  else
542  br = inst_[f.begin].out();
543 
544  if (IsCachedRuneByteSuffix(br)) {
545  // We can't fiddle with cached suffixes, so make a clone of the head.
546  int byterange = AllocInst(1);
547  if (byterange < 0)
548  return 0;
549  inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(),
550  inst_[br].foldcase(), inst_[br].out());
551 
552  // Ensure that the parent points to the clone, not to the original.
553  // Note that this could leave the head unreachable except via the cache.
554  br = byterange;
555  if (f.end.head == 0)
556  root = br;
557  else if (f.end.head&1)
558  inst_[f.begin].out1_ = br;
559  else
560  inst_[f.begin].set_out(br);
561  }
562 
563  int out = inst_[id].out();
564  if (!IsCachedRuneByteSuffix(id)) {
565  // The head should be the instruction most recently allocated, so free it
566  // instead of leaving it unreachable.
567  DCHECK_EQ(id, ninst_-1);
568  inst_[id].out_opcode_ = 0;
569  inst_[id].out1_ = 0;
570  ninst_--;
571  }
572 
573  out = AddSuffixRecursive(inst_[br].out(), out);
574  if (out == 0)
575  return 0;
576 
577  inst_[br].set_out(out);
578  return root;
579 }
580 
581 bool Compiler::ByteRangeEqual(int id1, int id2) {
582  return inst_[id1].lo() == inst_[id2].lo() &&
583  inst_[id1].hi() == inst_[id2].hi() &&
584  inst_[id1].foldcase() == inst_[id2].foldcase();
585 }
586 
587 Frag Compiler::FindByteRange(int root, int id) {
588  if (inst_[root].opcode() == kInstByteRange) {
589  if (ByteRangeEqual(root, id))
590  return Frag(root, kNullPatchList, false);
591  else
592  return NoMatch();
593  }
594 
595  while (inst_[root].opcode() == kInstAlt) {
596  int out1 = inst_[root].out1();
597  if (ByteRangeEqual(out1, id))
598  return Frag(root, PatchList::Mk((root << 1) | 1), false);
599 
600  // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
601  // what we're looking for, then we can stop immediately. Unfortunately, we
602  // can't short-circuit the search in reverse mode.
603  if (!reversed_)
604  return NoMatch();
605 
606  int out = inst_[root].out();
607  if (inst_[out].opcode() == kInstAlt)
608  root = out;
609  else if (ByteRangeEqual(out, id))
610  return Frag(root, PatchList::Mk(root << 1), false);
611  else
612  return NoMatch();
613  }
614 
615  LOG(DFATAL) << "should never happen";
616  return NoMatch();
617 }
618 
619 Frag Compiler::EndRange() {
620  return rune_range_;
621 }
622 
623 // Converts rune range lo-hi into a fragment that recognizes
624 // the bytes that would make up those runes in the current
625 // encoding (Latin 1 or UTF-8).
626 // This lets the machine work byte-by-byte even when
627 // using multibyte encodings.
628 
629 void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) {
630  switch (encoding_) {
631  default:
632  case kEncodingUTF8:
633  AddRuneRangeUTF8(lo, hi, foldcase);
634  break;
635  case kEncodingLatin1:
636  AddRuneRangeLatin1(lo, hi, foldcase);
637  break;
638  }
639 }
640 
641 void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
642  // Latin-1 is easy: runes *are* bytes.
643  if (lo > hi || lo > 0xFF)
644  return;
645  if (hi > 0xFF)
646  hi = 0xFF;
647  AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
648  static_cast<uint8_t>(hi), foldcase, 0));
649 }
650 
652  // The 80-10FFFF (Runeself-Runemax) rune range occurs frequently enough
653  // (for example, for /./ and /[^a-z]/) that it is worth simplifying: by
654  // permitting overlong encodings in E0 and F0 sequences and code points
655  // over 10FFFF in F4 sequences, the size of the bytecode and the number
656  // of equivalence classes are reduced significantly.
657  int id;
658  if (reversed_) {
659  // Prefix factoring matters, but we don't have to handle it here
660  // because the rune range trie logic takes care of that already.
661  id = UncachedRuneByteSuffix(0xC2, 0xDF, false, 0);
662  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
663  AddSuffix(id);
664 
665  id = UncachedRuneByteSuffix(0xE0, 0xEF, false, 0);
666  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
667  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
668  AddSuffix(id);
669 
670  id = UncachedRuneByteSuffix(0xF0, 0xF4, false, 0);
671  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
672  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
673  id = UncachedRuneByteSuffix(0x80, 0xBF, false, id);
674  AddSuffix(id);
675  } else {
676  // Suffix factoring matters - and we do have to handle it here.
677  int cont1 = UncachedRuneByteSuffix(0x80, 0xBF, false, 0);
678  id = UncachedRuneByteSuffix(0xC2, 0xDF, false, cont1);
679  AddSuffix(id);
680 
681  int cont2 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont1);
682  id = UncachedRuneByteSuffix(0xE0, 0xEF, false, cont2);
683  AddSuffix(id);
684 
685  int cont3 = UncachedRuneByteSuffix(0x80, 0xBF, false, cont2);
686  id = UncachedRuneByteSuffix(0xF0, 0xF4, false, cont3);
687  AddSuffix(id);
688  }
689 }
690 
691 void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
692  if (lo > hi)
693  return;
694 
695  // Pick off 80-10FFFF as a common special case.
696  if (lo == 0x80 && hi == 0x10ffff) {
697  Add_80_10ffff();
698  return;
699  }
700 
701  // Split range into same-length sized ranges.
702  for (int i = 1; i < UTFmax; i++) {
703  Rune max = MaxRune(i);
704  if (lo <= max && max < hi) {
705  AddRuneRangeUTF8(lo, max, foldcase);
706  AddRuneRangeUTF8(max+1, hi, foldcase);
707  return;
708  }
709  }
710 
711  // ASCII range is always a special case.
712  if (hi < Runeself) {
713  AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
714  static_cast<uint8_t>(hi), foldcase, 0));
715  return;
716  }
717 
718  // Split range into sections that agree on leading bytes.
719  for (int i = 1; i < UTFmax; i++) {
720  uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence
721  if ((lo & ~m) != (hi & ~m)) {
722  if ((lo & m) != 0) {
723  AddRuneRangeUTF8(lo, lo|m, foldcase);
724  AddRuneRangeUTF8((lo|m)+1, hi, foldcase);
725  return;
726  }
727  if ((hi & m) != m) {
728  AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase);
729  AddRuneRangeUTF8(hi&~m, hi, foldcase);
730  return;
731  }
732  }
733  }
734 
735  // Finally. Generate byte matching equivalent for lo-hi.
736  uint8_t ulo[UTFmax], uhi[UTFmax];
737  int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
738  int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
739  (void)m; // USED(m)
740  DCHECK_EQ(n, m);
741 
742  // The logic below encodes this thinking:
743  //
744  // 1. When we have built the whole suffix, we know that it cannot
745  // possibly be a suffix of anything longer: in forward mode, nothing
746  // else can occur before the leading byte; in reverse mode, nothing
747  // else can occur after the last continuation byte or else the leading
748  // byte would have to change. Thus, there is no benefit to caching
749  // the first byte of the suffix whereas there is a cost involved in
750  // cloning it if it begins a common prefix, which is fairly likely.
751  //
752  // 2. Conversely, the last byte of the suffix cannot possibly be a
753  // prefix of anything because next == 0, so we will never want to
754  // clone it, but it is fairly likely to be a common suffix. Perhaps
755  // more so in reverse mode than in forward mode because the former is
756  // "converging" towards lower entropy, but caching is still worthwhile
757  // for the latter in cases such as 80-BF.
758  //
759  // 3. Handling the bytes between the first and the last is less
760  // straightforward and, again, the approach depends on whether we are
761  // "converging" towards lower entropy: in forward mode, a single byte
762  // is unlikely to be part of a common suffix whereas a byte range
763  // is more likely so; in reverse mode, a byte range is unlikely to
764  // be part of a common suffix whereas a single byte is more likely
765  // so. The same benefit versus cost argument applies here.
766  int id = 0;
767  if (reversed_) {
768  for (int i = 0; i < n; i++) {
769  // In reverse UTF-8 mode: cache the leading byte; don't cache the last
770  // continuation byte; cache anything else iff it's a single byte (XX-XX).
771  if (i == 0 || (ulo[i] == uhi[i] && i != n-1))
772  id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
773  else
774  id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
775  }
776  } else {
777  for (int i = n-1; i >= 0; i--) {
778  // In forward UTF-8 mode: don't cache the leading byte; cache the last
779  // continuation byte; cache anything else iff it's a byte range (XX-YY).
780  if (i == n-1 || (ulo[i] < uhi[i] && i != 0))
781  id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
782  else
783  id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
784  }
785  }
786  AddSuffix(id);
787 }
788 
789 // Should not be called.
790 Frag Compiler::Copy(Frag arg) {
791  // We're using WalkExponential; there should be no copying.
792  LOG(DFATAL) << "Compiler::Copy called!";
793  failed_ = true;
794  return NoMatch();
795 }
796 
797 // Visits a node quickly; called once WalkExponential has
798 // decided to cut this walk short.
799 Frag Compiler::ShortVisit(Regexp* re, Frag) {
800  failed_ = true;
801  return NoMatch();
802 }
803 
804 // Called before traversing a node's children during the walk.
805 Frag Compiler::PreVisit(Regexp* re, Frag, bool* stop) {
806  // Cut off walk if we've already failed.
807  if (failed_)
808  *stop = true;
809 
810  return Frag(); // not used by caller
811 }
812 
813 Frag Compiler::Literal(Rune r, bool foldcase) {
814  switch (encoding_) {
815  default:
816  return Frag();
817 
818  case kEncodingLatin1:
819  return ByteRange(r, r, foldcase);
820 
821  case kEncodingUTF8: {
822  if (r < Runeself) // Make common case fast.
823  return ByteRange(r, r, foldcase);
824  uint8_t buf[UTFmax];
825  int n = runetochar(reinterpret_cast<char*>(buf), &r);
826  Frag f = ByteRange((uint8_t)buf[0], buf[0], false);
827  for (int i = 1; i < n; i++)
828  f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false));
829  return f;
830  }
831  }
832 }
833 
834 // Called after traversing the node's children during the walk.
835 // Given their frags, build and return the frag for this re.
836 Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
837  int nchild_frags) {
838  // If a child failed, don't bother going forward, especially
839  // since the child_frags might contain Frags with NULLs in them.
840  if (failed_)
841  return NoMatch();
842 
843  // Given the child fragments, return the fragment for this node.
844  switch (re->op()) {
845  case kRegexpRepeat:
846  // Should not see; code at bottom of function will print error
847  break;
848 
849  case kRegexpNoMatch:
850  return NoMatch();
851 
852  case kRegexpEmptyMatch:
853  return Nop();
854 
855  case kRegexpHaveMatch: {
856  Frag f = Match(re->match_id());
857  if (anchor_ == RE2::ANCHOR_BOTH) {
858  // Append \z or else the subexpression will effectively be unanchored.
859  // Complemented by the UNANCHORED case in CompileSet().
860  f = Cat(EmptyWidth(kEmptyEndText), f);
861  }
862  return f;
863  }
864 
865  case kRegexpConcat: {
866  Frag f = child_frags[0];
867  for (int i = 1; i < nchild_frags; i++)
868  f = Cat(f, child_frags[i]);
869  return f;
870  }
871 
872  case kRegexpAlternate: {
873  Frag f = child_frags[0];
874  for (int i = 1; i < nchild_frags; i++)
875  f = Alt(f, child_frags[i]);
876  return f;
877  }
878 
879  case kRegexpStar:
880  return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
881 
882  case kRegexpPlus:
883  return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
884 
885  case kRegexpQuest:
886  return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
887 
888  case kRegexpLiteral:
889  return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0);
890 
891  case kRegexpLiteralString: {
892  // Concatenation of literals.
893  if (re->nrunes() == 0)
894  return Nop();
895  Frag f;
896  for (int i = 0; i < re->nrunes(); i++) {
897  Frag f1 = Literal(re->runes()[i],
898  (re->parse_flags()&Regexp::FoldCase) != 0);
899  if (i == 0)
900  f = f1;
901  else
902  f = Cat(f, f1);
903  }
904  return f;
905  }
906 
907  case kRegexpAnyChar:
908  BeginRange();
909  AddRuneRange(0, Runemax, false);
910  return EndRange();
911 
912  case kRegexpAnyByte:
913  return ByteRange(0x00, 0xFF, false);
914 
915  case kRegexpCharClass: {
916  CharClass* cc = re->cc();
917  if (cc->empty()) {
918  // This can't happen.
919  LOG(DFATAL) << "No ranges in char class";
920  failed_ = true;
921  return NoMatch();
922  }
923 
924  // ASCII case-folding optimization: if the char class
925  // behaves the same on A-Z as it does on a-z,
926  // discard any ranges wholly contained in A-Z
927  // and mark the other ranges as foldascii.
928  // This reduces the size of a program for
929  // (?i)abc from 3 insts per letter to 1 per letter.
930  bool foldascii = cc->FoldsASCII();
931 
932  // Character class is just a big OR of the different
933  // character ranges in the class.
934  BeginRange();
935  for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) {
936  // ASCII case-folding optimization (see above).
937  if (foldascii && 'A' <= i->lo && i->hi <= 'Z')
938  continue;
939 
940  // If this range contains all of A-Za-z or none of it,
941  // the fold flag is unnecessary; don't bother.
942  bool fold = foldascii;
943  if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo ||
944  ('Z' < i->lo && i->hi < 'a'))
945  fold = false;
946 
947  AddRuneRange(i->lo, i->hi, fold);
948  }
949  return EndRange();
950  }
951 
952  case kRegexpCapture:
953  // If this is a non-capturing parenthesis -- (?:foo) --
954  // just use the inner expression.
955  if (re->cap() < 0)
956  return child_frags[0];
957  return Capture(child_frags[0], re->cap());
958 
959  case kRegexpBeginLine:
961 
962  case kRegexpEndLine:
964 
965  case kRegexpBeginText:
967 
968  case kRegexpEndText:
970 
971  case kRegexpWordBoundary:
973 
976  }
977  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
978  failed_ = true;
979  return NoMatch();
980 }
981 
982 // Is this regexp required to start at the beginning of the text?
983 // Only approximate; can return false for complicated regexps like (\Aa|\Ab),
984 // but handles (\A(a|b)). Could use the Walker to write a more exact one.
985 static bool IsAnchorStart(Regexp** pre, int depth) {
986  Regexp* re = *pre;
987  Regexp* sub;
988  // The depth limit makes sure that we don't overflow
989  // the stack on a deeply nested regexp. As the comment
990  // above says, IsAnchorStart is conservative, so returning
991  // a false negative is okay. The exact limit is somewhat arbitrary.
992  if (re == NULL || depth >= 4)
993  return false;
994  switch (re->op()) {
995  default:
996  break;
997  case kRegexpConcat:
998  if (re->nsub() > 0) {
999  sub = re->sub()[0]->Incref();
1000  if (IsAnchorStart(&sub, depth+1)) {
1001  PODArray<Regexp*> subcopy(re->nsub());
1002  subcopy[0] = sub; // already have reference
1003  for (int i = 1; i < re->nsub(); i++)
1004  subcopy[i] = re->sub()[i]->Incref();
1005  *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
1006  re->Decref();
1007  return true;
1008  }
1009  sub->Decref();
1010  }
1011  break;
1012  case kRegexpCapture:
1013  sub = re->sub()[0]->Incref();
1014  if (IsAnchorStart(&sub, depth+1)) {
1015  *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
1016  re->Decref();
1017  return true;
1018  }
1019  sub->Decref();
1020  break;
1021  case kRegexpBeginText:
1022  *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
1023  re->Decref();
1024  return true;
1025  }
1026  return false;
1027 }
1028 
1029 // Is this regexp required to start at the end of the text?
1030 // Only approximate; can return false for complicated regexps like (a\z|b\z),
1031 // but handles ((a|b)\z). Could use the Walker to write a more exact one.
1032 static bool IsAnchorEnd(Regexp** pre, int depth) {
1033  Regexp* re = *pre;
1034  Regexp* sub;
1035  // The depth limit makes sure that we don't overflow
1036  // the stack on a deeply nested regexp. As the comment
1037  // above says, IsAnchorEnd is conservative, so returning
1038  // a false negative is okay. The exact limit is somewhat arbitrary.
1039  if (re == NULL || depth >= 4)
1040  return false;
1041  switch (re->op()) {
1042  default:
1043  break;
1044  case kRegexpConcat:
1045  if (re->nsub() > 0) {
1046  sub = re->sub()[re->nsub() - 1]->Incref();
1047  if (IsAnchorEnd(&sub, depth+1)) {
1048  PODArray<Regexp*> subcopy(re->nsub());
1049  subcopy[re->nsub() - 1] = sub; // already have reference
1050  for (int i = 0; i < re->nsub() - 1; i++)
1051  subcopy[i] = re->sub()[i]->Incref();
1052  *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
1053  re->Decref();
1054  return true;
1055  }
1056  sub->Decref();
1057  }
1058  break;
1059  case kRegexpCapture:
1060  sub = re->sub()[0]->Incref();
1061  if (IsAnchorEnd(&sub, depth+1)) {
1062  *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
1063  re->Decref();
1064  return true;
1065  }
1066  sub->Decref();
1067  break;
1068  case kRegexpEndText:
1069  *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
1070  re->Decref();
1071  return true;
1072  }
1073  return false;
1074 }
1075 
1077  RE2::Anchor anchor) {
1078  if (flags & Regexp::Latin1)
1080  max_mem_ = max_mem;
1081  if (max_mem <= 0) {
1082  max_ninst_ = 100000; // more than enough
1083  } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
1084  // No room for anything.
1085  max_ninst_ = 0;
1086  } else {
1087  int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
1088  // Limit instruction count so that inst->id() fits nicely in an int.
1089  // SparseArray also assumes that the indices (inst->id()) are ints.
1090  // The call to WalkExponential uses 2*max_ninst_ below,
1091  // and other places in the code use 2 or 3 * prog->size().
1092  // Limiting to 2^24 should avoid overflow in those places.
1093  // (The point of allowing more than 32 bits of memory is to
1094  // have plenty of room for the DFA states, not to use it up
1095  // on the program.)
1096  if (m >= 1<<24)
1097  m = 1<<24;
1098  // Inst imposes its own limit (currently bigger than 2^24 but be safe).
1099  if (m > Prog::Inst::kMaxInst)
1101  max_ninst_ = static_cast<int>(m);
1102  }
1103  anchor_ = anchor;
1104 }
1105 
1106 // Compiles re, returning program.
1107 // Caller is responsible for deleting prog_.
1108 // If reversed is true, compiles a program that expects
1109 // to run over the input string backward (reverses all concatenations).
1110 // The reversed flag is also recorded in the returned program.
1111 Prog* Compiler::Compile(Regexp* re, bool reversed, int64_t max_mem) {
1112  Compiler c;
1113  c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */);
1114  c.reversed_ = reversed;
1115 
1116  // Simplify to remove things like counted repetitions
1117  // and character classes like \d.
1118  Regexp* sre = re->Simplify();
1119  if (sre == NULL)
1120  return NULL;
1121 
1122  // Record whether prog is anchored, removing the anchors.
1123  // (They get in the way of other optimizations.)
1124  bool is_anchor_start = IsAnchorStart(&sre, 0);
1125  bool is_anchor_end = IsAnchorEnd(&sre, 0);
1126 
1127  // Generate fragment for entire regexp.
1128  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
1129  sre->Decref();
1130  if (c.failed_)
1131  return NULL;
1132 
1133  // Success! Finish by putting Match node at end, and record start.
1134  // Turn off c.reversed_ (if it is set) to force the remaining concatenations
1135  // to behave normally.
1136  c.reversed_ = false;
1137  all = c.Cat(all, c.Match(0));
1138 
1139  c.prog_->set_reversed(reversed);
1140  if (c.prog_->reversed()) {
1141  c.prog_->set_anchor_start(is_anchor_end);
1142  c.prog_->set_anchor_end(is_anchor_start);
1143  } else {
1144  c.prog_->set_anchor_start(is_anchor_start);
1145  c.prog_->set_anchor_end(is_anchor_end);
1146  }
1147 
1148  c.prog_->set_start(all.begin);
1149  if (!c.prog_->anchor_start()) {
1150  // Also create unanchored version, which starts with a .*? loop.
1151  all = c.Cat(c.DotStar(), all);
1152  }
1153  c.prog_->set_start_unanchored(all.begin);
1154 
1155  // Hand ownership of prog_ to caller.
1156  return c.Finish(re);
1157 }
1158 
1160  if (failed_)
1161  return NULL;
1162 
1163  if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
1164  // No possible matches; keep Fail instruction only.
1165  ninst_ = 1;
1166  }
1167 
1168  // Hand off the array to Prog.
1169  prog_->inst_ = std::move(inst_);
1170  prog_->size_ = ninst_;
1171 
1172  prog_->Optimize();
1173  prog_->Flatten();
1174  prog_->ComputeByteMap();
1175 
1176  if (!prog_->reversed()) {
1178  bool prefix_foldcase;
1179  if (re->RequiredPrefixForAccel(&prefix, &prefix_foldcase))
1180  prog_->ConfigurePrefixAccel(prefix, prefix_foldcase);
1181  }
1182 
1183  // Record remaining memory for DFA.
1184  if (max_mem_ <= 0) {
1185  prog_->set_dfa_mem(1<<20);
1186  } else {
1187  int64_t m = max_mem_ - sizeof(Prog);
1188  m -= prog_->size_*sizeof(Prog::Inst); // account for inst_
1189  if (prog_->CanBitState())
1190  m -= prog_->size_*sizeof(uint16_t); // account for list_heads_
1191  if (m < 0)
1192  m = 0;
1193  prog_->set_dfa_mem(m);
1194  }
1195 
1196  Prog* p = prog_;
1197  prog_ = NULL;
1198  return p;
1199 }
1200 
1201 // Converts Regexp to Prog.
1203  return Compiler::Compile(this, false, max_mem);
1204 }
1205 
1206 Prog* Regexp::CompileToReverseProg(int64_t max_mem) {
1207  return Compiler::Compile(this, true, max_mem);
1208 }
1209 
1210 Frag Compiler::DotStar() {
1211  return Star(ByteRange(0x00, 0xff, false), true);
1212 }
1213 
1214 // Compiles RE set to Prog.
1215 Prog* Compiler::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
1216  Compiler c;
1217  c.Setup(re->parse_flags(), max_mem, anchor);
1218 
1219  Regexp* sre = re->Simplify();
1220  if (sre == NULL)
1221  return NULL;
1222 
1223  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
1224  sre->Decref();
1225  if (c.failed_)
1226  return NULL;
1227 
1228  c.prog_->set_anchor_start(true);
1229  c.prog_->set_anchor_end(true);
1230 
1231  if (anchor == RE2::UNANCHORED) {
1232  // Prepend .* or else the expression will effectively be anchored.
1233  // Complemented by the ANCHOR_BOTH case in PostVisit().
1234  all = c.Cat(c.DotStar(), all);
1235  }
1236  c.prog_->set_start(all.begin);
1237  c.prog_->set_start_unanchored(all.begin);
1238 
1239  Prog* prog = c.Finish(re);
1240  if (prog == NULL)
1241  return NULL;
1242 
1243  // Make sure DFA has enough memory to operate,
1244  // since we're not going to fall back to the NFA.
1245  bool dfa_failed = false;
1246  StringPiece sp = "hello, world";
1247  prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
1248  NULL, &dfa_failed, NULL);
1249  if (dfa_failed) {
1250  delete prog;
1251  return NULL;
1252  }
1253 
1254  return prog;
1255 }
1256 
1257 Prog* Prog::CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem) {
1258  return Compiler::CompileSet(re, anchor, max_mem);
1259 }
1260 
1261 } // namespace re2
re2::Compiler::DotStar
Frag DotStar()
Definition: bloaty/third_party/re2/re2/compile.cc:1228
re2::Compiler::Match
Frag Match(int32_t id)
Definition: bloaty/third_party/re2/re2/compile.cc:417
test_group_name.all
all
Definition: test_group_name.py:241
re2::PatchList::Patch
static void Patch(Prog::Inst *inst0, PatchList l, uint32_t p)
Definition: re2/re2/compile.cc:45
re2::kRegexpEmptyMatch
@ kRegexpEmptyMatch
Definition: bloaty/third_party/re2/re2/regexp.h:107
re2::Prog::Inst::out1_
uint32_t out1_
Definition: bloaty/third_party/re2/re2/prog.h:135
re2::kRegexpBeginLine
@ kRegexpBeginLine
Definition: bloaty/third_party/re2/re2/regexp.h:142
re2::PatchList::Append
static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2)
Definition: re2/re2/compile.cc:59
re2::PatchList::head
uint32_t head
Definition: re2/re2/compile.cc:72
re2::kRegexpNoWordBoundary
@ kRegexpNoWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:149
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
regen-readme.it
it
Definition: regen-readme.py:15
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::Compiler::rune_cache_
std::unordered_map< uint64_t, int > rune_cache_
Definition: bloaty/third_party/re2/re2/compile.cc:246
re2::Regexp::Decref
void Decref()
Definition: bloaty/third_party/re2/re2/regexp.cc:115
re2::Compiler::Compiler
Compiler()
Definition: bloaty/third_party/re2/re2/compile.cc:255
re2::Prog::Inst::out1
int out1()
Definition: bloaty/third_party/re2/re2/prog.h:85
re2::PatchList::Append
static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2)
Definition: bloaty/third_party/re2/re2/compile.cc:89
re2::Prog::reversed
bool reversed()
Definition: bloaty/third_party/re2/re2/prog.h:205
memset
return memset(p, 0, total)
fix_build_deps.c
list c
Definition: fix_build_deps.py:490
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
re2::Compiler::ShortVisit
Frag ShortVisit(Regexp *re, Frag parent_arg)
Definition: bloaty/third_party/re2/re2/compile.cc:819
re2::Compiler::Plus
Frag Plus(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:374
false
#define false
Definition: setup_once.h:323
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
xds_manager.f1
f1
Definition: xds_manager.py:42
re2::Prog::Inst
Definition: bloaty/third_party/re2/re2/prog.h:62
re2::kRegexpLiteralString
@ kRegexpLiteralString
Definition: bloaty/third_party/re2/re2/regexp.h:113
re2::Compiler::reversed_
bool reversed_
Definition: bloaty/third_party/re2/re2/compile.cc:238
re2::Regexp::Capture
static Regexp * Capture(Regexp *sub, ParseFlags flags, int cap)
Definition: bloaty/third_party/re2/re2/regexp.cc:298
re2::Compiler::NoMatch
Frag NoMatch()
Definition: bloaty/third_party/re2/re2/compile.cc:300
string.h
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::Regexp::nsub
int nsub()
Definition: bloaty/third_party/re2/re2/regexp.h:322
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::RE2::ANCHOR_BOTH
@ ANCHOR_BOTH
Definition: bloaty/third_party/re2/re2/re2.h:475
re2::PODArray::data
T * data() const
Definition: bloaty/third_party/re2/util/pod_array.h:24
re2::Prog::size_
int size_
Definition: bloaty/third_party/re2/re2/prog.h:403
re2::RE2::UNANCHORED
@ UNANCHORED
Definition: bloaty/third_party/re2/re2/re2.h:473
re2::kRegexpLiteral
@ kRegexpLiteral
Definition: bloaty/third_party/re2/re2/regexp.h:110
re2::Prog::ComputeByteMap
void ComputeByteMap()
Definition: bloaty/third_party/re2/re2/prog.cc:446
re2::Compiler::max_mem_
int64_t max_mem_
Definition: bloaty/third_party/re2/re2/compile.cc:244
pod_array.h
re2::PatchList::Patch
static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v)
Definition: bloaty/third_party/re2/re2/compile.cc:75
re2::Prog::Inst::set_out
void set_out(int out)
Definition: bloaty/third_party/re2/re2/prog.h:125
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
cpp.ast.reversed
def reversed(seq)
Definition: bloaty/third_party/googletest/googlemock/scripts/generator/cpp/ast.py:52
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::Regexp::Concat
static Regexp * Concat(Regexp **subs, int nsubs, ParseFlags flags)
Definition: bloaty/third_party/re2/re2/regexp.cc:286
re2::Regexp::CompileToProg
Prog * CompileToProg(int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1220
re2::kNullPatchList
static const PatchList kNullPatchList
Definition: re2/re2/compile.cc:76
re2::kRegexpHaveMatch
@ kRegexpHaveMatch
Definition: bloaty/third_party/re2/re2/regexp.h:161
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
re2::Compiler::Setup
void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor)
Definition: bloaty/third_party/re2/re2/compile.cc:1096
re2::kRegexpWordBoundary
@ kRegexpWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:147
re2::Compiler::PostVisit
Frag PostVisit(Regexp *re, Frag parent_arg, Frag pre_arg, Frag *child_args, int nchild_args)
Definition: bloaty/third_party/re2/re2/compile.cc:856
re2::Regexp::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/regexp.h:289
re2::Regexp::LiteralString
static Regexp * LiteralString(Rune *runes, int nrunes, ParseFlags flags)
Definition: bloaty/third_party/re2/re2/regexp.cc:321
re2::kRegexpEndText
@ kRegexpEndText
Definition: bloaty/third_party/re2/re2/regexp.h:154
re2::Prog::CanBitState
bool CanBitState()
Definition: bloaty/third_party/re2/re2/prog.h:317
re2::kEncodingUTF8
@ kEncodingUTF8
Definition: bloaty/third_party/re2/re2/compile.cc:123
re2::Compiler::anchor_
RE2::Anchor anchor_
Definition: bloaty/third_party/re2/re2/compile.cc:249
re2::IsNoMatch
static bool IsNoMatch(Frag a)
Definition: bloaty/third_party/re2/re2/compile.cc:305
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
re2::Compiler::Copy
Frag Copy(Frag arg)
Definition: bloaty/third_party/re2/re2/compile.cc:810
re2::kEmptyWordBoundary
@ kEmptyWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:47
re2::Frag::end
PatchList end
Definition: bloaty/third_party/re2/re2/compile.cc:115
re2::Compiler::AddRuneRange
void AddRuneRange(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:643
re2::Encoding
Encoding
Definition: bloaty/third_party/re2/re2/compile.cc:122
re2::Compiler::BeginRange
void BeginRange()
Definition: bloaty/third_party/re2/re2/compile.cc:467
re2::Compiler::failed_
bool failed_
Definition: bloaty/third_party/re2/re2/compile.cc:236
re2::kEncodingLatin1
@ kEncodingLatin1
Definition: bloaty/third_party/re2/re2/compile.cc:124
re2::PatchList
Definition: bloaty/third_party/re2/re2/compile.cc:38
re2::IsAnchorStart
static bool IsAnchorStart(Regexp **pre, int depth)
Definition: bloaty/third_party/re2/re2/compile.cc:1005
autogen_x86imm.f
f
Definition: autogen_x86imm.py:9
re2::Frag::Frag
Frag(uint32_t begin, PatchList end, bool nullable)
Definition: re2/re2/compile.cc:85
absl::move
constexpr absl::remove_reference_t< T > && move(T &&t) noexcept
Definition: abseil-cpp/absl/utility/utility.h:221
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
re2::kRegexpConcat
@ kRegexpConcat
Definition: bloaty/third_party/re2/re2/regexp.h:116
re2::Compiler::ByteRange
Frag ByteRange(int lo, int hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:399
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
root
RefCountedPtr< grpc_tls_certificate_provider > root
Definition: xds_server_config_fetcher.cc:223
re2::kEmptyNonWordBoundary
@ kEmptyNonWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:48
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::Frag::nullable
bool nullable
Definition: re2/re2/compile.cc:82
re2::EmptyOp
EmptyOp
Definition: bloaty/third_party/re2/re2/prog.h:42
re2::MakeRuneCacheKey
static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:484
re2::Prog::inst_
PODArray< Inst > inst_
Definition: bloaty/third_party/re2/re2/prog.h:413
re2::Compiler::AllocInst
int AllocInst(int n)
Definition: bloaty/third_party/re2/re2/compile.cc:272
re2::Runeself
@ Runeself
Definition: bloaty/third_party/re2/util/utf.h:31
re2::kRegexpAnyChar
@ kRegexpAnyChar
Definition: bloaty/third_party/re2/re2/regexp.h:136
re2::Regexp::RequiredPrefixForAccel
bool RequiredPrefixForAccel(std::string *prefix, bool *foldcase)
Definition: re2/re2/regexp.cc:720
re2::runetochar
int runetochar(char *str, const Rune *rune)
Definition: bloaty/third_party/re2/util/rune.cc:127
grpc_core::fail
Poll< absl::StatusOr< std::tuple< T... > > > fail()
Definition: try_join_test.cc:45
uint64_t
unsigned __int64 uint64_t
Definition: stdint-msvc2008.h:90
re2::kRegexpCharClass
@ kRegexpCharClass
Definition: bloaty/third_party/re2/re2/regexp.h:157
re2::Prog::Flatten
void Flatten()
Definition: bloaty/third_party/re2/re2/prog.cc:557
re2::Regexp::sub
Regexp ** sub()
Definition: bloaty/third_party/re2/re2/regexp.h:327
arg
Definition: cmdline.cc:40
re2::Prog::kAnchored
@ kAnchored
Definition: bloaty/third_party/re2/re2/prog.h:177
re2::Compiler::Literal
Frag Literal(Rune r, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:833
re2::Prog::set_dfa_mem
void set_dfa_mem(int64_t dfa_mem)
Definition: bloaty/third_party/re2/re2/prog.h:210
re2::kEmptyBeginText
@ kEmptyBeginText
Definition: bloaty/third_party/re2/re2/prog.h:45
re2::CharClass::iterator
RuneRange * iterator
Definition: bloaty/third_party/re2/re2/regexp.h:246
re2::Compiler::prog_
Prog * prog_
Definition: bloaty/third_party/re2/re2/compile.cc:235
re2::kRegexpAlternate
@ kRegexpAlternate
Definition: bloaty/third_party/re2/re2/regexp.h:118
re2::Prog::Inst::kMaxInst
static const int kMaxInst
Definition: bloaty/third_party/re2/re2/prog.h:114
re2::Compiler::Star
Frag Star(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:358
re2::Compiler::Cat
Frag Cat(Frag a, Frag b)
Definition: bloaty/third_party/re2/re2/compile.cc:310
re2::Frag
Definition: bloaty/third_party/re2/re2/compile.cc:113
re2::PatchList::p
uint32_t p
Definition: bloaty/third_party/re2/re2/compile.cc:39
re2::Compiler::Alt
Frag Alt(Frag a, Frag b)
Definition: bloaty/third_party/re2/re2/compile.cc:335
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::IsAnchorEnd
static bool IsAnchorEnd(Regexp **pre, int depth)
Definition: bloaty/third_party/re2/re2/compile.cc:1052
re2::Compiler::UncachedRuneByteSuffix
int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:473
re2::Compiler::IsCachedRuneByteSuffix
bool IsCachedRuneByteSuffix(int id)
Definition: bloaty/third_party/re2/re2/compile.cc:503
re2::Prog::kManyMatch
@ kManyMatch
Definition: bloaty/third_party/re2/re2/prog.h:196
re2::kInstAlt
@ kInstAlt
Definition: bloaty/third_party/re2/re2/prog.h:30
re2::Regexp::ParseFlags
ParseFlags
Definition: bloaty/third_party/re2/re2/regexp.h:278
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
stdint.h
re2::Compiler::Compile
static Prog * Compile(Regexp *re, bool reversed, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1136
re2::kRegexpStar
@ kRegexpStar
Definition: bloaty/third_party/re2/re2/regexp.h:121
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
re2::Regexp::parse_flags
ParseFlags parse_flags()
Definition: bloaty/third_party/re2/re2/regexp.h:324
re2::Compiler::EmptyWidth
Frag EmptyWidth(EmptyOp op)
Definition: bloaty/third_party/re2/re2/compile.cc:426
re2::Regexp::CompileToReverseProg
Prog * CompileToReverseProg(int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1224
re2::Compiler::ninst_
int ninst_
Definition: bloaty/third_party/re2/re2/compile.cc:241
re2::Prog::ConfigurePrefixAccel
void ConfigurePrefixAccel(const std::string &prefix, bool prefix_foldcase)
Definition: re2/re2/prog.cc:1005
re2::kEmptyEndLine
@ kEmptyEndLine
Definition: bloaty/third_party/re2/re2/prog.h:44
re2::Regexp::op
RegexpOp op()
Definition: bloaty/third_party/re2/re2/regexp.h:321
re2::Compiler::inst_
PODArray< Prog::Inst > inst_
Definition: bloaty/third_party/re2/re2/compile.cc:240
re2::kInstNop
@ kInstNop
Definition: bloaty/third_party/re2/re2/prog.h:36
re2::kRegexpAnyByte
@ kRegexpAnyByte
Definition: bloaty/third_party/re2/re2/regexp.h:139
key
const char * key
Definition: hpack_parser_table.cc:164
re2::Regexp::NonGreedy
@ NonGreedy
Definition: bloaty/third_party/re2/re2/regexp.h:290
re2::Compiler::encoding_
Encoding encoding_
Definition: bloaty/third_party/re2/re2/compile.cc:237
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::Compiler::ByteRangeEqual
bool ByteRangeEqual(int id1, int id2)
Definition: bloaty/third_party/re2/re2/compile.cc:595
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
re2::Compiler::Finish
Prog * Finish()
Definition: bloaty/third_party/re2/re2/compile.cc:1184
re2::Compiler::rune_range_
Frag rune_range_
Definition: bloaty/third_party/re2/re2/compile.cc:247
DCHECK_EQ
#define DCHECK_EQ(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:20
re2::Regexp::Incref
Regexp * Incref()
Definition: bloaty/third_party/re2/re2/regexp.cc:89
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
re2::Compiler::AddRuneRangeLatin1
void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:655
re2::PatchList::Mk
static PatchList Mk(uint32_t p)
Definition: re2/re2/compile.cc:39
next
AllocList * next[kMaxLevel]
Definition: abseil-cpp/absl/base/internal/low_level_alloc.cc:100
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::Prog::Inst::out
int out()
Definition: bloaty/third_party/re2/re2/prog.h:84
re2::Frag::Frag
Frag()
Definition: re2/re2/compile.cc:84
re2::Prog::CompileSet
static Prog * CompileSet(Regexp *re, RE2::Anchor anchor, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1275
re2::Frag::begin
uint32_t begin
Definition: bloaty/third_party/re2/re2/compile.cc:114
prefix
static const char prefix[]
Definition: head_of_line_blocking.cc:28
re2::Compiler::AddSuffixRecursive
int AddSuffixRecursive(int root, int id)
Definition: bloaty/third_party/re2/re2/compile.cc:537
re2::Compiler::Quest
Frag Quest(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:381
re2::kRegexpRepeat
@ kRegexpRepeat
Definition: bloaty/third_party/re2/re2/regexp.h:129
re2::Compiler::Nop
Frag Nop()
Definition: bloaty/third_party/re2/re2/compile.cc:408
re2::Prog::start_unanchored
int start_unanchored()
Definition: bloaty/third_party/re2/re2/prog.h:201
re2::Compiler::AddRuneRangeUTF8
void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:710
re2::MaxRune
static int MaxRune(int len)
Definition: bloaty/third_party/re2/re2/compile.cc:450
re2::kRegexpBeginText
@ kRegexpBeginText
Definition: bloaty/third_party/re2/re2/regexp.h:152
re2::kEmptyEndText
@ kEmptyEndText
Definition: bloaty/third_party/re2/re2/prog.h:46
re2::kEmptyBeginLine
@ kEmptyBeginLine
Definition: bloaty/third_party/re2/re2/prog.h:43
re2::kRegexpEndLine
@ kRegexpEndLine
Definition: bloaty/third_party/re2/re2/regexp.h:144
re2::Compiler::Add_80_10ffff
void Add_80_10ffff()
Definition: bloaty/third_party/re2/re2/compile.cc:696
stop
static const char stop[]
Definition: benchmark-async-pummel.c:35
re2::Compiler::max_ninst_
int max_ninst_
Definition: bloaty/third_party/re2/re2/compile.cc:242
re2::kRegexpCapture
@ kRegexpCapture
Definition: bloaty/third_party/re2/re2/regexp.h:133
re2::kRegexpPlus
@ kRegexpPlus
Definition: bloaty/third_party/re2/re2/regexp.h:123
re2::PatchList::Mk
static PatchList Mk(uint32_t p)
Definition: bloaty/third_party/re2/re2/compile.cc:58
re2::kRegexpNoMatch
@ kRegexpNoMatch
Definition: bloaty/third_party/re2/re2/regexp.h:104
re2::RE2::Anchor
Anchor
Definition: bloaty/third_party/re2/re2/re2.h:472
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
DCHECK
#define DCHECK(condition)
Definition: bloaty/third_party/re2/util/logging.h:19
re2::Compiler::operator=
Compiler & operator=(const Compiler &)=delete
re2::Compiler::PreVisit
Frag PreVisit(Regexp *re, Frag parent_arg, bool *stop)
Definition: bloaty/third_party/re2/re2/compile.cc:825
mkowners.depth
depth
Definition: mkowners.py:114
regress.m
m
Definition: regress/regress.py:25
int32_t
signed int int32_t
Definition: stdint-msvc2008.h:77
re2::Compiler::~Compiler
~Compiler()
Definition: bloaty/third_party/re2/re2/compile.cc:268
re2::Compiler::CachedRuneByteSuffix
int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:492
re2::Compiler::EndRange
Frag EndRange()
Definition: bloaty/third_party/re2/re2/compile.cc:633
opcode
opcode
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.h:6290
re2::Compiler::AddSuffix
void AddSuffix(int id)
Definition: bloaty/third_party/re2/re2/compile.cc:513
re2::PODArray
Definition: bloaty/third_party/re2/util/pod_array.h:14
op
static grpc_op * op
Definition: test/core/fling/client.cc:47
re2::kRegexpQuest
@ kRegexpQuest
Definition: bloaty/third_party/re2/re2/regexp.h:125
re2::Runemax
@ Runemax
Definition: bloaty/third_party/re2/util/utf.h:33
re2::kInstByteRange
@ kInstByteRange
Definition: bloaty/third_party/re2/re2/prog.h:32
re2::Regexp::FoldCase
@ FoldCase
Definition: bloaty/third_party/re2/re2/regexp.h:280
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::cap
int cap()
Definition: bloaty/third_party/re2/re2/regexp.h:338
re2::PatchList::tail
uint32_t tail
Definition: re2/re2/compile.cc:73
re2::Prog::start
int start()
Definition: bloaty/third_party/re2/re2/prog.h:200
re2::UTFmax
@ UTFmax
Definition: bloaty/third_party/re2/util/utf.h:29
re2::Compiler::Capture
Frag Capture(Frag a, int n)
Definition: bloaty/third_party/re2/re2/compile.cc:435
id
uint32_t id
Definition: flow_control_fuzzer.cc:70
re2::Compiler::FindByteRange
Frag FindByteRange(int root, int id)
Definition: bloaty/third_party/re2/re2/compile.cc:601
re2::Compiler::CompileSet
static Prog * CompileSet(Regexp *re, RE2::Anchor anchor, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1233
re2::Prog::Optimize
void Optimize()
Definition: bloaty/third_party/re2/re2/prog.cc:198


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:00