bloaty/third_party/re2/re2/compile.cc
Go to the documentation of this file.
1 // Copyright 2007 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Compile regular expression to Prog.
6 //
7 // Prog and Inst are defined in prog.h.
8 // This file's external interface is just Regexp::CompileToProg.
9 // The Compiler class defined in this file is private.
10 
11 #include <stdint.h>
12 #include <string.h>
13 #include <unordered_map>
14 #include <utility>
15 
16 #include "util/logging.h"
17 #include "util/pod_array.h"
18 #include "util/utf.h"
19 #include "re2/prog.h"
20 #include "re2/re2.h"
21 #include "re2/regexp.h"
22 #include "re2/walker-inl.h"
23 
24 namespace re2 {
25 
26 // List of pointers to Inst* that need to be filled in (patched).
27 // Because the Inst* haven't been filled in yet,
28 // we can use the Inst* word to hold the list's "next" pointer.
29 // It's kind of sleazy, but it works well in practice.
30 // See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
31 //
32 // Because the out and out1 fields in Inst are no longer pointers,
33 // we can't use pointers directly here either. Instead, p refers
34 // to inst_[p>>1].out (p&1 == 0) or inst_[p>>1].out1 (p&1 == 1).
35 // p == 0 represents the NULL list. This is okay because instruction #0
36 // is always the fail instruction, which never appears on a list.
37 
38 struct PatchList {
40 
41  // Returns patch list containing just p.
42  static PatchList Mk(uint32_t p);
43 
44  // Patches all the entries on l to have value v.
45  // Caller must not ever use patch list again.
46  static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v);
47 
48  // Deref returns the next pointer pointed at by p.
49  static PatchList Deref(Prog::Inst *inst0, PatchList l);
50 
51  // Appends two patch lists and returns result.
52  static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2);
53 };
54 
55 static PatchList nullPatchList = { 0 };
56 
57 // Returns patch list containing just p.
59  PatchList l;
60  l.p = p;
61  return l;
62 }
63 
64 // Returns the next pointer pointed at by l.
66  Prog::Inst* ip = &inst0[l.p>>1];
67  if (l.p&1)
68  l.p = ip->out1();
69  else
70  l.p = ip->out();
71  return l;
72 }
73 
74 // Patches all the entries on l to have value v.
76  while (l.p != 0) {
77  Prog::Inst* ip = &inst0[l.p>>1];
78  if (l.p&1) {
79  l.p = ip->out1();
80  ip->out1_ = val;
81  } else {
82  l.p = ip->out();
83  ip->set_out(val);
84  }
85  }
86 }
87 
88 // Appends two patch lists and returns result.
90  if (l1.p == 0)
91  return l2;
92  if (l2.p == 0)
93  return l1;
94 
95  PatchList l = l1;
96  for (;;) {
97  PatchList next = PatchList::Deref(inst0, l);
98  if (next.p == 0)
99  break;
100  l = next;
101  }
102 
103  Prog::Inst* ip = &inst0[l.p>>1];
104  if (l.p&1)
105  ip->out1_ = l2.p;
106  else
107  ip->set_out(l2.p);
108 
109  return l1;
110 }
111 
112 // Compiled program fragment.
113 struct Frag {
116 
117  Frag() : begin(0) { end.p = 0; } // needed so Frag can go in vector
119 };
120 
121 // Input encodings.
122 enum Encoding {
123  kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)
124  kEncodingLatin1, // Latin-1 (0-FF)
125 };
126 
127 class Compiler : public Regexp::Walker<Frag> {
128  public:
129  explicit Compiler();
130  ~Compiler();
131 
132  // Compiles Regexp to a new Prog.
133  // Caller is responsible for deleting Prog when finished with it.
134  // If reversed is true, compiles for walking over the input
135  // string backward (reverses all concatenations).
136  static Prog *Compile(Regexp* re, bool reversed, int64_t max_mem);
137 
138  // Compiles alternation of all the re to a new Prog.
139  // Each re has a match with an id equal to its index in the vector.
140  static Prog* CompileSet(Regexp* re, RE2::Anchor anchor, int64_t max_mem);
141 
142  // Interface for Regexp::Walker, which helps traverse the Regexp.
143  // The walk is purely post-recursive: given the machines for the
144  // children, PostVisit combines them to create the machine for
145  // the current node. The child_args are Frags.
146  // The Compiler traverses the Regexp parse tree, visiting
147  // each node in depth-first order. It invokes PreVisit before
148  // visiting the node's children and PostVisit after visiting
149  // the children.
150  Frag PreVisit(Regexp* re, Frag parent_arg, bool* stop);
151  Frag PostVisit(Regexp* re, Frag parent_arg, Frag pre_arg, Frag* child_args,
152  int nchild_args);
153  Frag ShortVisit(Regexp* re, Frag parent_arg);
154  Frag Copy(Frag arg);
155 
156  // Given fragment a, returns a+ or a+?; a* or a*?; a? or a??
157  Frag Plus(Frag a, bool nongreedy);
158  Frag Star(Frag a, bool nongreedy);
159  Frag Quest(Frag a, bool nongreedy);
160 
161  // Given fragment a, returns (a) capturing as \n.
162  Frag Capture(Frag a, int n);
163 
164  // Given fragments a and b, returns ab; a|b
165  Frag Cat(Frag a, Frag b);
166  Frag Alt(Frag a, Frag b);
167 
168  // Returns a fragment that can't match anything.
169  Frag NoMatch();
170 
171  // Returns a fragment that matches the empty string.
172  Frag Match(int32_t id);
173 
174  // Returns a no-op fragment.
175  Frag Nop();
176 
177  // Returns a fragment matching the byte range lo-hi.
178  Frag ByteRange(int lo, int hi, bool foldcase);
179 
180  // Returns a fragment matching an empty-width special op.
182 
183  // Adds n instructions to the program.
184  // Returns the index of the first one.
185  // Returns -1 if no more instructions are available.
186  int AllocInst(int n);
187 
188  // Rune range compiler.
189 
190  // Begins a new alternation.
191  void BeginRange();
192 
193  // Adds a fragment matching the rune range lo-hi.
194  void AddRuneRange(Rune lo, Rune hi, bool foldcase);
195  void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
196  void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
197  void Add_80_10ffff();
198 
199  // New suffix that matches the byte range lo-hi, then goes to next.
200  int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
201  int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next);
202 
203  // Returns true iff the suffix is cached.
204  bool IsCachedRuneByteSuffix(int id);
205 
206  // Adds a suffix to alternation.
207  void AddSuffix(int id);
208 
209  // Adds a suffix to the trie starting from the given root node.
210  // Returns zero iff allocating an instruction fails. Otherwise, returns
211  // the current root node, which might be different from what was given.
212  int AddSuffixRecursive(int root, int id);
213 
214  // Finds the trie node for the given suffix. Returns a Frag in order to
215  // distinguish between pointing at the root node directly (end.p == 0)
216  // and pointing at an Alt's out1 or out (end.p&1 == 1 or 0, respectively).
217  Frag FindByteRange(int root, int id);
218 
219  // Compares two ByteRanges and returns true iff they are equal.
220  bool ByteRangeEqual(int id1, int id2);
221 
222  // Returns the alternation of all the added suffixes.
223  Frag EndRange();
224 
225  // Single rune.
226  Frag Literal(Rune r, bool foldcase);
227 
229  Prog* Finish();
230 
231  // Returns .* where dot = any byte
232  Frag DotStar();
233 
234  private:
235  Prog* prog_; // Program being built.
236  bool failed_; // Did we give up compiling?
237  Encoding encoding_; // Input encoding
238  bool reversed_; // Should program run backward over text?
239 
241  int ninst_; // Number of instructions used.
242  int max_ninst_; // Maximum number of instructions.
243 
244  int64_t max_mem_; // Total memory budget.
245 
246  std::unordered_map<uint64_t, int> rune_cache_;
248 
249  RE2::Anchor anchor_; // anchor mode for RE2::Set
250 
251  Compiler(const Compiler&) = delete;
252  Compiler& operator=(const Compiler&) = delete;
253 };
254 
256  prog_ = new Prog();
257  failed_ = false;
259  reversed_ = false;
260  ninst_ = 0;
261  max_ninst_ = 1; // make AllocInst for fail instruction okay
262  max_mem_ = 0;
263  int fail = AllocInst(1);
264  inst_[fail].InitFail();
265  max_ninst_ = 0; // Caller must change
266 }
267 
269  delete prog_;
270 }
271 
273  if (failed_ || ninst_ + n > max_ninst_) {
274  failed_ = true;
275  return -1;
276  }
277 
278  if (ninst_ + n > inst_.size()) {
279  int cap = inst_.size();
280  if (cap == 0)
281  cap = 8;
282  while (ninst_ + n > cap)
283  cap *= 2;
284  PODArray<Prog::Inst> inst(cap);
285  if (inst_.data() != NULL)
286  memmove(inst.data(), inst_.data(), ninst_*sizeof inst_[0]);
287  memset(inst.data() + ninst_, 0, (cap - ninst_)*sizeof inst_[0]);
288  inst_ = std::move(inst);
289  }
290  int id = ninst_;
291  ninst_ += n;
292  return id;
293 }
294 
295 // These routines are somewhat hard to visualize in text --
296 // see http://swtch.com/~rsc/regexp/regexp1.html for
297 // pictures explaining what is going on here.
298 
299 // Returns an unmatchable fragment.
301  return Frag(0, nullPatchList);
302 }
303 
304 // Is a an unmatchable fragment?
305 static bool IsNoMatch(Frag a) {
306  return a.begin == 0;
307 }
308 
309 // Given fragments a and b, returns fragment for ab.
311  if (IsNoMatch(a) || IsNoMatch(b))
312  return NoMatch();
313 
314  // Elide no-op.
315  Prog::Inst* begin = &inst_[a.begin];
316  if (begin->opcode() == kInstNop &&
317  a.end.p == (a.begin << 1) &&
318  begin->out() == 0) {
319  // in case refs to a somewhere
320  PatchList::Patch(inst_.data(), a.end, b.begin);
321  return b;
322  }
323 
324  // To run backward over string, reverse all concatenations.
325  if (reversed_) {
326  PatchList::Patch(inst_.data(), b.end, a.begin);
327  return Frag(b.begin, a.end);
328  }
329 
330  PatchList::Patch(inst_.data(), a.end, b.begin);
331  return Frag(a.begin, b.end);
332 }
333 
334 // Given fragments for a and b, returns fragment for a|b.
336  // Special case for convenience in loops.
337  if (IsNoMatch(a))
338  return b;
339  if (IsNoMatch(b))
340  return a;
341 
342  int id = AllocInst(1);
343  if (id < 0)
344  return NoMatch();
345 
346  inst_[id].InitAlt(a.begin, b.begin);
347  return Frag(id, PatchList::Append(inst_.data(), a.end, b.end));
348 }
349 
350 // When capturing submatches in like-Perl mode, a kOpAlt Inst
351 // treats out_ as the first choice, out1_ as the second.
352 //
353 // For *, +, and ?, if out_ causes another repetition,
354 // then the operator is greedy. If out1_ is the repetition
355 // (and out_ moves forward), then the operator is non-greedy.
356 
357 // Given a fragment a, returns a fragment for a* or a*? (if nongreedy)
358 Frag Compiler::Star(Frag a, bool nongreedy) {
359  int id = AllocInst(1);
360  if (id < 0)
361  return NoMatch();
362  inst_[id].InitAlt(0, 0);
363  PatchList::Patch(inst_.data(), a.end, id);
364  if (nongreedy) {
365  inst_[id].out1_ = a.begin;
366  return Frag(id, PatchList::Mk(id << 1));
367  } else {
368  inst_[id].set_out(a.begin);
369  return Frag(id, PatchList::Mk((id << 1) | 1));
370  }
371 }
372 
373 // Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
374 Frag Compiler::Plus(Frag a, bool nongreedy) {
375  // a+ is just a* with a different entry point.
376  Frag f = Star(a, nongreedy);
377  return Frag(a.begin, f.end);
378 }
379 
380 // Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
381 Frag Compiler::Quest(Frag a, bool nongreedy) {
382  if (IsNoMatch(a))
383  return Nop();
384  int id = AllocInst(1);
385  if (id < 0)
386  return NoMatch();
387  PatchList pl;
388  if (nongreedy) {
389  inst_[id].InitAlt(0, a.begin);
390  pl = PatchList::Mk(id << 1);
391  } else {
392  inst_[id].InitAlt(a.begin, 0);
393  pl = PatchList::Mk((id << 1) | 1);
394  }
395  return Frag(id, PatchList::Append(inst_.data(), pl, a.end));
396 }
397 
398 // Returns a fragment for the byte range lo-hi.
399 Frag Compiler::ByteRange(int lo, int hi, bool foldcase) {
400  int id = AllocInst(1);
401  if (id < 0)
402  return NoMatch();
403  inst_[id].InitByteRange(lo, hi, foldcase, 0);
404  return Frag(id, PatchList::Mk(id << 1));
405 }
406 
407 // Returns a no-op fragment. Sometimes unavoidable.
409  int id = AllocInst(1);
410  if (id < 0)
411  return NoMatch();
412  inst_[id].InitNop(0);
413  return Frag(id, PatchList::Mk(id << 1));
414 }
415 
416 // Returns a fragment that signals a match.
418  int id = AllocInst(1);
419  if (id < 0)
420  return NoMatch();
421  inst_[id].InitMatch(match_id);
422  return Frag(id, nullPatchList);
423 }
424 
425 // Returns a fragment matching a particular empty-width op (like ^ or $)
427  int id = AllocInst(1);
428  if (id < 0)
429  return NoMatch();
430  inst_[id].InitEmptyWidth(empty, 0);
431  return Frag(id, PatchList::Mk(id << 1));
432 }
433 
434 // Given a fragment a, returns a fragment with capturing parens around a.
436  if (IsNoMatch(a))
437  return NoMatch();
438  int id = AllocInst(2);
439  if (id < 0)
440  return NoMatch();
441  inst_[id].InitCapture(2*n, a.begin);
442  inst_[id+1].InitCapture(2*n+1, 0);
443  PatchList::Patch(inst_.data(), a.end, id+1);
444 
445  return Frag(id, PatchList::Mk((id+1) << 1));
446 }
447 
448 // A Rune is a name for a Unicode code point.
449 // Returns maximum rune encoded by UTF-8 sequence of length len.
450 static int MaxRune(int len) {
451  int b; // number of Rune bits in len-byte UTF-8 sequence (len < UTFmax)
452  if (len == 1)
453  b = 7;
454  else
455  b = 8-(len+1) + 6*(len-1);
456  return (1<<b) - 1; // maximum Rune for b bits.
457 }
458 
459 // The rune range compiler caches common suffix fragments,
460 // which are very common in UTF-8 (e.g., [80-bf]).
461 // The fragment suffixes are identified by their start
462 // instructions. NULL denotes the eventual end match.
463 // The Frag accumulates in rune_range_. Caching common
464 // suffixes reduces the UTF-8 "." from 32 to 24 instructions,
465 // and it reduces the corresponding one-pass NFA from 16 nodes to 8.
466 
468  rune_cache_.clear();
469  rune_range_.begin = 0;
471 }
472 
474  int next) {
475  Frag f = ByteRange(lo, hi, foldcase);
476  if (next != 0) {
477  PatchList::Patch(inst_.data(), f.end, next);
478  } else {
480  }
481  return f.begin;
482 }
483 
484 static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase,
485  int next) {
486  return (uint64_t)next << 17 |
487  (uint64_t)lo << 9 |
488  (uint64_t)hi << 1 |
489  (uint64_t)foldcase;
490 }
491 
493  int next) {
494  uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
495  std::unordered_map<uint64_t, int>::const_iterator it = rune_cache_.find(key);
496  if (it != rune_cache_.end())
497  return it->second;
498  int id = UncachedRuneByteSuffix(lo, hi, foldcase, next);
499  rune_cache_[key] = id;
500  return id;
501 }
502 
504  uint8_t lo = inst_[id].lo_;
505  uint8_t hi = inst_[id].hi_;
506  bool foldcase = inst_[id].foldcase() != 0;
507  int next = inst_[id].out();
508 
509  uint64_t key = MakeRuneCacheKey(lo, hi, foldcase, next);
510  return rune_cache_.find(key) != rune_cache_.end();
511 }
512 
513 void Compiler::AddSuffix(int id) {
514  if (failed_)
515  return;
516 
517  if (rune_range_.begin == 0) {
518  rune_range_.begin = id;
519  return;
520  }
521 
522  if (encoding_ == kEncodingUTF8) {
523  // Build a trie in order to reduce fanout.
525  return;
526  }
527 
528  int alt = AllocInst(1);
529  if (alt < 0) {
530  rune_range_.begin = 0;
531  return;
532  }
533  inst_[alt].InitAlt(rune_range_.begin, id);
534  rune_range_.begin = alt;
535 }
536 
538  DCHECK(inst_[root].opcode() == kInstAlt ||
540 
541  Frag f = FindByteRange(root, id);
542  if (IsNoMatch(f)) {
543  int alt = AllocInst(1);
544  if (alt < 0)
545  return 0;
546  inst_[alt].InitAlt(root, id);
547  return alt;
548  }
549 
550  int br;
551  if (f.end.p == 0)
552  br = root;
553  else if (f.end.p&1)
554  br = inst_[f.begin].out1();
555  else
556  br = inst_[f.begin].out();
557 
558  if (IsCachedRuneByteSuffix(br)) {
559  // We can't fiddle with cached suffixes, so make a clone of the head.
560  int byterange = AllocInst(1);
561  if (byterange < 0)
562  return 0;
563  inst_[byterange].InitByteRange(inst_[br].lo(), inst_[br].hi(),
564  inst_[br].foldcase(), inst_[br].out());
565 
566  // Ensure that the parent points to the clone, not to the original.
567  // Note that this could leave the head unreachable except via the cache.
568  br = byterange;
569  if (f.end.p == 0)
570  root = br;
571  else if (f.end.p&1)
572  inst_[f.begin].out1_ = br;
573  else
574  inst_[f.begin].set_out(br);
575  }
576 
577  int out = inst_[id].out();
578  if (!IsCachedRuneByteSuffix(id)) {
579  // The head should be the instruction most recently allocated, so free it
580  // instead of leaving it unreachable.
581  DCHECK_EQ(id, ninst_-1);
582  inst_[id].out_opcode_ = 0;
583  inst_[id].out1_ = 0;
584  ninst_--;
585  }
586 
587  out = AddSuffixRecursive(inst_[br].out(), out);
588  if (out == 0)
589  return 0;
590 
591  inst_[br].set_out(out);
592  return root;
593 }
594 
595 bool Compiler::ByteRangeEqual(int id1, int id2) {
596  return inst_[id1].lo() == inst_[id2].lo() &&
597  inst_[id1].hi() == inst_[id2].hi() &&
598  inst_[id1].foldcase() == inst_[id2].foldcase();
599 }
600 
602  if (inst_[root].opcode() == kInstByteRange) {
603  if (ByteRangeEqual(root, id))
604  return Frag(root, nullPatchList);
605  else
606  return NoMatch();
607  }
608 
609  while (inst_[root].opcode() == kInstAlt) {
610  int out1 = inst_[root].out1();
611  if (ByteRangeEqual(out1, id))
612  return Frag(root, PatchList::Mk((root << 1) | 1));
613 
614  // CharClass is a sorted list of ranges, so if out1 of the root Alt wasn't
615  // what we're looking for, then we can stop immediately. Unfortunately, we
616  // can't short-circuit the search in reverse mode.
617  if (!reversed_)
618  return NoMatch();
619 
620  int out = inst_[root].out();
621  if (inst_[out].opcode() == kInstAlt)
622  root = out;
623  else if (ByteRangeEqual(out, id))
624  return Frag(root, PatchList::Mk(root << 1));
625  else
626  return NoMatch();
627  }
628 
629  LOG(DFATAL) << "should never happen";
630  return NoMatch();
631 }
632 
634  return rune_range_;
635 }
636 
637 // Converts rune range lo-hi into a fragment that recognizes
638 // the bytes that would make up those runes in the current
639 // encoding (Latin 1 or UTF-8).
640 // This lets the machine work byte-by-byte even when
641 // using multibyte encodings.
642 
643 void Compiler::AddRuneRange(Rune lo, Rune hi, bool foldcase) {
644  switch (encoding_) {
645  default:
646  case kEncodingUTF8:
647  AddRuneRangeUTF8(lo, hi, foldcase);
648  break;
649  case kEncodingLatin1:
650  AddRuneRangeLatin1(lo, hi, foldcase);
651  break;
652  }
653 }
654 
655 void Compiler::AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase) {
656  // Latin-1 is easy: runes *are* bytes.
657  if (lo > hi || lo > 0xFF)
658  return;
659  if (hi > 0xFF)
660  hi = 0xFF;
661  AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
662  static_cast<uint8_t>(hi), foldcase, 0));
663 }
664 
665 // Table describing how to make a UTF-8 matching machine
666 // for the rune range 80-10FFFF (Runeself-Runemax).
667 // This range happens frequently enough (for example /./ and /[^a-z]/)
668 // and the rune_cache_ map is slow enough that this is worth
669 // special handling. Makes compilation of a small expression
670 // with a dot in it about 10% faster.
671 // The * in the comments below mark whole sequences.
672 static struct ByteRangeProg {
673  int next;
674  int lo;
675  int hi;
676 } prog_80_10ffff[] = {
677  // Two-byte
678  { -1, 0x80, 0xBF, }, // 0: 80-BF
679  { 0, 0xC2, 0xDF, }, // 1: C2-DF 80-BF*
680 
681  // Three-byte
682  { 0, 0xA0, 0xBF, }, // 2: A0-BF 80-BF
683  { 2, 0xE0, 0xE0, }, // 3: E0 A0-BF 80-BF*
684  { 0, 0x80, 0xBF, }, // 4: 80-BF 80-BF
685  { 4, 0xE1, 0xEF, }, // 5: E1-EF 80-BF 80-BF*
686 
687  // Four-byte
688  { 4, 0x90, 0xBF, }, // 6: 90-BF 80-BF 80-BF
689  { 6, 0xF0, 0xF0, }, // 7: F0 90-BF 80-BF 80-BF*
690  { 4, 0x80, 0xBF, }, // 8: 80-BF 80-BF 80-BF
691  { 8, 0xF1, 0xF3, }, // 9: F1-F3 80-BF 80-BF 80-BF*
692  { 4, 0x80, 0x8F, }, // 10: 80-8F 80-BF 80-BF
693  { 10, 0xF4, 0xF4, }, // 11: F4 80-8F 80-BF 80-BF*
694 };
695 
697  int inst[arraysize(prog_80_10ffff)] = { 0 }; // does not need to be initialized; silences gcc warning
698  for (size_t i = 0; i < arraysize(prog_80_10ffff); i++) {
699  const ByteRangeProg& p = prog_80_10ffff[i];
700  int next = 0;
701  if (p.next >= 0)
702  next = inst[p.next];
703  inst[i] = UncachedRuneByteSuffix(static_cast<uint8_t>(p.lo),
704  static_cast<uint8_t>(p.hi), false, next);
705  if ((p.lo & 0xC0) != 0x80)
706  AddSuffix(inst[i]);
707  }
708 }
709 
710 void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
711  if (lo > hi)
712  return;
713 
714  // Pick off 80-10FFFF as a common special case
715  // that can bypass the slow rune_cache_.
716  if (lo == 0x80 && hi == 0x10ffff && !reversed_) {
717  Add_80_10ffff();
718  return;
719  }
720 
721  // Split range into same-length sized ranges.
722  for (int i = 1; i < UTFmax; i++) {
723  Rune max = MaxRune(i);
724  if (lo <= max && max < hi) {
725  AddRuneRangeUTF8(lo, max, foldcase);
726  AddRuneRangeUTF8(max+1, hi, foldcase);
727  return;
728  }
729  }
730 
731  // ASCII range is always a special case.
732  if (hi < Runeself) {
733  AddSuffix(UncachedRuneByteSuffix(static_cast<uint8_t>(lo),
734  static_cast<uint8_t>(hi), foldcase, 0));
735  return;
736  }
737 
738  // Split range into sections that agree on leading bytes.
739  for (int i = 1; i < UTFmax; i++) {
740  uint32_t m = (1<<(6*i)) - 1; // last i bytes of a UTF-8 sequence
741  if ((lo & ~m) != (hi & ~m)) {
742  if ((lo & m) != 0) {
743  AddRuneRangeUTF8(lo, lo|m, foldcase);
744  AddRuneRangeUTF8((lo|m)+1, hi, foldcase);
745  return;
746  }
747  if ((hi & m) != m) {
748  AddRuneRangeUTF8(lo, (hi&~m)-1, foldcase);
749  AddRuneRangeUTF8(hi&~m, hi, foldcase);
750  return;
751  }
752  }
753  }
754 
755  // Finally. Generate byte matching equivalent for lo-hi.
756  uint8_t ulo[UTFmax], uhi[UTFmax];
757  int n = runetochar(reinterpret_cast<char*>(ulo), &lo);
758  int m = runetochar(reinterpret_cast<char*>(uhi), &hi);
759  (void)m; // USED(m)
760  DCHECK_EQ(n, m);
761 
762  // The logic below encodes this thinking:
763  //
764  // 1. When we have built the whole suffix, we know that it cannot
765  // possibly be a suffix of anything longer: in forward mode, nothing
766  // else can occur before the leading byte; in reverse mode, nothing
767  // else can occur after the last continuation byte or else the leading
768  // byte would have to change. Thus, there is no benefit to caching
769  // the first byte of the suffix whereas there is a cost involved in
770  // cloning it if it begins a common prefix, which is fairly likely.
771  //
772  // 2. Conversely, the last byte of the suffix cannot possibly be a
773  // prefix of anything because next == 0, so we will never want to
774  // clone it, but it is fairly likely to be a common suffix. Perhaps
775  // more so in reverse mode than in forward mode because the former is
776  // "converging" towards lower entropy, but caching is still worthwhile
777  // for the latter in cases such as 80-BF.
778  //
779  // 3. Handling the bytes between the first and the last is less
780  // straightforward and, again, the approach depends on whether we are
781  // "converging" towards lower entropy: in forward mode, a single byte
782  // is unlikely to be part of a common suffix whereas a byte range
783  // is more likely so; in reverse mode, a byte range is unlikely to
784  // be part of a common suffix whereas a single byte is more likely
785  // so. The same benefit versus cost argument applies here.
786  int id = 0;
787  if (reversed_) {
788  for (int i = 0; i < n; i++) {
789  // In reverse UTF-8 mode: cache the leading byte; don't cache the last
790  // continuation byte; cache anything else iff it's a single byte (XX-XX).
791  if (i == 0 || (ulo[i] == uhi[i] && i != n-1))
792  id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
793  else
794  id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
795  }
796  } else {
797  for (int i = n-1; i >= 0; i--) {
798  // In forward UTF-8 mode: don't cache the leading byte; cache the last
799  // continuation byte; cache anything else iff it's a byte range (XX-YY).
800  if (i == n-1 || (ulo[i] < uhi[i] && i != 0))
801  id = CachedRuneByteSuffix(ulo[i], uhi[i], false, id);
802  else
803  id = UncachedRuneByteSuffix(ulo[i], uhi[i], false, id);
804  }
805  }
806  AddSuffix(id);
807 }
808 
809 // Should not be called.
811  // We're using WalkExponential; there should be no copying.
812  LOG(DFATAL) << "Compiler::Copy called!";
813  failed_ = true;
814  return NoMatch();
815 }
816 
817 // Visits a node quickly; called once WalkExponential has
818 // decided to cut this walk short.
820  failed_ = true;
821  return NoMatch();
822 }
823 
824 // Called before traversing a node's children during the walk.
826  // Cut off walk if we've already failed.
827  if (failed_)
828  *stop = true;
829 
830  return Frag(); // not used by caller
831 }
832 
833 Frag Compiler::Literal(Rune r, bool foldcase) {
834  switch (encoding_) {
835  default:
836  return Frag();
837 
838  case kEncodingLatin1:
839  return ByteRange(r, r, foldcase);
840 
841  case kEncodingUTF8: {
842  if (r < Runeself) // Make common case fast.
843  return ByteRange(r, r, foldcase);
844  uint8_t buf[UTFmax];
845  int n = runetochar(reinterpret_cast<char*>(buf), &r);
846  Frag f = ByteRange((uint8_t)buf[0], buf[0], false);
847  for (int i = 1; i < n; i++)
848  f = Cat(f, ByteRange((uint8_t)buf[i], buf[i], false));
849  return f;
850  }
851  }
852 }
853 
854 // Called after traversing the node's children during the walk.
855 // Given their frags, build and return the frag for this re.
857  int nchild_frags) {
858  // If a child failed, don't bother going forward, especially
859  // since the child_frags might contain Frags with NULLs in them.
860  if (failed_)
861  return NoMatch();
862 
863  // Given the child fragments, return the fragment for this node.
864  switch (re->op()) {
865  case kRegexpRepeat:
866  // Should not see; code at bottom of function will print error
867  break;
868 
869  case kRegexpNoMatch:
870  return NoMatch();
871 
872  case kRegexpEmptyMatch:
873  return Nop();
874 
875  case kRegexpHaveMatch: {
876  Frag f = Match(re->match_id());
877  if (anchor_ == RE2::ANCHOR_BOTH) {
878  // Append \z or else the subexpression will effectively be unanchored.
879  // Complemented by the UNANCHORED case in CompileSet().
880  f = Cat(EmptyWidth(kEmptyEndText), f);
881  }
882  return f;
883  }
884 
885  case kRegexpConcat: {
886  Frag f = child_frags[0];
887  for (int i = 1; i < nchild_frags; i++)
888  f = Cat(f, child_frags[i]);
889  return f;
890  }
891 
892  case kRegexpAlternate: {
893  Frag f = child_frags[0];
894  for (int i = 1; i < nchild_frags; i++)
895  f = Alt(f, child_frags[i]);
896  return f;
897  }
898 
899  case kRegexpStar:
900  return Star(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
901 
902  case kRegexpPlus:
903  return Plus(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
904 
905  case kRegexpQuest:
906  return Quest(child_frags[0], (re->parse_flags()&Regexp::NonGreedy) != 0);
907 
908  case kRegexpLiteral:
909  return Literal(re->rune(), (re->parse_flags()&Regexp::FoldCase) != 0);
910 
911  case kRegexpLiteralString: {
912  // Concatenation of literals.
913  if (re->nrunes() == 0)
914  return Nop();
915  Frag f;
916  for (int i = 0; i < re->nrunes(); i++) {
917  Frag f1 = Literal(re->runes()[i],
918  (re->parse_flags()&Regexp::FoldCase) != 0);
919  if (i == 0)
920  f = f1;
921  else
922  f = Cat(f, f1);
923  }
924  return f;
925  }
926 
927  case kRegexpAnyChar:
928  BeginRange();
929  AddRuneRange(0, Runemax, false);
930  return EndRange();
931 
932  case kRegexpAnyByte:
933  return ByteRange(0x00, 0xFF, false);
934 
935  case kRegexpCharClass: {
936  CharClass* cc = re->cc();
937  if (cc->empty()) {
938  // This can't happen.
939  LOG(DFATAL) << "No ranges in char class";
940  failed_ = true;
941  return NoMatch();
942  }
943 
944  // ASCII case-folding optimization: if the char class
945  // behaves the same on A-Z as it does on a-z,
946  // discard any ranges wholly contained in A-Z
947  // and mark the other ranges as foldascii.
948  // This reduces the size of a program for
949  // (?i)abc from 3 insts per letter to 1 per letter.
950  bool foldascii = cc->FoldsASCII();
951 
952  // Character class is just a big OR of the different
953  // character ranges in the class.
954  BeginRange();
955  for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i) {
956  // ASCII case-folding optimization (see above).
957  if (foldascii && 'A' <= i->lo && i->hi <= 'Z')
958  continue;
959 
960  // If this range contains all of A-Za-z or none of it,
961  // the fold flag is unnecessary; don't bother.
962  bool fold = foldascii;
963  if ((i->lo <= 'A' && 'z' <= i->hi) || i->hi < 'A' || 'z' < i->lo ||
964  ('Z' < i->lo && i->hi < 'a'))
965  fold = false;
966 
967  AddRuneRange(i->lo, i->hi, fold);
968  }
969  return EndRange();
970  }
971 
972  case kRegexpCapture:
973  // If this is a non-capturing parenthesis -- (?:foo) --
974  // just use the inner expression.
975  if (re->cap() < 0)
976  return child_frags[0];
977  return Capture(child_frags[0], re->cap());
978 
979  case kRegexpBeginLine:
981 
982  case kRegexpEndLine:
984 
985  case kRegexpBeginText:
987 
988  case kRegexpEndText:
990 
991  case kRegexpWordBoundary:
993 
996  }
997  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
998  failed_ = true;
999  return NoMatch();
1000 }
1001 
1002 // Is this regexp required to start at the beginning of the text?
1003 // Only approximate; can return false for complicated regexps like (\Aa|\Ab),
1004 // but handles (\A(a|b)). Could use the Walker to write a more exact one.
1005 static bool IsAnchorStart(Regexp** pre, int depth) {
1006  Regexp* re = *pre;
1007  Regexp* sub;
1008  // The depth limit makes sure that we don't overflow
1009  // the stack on a deeply nested regexp. As the comment
1010  // above says, IsAnchorStart is conservative, so returning
1011  // a false negative is okay. The exact limit is somewhat arbitrary.
1012  if (re == NULL || depth >= 4)
1013  return false;
1014  switch (re->op()) {
1015  default:
1016  break;
1017  case kRegexpConcat:
1018  if (re->nsub() > 0) {
1019  sub = re->sub()[0]->Incref();
1020  if (IsAnchorStart(&sub, depth+1)) {
1021  PODArray<Regexp*> subcopy(re->nsub());
1022  subcopy[0] = sub; // already have reference
1023  for (int i = 1; i < re->nsub(); i++)
1024  subcopy[i] = re->sub()[i]->Incref();
1025  *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
1026  re->Decref();
1027  return true;
1028  }
1029  sub->Decref();
1030  }
1031  break;
1032  case kRegexpCapture:
1033  sub = re->sub()[0]->Incref();
1034  if (IsAnchorStart(&sub, depth+1)) {
1035  *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
1036  re->Decref();
1037  return true;
1038  }
1039  sub->Decref();
1040  break;
1041  case kRegexpBeginText:
1042  *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
1043  re->Decref();
1044  return true;
1045  }
1046  return false;
1047 }
1048 
1049 // Is this regexp required to start at the end of the text?
1050 // Only approximate; can return false for complicated regexps like (a\z|b\z),
1051 // but handles ((a|b)\z). Could use the Walker to write a more exact one.
1052 static bool IsAnchorEnd(Regexp** pre, int depth) {
1053  Regexp* re = *pre;
1054  Regexp* sub;
1055  // The depth limit makes sure that we don't overflow
1056  // the stack on a deeply nested regexp. As the comment
1057  // above says, IsAnchorEnd is conservative, so returning
1058  // a false negative is okay. The exact limit is somewhat arbitrary.
1059  if (re == NULL || depth >= 4)
1060  return false;
1061  switch (re->op()) {
1062  default:
1063  break;
1064  case kRegexpConcat:
1065  if (re->nsub() > 0) {
1066  sub = re->sub()[re->nsub() - 1]->Incref();
1067  if (IsAnchorEnd(&sub, depth+1)) {
1068  PODArray<Regexp*> subcopy(re->nsub());
1069  subcopy[re->nsub() - 1] = sub; // already have reference
1070  for (int i = 0; i < re->nsub() - 1; i++)
1071  subcopy[i] = re->sub()[i]->Incref();
1072  *pre = Regexp::Concat(subcopy.data(), re->nsub(), re->parse_flags());
1073  re->Decref();
1074  return true;
1075  }
1076  sub->Decref();
1077  }
1078  break;
1079  case kRegexpCapture:
1080  sub = re->sub()[0]->Incref();
1081  if (IsAnchorEnd(&sub, depth+1)) {
1082  *pre = Regexp::Capture(sub, re->parse_flags(), re->cap());
1083  re->Decref();
1084  return true;
1085  }
1086  sub->Decref();
1087  break;
1088  case kRegexpEndText:
1089  *pre = Regexp::LiteralString(NULL, 0, re->parse_flags());
1090  re->Decref();
1091  return true;
1092  }
1093  return false;
1094 }
1095 
1097  RE2::Anchor anchor) {
1098  prog_->set_flags(flags);
1099 
1100  if (flags & Regexp::Latin1)
1102  max_mem_ = max_mem;
1103  if (max_mem <= 0) {
1104  max_ninst_ = 100000; // more than enough
1105  } else if (static_cast<size_t>(max_mem) <= sizeof(Prog)) {
1106  // No room for anything.
1107  max_ninst_ = 0;
1108  } else {
1109  int64_t m = (max_mem - sizeof(Prog)) / sizeof(Prog::Inst);
1110  // Limit instruction count so that inst->id() fits nicely in an int.
1111  // SparseArray also assumes that the indices (inst->id()) are ints.
1112  // The call to WalkExponential uses 2*max_ninst_ below,
1113  // and other places in the code use 2 or 3 * prog->size().
1114  // Limiting to 2^24 should avoid overflow in those places.
1115  // (The point of allowing more than 32 bits of memory is to
1116  // have plenty of room for the DFA states, not to use it up
1117  // on the program.)
1118  if (m >= 1<<24)
1119  m = 1<<24;
1120 
1121  // Inst imposes its own limit (currently bigger than 2^24 but be safe).
1122  if (m > Prog::Inst::kMaxInst)
1124 
1125  max_ninst_ = static_cast<int>(m);
1126  }
1127 
1128  anchor_ = anchor;
1129 }
1130 
1131 // Compiles re, returning program.
1132 // Caller is responsible for deleting prog_.
1133 // If reversed is true, compiles a program that expects
1134 // to run over the input string backward (reverses all concatenations).
1135 // The reversed flag is also recorded in the returned program.
1137  Compiler c;
1138  c.Setup(re->parse_flags(), max_mem, RE2::UNANCHORED /* unused */);
1139  c.reversed_ = reversed;
1140 
1141  // Simplify to remove things like counted repetitions
1142  // and character classes like \d.
1143  Regexp* sre = re->Simplify();
1144  if (sre == NULL)
1145  return NULL;
1146 
1147  // Record whether prog is anchored, removing the anchors.
1148  // (They get in the way of other optimizations.)
1149  bool is_anchor_start = IsAnchorStart(&sre, 0);
1150  bool is_anchor_end = IsAnchorEnd(&sre, 0);
1151 
1152  // Generate fragment for entire regexp.
1153  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
1154  sre->Decref();
1155  if (c.failed_)
1156  return NULL;
1157 
1158  // Success! Finish by putting Match node at end, and record start.
1159  // Turn off c.reversed_ (if it is set) to force the remaining concatenations
1160  // to behave normally.
1161  c.reversed_ = false;
1162  all = c.Cat(all, c.Match(0));
1163 
1164  c.prog_->set_reversed(reversed);
1165  if (c.prog_->reversed()) {
1166  c.prog_->set_anchor_start(is_anchor_end);
1167  c.prog_->set_anchor_end(is_anchor_start);
1168  } else {
1169  c.prog_->set_anchor_start(is_anchor_start);
1170  c.prog_->set_anchor_end(is_anchor_end);
1171  }
1172 
1173  c.prog_->set_start(all.begin);
1174  if (!c.prog_->anchor_start()) {
1175  // Also create unanchored version, which starts with a .*? loop.
1176  all = c.Cat(c.DotStar(), all);
1177  }
1178  c.prog_->set_start_unanchored(all.begin);
1179 
1180  // Hand ownership of prog_ to caller.
1181  return c.Finish();
1182 }
1183 
1185  if (failed_)
1186  return NULL;
1187 
1188  if (prog_->start() == 0 && prog_->start_unanchored() == 0) {
1189  // No possible matches; keep Fail instruction only.
1190  ninst_ = 1;
1191  }
1192 
1193  // Hand off the array to Prog.
1194  prog_->inst_ = std::move(inst_);
1195  prog_->size_ = ninst_;
1196 
1197  prog_->Optimize();
1198  prog_->Flatten();
1199  prog_->ComputeByteMap();
1200 
1201  // Record remaining memory for DFA.
1202  if (max_mem_ <= 0) {
1203  prog_->set_dfa_mem(1<<20);
1204  } else {
1205  int64_t m = max_mem_ - sizeof(Prog);
1206  m -= prog_->size_*sizeof(Prog::Inst); // account for inst_
1207  if (prog_->CanBitState())
1208  m -= prog_->size_*sizeof(uint16_t); // account for list_heads_
1209  if (m < 0)
1210  m = 0;
1211  prog_->set_dfa_mem(m);
1212  }
1213 
1214  Prog* p = prog_;
1215  prog_ = NULL;
1216  return p;
1217 }
1218 
1219 // Converts Regexp to Prog.
1221  return Compiler::Compile(this, false, max_mem);
1222 }
1223 
1225  return Compiler::Compile(this, true, max_mem);
1226 }
1227 
1229  return Star(ByteRange(0x00, 0xff, false), true);
1230 }
1231 
1232 // Compiles RE set to Prog.
1234  Compiler c;
1235  c.Setup(re->parse_flags(), max_mem, anchor);
1236 
1237  Regexp* sre = re->Simplify();
1238  if (sre == NULL)
1239  return NULL;
1240 
1241  Frag all = c.WalkExponential(sre, Frag(), 2*c.max_ninst_);
1242  sre->Decref();
1243  if (c.failed_)
1244  return NULL;
1245 
1246  c.prog_->set_anchor_start(true);
1247  c.prog_->set_anchor_end(true);
1248 
1249  if (anchor == RE2::UNANCHORED) {
1250  // Prepend .* or else the expression will effectively be anchored.
1251  // Complemented by the ANCHOR_BOTH case in PostVisit().
1252  all = c.Cat(c.DotStar(), all);
1253  }
1254  c.prog_->set_start(all.begin);
1255  c.prog_->set_start_unanchored(all.begin);
1256 
1257  Prog* prog = c.Finish();
1258  if (prog == NULL)
1259  return NULL;
1260 
1261  // Make sure DFA has enough memory to operate,
1262  // since we're not going to fall back to the NFA.
1263  bool dfa_failed = false;
1264  StringPiece sp = "hello, world";
1265  prog->SearchDFA(sp, sp, Prog::kAnchored, Prog::kManyMatch,
1266  NULL, &dfa_failed, NULL);
1267  if (dfa_failed) {
1268  delete prog;
1269  return NULL;
1270  }
1271 
1272  return prog;
1273 }
1274 
1276  return Compiler::CompileSet(re, anchor, max_mem);
1277 }
1278 
1279 } // namespace re2
re2::Compiler::DotStar
Frag DotStar()
Definition: bloaty/third_party/re2/re2/compile.cc:1228
re2::Compiler::Match
Frag Match(int32_t id)
Definition: bloaty/third_party/re2/re2/compile.cc:417
re2::CharClass::empty
bool empty()
Definition: bloaty/third_party/re2/re2/regexp.h:251
test_group_name.all
all
Definition: test_group_name.py:241
re2::kRegexpEmptyMatch
@ kRegexpEmptyMatch
Definition: bloaty/third_party/re2/re2/regexp.h:107
re2::Prog::Inst::out1_
uint32_t out1_
Definition: bloaty/third_party/re2/re2/prog.h:135
re2::kRegexpBeginLine
@ kRegexpBeginLine
Definition: bloaty/third_party/re2/re2/regexp.h:142
re2::kRegexpNoWordBoundary
@ kRegexpNoWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:149
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
regen-readme.it
it
Definition: regen-readme.py:15
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
re2::Compiler::rune_cache_
std::unordered_map< uint64_t, int > rune_cache_
Definition: bloaty/third_party/re2/re2/compile.cc:246
re2::Compiler
Definition: bloaty/third_party/re2/re2/compile.cc:127
re2::Regexp::Decref
void Decref()
Definition: bloaty/third_party/re2/re2/regexp.cc:115
re2::Compiler::Compiler
Compiler()
Definition: bloaty/third_party/re2/re2/compile.cc:255
re2::Prog::Inst::out1
int out1()
Definition: bloaty/third_party/re2/re2/prog.h:85
pod_array.h
re2::PatchList::Append
static PatchList Append(Prog::Inst *inst0, PatchList l1, PatchList l2)
Definition: bloaty/third_party/re2/re2/compile.cc:89
memset
return memset(p, 0, total)
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
re2::Compiler::ShortVisit
Frag ShortVisit(Regexp *re, Frag parent_arg)
Definition: bloaty/third_party/re2/re2/compile.cc:819
re2::Compiler::Plus
Frag Plus(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:374
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
re2::Regexp::runes
Rune * runes()
Definition: bloaty/third_party/re2/re2/regexp.h:340
xds_manager.f1
f1
Definition: xds_manager.py:42
re2::Prog::Inst
Definition: bloaty/third_party/re2/re2/prog.h:62
re2::kRegexpLiteralString
@ kRegexpLiteralString
Definition: bloaty/third_party/re2/re2/regexp.h:113
re2::Compiler::reversed_
bool reversed_
Definition: bloaty/third_party/re2/re2/compile.cc:238
re2::Regexp::Capture
static Regexp * Capture(Regexp *sub, ParseFlags flags, int cap)
Definition: bloaty/third_party/re2/re2/regexp.cc:298
re2::Compiler::NoMatch
Frag NoMatch()
Definition: bloaty/third_party/re2/re2/compile.cc:300
string.h
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::Regexp::nsub
int nsub()
Definition: bloaty/third_party/re2/re2/regexp.h:322
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
re2::RE2::ANCHOR_BOTH
@ ANCHOR_BOTH
Definition: bloaty/third_party/re2/re2/re2.h:475
re2::PODArray::data
T * data() const
Definition: bloaty/third_party/re2/util/pod_array.h:24
re2::Prog::size_
int size_
Definition: bloaty/third_party/re2/re2/prog.h:403
re2::RE2::UNANCHORED
@ UNANCHORED
Definition: bloaty/third_party/re2/re2/re2.h:473
re2::kRegexpLiteral
@ kRegexpLiteral
Definition: bloaty/third_party/re2/re2/regexp.h:110
re2::Prog::ComputeByteMap
void ComputeByteMap()
Definition: bloaty/third_party/re2/re2/prog.cc:446
re2::Compiler::max_mem_
int64_t max_mem_
Definition: bloaty/third_party/re2/re2/compile.cc:244
re2::Regexp::nrunes
int nrunes()
Definition: bloaty/third_party/re2/re2/regexp.h:341
re2::PatchList::Patch
static void Patch(Prog::Inst *inst0, PatchList l, uint32_t v)
Definition: bloaty/third_party/re2/re2/compile.cc:75
re2::Prog::Inst::set_out
void set_out(int out)
Definition: bloaty/third_party/re2/re2/prog.h:125
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::PatchList::Deref
static PatchList Deref(Prog::Inst *inst0, PatchList l)
Definition: bloaty/third_party/re2/re2/compile.cc:65
cpp.ast.reversed
def reversed(seq)
Definition: bloaty/third_party/googletest/googlemock/scripts/generator/cpp/ast.py:52
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::Regexp::Concat
static Regexp * Concat(Regexp **subs, int nsubs, ParseFlags flags)
Definition: bloaty/third_party/re2/re2/regexp.cc:286
re2::Regexp::CompileToProg
Prog * CompileToProg(int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1220
re2::Regexp::cc
CharClass * cc()
Definition: bloaty/third_party/re2/re2/regexp.h:337
re2::kRegexpHaveMatch
@ kRegexpHaveMatch
Definition: bloaty/third_party/re2/re2/regexp.h:161
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
re2::Compiler::Setup
void Setup(Regexp::ParseFlags, int64_t, RE2::Anchor)
Definition: bloaty/third_party/re2/re2/compile.cc:1096
re2::kRegexpWordBoundary
@ kRegexpWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:147
re2::Compiler::PostVisit
Frag PostVisit(Regexp *re, Frag parent_arg, Frag pre_arg, Frag *child_args, int nchild_args)
Definition: bloaty/third_party/re2/re2/compile.cc:856
re2::Regexp::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/regexp.h:289
re2::Regexp::LiteralString
static Regexp * LiteralString(Rune *runes, int nrunes, ParseFlags flags)
Definition: bloaty/third_party/re2/re2/regexp.cc:321
re2::kRegexpEndText
@ kRegexpEndText
Definition: bloaty/third_party/re2/re2/regexp.h:154
re2::Prog::CanBitState
bool CanBitState()
Definition: bloaty/third_party/re2/re2/prog.h:317
re2::kEncodingUTF8
@ kEncodingUTF8
Definition: bloaty/third_party/re2/re2/compile.cc:123
re2::Compiler::anchor_
RE2::Anchor anchor_
Definition: bloaty/third_party/re2/re2/compile.cc:249
re2::IsNoMatch
static bool IsNoMatch(Frag a)
Definition: bloaty/third_party/re2/re2/compile.cc:305
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
re2::Compiler::Copy
Frag Copy(Frag arg)
Definition: bloaty/third_party/re2/re2/compile.cc:810
re2::kEmptyWordBoundary
@ kEmptyWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:47
re2::prog_80_10ffff
static struct re2::ByteRangeProg prog_80_10ffff[]
re2::Frag::end
PatchList end
Definition: bloaty/third_party/re2/re2/compile.cc:115
re2::Compiler::AddRuneRange
void AddRuneRange(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:643
re2::CharClass
Definition: bloaty/third_party/re2/re2/regexp.h:242
re2::Encoding
Encoding
Definition: bloaty/third_party/re2/re2/compile.cc:122
re2::Compiler::BeginRange
void BeginRange()
Definition: bloaty/third_party/re2/re2/compile.cc:467
re2::Compiler::failed_
bool failed_
Definition: bloaty/third_party/re2/re2/compile.cc:236
re2::ByteRangeProg::lo
int lo
Definition: bloaty/third_party/re2/re2/compile.cc:674
re2::kEncodingLatin1
@ kEncodingLatin1
Definition: bloaty/third_party/re2/re2/compile.cc:124
re2::Regexp::Walker
Definition: bloaty/third_party/re2/re2/regexp.h:414
re2::Regexp::rune
Rune rune()
Definition: bloaty/third_party/re2/re2/regexp.h:336
re2::PatchList
Definition: bloaty/third_party/re2/re2/compile.cc:38
re2::IsAnchorStart
static bool IsAnchorStart(Regexp **pre, int depth)
Definition: bloaty/third_party/re2/re2/compile.cc:1005
absl::move
constexpr absl::remove_reference_t< T > && move(T &&t) noexcept
Definition: abseil-cpp/absl/utility/utility.h:221
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
re2::kRegexpConcat
@ kRegexpConcat
Definition: bloaty/third_party/re2/re2/regexp.h:116
re2::Compiler::ByteRange
Frag ByteRange(int lo, int hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:399
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
root
RefCountedPtr< grpc_tls_certificate_provider > root
Definition: xds_server_config_fetcher.cc:223
re2::kEmptyNonWordBoundary
@ kEmptyNonWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:48
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::EmptyOp
EmptyOp
Definition: bloaty/third_party/re2/re2/prog.h:42
re2::MakeRuneCacheKey
static uint64_t MakeRuneCacheKey(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:484
re2::Prog::inst_
PODArray< Inst > inst_
Definition: bloaty/third_party/re2/re2/prog.h:413
re2::Compiler::AllocInst
int AllocInst(int n)
Definition: bloaty/third_party/re2/re2/compile.cc:272
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::Runeself
@ Runeself
Definition: bloaty/third_party/re2/util/utf.h:31
re2::kRegexpAnyChar
@ kRegexpAnyChar
Definition: bloaty/third_party/re2/re2/regexp.h:136
re2::runetochar
int runetochar(char *str, const Rune *rune)
Definition: bloaty/third_party/re2/util/rune.cc:127
grpc_core::fail
Poll< absl::StatusOr< std::tuple< T... > > > fail()
Definition: try_join_test.cc:45
uint64_t
unsigned __int64 uint64_t
Definition: stdint-msvc2008.h:90
re2::kRegexpCharClass
@ kRegexpCharClass
Definition: bloaty/third_party/re2/re2/regexp.h:157
re2::Prog::Flatten
void Flatten()
Definition: bloaty/third_party/re2/re2/prog.cc:557
re2::Regexp::sub
Regexp ** sub()
Definition: bloaty/third_party/re2/re2/regexp.h:327
arg
Definition: cmdline.cc:40
re2::Prog::kAnchored
@ kAnchored
Definition: bloaty/third_party/re2/re2/prog.h:177
re2::Compiler::Literal
Frag Literal(Rune r, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:833
re2::Prog::set_dfa_mem
void set_dfa_mem(int64_t dfa_mem)
Definition: bloaty/third_party/re2/re2/prog.h:210
re2::kEmptyBeginText
@ kEmptyBeginText
Definition: bloaty/third_party/re2/re2/prog.h:45
re2::nullPatchList
static PatchList nullPatchList
Definition: bloaty/third_party/re2/re2/compile.cc:55
re2::Regexp::match_id
int match_id()
Definition: bloaty/third_party/re2/re2/regexp.h:342
re2::Compiler::prog_
Prog * prog_
Definition: bloaty/third_party/re2/re2/compile.cc:235
re2::kRegexpAlternate
@ kRegexpAlternate
Definition: bloaty/third_party/re2/re2/regexp.h:118
re2::CharClass::end
iterator end()
Definition: bloaty/third_party/re2/re2/regexp.h:248
re2::Prog::Inst::kMaxInst
static const int kMaxInst
Definition: bloaty/third_party/re2/re2/prog.h:114
re2::Compiler::Star
Frag Star(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:358
re2::Compiler::Cat
Frag Cat(Frag a, Frag b)
Definition: bloaty/third_party/re2/re2/compile.cc:310
re2::Frag
Definition: bloaty/third_party/re2/re2/compile.cc:113
re2::PatchList::p
uint32_t p
Definition: bloaty/third_party/re2/re2/compile.cc:39
re2::Compiler::Alt
Frag Alt(Frag a, Frag b)
Definition: bloaty/third_party/re2/re2/compile.cc:335
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::IsAnchorEnd
static bool IsAnchorEnd(Regexp **pre, int depth)
Definition: bloaty/third_party/re2/re2/compile.cc:1052
re2::Compiler::UncachedRuneByteSuffix
int UncachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:473
re2::Compiler::IsCachedRuneByteSuffix
bool IsCachedRuneByteSuffix(int id)
Definition: bloaty/third_party/re2/re2/compile.cc:503
re2::Prog::kManyMatch
@ kManyMatch
Definition: bloaty/third_party/re2/re2/prog.h:196
re2::kInstAlt
@ kInstAlt
Definition: bloaty/third_party/re2/re2/prog.h:30
re2::Regexp::ParseFlags
ParseFlags
Definition: bloaty/third_party/re2/re2/regexp.h:278
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
stdint.h
re2::Compiler::Compile
static Prog * Compile(Regexp *re, bool reversed, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1136
re2::kRegexpStar
@ kRegexpStar
Definition: bloaty/third_party/re2/re2/regexp.h:121
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::CharClass::FoldsASCII
bool FoldsASCII()
Definition: bloaty/third_party/re2/re2/regexp.h:253
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
re2::Regexp::parse_flags
ParseFlags parse_flags()
Definition: bloaty/third_party/re2/re2/regexp.h:324
re2::Compiler::EmptyWidth
Frag EmptyWidth(EmptyOp op)
Definition: bloaty/third_party/re2/re2/compile.cc:426
re2::Regexp::CompileToReverseProg
Prog * CompileToReverseProg(int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1224
re2::Prog::set_flags
void set_flags(int flags)
Definition: bloaty/third_party/re2/re2/prog.h:213
re2::Compiler::ninst_
int ninst_
Definition: bloaty/third_party/re2/re2/compile.cc:241
re2::kEmptyEndLine
@ kEmptyEndLine
Definition: bloaty/third_party/re2/re2/prog.h:44
re2::Regexp::op
RegexpOp op()
Definition: bloaty/third_party/re2/re2/regexp.h:321
re2::Compiler::inst_
PODArray< Prog::Inst > inst_
Definition: bloaty/third_party/re2/re2/compile.cc:240
re2::kInstNop
@ kInstNop
Definition: bloaty/third_party/re2/re2/prog.h:36
re2::kRegexpAnyByte
@ kRegexpAnyByte
Definition: bloaty/third_party/re2/re2/regexp.h:139
key
const char * key
Definition: hpack_parser_table.cc:164
re2::CharClass::begin
iterator begin()
Definition: bloaty/third_party/re2/re2/regexp.h:247
re2::Regexp::NonGreedy
@ NonGreedy
Definition: bloaty/third_party/re2/re2/regexp.h:290
re2::Compiler::encoding_
Encoding encoding_
Definition: bloaty/third_party/re2/re2/compile.cc:237
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::Compiler::ByteRangeEqual
bool ByteRangeEqual(int id1, int id2)
Definition: bloaty/third_party/re2/re2/compile.cc:595
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
re2::Regexp::Simplify
Regexp * Simplify()
Definition: bloaty/third_party/re2/re2/simplify.cc:179
re2::RuneRange
Definition: bloaty/third_party/re2/re2/regexp.h:225
re2::Compiler::Finish
Prog * Finish()
Definition: bloaty/third_party/re2/re2/compile.cc:1184
re2::Frag::Frag
Frag(uint32_t begin, PatchList end)
Definition: bloaty/third_party/re2/re2/compile.cc:118
re2::Compiler::rune_range_
Frag rune_range_
Definition: bloaty/third_party/re2/re2/compile.cc:247
DCHECK_EQ
#define DCHECK_EQ(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:20
re2::Regexp::Incref
Regexp * Incref()
Definition: bloaty/third_party/re2/re2/regexp.cc:89
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
re2::Compiler::AddRuneRangeLatin1
void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:655
next
AllocList * next[kMaxLevel]
Definition: abseil-cpp/absl/base/internal/low_level_alloc.cc:100
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::Prog::Inst::out
int out()
Definition: bloaty/third_party/re2/re2/prog.h:84
re2::Frag::Frag
Frag()
Definition: bloaty/third_party/re2/re2/compile.cc:117
re2::Prog::CompileSet
static Prog * CompileSet(Regexp *re, RE2::Anchor anchor, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1275
re2::Frag::begin
uint32_t begin
Definition: bloaty/third_party/re2/re2/compile.cc:114
re2::ByteRangeProg::next
int next
Definition: bloaty/third_party/re2/re2/compile.cc:673
re2::Compiler::AddSuffixRecursive
int AddSuffixRecursive(int root, int id)
Definition: bloaty/third_party/re2/re2/compile.cc:537
re2::Compiler::Quest
Frag Quest(Frag a, bool nongreedy)
Definition: bloaty/third_party/re2/re2/compile.cc:381
re2::kRegexpRepeat
@ kRegexpRepeat
Definition: bloaty/third_party/re2/re2/regexp.h:129
re2::Compiler::Nop
Frag Nop()
Definition: bloaty/third_party/re2/re2/compile.cc:408
re2::Prog::start_unanchored
int start_unanchored()
Definition: bloaty/third_party/re2/re2/prog.h:201
re2::Compiler::AddRuneRangeUTF8
void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase)
Definition: bloaty/third_party/re2/re2/compile.cc:710
re2::MaxRune
static int MaxRune(int len)
Definition: bloaty/third_party/re2/re2/compile.cc:450
re2::kRegexpBeginText
@ kRegexpBeginText
Definition: bloaty/third_party/re2/re2/regexp.h:152
re2::kEmptyEndText
@ kEmptyEndText
Definition: bloaty/third_party/re2/re2/prog.h:46
re2::kEmptyBeginLine
@ kEmptyBeginLine
Definition: bloaty/third_party/re2/re2/prog.h:43
re2::kRegexpEndLine
@ kRegexpEndLine
Definition: bloaty/third_party/re2/re2/regexp.h:144
re2::Compiler::Add_80_10ffff
void Add_80_10ffff()
Definition: bloaty/third_party/re2/re2/compile.cc:696
stop
static const char stop[]
Definition: benchmark-async-pummel.c:35
re2::Compiler::max_ninst_
int max_ninst_
Definition: bloaty/third_party/re2/re2/compile.cc:242
re2::kRegexpCapture
@ kRegexpCapture
Definition: bloaty/third_party/re2/re2/regexp.h:133
re2::kRegexpPlus
@ kRegexpPlus
Definition: bloaty/third_party/re2/re2/regexp.h:123
re2::PatchList::Mk
static PatchList Mk(uint32_t p)
Definition: bloaty/third_party/re2/re2/compile.cc:58
re2::kRegexpNoMatch
@ kRegexpNoMatch
Definition: bloaty/third_party/re2/re2/regexp.h:104
re2::RE2::Anchor
Anchor
Definition: bloaty/third_party/re2/re2/re2.h:472
len
int len
Definition: abseil-cpp/absl/base/internal/low_level_alloc_test.cc:46
DCHECK
#define DCHECK(condition)
Definition: bloaty/third_party/re2/util/logging.h:19
re2::Compiler::operator=
Compiler & operator=(const Compiler &)=delete
re2::ByteRangeProg::hi
int hi
Definition: bloaty/third_party/re2/re2/compile.cc:675
re2::Compiler::PreVisit
Frag PreVisit(Regexp *re, Frag parent_arg, bool *stop)
Definition: bloaty/third_party/re2/re2/compile.cc:825
mkowners.depth
depth
Definition: mkowners.py:114
regress.m
m
Definition: regress/regress.py:25
int32_t
signed int int32_t
Definition: stdint-msvc2008.h:77
re2::Compiler::~Compiler
~Compiler()
Definition: bloaty/third_party/re2/re2/compile.cc:268
re2::Compiler::CachedRuneByteSuffix
int CachedRuneByteSuffix(uint8_t lo, uint8_t hi, bool foldcase, int next)
Definition: bloaty/third_party/re2/re2/compile.cc:492
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
re2::Compiler::EndRange
Frag EndRange()
Definition: bloaty/third_party/re2/re2/compile.cc:633
opcode
opcode
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.h:6290
re2::Compiler::AddSuffix
void AddSuffix(int id)
Definition: bloaty/third_party/re2/re2/compile.cc:513
re2::PODArray
Definition: bloaty/third_party/re2/util/pod_array.h:14
op
static grpc_op * op
Definition: test/core/fling/client.cc:47
re2::kRegexpQuest
@ kRegexpQuest
Definition: bloaty/third_party/re2/re2/regexp.h:125
re2::Runemax
@ Runemax
Definition: bloaty/third_party/re2/util/utf.h:33
re2::kInstByteRange
@ kInstByteRange
Definition: bloaty/third_party/re2/re2/prog.h:32
re2::ByteRangeProg
Definition: bloaty/third_party/re2/re2/compile.cc:672
re2::Regexp::FoldCase
@ FoldCase
Definition: bloaty/third_party/re2/re2/regexp.h:280
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::cap
int cap()
Definition: bloaty/third_party/re2/re2/regexp.h:338
re2::Prog::start
int start()
Definition: bloaty/third_party/re2/re2/prog.h:200
re2::UTFmax
@ UTFmax
Definition: bloaty/third_party/re2/util/utf.h:29
re2::Compiler::Capture
Frag Capture(Frag a, int n)
Definition: bloaty/third_party/re2/re2/compile.cc:435
id
uint32_t id
Definition: flow_control_fuzzer.cc:70
re2::Compiler::FindByteRange
Frag FindByteRange(int root, int id)
Definition: bloaty/third_party/re2/re2/compile.cc:601
re2::Compiler::CompileSet
static Prog * CompileSet(Regexp *re, RE2::Anchor anchor, int64_t max_mem)
Definition: bloaty/third_party/re2/re2/compile.cc:1233
re2::Prog::Optimize
void Optimize()
Definition: bloaty/third_party/re2/re2/prog.cc:198


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:00