re2/re2/prog.cc
Go to the documentation of this file.
1 // Copyright 2007 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Compiled regular expression representation.
6 // Tested by compile_test.cc
7 
8 #include "re2/prog.h"
9 
10 #if defined(__AVX2__)
11 #include <immintrin.h>
12 #ifdef _MSC_VER
13 #include <intrin.h>
14 #endif
15 #endif
16 #include <stdint.h>
17 #include <string.h>
18 #include <algorithm>
19 #include <memory>
20 #include <utility>
21 
22 #include "util/util.h"
23 #include "util/logging.h"
24 #include "util/strutil.h"
25 #include "re2/bitmap256.h"
26 #include "re2/stringpiece.h"
27 
28 namespace re2 {
29 
30 // Constructors per Inst opcode
31 
35  out1_ = out1;
36 }
37 
38 void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
39  DCHECK_EQ(out_opcode_, 0);
40  set_out_opcode(out, kInstByteRange);
41  lo_ = lo & 0xFF;
42  hi_ = hi & 0xFF;
43  hint_foldcase_ = foldcase&1;
44 }
45 
46 void Prog::Inst::InitCapture(int cap, uint32_t out) {
47  DCHECK_EQ(out_opcode_, 0);
48  set_out_opcode(out, kInstCapture);
49  cap_ = cap;
50 }
51 
53  DCHECK_EQ(out_opcode_, 0);
54  set_out_opcode(out, kInstEmptyWidth);
55  empty_ = empty;
56 }
57 
59  DCHECK_EQ(out_opcode_, 0);
60  set_opcode(kInstMatch);
61  match_id_ = id;
62 }
63 
65  DCHECK_EQ(out_opcode_, 0);
66  set_opcode(kInstNop);
67 }
68 
69 void Prog::Inst::InitFail() {
70  DCHECK_EQ(out_opcode_, 0);
71  set_opcode(kInstFail);
72 }
73 
75  switch (opcode()) {
76  default:
77  return StringPrintf("opcode %d", static_cast<int>(opcode()));
78 
79  case kInstAlt:
80  return StringPrintf("alt -> %d | %d", out(), out1_);
81 
82  case kInstAltMatch:
83  return StringPrintf("altmatch -> %d | %d", out(), out1_);
84 
85  case kInstByteRange:
86  return StringPrintf("byte%s [%02x-%02x] %d -> %d",
87  foldcase() ? "/i" : "",
88  lo_, hi_, hint(), out());
89 
90  case kInstCapture:
91  return StringPrintf("capture %d -> %d", cap_, out());
92 
93  case kInstEmptyWidth:
94  return StringPrintf("emptywidth %#x -> %d",
95  static_cast<int>(empty_), out());
96 
97  case kInstMatch:
98  return StringPrintf("match! %d", match_id());
99 
100  case kInstNop:
101  return StringPrintf("nop -> %d", out());
102 
103  case kInstFail:
104  return StringPrintf("fail");
105  }
106 }
107 
108 Prog::Prog()
109  : anchor_start_(false),
111  reversed_(false),
114  start_(0),
116  size_(0),
117  bytemap_range_(0),
119  prefix_size_(0),
120  list_count_(0),
121  dfa_mem_(0),
122  dfa_first_(NULL),
123  dfa_longest_(NULL) {
124 }
125 
126 Prog::~Prog() {
129  if (prefix_foldcase_)
130  delete[] prefix_dfa_;
131 }
132 
133 typedef SparseSet Workq;
134 
135 static inline void AddToQueue(Workq* q, int id) {
136  if (id != 0)
137  q->insert(id);
138 }
139 
141  std::string s;
142  for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
143  int id = *i;
144  Prog::Inst* ip = prog->inst(id);
145  s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
146  AddToQueue(q, ip->out());
147  if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
148  AddToQueue(q, ip->out1());
149  }
150  return s;
151 }
152 
154  std::string s;
155  for (int id = start; id < prog->size(); id++) {
156  Prog::Inst* ip = prog->inst(id);
157  if (ip->last())
158  s += StringPrintf("%d. %s\n", id, ip->Dump().c_str());
159  else
160  s += StringPrintf("%d+ %s\n", id, ip->Dump().c_str());
161  }
162  return s;
163 }
164 
166  if (did_flatten_)
167  return FlattenedProgToString(this, start_);
168 
169  Workq q(size_);
170  AddToQueue(&q, start_);
171  return ProgToString(this, &q);
172 }
173 
175  if (did_flatten_)
177 
178  Workq q(size_);
180  return ProgToString(this, &q);
181 }
182 
185  for (int c = 0; c < 256; c++) {
186  int b = bytemap_[c];
187  int lo = c;
188  while (c < 256-1 && bytemap_[c+1] == b)
189  c++;
190  int hi = c;
191  map += StringPrintf("[%02x-%02x] -> %d\n", lo, hi, b);
192  }
193  return map;
194 }
195 
196 // Is ip a guaranteed match at end of text, perhaps after some capturing?
197 static bool IsMatch(Prog* prog, Prog::Inst* ip) {
198  for (;;) {
199  switch (ip->opcode()) {
200  default:
201  LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
202  return false;
203 
204  case kInstAlt:
205  case kInstAltMatch:
206  case kInstByteRange:
207  case kInstFail:
208  case kInstEmptyWidth:
209  return false;
210 
211  case kInstCapture:
212  case kInstNop:
213  ip = prog->inst(ip->out());
214  break;
215 
216  case kInstMatch:
217  return true;
218  }
219  }
220 }
221 
222 // Peep-hole optimizer.
223 void Prog::Optimize() {
224  Workq q(size_);
225 
226  // Eliminate nops. Most are taken out during compilation
227  // but a few are hard to avoid.
228  q.clear();
229  AddToQueue(&q, start_);
230  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
231  int id = *i;
232 
233  Inst* ip = inst(id);
234  int j = ip->out();
235  Inst* jp;
236  while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
237  j = jp->out();
238  }
239  ip->set_out(j);
240  AddToQueue(&q, ip->out());
241 
242  if (ip->opcode() == kInstAlt) {
243  j = ip->out1();
244  while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
245  j = jp->out();
246  }
247  ip->out1_ = j;
248  AddToQueue(&q, ip->out1());
249  }
250  }
251 
252  // Insert kInstAltMatch instructions
253  // Look for
254  // ip: Alt -> j | k
255  // j: ByteRange [00-FF] -> ip
256  // k: Match
257  // or the reverse (the above is the greedy one).
258  // Rewrite Alt to AltMatch.
259  q.clear();
260  AddToQueue(&q, start_);
261  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
262  int id = *i;
263  Inst* ip = inst(id);
264  AddToQueue(&q, ip->out());
265  if (ip->opcode() == kInstAlt)
266  AddToQueue(&q, ip->out1());
267 
268  if (ip->opcode() == kInstAlt) {
269  Inst* j = inst(ip->out());
270  Inst* k = inst(ip->out1());
271  if (j->opcode() == kInstByteRange && j->out() == id &&
272  j->lo() == 0x00 && j->hi() == 0xFF &&
273  IsMatch(this, k)) {
274  ip->set_opcode(kInstAltMatch);
275  continue;
276  }
277  if (IsMatch(this, j) &&
278  k->opcode() == kInstByteRange && k->out() == id &&
279  k->lo() == 0x00 && k->hi() == 0xFF) {
280  ip->set_opcode(kInstAltMatch);
281  }
282  }
283  }
284 }
285 
286 uint32_t Prog::EmptyFlags(const StringPiece& text, const char* p) {
287  int flags = 0;
288 
289  // ^ and \A
290  if (p == text.data())
292  else if (p[-1] == '\n')
294 
295  // $ and \z
296  if (p == text.data() + text.size())
298  else if (p < text.data() + text.size() && p[0] == '\n')
299  flags |= kEmptyEndLine;
300 
301  // \b and \B
302  if (p == text.data() && p == text.data() + text.size()) {
303  // no word boundary here
304  } else if (p == text.data()) {
305  if (IsWordChar(p[0]))
307  } else if (p == text.data() + text.size()) {
308  if (IsWordChar(p[-1]))
310  } else {
311  if (IsWordChar(p[-1]) != IsWordChar(p[0]))
313  }
314  if (!(flags & kEmptyWordBoundary))
316 
317  return flags;
318 }
319 
320 // ByteMapBuilder implements a coloring algorithm.
321 //
322 // The first phase is a series of "mark and merge" batches: we mark one or more
323 // [lo-hi] ranges, then merge them into our internal state. Batching is not for
324 // performance; rather, it means that the ranges are treated indistinguishably.
325 //
326 // Internally, the ranges are represented using a bitmap that stores the splits
327 // and a vector that stores the colors; both of them are indexed by the ranges'
328 // last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
329 // hi (if not already split), then recolor each range in between. The color map
330 // (i.e. from the old color to the new color) is maintained for the lifetime of
331 // the batch and so underpins this somewhat obscure approach to set operations.
332 //
333 // The second phase builds the bytemap from our internal state: we recolor each
334 // range, then store the new color (which is now the byte class) in each of the
335 // corresponding array elements. Finally, we output the number of byte classes.
336 class ByteMapBuilder {
337  public:
339  // Initial state: the [0-255] range has color 256.
340  // This will avoid problems during the second phase,
341  // in which we assign byte classes numbered from 0.
342  splits_.Set(255);
343  colors_[255] = 256;
344  nextcolor_ = 257;
345  }
346 
347  void Mark(int lo, int hi);
348  void Merge();
349  void Build(uint8_t* bytemap, int* bytemap_range);
350 
351  private:
352  int Recolor(int oldcolor);
353 
355  int colors_[256];
356  int nextcolor_;
357  std::vector<std::pair<int, int>> colormap_;
358  std::vector<std::pair<int, int>> ranges_;
359 
360  ByteMapBuilder(const ByteMapBuilder&) = delete;
361  ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
362 };
363 
364 void ByteMapBuilder::Mark(int lo, int hi) {
365  DCHECK_GE(lo, 0);
366  DCHECK_GE(hi, 0);
367  DCHECK_LE(lo, 255);
368  DCHECK_LE(hi, 255);
369  DCHECK_LE(lo, hi);
370 
371  // Ignore any [0-255] ranges. They cause us to recolor every range, which
372  // has no effect on the eventual result and is therefore a waste of time.
373  if (lo == 0 && hi == 255)
374  return;
375 
376  ranges_.emplace_back(lo, hi);
377 }
378 
379 void ByteMapBuilder::Merge() {
380  for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
381  it != ranges_.end();
382  ++it) {
383  int lo = it->first-1;
384  int hi = it->second;
385 
386  if (0 <= lo && !splits_.Test(lo)) {
387  splits_.Set(lo);
388  int next = splits_.FindNextSetBit(lo+1);
389  colors_[lo] = colors_[next];
390  }
391  if (!splits_.Test(hi)) {
392  splits_.Set(hi);
393  int next = splits_.FindNextSetBit(hi+1);
394  colors_[hi] = colors_[next];
395  }
396 
397  int c = lo+1;
398  while (c < 256) {
399  int next = splits_.FindNextSetBit(c);
401  if (next == hi)
402  break;
403  c = next+1;
404  }
405  }
406  colormap_.clear();
407  ranges_.clear();
408 }
409 
410 void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
411  // Assign byte classes numbered from 0.
412  nextcolor_ = 0;
413 
414  int c = 0;
415  while (c < 256) {
416  int next = splits_.FindNextSetBit(c);
417  uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
418  while (c <= next) {
419  bytemap[c] = b;
420  c++;
421  }
422  }
423 
424  *bytemap_range = nextcolor_;
425 }
426 
427 int ByteMapBuilder::Recolor(int oldcolor) {
428  // Yes, this is a linear search. There can be at most 256
429  // colors and there will typically be far fewer than that.
430  // Also, we need to consider keys *and* values in order to
431  // avoid recoloring a given range more than once per batch.
432  std::vector<std::pair<int, int>>::const_iterator it =
433  std::find_if(colormap_.begin(), colormap_.end(),
434  [=](const std::pair<int, int>& kv) -> bool {
435  return kv.first == oldcolor || kv.second == oldcolor;
436  });
437  if (it != colormap_.end())
438  return it->second;
439  int newcolor = nextcolor_;
440  nextcolor_++;
441  colormap_.emplace_back(oldcolor, newcolor);
442  return newcolor;
443 }
444 
445 void Prog::ComputeByteMap() {
446  // Fill in bytemap with byte classes for the program.
447  // Ranges of bytes that are treated indistinguishably
448  // will be mapped to a single byte class.
449  ByteMapBuilder builder;
450 
451  // Don't repeat the work for ^ and $.
452  bool marked_line_boundaries = false;
453  // Don't repeat the work for \b and \B.
454  bool marked_word_boundaries = false;
455 
456  for (int id = 0; id < size(); id++) {
457  Inst* ip = inst(id);
458  if (ip->opcode() == kInstByteRange) {
459  int lo = ip->lo();
460  int hi = ip->hi();
461  builder.Mark(lo, hi);
462  if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
463  int foldlo = lo;
464  int foldhi = hi;
465  if (foldlo < 'a')
466  foldlo = 'a';
467  if (foldhi > 'z')
468  foldhi = 'z';
469  if (foldlo <= foldhi) {
470  foldlo += 'A' - 'a';
471  foldhi += 'A' - 'a';
472  builder.Mark(foldlo, foldhi);
473  }
474  }
475  // If this Inst is not the last Inst in its list AND the next Inst is
476  // also a ByteRange AND the Insts have the same out, defer the merge.
477  if (!ip->last() &&
478  inst(id+1)->opcode() == kInstByteRange &&
479  ip->out() == inst(id+1)->out())
480  continue;
481  builder.Merge();
482  } else if (ip->opcode() == kInstEmptyWidth) {
483  if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
484  !marked_line_boundaries) {
485  builder.Mark('\n', '\n');
486  builder.Merge();
487  marked_line_boundaries = true;
488  }
489  if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
490  !marked_word_boundaries) {
491  // We require two batches here: the first for ranges that are word
492  // characters, the second for ranges that are not word characters.
493  for (bool isword : {true, false}) {
494  int j;
495  for (int i = 0; i < 256; i = j) {
496  for (j = i + 1; j < 256 &&
497  Prog::IsWordChar(static_cast<uint8_t>(i)) ==
498  Prog::IsWordChar(static_cast<uint8_t>(j));
499  j++)
500  ;
501  if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
502  builder.Mark(i, j - 1);
503  }
504  builder.Merge();
505  }
506  marked_word_boundaries = true;
507  }
508  }
509  }
510 
512 
513  if (0) { // For debugging, use trivial bytemap.
514  LOG(ERROR) << "Using trivial bytemap.";
515  for (int i = 0; i < 256; i++)
516  bytemap_[i] = static_cast<uint8_t>(i);
517  bytemap_range_ = 256;
518  }
519 }
520 
521 // Prog::Flatten() implements a graph rewriting algorithm.
522 //
523 // The overall process is similar to epsilon removal, but retains some epsilon
524 // transitions: those from Capture and EmptyWidth instructions; and those from
525 // nullable subexpressions. (The latter avoids quadratic blowup in transitions
526 // in the worst case.) It might be best thought of as Alt instruction elision.
527 //
528 // In conceptual terms, it divides the Prog into "trees" of instructions, then
529 // traverses the "trees" in order to produce "lists" of instructions. A "tree"
530 // is one or more instructions that grow from one "root" instruction to one or
531 // more "leaf" instructions; if a "tree" has exactly one instruction, then the
532 // "root" is also the "leaf". In most cases, a "root" is the successor of some
533 // "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
534 // and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
535 // EmptyWidth or Match instruction. However, this is insufficient for handling
536 // nested nullable subexpressions correctly, so in some cases, a "root" is the
537 // dominator of the instructions reachable from some "successor root" (i.e. it
538 // has an unreachable predecessor) and is considered a "dominator root". Since
539 // only Alt instructions can be "dominator roots" (other instructions would be
540 // "leaves"), only Alt instructions are required to be marked as predecessors.
541 //
542 // Dividing the Prog into "trees" comprises two passes: marking the "successor
543 // roots" and the predecessors; and marking the "dominator roots". Sorting the
544 // "successor roots" by their bytecode offsets enables iteration in order from
545 // greatest to least during the second pass; by working backwards in this case
546 // and flooding the graph no further than "leaves" and already marked "roots",
547 // it becomes possible to mark "dominator roots" without doing excessive work.
548 //
549 // Traversing the "trees" is just iterating over the "roots" in order of their
550 // marking and flooding the graph no further than "leaves" and "roots". When a
551 // "leaf" is reached, the instruction is copied with its successor remapped to
552 // its "root" number. When a "root" is reached, a Nop instruction is generated
553 // with its successor remapped similarly. As each "list" is produced, its last
554 // instruction is marked as such. After all of the "lists" have been produced,
555 // a pass over their instructions remaps their successors to bytecode offsets.
556 void Prog::Flatten() {
557  if (did_flatten_)
558  return;
559  did_flatten_ = true;
560 
561  // Scratch structures. It's important that these are reused by functions
562  // that we call in loops because they would thrash the heap otherwise.
563  SparseSet reachable(size());
564  std::vector<int> stk;
565  stk.reserve(size());
566 
567  // First pass: Marks "successor roots" and predecessors.
568  // Builds the mapping from inst-ids to root-ids.
569  SparseArray<int> rootmap(size());
570  SparseArray<int> predmap(size());
571  std::vector<std::vector<int>> predvec;
572  MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
573 
574  // Second pass: Marks "dominator roots".
575  SparseArray<int> sorted(rootmap);
576  std::sort(sorted.begin(), sorted.end(), sorted.less);
577  for (SparseArray<int>::const_iterator i = sorted.end() - 1;
578  i != sorted.begin();
579  --i) {
580  if (i->index() != start_unanchored() && i->index() != start())
581  MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
582  }
583 
584  // Third pass: Emits "lists". Remaps outs to root-ids.
585  // Builds the mapping from root-ids to flat-ids.
586  std::vector<int> flatmap(rootmap.size());
587  std::vector<Inst> flat;
588  flat.reserve(size());
589  for (SparseArray<int>::const_iterator i = rootmap.begin();
590  i != rootmap.end();
591  ++i) {
592  flatmap[i->value()] = static_cast<int>(flat.size());
593  EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
594  flat.back().set_last();
595  // We have the bounds of the "list", so this is the
596  // most convenient point at which to compute hints.
597  ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
598  }
599 
600  list_count_ = static_cast<int>(flatmap.size());
601  for (int i = 0; i < kNumInst; i++)
602  inst_count_[i] = 0;
603 
604  // Fourth pass: Remaps outs to flat-ids.
605  // Counts instructions by opcode.
606  for (int id = 0; id < static_cast<int>(flat.size()); id++) {
607  Inst* ip = &flat[id];
608  if (ip->opcode() != kInstAltMatch) // handled in EmitList()
609  ip->set_out(flatmap[ip->out()]);
610  inst_count_[ip->opcode()]++;
611  }
612 
613  int total = 0;
614  for (int i = 0; i < kNumInst; i++)
615  total += inst_count_[i];
616  DCHECK_EQ(total, static_cast<int>(flat.size()));
617 
618  // Remap start_unanchored and start.
619  if (start_unanchored() == 0) {
620  DCHECK_EQ(start(), 0);
621  } else if (start_unanchored() == start()) {
622  set_start_unanchored(flatmap[1]);
623  set_start(flatmap[1]);
624  } else {
625  set_start_unanchored(flatmap[1]);
626  set_start(flatmap[2]);
627  }
628 
629  // Finally, replace the old instructions with the new instructions.
630  size_ = static_cast<int>(flat.size());
631  inst_ = PODArray<Inst>(size_);
632  memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
633 
634  // Populate the list heads for BitState.
635  // 512 instructions limits the memory footprint to 1KiB.
636  if (size_ <= 512) {
637  list_heads_ = PODArray<uint16_t>(size_);
638  // 0xFF makes it more obvious if we try to look up a non-head.
639  memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
640  for (int i = 0; i < list_count_; ++i)
641  list_heads_[flatmap[i]] = i;
642  }
643 }
644 
645 void Prog::MarkSuccessors(SparseArray<int>* rootmap,
646  SparseArray<int>* predmap,
647  std::vector<std::vector<int>>* predvec,
648  SparseSet* reachable, std::vector<int>* stk) {
649  // Mark the kInstFail instruction.
650  rootmap->set_new(0, rootmap->size());
651 
652  // Mark the start_unanchored and start instructions.
653  if (!rootmap->has_index(start_unanchored()))
654  rootmap->set_new(start_unanchored(), rootmap->size());
655  if (!rootmap->has_index(start()))
656  rootmap->set_new(start(), rootmap->size());
657 
658  reachable->clear();
659  stk->clear();
660  stk->push_back(start_unanchored());
661  while (!stk->empty()) {
662  int id = stk->back();
663  stk->pop_back();
664  Loop:
665  if (reachable->contains(id))
666  continue;
667  reachable->insert_new(id);
668 
669  Inst* ip = inst(id);
670  switch (ip->opcode()) {
671  default:
672  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
673  break;
674 
675  case kInstAltMatch:
676  case kInstAlt:
677  // Mark this instruction as a predecessor of each out.
678  for (int out : {ip->out(), ip->out1()}) {
679  if (!predmap->has_index(out)) {
680  predmap->set_new(out, static_cast<int>(predvec->size()));
681  predvec->emplace_back();
682  }
683  (*predvec)[predmap->get_existing(out)].emplace_back(id);
684  }
685  stk->push_back(ip->out1());
686  id = ip->out();
687  goto Loop;
688 
689  case kInstByteRange:
690  case kInstCapture:
691  case kInstEmptyWidth:
692  // Mark the out of this instruction as a "root".
693  if (!rootmap->has_index(ip->out()))
694  rootmap->set_new(ip->out(), rootmap->size());
695  id = ip->out();
696  goto Loop;
697 
698  case kInstNop:
699  id = ip->out();
700  goto Loop;
701 
702  case kInstMatch:
703  case kInstFail:
704  break;
705  }
706  }
707 }
708 
709 void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
710  SparseArray<int>* predmap,
711  std::vector<std::vector<int>>* predvec,
712  SparseSet* reachable, std::vector<int>* stk) {
713  reachable->clear();
714  stk->clear();
715  stk->push_back(root);
716  while (!stk->empty()) {
717  int id = stk->back();
718  stk->pop_back();
719  Loop:
720  if (reachable->contains(id))
721  continue;
722  reachable->insert_new(id);
723 
724  if (id != root && rootmap->has_index(id)) {
725  // We reached another "tree" via epsilon transition.
726  continue;
727  }
728 
729  Inst* ip = inst(id);
730  switch (ip->opcode()) {
731  default:
732  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
733  break;
734 
735  case kInstAltMatch:
736  case kInstAlt:
737  stk->push_back(ip->out1());
738  id = ip->out();
739  goto Loop;
740 
741  case kInstByteRange:
742  case kInstCapture:
743  case kInstEmptyWidth:
744  break;
745 
746  case kInstNop:
747  id = ip->out();
748  goto Loop;
749 
750  case kInstMatch:
751  case kInstFail:
752  break;
753  }
754  }
755 
756  for (SparseSet::const_iterator i = reachable->begin();
757  i != reachable->end();
758  ++i) {
759  int id = *i;
760  if (predmap->has_index(id)) {
761  for (int pred : (*predvec)[predmap->get_existing(id)]) {
762  if (!reachable->contains(pred)) {
763  // id has a predecessor that cannot be reached from root!
764  // Therefore, id must be a "root" too - mark it as such.
765  if (!rootmap->has_index(id))
766  rootmap->set_new(id, rootmap->size());
767  }
768  }
769  }
770  }
771 }
772 
773 void Prog::EmitList(int root, SparseArray<int>* rootmap,
774  std::vector<Inst>* flat,
775  SparseSet* reachable, std::vector<int>* stk) {
776  reachable->clear();
777  stk->clear();
778  stk->push_back(root);
779  while (!stk->empty()) {
780  int id = stk->back();
781  stk->pop_back();
782  Loop:
783  if (reachable->contains(id))
784  continue;
785  reachable->insert_new(id);
786 
787  if (id != root && rootmap->has_index(id)) {
788  // We reached another "tree" via epsilon transition. Emit a kInstNop
789  // instruction so that the Prog does not become quadratically larger.
790  flat->emplace_back();
791  flat->back().set_opcode(kInstNop);
792  flat->back().set_out(rootmap->get_existing(id));
793  continue;
794  }
795 
796  Inst* ip = inst(id);
797  switch (ip->opcode()) {
798  default:
799  LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
800  break;
801 
802  case kInstAltMatch:
803  flat->emplace_back();
804  flat->back().set_opcode(kInstAltMatch);
805  flat->back().set_out(static_cast<int>(flat->size()));
806  flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
808 
809  case kInstAlt:
810  stk->push_back(ip->out1());
811  id = ip->out();
812  goto Loop;
813 
814  case kInstByteRange:
815  case kInstCapture:
816  case kInstEmptyWidth:
817  flat->emplace_back();
818  memmove(&flat->back(), ip, sizeof *ip);
819  flat->back().set_out(rootmap->get_existing(ip->out()));
820  break;
821 
822  case kInstNop:
823  id = ip->out();
824  goto Loop;
825 
826  case kInstMatch:
827  case kInstFail:
828  flat->emplace_back();
829  memmove(&flat->back(), ip, sizeof *ip);
830  break;
831  }
832  }
833 }
834 
835 // For each ByteRange instruction in [begin, end), computes a hint to execution
836 // engines: the delta to the next instruction (in flat) worth exploring iff the
837 // current instruction matched.
838 //
839 // Implements a coloring algorithm related to ByteMapBuilder, but in this case,
840 // colors are instructions and recoloring ranges precisely identifies conflicts
841 // between instructions. Iterating backwards over [begin, end) is guaranteed to
842 // identify the nearest conflict (if any) with only linear complexity.
843 void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
844  Bitmap256 splits;
845  int colors[256];
846 
847  bool dirty = false;
848  for (int id = end; id >= begin; --id) {
849  if (id == end ||
850  (*flat)[id].opcode() != kInstByteRange) {
851  if (dirty) {
852  dirty = false;
853  splits.Clear();
854  }
855  splits.Set(255);
856  colors[255] = id;
857  // At this point, the [0-255] range is colored with id.
858  // Thus, hints cannot point beyond id; and if id == end,
859  // hints that would have pointed to id will be 0 instead.
860  continue;
861  }
862  dirty = true;
863 
864  // We recolor the [lo-hi] range with id. Note that first ratchets backwards
865  // from end to the nearest conflict (if any) during recoloring.
866  int first = end;
867  auto Recolor = [&](int lo, int hi) {
868  // Like ByteMapBuilder, we split at lo-1 and at hi.
869  --lo;
870 
871  if (0 <= lo && !splits.Test(lo)) {
872  splits.Set(lo);
873  int next = splits.FindNextSetBit(lo+1);
874  colors[lo] = colors[next];
875  }
876  if (!splits.Test(hi)) {
877  splits.Set(hi);
878  int next = splits.FindNextSetBit(hi+1);
879  colors[hi] = colors[next];
880  }
881 
882  int c = lo+1;
883  while (c < 256) {
884  int next = splits.FindNextSetBit(c);
885  // Ratchet backwards...
886  first = std::min(first, colors[next]);
887  // Recolor with id - because it's the new nearest conflict!
888  colors[next] = id;
889  if (next == hi)
890  break;
891  c = next+1;
892  }
893  };
894 
895  Inst* ip = &(*flat)[id];
896  int lo = ip->lo();
897  int hi = ip->hi();
898  Recolor(lo, hi);
899  if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
900  int foldlo = lo;
901  int foldhi = hi;
902  if (foldlo < 'a')
903  foldlo = 'a';
904  if (foldhi > 'z')
905  foldhi = 'z';
906  if (foldlo <= foldhi) {
907  foldlo += 'A' - 'a';
908  foldhi += 'A' - 'a';
909  Recolor(foldlo, foldhi);
910  }
911  }
912 
913  if (first != end) {
914  uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
915  ip->hint_foldcase_ |= hint<<1;
916  }
917  }
918 }
919 
920 // The final state will always be this, which frees up a register for the hot
921 // loop and thus avoids the spilling that can occur when building with Clang.
922 static const size_t kShiftDFAFinal = 9;
923 
924 // This function takes the prefix as std::string (i.e. not const std::string&
925 // as normal) because it's going to clobber it, so a temporary is convenient.
927  // This constant is for convenience now and also for correctness later when
928  // we clobber the prefix, but still need to know how long it was initially.
929  const size_t size = prefix.size();
930 
931  // Construct the NFA.
932  // The table is indexed by input byte; each element is a bitfield of states
933  // reachable by the input byte. Given a bitfield of the current states, the
934  // bitfield of states reachable from those is - for this specific purpose -
935  // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives
936  // the bitfield of the next states reached by stepping over the input byte.
937  // Credits for this technique: the Hyperscan paper by Geoff Langdale et al.
938  uint16_t nfa[256]{};
939  for (size_t i = 0; i < size; ++i) {
940  uint8_t b = prefix[i];
941  nfa[b] |= 1 << (i+1);
942  }
943  // This is the `\C*?` for unanchored search.
944  for (int b = 0; b < 256; ++b)
945  nfa[b] |= 1;
946 
947  // This maps from DFA state to NFA states; the reverse mapping is used when
948  // recording transitions and gets implemented with plain old linear search.
949  // The "Shift DFA" technique limits this to ten states when using uint64_t;
950  // to allow for the initial state, we use at most nine bytes of the prefix.
951  // That same limit is also why uint16_t is sufficient for the NFA bitfield.
952  uint16_t states[kShiftDFAFinal+1]{};
953  states[0] = 1;
954  for (size_t dcurr = 0; dcurr < size; ++dcurr) {
955  uint8_t b = prefix[dcurr];
956  uint16_t ncurr = states[dcurr];
957  uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
958  size_t dnext = dcurr+1;
959  if (dnext == size)
960  dnext = kShiftDFAFinal;
961  states[dnext] = nnext;
962  }
963 
964  // Sort and unique the bytes of the prefix to avoid repeating work while we
965  // record transitions. This clobbers the prefix, but it's no longer needed.
966  std::sort(prefix.begin(), prefix.end());
967  prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end());
968 
969  // Construct the DFA.
970  // The table is indexed by input byte; each element is effectively a packed
971  // array of uint6_t; each array value will be multiplied by six in order to
972  // avoid having to do so later in the hot loop as well as masking/shifting.
973  // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen.
974  uint64_t* dfa = new uint64_t[256]{};
975  // Record a transition from each state for each of the bytes of the prefix.
976  // Note that all other input bytes go back to the initial state by default.
977  for (size_t dcurr = 0; dcurr < size; ++dcurr) {
978  for (uint8_t b : prefix) {
979  uint16_t ncurr = states[dcurr];
980  uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
981  size_t dnext = 0;
982  while (states[dnext] != nnext)
983  ++dnext;
984  dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
985  // Convert ASCII letters to uppercase and record the extra transitions.
986  // Note that ASCII letters are guaranteed to be lowercase at this point
987  // because that's how the parser normalises them. #FunFact: 'k' and 's'
988  // match U+212A and U+017F, respectively, so they won't occur here when
989  // using UTF-8 encoding because the parser will emit character classes.
990  if ('a' <= b && b <= 'z') {
991  b -= 'a' - 'A';
992  dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
993  }
994  }
995  }
996  // This lets the final state "saturate", which will matter for performance:
997  // in the hot loop, we check for a match only at the end of each iteration,
998  // so we must keep signalling the match until we get around to checking it.
999  for (int b = 0; b < 256; ++b)
1000  dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
1001 
1002  return dfa;
1003 }
1004 
1006  bool prefix_foldcase) {
1007  prefix_foldcase_ = prefix_foldcase;
1008  prefix_size_ = prefix.size();
1009  if (prefix_foldcase_) {
1010  // Use PrefixAccel_ShiftDFA().
1011  // ... and no more than nine bytes of the prefix. (See above for details.)
1014  } else if (prefix_size_ != 1) {
1015  // Use PrefixAccel_FrontAndBack().
1016  prefix_front_ = prefix.front();
1017  prefix_back_ = prefix.back();
1018  } else {
1019  // Use memchr(3).
1020  prefix_front_ = prefix.front();
1021  }
1022 }
1023 
1024 const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
1025  if (size < prefix_size_)
1026  return NULL;
1027 
1028  uint64_t curr = 0;
1029 
1030  // At the time of writing, rough benchmarks on a Broadwell machine showed
1031  // that this unroll factor (i.e. eight) achieves a speedup factor of two.
1032  if (size >= 8) {
1033  const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1034  const uint8_t* endp = p + (size&~7);
1035  do {
1036  uint8_t b0 = p[0];
1037  uint8_t b1 = p[1];
1038  uint8_t b2 = p[2];
1039  uint8_t b3 = p[3];
1040  uint8_t b4 = p[4];
1041  uint8_t b5 = p[5];
1042  uint8_t b6 = p[6];
1043  uint8_t b7 = p[7];
1044 
1045  uint64_t next0 = prefix_dfa_[b0];
1046  uint64_t next1 = prefix_dfa_[b1];
1047  uint64_t next2 = prefix_dfa_[b2];
1048  uint64_t next3 = prefix_dfa_[b3];
1049  uint64_t next4 = prefix_dfa_[b4];
1050  uint64_t next5 = prefix_dfa_[b5];
1051  uint64_t next6 = prefix_dfa_[b6];
1052  uint64_t next7 = prefix_dfa_[b7];
1053 
1054  uint64_t curr0 = next0 >> (curr & 63);
1055  uint64_t curr1 = next1 >> (curr0 & 63);
1056  uint64_t curr2 = next2 >> (curr1 & 63);
1057  uint64_t curr3 = next3 >> (curr2 & 63);
1058  uint64_t curr4 = next4 >> (curr3 & 63);
1059  uint64_t curr5 = next5 >> (curr4 & 63);
1060  uint64_t curr6 = next6 >> (curr5 & 63);
1061  uint64_t curr7 = next7 >> (curr6 & 63);
1062 
1063  if ((curr7 & 63) == kShiftDFAFinal * 6) {
1064  // At the time of writing, using the same masking subexpressions from
1065  // the preceding lines caused Clang to clutter the hot loop computing
1066  // them - even though they aren't actually needed for shifting! Hence
1067  // these rewritten conditions, which achieve a speedup factor of two.
1068  if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_;
1069  if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_;
1070  if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_;
1071  if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_;
1072  if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_;
1073  if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_;
1074  if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_;
1075  if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_;
1076  }
1077 
1078  curr = curr7;
1079  p += 8;
1080  } while (p != endp);
1081  data = p;
1082  size = size&7;
1083  }
1084 
1085  const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1086  const uint8_t* endp = p + size;
1087  while (p != endp) {
1088  uint8_t b = *p++;
1090  curr = next >> (curr & 63);
1091  if ((curr & 63) == kShiftDFAFinal * 6)
1092  return p-prefix_size_;
1093  }
1094  return NULL;
1095 }
1096 
1097 #if defined(__AVX2__)
1098 // Finds the least significant non-zero bit in n.
1099 static int FindLSBSet(uint32_t n) {
1100  DCHECK_NE(n, 0);
1101 #if defined(__GNUC__)
1102  return __builtin_ctz(n);
1103 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
1104  unsigned long c;
1105  _BitScanForward(&c, n);
1106  return static_cast<int>(c);
1107 #else
1108  int c = 31;
1109  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
1110  uint32_t word = n << shift;
1111  if (word != 0) {
1112  n = word;
1113  c -= shift;
1114  }
1115  }
1116  return c;
1117 #endif
1118 }
1119 #endif
1120 
1121 const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
1122  DCHECK_GE(prefix_size_, 2);
1123  if (size < prefix_size_)
1124  return NULL;
1125  // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
1126  // This also means that probing for prefix_back_ doesn't go out of bounds.
1127  size -= prefix_size_-1;
1128 
1129 #if defined(__AVX2__)
1130  // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
1131  if (size >= sizeof(__m256i)) {
1132  const __m256i* fp = reinterpret_cast<const __m256i*>(
1133  reinterpret_cast<const char*>(data));
1134  const __m256i* bp = reinterpret_cast<const __m256i*>(
1135  reinterpret_cast<const char*>(data) + prefix_size_-1);
1136  const __m256i* endfp = fp + size/sizeof(__m256i);
1137  const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
1138  const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
1139  do {
1140  const __m256i f_loadu = _mm256_loadu_si256(fp++);
1141  const __m256i b_loadu = _mm256_loadu_si256(bp++);
1142  const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
1143  const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
1144  const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
1145  if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero.
1146  const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
1147  const int fb_movemask = _mm256_movemask_epi8(fb_and);
1148  const int fb_ctz = FindLSBSet(fb_movemask);
1149  return reinterpret_cast<const char*>(fp-1) + fb_ctz;
1150  }
1151  } while (fp != endfp);
1152  data = fp;
1153  size = size%sizeof(__m256i);
1154  }
1155 #endif
1156 
1157  const char* p0 = reinterpret_cast<const char*>(data);
1158  for (const char* p = p0;; p++) {
1159  DCHECK_GE(size, static_cast<size_t>(p-p0));
1160  p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
1161  if (p == NULL || p[prefix_size_-1] == prefix_back_)
1162  return p;
1163  }
1164 }
1165 
1166 } // namespace re2
re2::Prog::Inst::out1_
uint32_t out1_
Definition: bloaty/third_party/re2/re2/prog.h:135
re2::kInstFail
@ kInstFail
Definition: bloaty/third_party/re2/re2/prog.h:37
re2::Prog::dfa_first_
DFA * dfa_first_
Definition: bloaty/third_party/re2/re2/prog.h:417
re2::ByteMapBuilder::ByteMapBuilder
ByteMapBuilder()
Definition: re2/re2/prog.cc:338
gen_build_yaml.out
dictionary out
Definition: src/benchmark/gen_build_yaml.py:24
re2::Prog::did_onepass_
bool did_onepass_
Definition: bloaty/third_party/re2/re2/prog.h:399
re2::SparseSet
SparseSetT< void > SparseSet
Definition: bloaty/third_party/re2/util/sparse_set.h:260
regen-readme.it
it
Definition: regen-readme.py:15
re2::IsMatch
static bool IsMatch(Prog *, Prog::Inst *)
Definition: bloaty/third_party/re2/re2/prog.cc:262
prog
char * prog
Definition: bloaty/third_party/zlib/contrib/untgz/untgz.c:125
absl::str_format_internal::LengthMod::j
@ j
re2::Prog::DumpUnanchored
std::string DumpUnanchored()
Definition: bloaty/third_party/re2/re2/prog.cc:166
re2::ByteMapBuilder::operator=
ByteMapBuilder & operator=(const ByteMapBuilder &)=delete
re2::Prog::Inst::InitCapture
void InitCapture(int cap, uint32_t out)
Definition: bloaty/third_party/re2/re2/prog.cc:40
re2::Prog::prefix_back_
int prefix_back_
Definition: re2/re2/prog.h:428
re2::Prog::Inst::out1
int out1()
Definition: bloaty/third_party/re2/re2/prog.h:85
re2::Prog::flags
int flags()
Definition: bloaty/third_party/re2/re2/prog.h:212
memset
return memset(p, 0, total)
fix_build_deps.c
list c
Definition: fix_build_deps.py:490
re2::Prog::prefix_size_
size_t prefix_size_
Definition: re2/re2/prog.h:423
begin
char * begin
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1007
false
#define false
Definition: setup_once.h:323
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
DCHECK_LE
#define DCHECK_LE(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:22
re2::SparseArray::const_iterator
const typedef IndexValue * const_iterator
Definition: bloaty/third_party/re2/util/sparse_array.h:120
re2::Prog::Inst
Definition: bloaty/third_party/re2/re2/prog.h:62
re2::Prog::Prog
Prog()
Definition: bloaty/third_party/re2/re2/prog.cc:102
re2::kShiftDFAFinal
static const size_t kShiftDFAFinal
Definition: re2/re2/prog.cc:922
total
size_t total
Definition: cord_analysis.cc:59
re2::Prog::reversed_
bool reversed_
Definition: bloaty/third_party/re2/re2/prog.h:397
re2::Bitmap256
Definition: bloaty/third_party/re2/re2/bitmap256.h:19
string.h
re2::Prog::did_flatten_
bool did_flatten_
Definition: bloaty/third_party/re2/re2/prog.h:398
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::PODArray::data
T * data() const
Definition: bloaty/third_party/re2/util/pod_array.h:24
re2::Prog::size_
int size_
Definition: bloaty/third_party/re2/re2/prog.h:403
re2::Prog::ComputeByteMap
void ComputeByteMap()
Definition: bloaty/third_party/re2/re2/prog.cc:446
re2::Prog::list_count_
int list_count_
Definition: bloaty/third_party/re2/re2/prog.h:408
re2::Prog::Inst::InitMatch
void InitMatch(int id)
re2::Prog::MarkSuccessors
void MarkSuccessors(SparseArray< int > *rootmap, SparseArray< int > *predmap, std::vector< std::vector< int >> *predvec, SparseSet *reachable, std::vector< int > *stk)
Definition: bloaty/third_party/re2/re2/prog.cc:646
re2::Prog::ComputeHints
void ComputeHints(std::vector< Inst > *flat, int begin, int end)
Definition: bloaty/third_party/re2/re2/prog.cc:844
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
uint8_t
unsigned char uint8_t
Definition: stdint-msvc2008.h:78
setup.k
k
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
map
zval * map
Definition: php/ext/google/protobuf/encode_decode.c:480
re2::Prog::Inst::set_out_opcode
void set_out_opcode(int out, InstOp opcode)
Definition: bloaty/third_party/re2/re2/prog.h:129
re2::kInstEmptyWidth
@ kInstEmptyWidth
Definition: bloaty/third_party/re2/re2/prog.h:34
re2::Prog::IsWordChar
static bool IsWordChar(uint8_t c)
Definition: bloaty/third_party/re2/re2/prog.h:240
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
re2::SparseSetT::const_iterator
const typedef int * const_iterator
Definition: bloaty/third_party/re2/util/sparse_set.h:76
re2::kEmptyWordBoundary
@ kEmptyWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:47
re2::Prog::Inst::InitAlt
void InitAlt(uint32_t out, uint32_t out1)
Definition: bloaty/third_party/re2/re2/prog.cc:26
re2::Prog::DumpByteMap
std::string DumpByteMap()
Definition: bloaty/third_party/re2/re2/prog.cc:175
DCHECK_NE
#define DCHECK_NE(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:21
re2::SparseSetT
Definition: bloaty/third_party/re2/util/sparse_set.h:69
re2::ByteMapBuilder::Recolor
int Recolor(int oldcolor)
Definition: bloaty/third_party/re2/re2/prog.cc:428
re2::kInstCapture
@ kInstCapture
Definition: bloaty/third_party/re2/re2/prog.h:33
re2::Prog::Inst::InitEmptyWidth
void InitEmptyWidth(EmptyOp empty, uint32_t out)
Definition: bloaty/third_party/re2/re2/prog.cc:46
start
static uint64_t start
Definition: benchmark-pound.c:74
profile_analyzer.builder
builder
Definition: profile_analyzer.py:159
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
re2::Bitmap256::Set
void Set(int c)
Definition: bloaty/third_party/re2/re2/bitmap256.h:39
end
char * end
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1008
re2::FlattenedProgToString
static std::string FlattenedProgToString(Prog *prog, int start)
Definition: bloaty/third_party/re2/re2/prog.cc:145
re2::Prog::prefix_front_
int prefix_front_
Definition: re2/re2/prog.h:427
unique
static int unique
Definition: h2_local_abstract_uds_percent_encoded.cc:25
re2::ByteMapBuilder::Mark
void Mark(int lo, int hi)
Definition: bloaty/third_party/re2/re2/prog.cc:365
re2::ByteMapBuilder::colors_
int colors_[256]
Definition: bloaty/third_party/re2/re2/prog.cc:356
b2
T::second_type b2
Definition: abseil-cpp/absl/container/internal/hash_function_defaults_test.cc:308
root
RefCountedPtr< grpc_tls_certificate_provider > root
Definition: xds_server_config_fetcher.cc:223
FALLTHROUGH_INTENDED
#define FALLTHROUGH_INTENDED
Definition: bloaty/third_party/re2/util/util.h:26
re2::kEmptyNonWordBoundary
@ kEmptyNonWordBoundary
Definition: bloaty/third_party/re2/re2/prog.h:48
re2::Bitmap256::FindNextSetBit
int FindNextSetBit(int c) const
Definition: bloaty/third_party/re2/re2/bitmap256.h:86
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::EmptyOp
EmptyOp
Definition: bloaty/third_party/re2/re2/prog.h:42
re2::Prog::inst_
PODArray< Inst > inst_
Definition: bloaty/third_party/re2/re2/prog.h:413
uint64_t
unsigned __int64 uint64_t
Definition: stdint-msvc2008.h:90
re2::Prog::EmitList
void EmitList(int root, SparseArray< int > *rootmap, std::vector< Inst > *flat, SparseSet *reachable, std::vector< int > *stk)
Definition: bloaty/third_party/re2/re2/prog.cc:774
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::Prog::Flatten
void Flatten()
Definition: bloaty/third_party/re2/re2/prog.cc:557
re2::Prog::inst
Inst * inst(int id)
Definition: bloaty/third_party/re2/re2/prog.h:199
re2::Prog::inst_count_
int inst_count_[kNumInst]
Definition: bloaty/third_party/re2/re2/prog.h:409
re2::Prog::PrefixAccel_ShiftDFA
const void * PrefixAccel_ShiftDFA(const void *data, size_t size)
Definition: re2/re2/prog.cc:1024
re2::Prog::Inst::InitNop
void InitNop(uint32_t out)
Definition: bloaty/third_party/re2/re2/prog.cc:58
re2::kEmptyBeginText
@ kEmptyBeginText
Definition: bloaty/third_party/re2/re2/prog.h:45
re2::kNumInst
@ kNumInst
Definition: bloaty/third_party/re2/re2/prog.h:38
re2::Prog::DeleteDFA
void DeleteDFA(DFA *dfa)
Definition: bloaty/third_party/re2/re2/dfa.cc:1847
re2::Prog::~Prog
~Prog()
Definition: bloaty/third_party/re2/re2/prog.cc:120
re2::Prog::Inst::last
int last()
Definition: bloaty/third_party/re2/re2/prog.h:83
re2::Prog::set_start_unanchored
void set_start_unanchored(int start)
Definition: bloaty/third_party/re2/re2/prog.h:203
re2::Prog::size
int size()
Definition: bloaty/third_party/re2/re2/prog.h:204
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
re2::ByteMapBuilder::colormap_
std::vector< std::pair< int, int > > colormap_
Definition: bloaty/third_party/re2/re2/prog.cc:358
re2::Prog::anchor_end_
bool anchor_end_
Definition: bloaty/third_party/re2/re2/prog.h:396
re2::ProgToString
static std::string ProgToString(Prog *prog, Workq *q)
Definition: bloaty/third_party/re2/re2/prog.cc:132
min
#define min(a, b)
Definition: qsort.h:83
re2::Prog::set_start
void set_start(int start)
Definition: bloaty/third_party/re2/re2/prog.h:202
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::kInstAlt
@ kInstAlt
Definition: bloaty/third_party/re2/re2/prog.h:30
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
google::protobuf::ERROR
static const LogLevel ERROR
Definition: bloaty/third_party/protobuf/src/google/protobuf/testing/googletest.h:70
stdint.h
re2::ByteMapBuilder::Merge
void Merge()
Definition: bloaty/third_party/re2/re2/prog.cc:380
re2::Prog::Inst::opcode
InstOp opcode()
Definition: bloaty/third_party/re2/re2/prog.h:82
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
re2::Bitmap256::Test
bool Test(int c) const
Definition: bloaty/third_party/re2/re2/bitmap256.h:31
re2::Prog::ConfigurePrefixAccel
void ConfigurePrefixAccel(const std::string &prefix, bool prefix_foldcase)
Definition: re2/re2/prog.cc:1005
re2::kEmptyEndLine
@ kEmptyEndLine
Definition: bloaty/third_party/re2/re2/prog.h:44
grpc_core::Loop
promise_detail::Loop< F > Loop(F f)
Definition: loop.h:130
re2::Prog::MarkDominator
void MarkDominator(int root, SparseArray< int > *rootmap, SparseArray< int > *predmap, std::vector< std::vector< int >> *predvec, SparseSet *reachable, std::vector< int > *stk)
Definition: bloaty/third_party/re2/re2/prog.cc:710
b1
T::second_type b1
Definition: abseil-cpp/absl/container/internal/hash_function_defaults_test.cc:306
re2::ByteMapBuilder::Build
void Build(uint8_t *bytemap, int *bytemap_range)
Definition: bloaty/third_party/re2/re2/prog.cc:411
re2::kInstNop
@ kInstNop
Definition: bloaty/third_party/re2/re2/prog.h:36
re2::kInstMatch
@ kInstMatch
Definition: bloaty/third_party/re2/re2/prog.h:35
re2::Prog::PrefixAccel_FrontAndBack
const void * PrefixAccel_FrontAndBack(const void *data, size_t size)
Definition: re2/re2/prog.cc:1121
re2::Prog::Inst::out_opcode_
uint32_t out_opcode_
Definition: bloaty/third_party/re2/re2/prog.h:133
re2::kInstAltMatch
@ kInstAltMatch
Definition: bloaty/third_party/re2/re2/prog.h:31
absl::flags_internal
Definition: abseil-cpp/absl/flags/commandlineflag.h:40
re2::Prog::prefix_dfa_
uint64_t * prefix_dfa_
Definition: re2/re2/prog.h:425
re2::Prog::start_unanchored_
int start_unanchored_
Definition: bloaty/third_party/re2/re2/prog.h:402
re2::AddToQueue
static void AddToQueue(Workq *q, int id)
Definition: bloaty/third_party/re2/re2/prog.cc:127
re2::Prog
Definition: bloaty/third_party/re2/re2/prog.h:56
re2::Prog::anchor_start_
bool anchor_start_
Definition: bloaty/third_party/re2/re2/prog.h:395
DCHECK_EQ
#define DCHECK_EQ(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:20
re2::Prog::bytemap_
uint8_t bytemap_[256]
Definition: bloaty/third_party/re2/re2/prog.h:420
re2::ByteMapBuilder::splits_
Bitmap256 splits_
Definition: bloaty/third_party/re2/re2/prog.cc:355
re2::SparseSetT::iterator
int * iterator
Definition: bloaty/third_party/re2/util/sparse_set.h:75
re2::Prog::dfa_longest_
DFA * dfa_longest_
Definition: bloaty/third_party/re2/re2/prog.h:418
re2::ByteMapBuilder
Definition: bloaty/third_party/re2/re2/prog.cc:337
next
AllocList * next[kMaxLevel]
Definition: abseil-cpp/absl/base/internal/low_level_alloc.cc:100
first
StrT first
Definition: cxa_demangle.cpp:4884
re2::Prog::Inst::out
int out()
Definition: bloaty/third_party/re2/re2/prog.h:84
re2::Workq
SparseSet Workq
Definition: bloaty/third_party/re2/re2/prog.cc:125
prefix
static const char prefix[]
Definition: head_of_line_blocking.cc:28
DCHECK_GE
#define DCHECK_GE(val1, val2)
Definition: bloaty/third_party/re2/util/logging.h:24
re2::ByteMapBuilder::nextcolor_
int nextcolor_
Definition: bloaty/third_party/re2/re2/prog.cc:357
re2::Prog::Inst::InitFail
void InitFail()
Definition: bloaty/third_party/re2/re2/prog.cc:63
re2::Prog::start_unanchored
int start_unanchored()
Definition: bloaty/third_party/re2/re2/prog.h:201
re2::Prog::bytemap_range_
int bytemap_range_
Definition: bloaty/third_party/re2/re2/prog.h:404
re2::kEmptyEndText
@ kEmptyEndText
Definition: bloaty/third_party/re2/re2/prog.h:46
re2::kEmptyBeginLine
@ kEmptyBeginLine
Definition: bloaty/third_party/re2/re2/prog.h:43
re2::Prog::list_heads_
PODArray< uint16_t > list_heads_
Definition: bloaty/third_party/re2/re2/prog.h:410
absl::str_format_internal::LengthMod::q
@ q
re2::BuildShiftDFA
static uint64_t * BuildShiftDFA(std::string prefix)
Definition: re2/re2/prog.cc:926
re2::Prog::Inst::Dump
std::string Dump()
Definition: bloaty/third_party/re2/re2/prog.cc:68
re2::Prog::dfa_mem_
int64_t dfa_mem_
Definition: bloaty/third_party/re2/re2/prog.h:416
intrin.h
size
voidpf void uLong size
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
int32_t
signed int int32_t
Definition: stdint-msvc2008.h:77
re2::ByteMapBuilder::ranges_
std::vector< std::pair< int, int > > ranges_
Definition: bloaty/third_party/re2/re2/prog.cc:359
re2::Prog::Inst::InitByteRange
void InitByteRange(int lo, int hi, int foldcase, uint32_t out)
Definition: bloaty/third_party/re2/re2/prog.cc:32
opcode
opcode
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.h:6290
re2::Prog::prefix_foldcase_
bool prefix_foldcase_
Definition: re2/re2/prog.h:422
re2::Prog::start_
int start_
Definition: bloaty/third_party/re2/re2/prog.h:401
re2::kInstByteRange
@ kInstByteRange
Definition: bloaty/third_party/re2/re2/prog.h:32
re2::Prog::EmptyFlags
static uint32_t EmptyFlags(const StringPiece &context, const char *p)
Definition: bloaty/third_party/re2/re2/prog.cc:287
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Prog::start
int start()
Definition: bloaty/third_party/re2/re2/prog.h:200
id
uint32_t id
Definition: flow_control_fuzzer.cc:70
re2::Prog::Dump
std::string Dump()
Definition: bloaty/third_party/re2/re2/prog.cc:157
re2::Prog::Optimize
void Optimize()
Definition: bloaty/third_party/re2/re2/prog.cc:198


grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:46