re2/re2/testing/dump.cc
Go to the documentation of this file.
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Dump the regexp into a string showing structure.
6 // Tested by parse_unittest.cc
7 
8 // This function traverses the regexp recursively,
9 // meaning that on inputs like Regexp::Simplify of
10 // a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
11 // it takes time and space exponential in the size of the
12 // original regular expression. It can also use stack space
13 // linear in the size of the regular expression for inputs
14 // like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
15 // IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
16 // As a result, Dump is provided only in the testing
17 // library (see BUILD).
18 
19 #include <string>
20 
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "util/utf.h"
25 #include "re2/stringpiece.h"
26 #include "re2/regexp.h"
27 
28 namespace re2 {
29 
30 static const char* kOpcodeNames[] = {
31  "bad",
32  "no",
33  "emp",
34  "lit",
35  "str",
36  "cat",
37  "alt",
38  "star",
39  "plus",
40  "que",
41  "rep",
42  "cap",
43  "dot",
44  "byte",
45  "bol",
46  "eol",
47  "wb", // kRegexpWordBoundary
48  "nwb", // kRegexpNoWordBoundary
49  "bot",
50  "eot",
51  "cc",
52  "match",
53 };
54 
55 // Create string representation of regexp with explicit structure.
56 // Nothing pretty, just for testing.
57 static void DumpRegexpAppending(Regexp* re, std::string* s) {
58  if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
59  *s += StringPrintf("op%d", re->op());
60  } else {
61  switch (re->op()) {
62  default:
63  break;
64  case kRegexpStar:
65  case kRegexpPlus:
66  case kRegexpQuest:
67  case kRegexpRepeat:
68  if (re->parse_flags() & Regexp::NonGreedy)
69  s->append("n");
70  break;
71  }
72  s->append(kOpcodeNames[re->op()]);
73  if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
74  Rune r = re->rune();
75  if ('a' <= r && r <= 'z')
76  s->append("fold");
77  }
78  if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
79  for (int i = 0; i < re->nrunes(); i++) {
80  Rune r = re->runes()[i];
81  if ('a' <= r && r <= 'z') {
82  s->append("fold");
83  break;
84  }
85  }
86  }
87  }
88  s->append("{");
89  switch (re->op()) {
90  default:
91  break;
92  case kRegexpEndText:
93  if (!(re->parse_flags() & Regexp::WasDollar)) {
94  s->append("\\z");
95  }
96  break;
97  case kRegexpLiteral: {
98  Rune r = re->rune();
99  char buf[UTFmax+1];
100  buf[runetochar(buf, &r)] = 0;
101  s->append(buf);
102  break;
103  }
105  for (int i = 0; i < re->nrunes(); i++) {
106  Rune r = re->runes()[i];
107  char buf[UTFmax+1];
108  buf[runetochar(buf, &r)] = 0;
109  s->append(buf);
110  }
111  break;
112  case kRegexpConcat:
113  case kRegexpAlternate:
114  for (int i = 0; i < re->nsub(); i++)
115  DumpRegexpAppending(re->sub()[i], s);
116  break;
117  case kRegexpStar:
118  case kRegexpPlus:
119  case kRegexpQuest:
120  DumpRegexpAppending(re->sub()[0], s);
121  break;
122  case kRegexpCapture:
123  if (re->cap() == 0)
124  LOG(DFATAL) << "kRegexpCapture cap() == 0";
125  if (re->name()) {
126  s->append(*re->name());
127  s->append(":");
128  }
129  DumpRegexpAppending(re->sub()[0], s);
130  break;
131  case kRegexpRepeat:
132  s->append(StringPrintf("%d,%d ", re->min(), re->max()));
133  DumpRegexpAppending(re->sub()[0], s);
134  break;
135  case kRegexpCharClass: {
137  for (CharClass::iterator it = re->cc()->begin();
138  it != re->cc()->end(); ++it) {
139  RuneRange rr = *it;
140  s->append(sep);
141  if (rr.lo == rr.hi)
142  s->append(StringPrintf("%#x", rr.lo));
143  else
144  s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
145  sep = " ";
146  }
147  break;
148  }
149  }
150  s->append("}");
151 }
152 
154  // Make sure that we are being called from a unit test.
155  // Should cause a link error if used outside of testing.
156  CHECK(!::testing::TempDir().empty());
157 
158  std::string s;
159  DumpRegexpAppending(this, &s);
160  return s;
161 }
162 
163 } // namespace re2
regen-readme.it
it
Definition: regen-readme.py:15
testing::TempDir
GTEST_API_ std::string TempDir()
Definition: bloaty/third_party/googletest/googletest/src/gtest.cc:6140
re2::Regexp::runes
Rune * runes()
Definition: bloaty/third_party/re2/re2/regexp.h:340
re2::kRegexpLiteralString
@ kRegexpLiteralString
Definition: bloaty/third_party/re2/re2/regexp.h:113
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::Regexp::nsub
int nsub()
Definition: bloaty/third_party/re2/re2/regexp.h:322
buf
voidpf void * buf
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::kRegexpLiteral
@ kRegexpLiteral
Definition: bloaty/third_party/re2/re2/regexp.h:110
re2::Regexp::nrunes
int nrunes()
Definition: bloaty/third_party/re2/re2/regexp.h:341
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Regexp::cc
CharClass * cc()
Definition: bloaty/third_party/re2/re2/regexp.h:337
re2::kRegexpEndText
@ kRegexpEndText
Definition: bloaty/third_party/re2/re2/regexp.h:154
re2::Regexp::rune
Rune rune()
Definition: bloaty/third_party/re2/re2/regexp.h:336
re2::kRegexpConcat
@ kRegexpConcat
Definition: bloaty/third_party/re2/re2/regexp.h:116
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
text_format_test_wrapper.sep
sep
Definition: text_format_test_wrapper.py:34
re2::runetochar
int runetochar(char *str, const Rune *rune)
Definition: bloaty/third_party/re2/util/rune.cc:127
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::kRegexpCharClass
@ kRegexpCharClass
Definition: bloaty/third_party/re2/re2/regexp.h:157
re2::RuneRange::hi
Rune hi
Definition: bloaty/third_party/re2/re2/regexp.h:229
re2::Regexp::sub
Regexp ** sub()
Definition: bloaty/third_party/re2/re2/regexp.h:327
CHECK
#define CHECK(x)
Definition: bloaty/third_party/protobuf/php/ext/google/protobuf/upb.c:8085
re2::kRegexpAlternate
@ kRegexpAlternate
Definition: bloaty/third_party/re2/re2/regexp.h:118
re2::CharClass::end
iterator end()
Definition: bloaty/third_party/re2/re2/regexp.h:248
re2::Regexp::name
const std::string * name()
Definition: bloaty/third_party/re2/re2/regexp.h:339
re2::Regexp::Dump
std::string Dump()
Definition: bloaty/third_party/re2/re2/testing/dump.cc:156
re2::Regexp::min
int min()
Definition: bloaty/third_party/re2/re2/regexp.h:334
re2::kRegexpStar
@ kRegexpStar
Definition: bloaty/third_party/re2/re2/regexp.h:121
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
re2::kOpcodeNames
static const char * kOpcodeNames[]
Definition: bloaty/third_party/re2/re2/testing/dump.cc:33
re2::Regexp::parse_flags
ParseFlags parse_flags()
Definition: bloaty/third_party/re2/re2/regexp.h:324
re2::Regexp::max
int max()
Definition: bloaty/third_party/re2/re2/regexp.h:335
re2::Regexp::op
RegexpOp op()
Definition: bloaty/third_party/re2/re2/regexp.h:321
re2::RuneRange::lo
Rune lo
Definition: bloaty/third_party/re2/re2/regexp.h:228
re2::CharClass::begin
iterator begin()
Definition: bloaty/third_party/re2/re2/regexp.h:247
re2::Regexp::NonGreedy
@ NonGreedy
Definition: bloaty/third_party/re2/re2/regexp.h:290
re2::RuneRange
Definition: bloaty/third_party/re2/re2/regexp.h:225
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::kRegexpRepeat
@ kRegexpRepeat
Definition: bloaty/third_party/re2/re2/regexp.h:129
re2::Regexp::WasDollar
@ WasDollar
Definition: bloaty/third_party/re2/re2/regexp.h:316
re2::kRegexpCapture
@ kRegexpCapture
Definition: bloaty/third_party/re2/re2/regexp.h:133
re2::kRegexpPlus
@ kRegexpPlus
Definition: bloaty/third_party/re2/re2/regexp.h:123
re2::kRegexpQuest
@ kRegexpQuest
Definition: bloaty/third_party/re2/re2/regexp.h:125
re2::DumpRegexpAppending
static void DumpRegexpAppending(Regexp *re, std::string *s)
Definition: bloaty/third_party/re2/re2/testing/dump.cc:60
re2::Regexp::FoldCase
@ FoldCase
Definition: bloaty/third_party/re2/re2/regexp.h:280
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::Regexp::cap
int cap()
Definition: bloaty/third_party/re2/re2/regexp.h:338
re2::UTFmax
@ UTFmax
Definition: bloaty/third_party/re2/util/utf.h:29


grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:17