bloaty/third_party/re2/re2/tostring.cc
Go to the documentation of this file.
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Format a regular expression structure as a string.
6 // Tested by parse_test.cc
7 
8 #include <string.h>
9 #include <string>
10 
11 #include "util/util.h"
12 #include "util/logging.h"
13 #include "util/strutil.h"
14 #include "util/utf.h"
15 #include "re2/regexp.h"
16 #include "re2/walker-inl.h"
17 
18 namespace re2 {
19 
20 enum {
28 };
29 
30 // Helper function. See description below.
31 static void AppendCCRange(std::string* t, Rune lo, Rune hi);
32 
33 // Walker to generate string in s_.
34 // The arg pointers are actually integers giving the
35 // context precedence.
36 // The child_args are always NULL.
37 class ToStringWalker : public Regexp::Walker<int> {
38  public:
39  explicit ToStringWalker(std::string* t) : t_(t) {}
40 
41  virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
42  virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
43  int* child_args, int nchild_args);
44  virtual int ShortVisit(Regexp* re, int parent_arg) {
45  return 0;
46  }
47 
48  private:
49  std::string* t_; // The string the walker appends to.
50 
51  ToStringWalker(const ToStringWalker&) = delete;
52  ToStringWalker& operator=(const ToStringWalker&) = delete;
53 };
54 
56  std::string t;
57  ToStringWalker w(&t);
58  w.WalkExponential(this, PrecToplevel, 100000);
59  if (w.stopped_early())
60  t += " [truncated]";
61  return t;
62 }
63 
64 #define ToString DontCallToString // Avoid accidental recursion.
65 
66 // Visits re before children are processed.
67 // Appends ( if needed and passes new precedence to children.
68 int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
69  int prec = parent_arg;
70  int nprec = PrecAtom;
71 
72  switch (re->op()) {
73  case kRegexpNoMatch:
74  case kRegexpEmptyMatch:
75  case kRegexpLiteral:
76  case kRegexpAnyChar:
77  case kRegexpAnyByte:
78  case kRegexpBeginLine:
79  case kRegexpEndLine:
80  case kRegexpBeginText:
81  case kRegexpEndText:
84  case kRegexpCharClass:
85  case kRegexpHaveMatch:
86  nprec = PrecAtom;
87  break;
88 
89  case kRegexpConcat:
91  if (prec < PrecConcat)
92  t_->append("(?:");
93  nprec = PrecConcat;
94  break;
95 
96  case kRegexpAlternate:
97  if (prec < PrecAlternate)
98  t_->append("(?:");
99  nprec = PrecAlternate;
100  break;
101 
102  case kRegexpCapture:
103  t_->append("(");
104  if (re->cap() == 0)
105  LOG(DFATAL) << "kRegexpCapture cap() == 0";
106  if (re->name()) {
107  t_->append("?P<");
108  t_->append(*re->name());
109  t_->append(">");
110  }
111  nprec = PrecParen;
112  break;
113 
114  case kRegexpStar:
115  case kRegexpPlus:
116  case kRegexpQuest:
117  case kRegexpRepeat:
118  if (prec < PrecUnary)
119  t_->append("(?:");
120  // The subprecedence here is PrecAtom instead of PrecUnary
121  // because PCRE treats two unary ops in a row as a parse error.
122  nprec = PrecAtom;
123  break;
124  }
125 
126  return nprec;
127 }
128 
129 static void AppendLiteral(std::string *t, Rune r, bool foldcase) {
130  if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
131  t->append(1, '\\');
132  t->append(1, static_cast<char>(r));
133  } else if (foldcase && 'a' <= r && r <= 'z') {
134  r -= 'a' - 'A';
135  t->append(1, '[');
136  t->append(1, static_cast<char>(r));
137  t->append(1, static_cast<char>(r) + 'a' - 'A');
138  t->append(1, ']');
139  } else {
140  AppendCCRange(t, r, r);
141  }
142 }
143 
144 // Visits re after children are processed.
145 // For childless regexps, all the work is done here.
146 // For regexps with children, append any unary suffixes or ).
147 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
148  int* child_args, int nchild_args) {
149  int prec = parent_arg;
150  switch (re->op()) {
151  case kRegexpNoMatch:
152  // There's no simple symbol for "no match", but
153  // [^0-Runemax] excludes everything.
154  t_->append("[^\\x00-\\x{10ffff}]");
155  break;
156 
157  case kRegexpEmptyMatch:
158  // Append (?:) to make empty string visible,
159  // unless this is already being parenthesized.
160  if (prec < PrecEmpty)
161  t_->append("(?:)");
162  break;
163 
164  case kRegexpLiteral:
165  AppendLiteral(t_, re->rune(),
166  (re->parse_flags() & Regexp::FoldCase) != 0);
167  break;
168 
170  for (int i = 0; i < re->nrunes(); i++)
171  AppendLiteral(t_, re->runes()[i],
172  (re->parse_flags() & Regexp::FoldCase) != 0);
173  if (prec < PrecConcat)
174  t_->append(")");
175  break;
176 
177  case kRegexpConcat:
178  if (prec < PrecConcat)
179  t_->append(")");
180  break;
181 
182  case kRegexpAlternate:
183  // Clumsy but workable: the children all appended |
184  // at the end of their strings, so just remove the last one.
185  if ((*t_)[t_->size()-1] == '|')
186  t_->erase(t_->size()-1);
187  else
188  LOG(DFATAL) << "Bad final char: " << t_;
189  if (prec < PrecAlternate)
190  t_->append(")");
191  break;
192 
193  case kRegexpStar:
194  t_->append("*");
195  if (re->parse_flags() & Regexp::NonGreedy)
196  t_->append("?");
197  if (prec < PrecUnary)
198  t_->append(")");
199  break;
200 
201  case kRegexpPlus:
202  t_->append("+");
203  if (re->parse_flags() & Regexp::NonGreedy)
204  t_->append("?");
205  if (prec < PrecUnary)
206  t_->append(")");
207  break;
208 
209  case kRegexpQuest:
210  t_->append("?");
211  if (re->parse_flags() & Regexp::NonGreedy)
212  t_->append("?");
213  if (prec < PrecUnary)
214  t_->append(")");
215  break;
216 
217  case kRegexpRepeat:
218  if (re->max() == -1)
219  t_->append(StringPrintf("{%d,}", re->min()));
220  else if (re->min() == re->max())
221  t_->append(StringPrintf("{%d}", re->min()));
222  else
223  t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
224  if (re->parse_flags() & Regexp::NonGreedy)
225  t_->append("?");
226  if (prec < PrecUnary)
227  t_->append(")");
228  break;
229 
230  case kRegexpAnyChar:
231  t_->append(".");
232  break;
233 
234  case kRegexpAnyByte:
235  t_->append("\\C");
236  break;
237 
238  case kRegexpBeginLine:
239  t_->append("^");
240  break;
241 
242  case kRegexpEndLine:
243  t_->append("$");
244  break;
245 
246  case kRegexpBeginText:
247  t_->append("(?-m:^)");
248  break;
249 
250  case kRegexpEndText:
251  if (re->parse_flags() & Regexp::WasDollar)
252  t_->append("(?-m:$)");
253  else
254  t_->append("\\z");
255  break;
256 
257  case kRegexpWordBoundary:
258  t_->append("\\b");
259  break;
260 
262  t_->append("\\B");
263  break;
264 
265  case kRegexpCharClass: {
266  if (re->cc()->size() == 0) {
267  t_->append("[^\\x00-\\x{10ffff}]");
268  break;
269  }
270  t_->append("[");
271  // Heuristic: show class as negated if it contains the
272  // non-character 0xFFFE and yet somehow isn't full.
273  CharClass* cc = re->cc();
274  if (cc->Contains(0xFFFE) && !cc->full()) {
275  cc = cc->Negate();
276  t_->append("^");
277  }
278  for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
279  AppendCCRange(t_, i->lo, i->hi);
280  if (cc != re->cc())
281  cc->Delete();
282  t_->append("]");
283  break;
284  }
285 
286  case kRegexpCapture:
287  t_->append(")");
288  break;
289 
290  case kRegexpHaveMatch:
291  // There's no syntax accepted by the parser to generate
292  // this node (it is generated by RE2::Set) so make something
293  // up that is readable but won't compile.
294  t_->append("(?HaveMatch:%d)", re->match_id());
295  break;
296  }
297 
298  // If the parent is an alternation, append the | for it.
299  if (prec == PrecAlternate)
300  t_->append("|");
301 
302  return 0;
303 }
304 
305 // Appends a rune for use in a character class to the string t.
306 static void AppendCCChar(std::string* t, Rune r) {
307  if (0x20 <= r && r <= 0x7E) {
308  if (strchr("[]^-\\", r))
309  t->append("\\");
310  t->append(1, static_cast<char>(r));
311  return;
312  }
313  switch (r) {
314  default:
315  break;
316 
317  case '\r':
318  t->append("\\r");
319  return;
320 
321  case '\t':
322  t->append("\\t");
323  return;
324 
325  case '\n':
326  t->append("\\n");
327  return;
328 
329  case '\f':
330  t->append("\\f");
331  return;
332  }
333 
334  if (r < 0x100) {
335  *t += StringPrintf("\\x%02x", static_cast<int>(r));
336  return;
337  }
338  *t += StringPrintf("\\x{%x}", static_cast<int>(r));
339 }
340 
341 static void AppendCCRange(std::string* t, Rune lo, Rune hi) {
342  if (lo > hi)
343  return;
344  AppendCCChar(t, lo);
345  if (lo < hi) {
346  t->append("-");
347  AppendCCChar(t, hi);
348  }
349 }
350 
351 } // namespace re2
re2::CharClass::full
bool full()
Definition: bloaty/third_party/re2/re2/regexp.h:252
re2::kRegexpEmptyMatch
@ kRegexpEmptyMatch
Definition: bloaty/third_party/re2/re2/regexp.h:107
re2::kRegexpBeginLine
@ kRegexpBeginLine
Definition: bloaty/third_party/re2/re2/regexp.h:142
re2::kRegexpNoWordBoundary
@ kRegexpNoWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:149
re2::PrecAlternate
@ PrecAlternate
Definition: bloaty/third_party/re2/re2/tostring.cc:24
re2::Regexp::runes
Rune * runes()
Definition: bloaty/third_party/re2/re2/regexp.h:340
re2::CharClass::Contains
bool Contains(Rune r)
Definition: bloaty/third_party/re2/re2/regexp.cc:942
re2::kRegexpLiteralString
@ kRegexpLiteralString
Definition: bloaty/third_party/re2/re2/regexp.h:113
re2::ToStringWalker::PreVisit
virtual int PreVisit(Regexp *re, int parent_arg, bool *stop)
Definition: bloaty/third_party/re2/re2/tostring.cc:68
string.h
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
re2::kRegexpLiteral
@ kRegexpLiteral
Definition: bloaty/third_party/re2/re2/regexp.h:110
re2::ToStringWalker::t_
std::string * t_
Definition: bloaty/third_party/re2/re2/tostring.cc:49
re2::Regexp::Walker::WalkExponential
T WalkExponential(Regexp *re, T top_arg, int max_visits)
Definition: bloaty/third_party/re2/re2/walker-inl.h:240
re2::Regexp::nrunes
int nrunes()
Definition: bloaty/third_party/re2/re2/regexp.h:341
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::Regexp::cc
CharClass * cc()
Definition: bloaty/third_party/re2/re2/regexp.h:337
re2::kRegexpHaveMatch
@ kRegexpHaveMatch
Definition: bloaty/third_party/re2/re2/regexp.h:161
re2::kRegexpWordBoundary
@ kRegexpWordBoundary
Definition: bloaty/third_party/re2/re2/regexp.h:147
re2::kRegexpEndText
@ kRegexpEndText
Definition: bloaty/third_party/re2/re2/regexp.h:154
re2::ToStringWalker::ShortVisit
virtual int ShortVisit(Regexp *re, int parent_arg)
Definition: bloaty/third_party/re2/re2/tostring.cc:44
re2::CharClass
Definition: bloaty/third_party/re2/re2/regexp.h:242
re2::Regexp::ToString
std::string ToString()
Definition: bloaty/third_party/re2/re2/tostring.cc:55
re2::Regexp::Walker
Definition: bloaty/third_party/re2/re2/regexp.h:414
re2::Regexp::rune
Rune rune()
Definition: bloaty/third_party/re2/re2/regexp.h:336
re2::kRegexpConcat
@ kRegexpConcat
Definition: bloaty/third_party/re2/re2/regexp.h:116
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::ToStringWalker::operator=
ToStringWalker & operator=(const ToStringWalker &)=delete
re2::AppendCCChar
static void AppendCCChar(std::string *t, Rune r)
Definition: bloaty/third_party/re2/re2/tostring.cc:306
re2::kRegexpAnyChar
@ kRegexpAnyChar
Definition: bloaty/third_party/re2/re2/regexp.h:136
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::kRegexpCharClass
@ kRegexpCharClass
Definition: bloaty/third_party/re2/re2/regexp.h:157
re2::AppendLiteral
static void AppendLiteral(std::string *t, Rune r, bool foldcase)
Definition: bloaty/third_party/re2/re2/tostring.cc:129
re2::PrecUnary
@ PrecUnary
Definition: bloaty/third_party/re2/re2/tostring.cc:22
re2::ToStringWalker
Definition: bloaty/third_party/re2/re2/tostring.cc:37
re2::Regexp::match_id
int match_id()
Definition: bloaty/third_party/re2/re2/regexp.h:342
re2::kRegexpAlternate
@ kRegexpAlternate
Definition: bloaty/third_party/re2/re2/regexp.h:118
re2::CharClass::end
iterator end()
Definition: bloaty/third_party/re2/re2/regexp.h:248
re2::PrecParen
@ PrecParen
Definition: bloaty/third_party/re2/re2/tostring.cc:26
re2::Regexp::name
const std::string * name()
Definition: bloaty/third_party/re2/re2/regexp.h:339
re2::Regexp::min
int min()
Definition: bloaty/third_party/re2/re2/regexp.h:334
re2::kRegexpStar
@ kRegexpStar
Definition: bloaty/third_party/re2/re2/regexp.h:121
re2::Regexp::parse_flags
ParseFlags parse_flags()
Definition: bloaty/third_party/re2/re2/regexp.h:324
re2::Regexp::max
int max()
Definition: bloaty/third_party/re2/re2/regexp.h:335
re2::Regexp::Walker::stopped_early
bool stopped_early()
Definition: bloaty/third_party/re2/re2/walker-inl.h:88
re2::ToStringWalker::ToStringWalker
ToStringWalker(std::string *t)
Definition: bloaty/third_party/re2/re2/tostring.cc:39
re2::Regexp::op
RegexpOp op()
Definition: bloaty/third_party/re2/re2/regexp.h:321
re2::kRegexpAnyByte
@ kRegexpAnyByte
Definition: bloaty/third_party/re2/re2/regexp.h:139
re2::CharClass::begin
iterator begin()
Definition: bloaty/third_party/re2/re2/regexp.h:247
re2::Regexp::NonGreedy
@ NonGreedy
Definition: bloaty/third_party/re2/re2/regexp.h:290
re2::PrecToplevel
@ PrecToplevel
Definition: bloaty/third_party/re2/re2/tostring.cc:27
re2::RuneRange
Definition: bloaty/third_party/re2/re2/regexp.h:225
re2::Rune
signed int Rune
Definition: bloaty/third_party/re2/util/utf.h:25
re2::CharClass::size
int size()
Definition: bloaty/third_party/re2/re2/regexp.h:250
re2::PrecConcat
@ PrecConcat
Definition: bloaty/third_party/re2/re2/tostring.cc:23
fix_build_deps.r
r
Definition: fix_build_deps.py:491
re2::AppendCCRange
static void AppendCCRange(std::string *t, Rune lo, Rune hi)
Definition: bloaty/third_party/re2/re2/tostring.cc:341
re2::kRegexpRepeat
@ kRegexpRepeat
Definition: bloaty/third_party/re2/re2/regexp.h:129
re2::kRegexpBeginText
@ kRegexpBeginText
Definition: bloaty/third_party/re2/re2/regexp.h:152
re2::kRegexpEndLine
@ kRegexpEndLine
Definition: bloaty/third_party/re2/re2/regexp.h:144
stop
static const char stop[]
Definition: benchmark-async-pummel.c:35
re2::PrecEmpty
@ PrecEmpty
Definition: bloaty/third_party/re2/re2/tostring.cc:25
re2::Regexp::WasDollar
@ WasDollar
Definition: bloaty/third_party/re2/re2/regexp.h:316
re2::kRegexpCapture
@ kRegexpCapture
Definition: bloaty/third_party/re2/re2/regexp.h:133
re2::PrecAtom
@ PrecAtom
Definition: bloaty/third_party/re2/re2/tostring.cc:21
re2::ToStringWalker::PostVisit
virtual int PostVisit(Regexp *re, int parent_arg, int pre_arg, int *child_args, int nchild_args)
Definition: bloaty/third_party/re2/re2/tostring.cc:147
re2::kRegexpPlus
@ kRegexpPlus
Definition: bloaty/third_party/re2/re2/regexp.h:123
re2::kRegexpNoMatch
@ kRegexpNoMatch
Definition: bloaty/third_party/re2/re2/regexp.h:104
re2::CharClass::Delete
void Delete()
Definition: bloaty/third_party/re2/re2/regexp.cc:917
re2::kRegexpQuest
@ kRegexpQuest
Definition: bloaty/third_party/re2/re2/regexp.h:125
re2::Regexp::FoldCase
@ FoldCase
Definition: bloaty/third_party/re2/re2/regexp.h:280
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
re2::CharClass::Negate
CharClass * Negate()
Definition: bloaty/third_party/re2/re2/regexp.cc:922
re2::Regexp::cap
int cap()
Definition: bloaty/third_party/re2/re2/regexp.h:338


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:40