Go to the documentation of this file.
28 #include "util/util.h"
29 #include "util/logging.h"
31 #include "util/strutil.h"
33 #include "re2/regexp.h"
34 #include "re2/stringpiece.h"
35 #include "re2/unicode_casefold.h"
36 #include "re2/unicode_groups.h"
37 #include "re2/walker-inl.h"
39 #if defined(RE2_USE_ICU)
40 #include "unicode/uniset.h"
41 #include "unicode/unistr.h"
42 #include "unicode/utypes.h"
48 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
197 : flags_(
flags), whole_regexp_(whole_regexp),
208 for (
Regexp* re = stacktop_; re != NULL; re =
next) {
257 re->
rune_ =
r +
'a' -
'A';
262 if (!IsMarker(re->
op()))
264 re->
down_ = stacktop_;
278 if (f[
m].lo <=
r &&
r <= f[
m].hi)
337 if (f == NULL || r < f->lo)
351 LOG(DFATAL) <<
"AddFoldedRange recurses too much.";
355 if (!
cc->AddRange(lo, hi))
370 Rune hi1 = std::min<Rune>(hi, f->hi);
404 if (!(flags_ &
NeverNL) ||
r !=
'\n') {
409 return PushRegexp(re);
413 if ((flags_ &
NeverNL) &&
r ==
'\n')
417 if (MaybeConcatString(
r, flags_))
422 return PushRegexp(re);
463 return PushRegexp(re);
469 return PushRegexp(re);
477 if (stacktop_ == NULL || IsMarker(stacktop_->op())) {
488 if (
op == stacktop_->op() && fl == stacktop_->parse_flags())
497 fl == stacktop_->parse_flags()) {
505 re->
sub()[0] = FinishRegexp(stacktop_);
523 virtual int PreVisit(
Regexp* re,
int parent_arg,
bool*
stop);
524 virtual int PostVisit(
Regexp* re,
int parent_arg,
int pre_arg,
525 int* child_args,
int nchild_args);
526 virtual int ShortVisit(
Regexp* re,
int parent_arg);
534 int arg = parent_arg;
548 int* child_args,
int nchild_args) {
550 for (
int i = 0;
i < nchild_args;
i++) {
551 if (child_args[
i] <
arg) {
561 LOG(DFATAL) <<
"RepetitionWalker::ShortVisit called";
575 if (stacktop_ == NULL || IsMarker(stacktop_->op())) {
588 re->
sub()[0] = FinishRegexp(stacktop_);
591 if (
min >= 2 ||
max >= 2) {
612 if (
name.data() != NULL)
614 return PushRegexp(re);
621 return PushRegexp(re);
636 if ((r1 = stacktop_) != NULL &&
637 (r2 = r1->
down_) != NULL &&
640 if ((r3 = r2->
down_) != NULL &&
684 if ((r1 = stacktop_) == NULL ||
685 (r2 = r1->
down_) == NULL ||
688 status_->set_error_arg(whole_regexp_);
693 stacktop_ = r2->
down_;
704 re->
sub()[0] = FinishRegexp(r1);
710 return PushRegexp(re);
717 if (re != NULL && re->
down_ != NULL) {
719 status_->set_error_arg(whole_regexp_);
723 return FinishRegexp(re);
754 if (re->
nsub() == 2) {
842 switch (re->
nsub()) {
846 LOG(DFATAL) <<
"Concat of " << re->
nsub();
912 std::vector<Splice>* splices);
915 std::vector<Splice>* splices);
918 std::vector<Splice>* splices);
933 std::vector<Frame> stk;
937 auto&
sub = stk.back().
sub;
938 auto&
nsub = stk.back().nsub;
939 auto&
round = stk.back().round;
940 auto& splices = stk.back().splices;
941 auto& spliceidx = stk.back().spliceidx;
943 if (splices.empty()) {
947 }
else if (spliceidx <
static_cast<int>(splices.size())) {
949 stk.emplace_back(splices[spliceidx].
sub, splices[spliceidx].
nsub);
953 auto iter = splices.begin();
955 for (
int i = 0;
i <
nsub; ) {
957 while (
sub + i < iter->
sub)
964 re[0] =
iter->prefix;
976 LOG(DFATAL) <<
"unknown round: " <<
round;
980 if (++
iter == splices.end()) {
1002 if (stk.size() == 1) {
1010 stk.back().splices[stk.back().spliceidx].nsuffix = nsuffix;
1011 ++stk.back().spliceidx;
1015 LOG(DFATAL) <<
"unknown round: " <<
round;
1020 if (splices.empty() ||
round == 3) {
1021 spliceidx =
static_cast<int>(splices.size());
1030 std::vector<Splice>* splices) {
1036 for (
int i = 0;
i <=
nsub;
i++) {
1039 Rune* rune_i = NULL;
1044 if (runeflags_i == runeflags) {
1046 while (same < nrune && same < nrune_i &&
rune[same] == rune_i[same])
1061 }
else if (
i ==
start+1) {
1065 for (
int j =
start; j <
i; j++)
1075 runeflags = runeflags_i;
1082 std::vector<Splice>* splices) {
1093 for (
int i = 0;
i <=
nsub;
i++) {
1099 if (
first != NULL &&
1127 }
else if (
i ==
start+1) {
1131 for (
int j =
start; j <
i; j++)
1146 std::vector<Splice>* splices) {
1150 for (
int i = 0;
i <=
nsub;
i++) {
1156 if (
first != NULL &&
1169 }
else if (
i ==
start+1) {
1173 for (
int j =
start; j <
i; j++) {
1182 LOG(DFATAL) <<
"RE2: unexpected op: " << re->
op() <<
" "
1217 if (stacktop_ != NULL && stacktop_->down_ ==
next)
1246 if (r1 == NULL || IsMarker(r1->
op())) {
1260 stacktop_ = r1->
down_;
1278 if ((re1 = stacktop_) == NULL || (
re2 = re1->
down_) == NULL)
1326 if (s->size() == 0 || !isdigit((*s)[0] & 0xFF))
1329 if (s->size() >= 2 && (*s)[0] ==
'0' && isdigit((*s)[1] & 0xFF))
1333 while (s->size() > 0 && isdigit(c = (*s)[0] & 0xFF)) {
1338 s->remove_prefix(1);
1355 if (s.size() == 0 || s[0] !=
'{')
1378 if (s.size() == 0 || s[0] !=
'}')
1419 while (t.size() > 0) {
1428 return (
'0' <= c && c <=
'9') ||
1429 (
'A' <= c && c <=
'F') ||
1430 (
'a' <= c && c <=
'f');
1435 if (
'0' <= c && c <=
'9')
1437 if (
'A' <= c && c <=
'F')
1438 return c -
'A' + 10;
1439 if (
'a' <= c && c <=
'f')
1440 return c -
'a' + 10;
1441 LOG(DFATAL) <<
"Bad hex digit " << c;
1450 const char*
begin = s->data();
1451 if (s->size() < 1 || (*s)[0] !=
'\\') {
1457 if (s->size() < 2) {
1463 s->remove_prefix(1);
1469 if (c <
Runeself && !isalpha(c) && !isdigit(c)) {
1488 if (s->size() == 0 || (*s)[0] <
'0' || (*s)[0] >
'7')
1494 if (s->size() > 0 &&
'0' <= (c = (*s)[0]) && c <=
'7') {
1496 s->remove_prefix(1);
1497 if (s->size() > 0) {
1499 if (
'0' <= c && c <=
'7') {
1501 s->remove_prefix(1);
1505 if (
code > rune_max)
1530 if (
code > rune_max)
1537 if (c !=
'}' || nhex == 0)
1587 LOG(DFATAL) <<
"Not reached in ParseEscape.";
1605 if (cutnl && lo <=
'\n' &&
'\n' <= hi) {
1624 for (
int i = 0;
i < ngroups;
i++)
1639 #if !defined(RE2_USE_ICU)
1658 for (
int i = 0;
i <
g->nr16;
i++) {
1661 for (
int i = 0;
i <
g->nr32;
i++) {
1681 cc->AddCharClass(&ccb1);
1685 for (
int i = 0;
i <
g->nr16;
i++) {
1686 if (next < g->r16[
i].lo)
1690 for (
int i = 0;
i <
g->nr32;
i++) {
1691 if (next < g->r32[
i].lo)
1709 if (s->size() < 2 || (*s)[0] !=
'\\')
1717 s->remove_prefix(
name.size());
1735 if (s->size() < 2 || (*s)[0] !=
'\\')
1738 if (c !=
'p' && c !=
'P')
1747 s->remove_prefix(2);
1753 const char* p = seq.
data() + 2;
1757 size_t end = s->find(
'}', 0);
1762 status->set_error_arg(seq);
1766 s->remove_prefix(
end + 1);
1774 if (
name.size() > 0 &&
name[0] ==
'^') {
1776 name.remove_prefix(1);
1779 #if !defined(RE2_USE_ICU)
1784 status->set_error_arg(seq);
1792 ::icu::UnicodeString ustr = ::icu::UnicodeString::fromUTF8(
1794 UErrorCode uerr = U_ZERO_ERROR;
1795 ::icu::UnicodeSet uset(ustr, uerr);
1796 if (U_FAILURE(uerr)) {
1798 status->set_error_arg(seq);
1803 int nr = uset.getRangeCount();
1805 for (
int i = 0;
i < nr;
i++) {
1806 r[
i].lo = uset.getRangeStart(
i);
1807 r[
i].hi = uset.getRangeEnd(
i);
1824 const char* p = s->data();
1825 const char* ep = s->data() + s->size();
1826 if (ep - p < 2 || p[0] !=
'[' || p[1] !=
':')
1831 for (q = p+2; q <= ep-2 && (*q !=
':' || *(q+1) !=
']'); q++)
1849 s->remove_prefix(
name.size());
1861 if (s->size() == 0) {
1863 status->set_error_arg(whole_class);
1869 if (s->size() >= 1 && (*s)[0] ==
'\\')
1885 if (!ParseCCCharacter(s, &rr->
lo, whole_class,
status))
1888 if (s->size() >= 2 && (*s)[0] ==
'-' && (*s)[1] !=
']') {
1889 s->remove_prefix(1);
1890 if (!ParseCCCharacter(s, &rr->
hi, whole_class,
status))
1892 if (rr->
hi < rr->
lo) {
1911 if (s->size() == 0 || (*s)[0] !=
'[') {
1917 bool negated =
false;
1920 s->remove_prefix(1);
1921 if (s->size() > 0 && (*s)[0] ==
'^') {
1922 s->remove_prefix(1);
1931 while (s->size() > 0 && ((*s)[0] !=
']' ||
first)) {
1934 if ((*s)[0] ==
'-' && !
first && !(flags_&
PerlX) &&
1935 (s->size() == 1 || (*s)[1] !=
']')) {
1952 if (s->size() > 2 && (*s)[0] ==
'[' && (*s)[1] ==
':') {
1965 if (s->size() > 2 &&
1967 ((*s)[1] ==
'p' || (*s)[1] ==
'P')) {
1988 if (!ParseCCRange(s, &rr, whole_class,
status)) {
1999 if (s->size() == 0) {
2001 status->set_error_arg(whole_class);
2005 s->remove_prefix(1);
2019 if (
name.size() == 0)
2021 for (
size_t i = 0;
i <
name.size();
i++) {
2023 if ((
'0' <= c && c <=
'9') ||
2024 (
'a' <= c && c <=
'z') ||
2025 (
'A' <= c && c <=
'Z') ||
2042 if (!(flags_ &
PerlX) || t.size() < 2 || t[0] !=
'(' || t[1] !=
'?') {
2043 LOG(DFATAL) <<
"Bad call to ParseState::ParsePerlFlags";
2065 if (t.size() > 2 && t[0] ==
'P' && t[1] ==
'<') {
2067 size_t end = t.find(
'>', 2);
2083 status_->set_error_arg(capture);
2087 if (!DoLeftParen(
name)) {
2093 static_cast<size_t>(capture.
data() + capture.
size() - s->data()));
2097 bool negated =
false;
2098 bool sawflags =
false;
2153 if (!DoLeftParenNoCapture()) {
2167 if (negated && !sawflags)
2177 StringPiece(s->data(),
static_cast<size_t>(t.data() - s->data())));
2189 for (
size_t i = 0;
i < latin1.
size();
i++) {
2190 Rune r = latin1[
i] & 0xFF;
2192 utf->append(
buf,
n);
2211 if (global_flags &
Latin1) {
2220 while (t.size() > 0) {
2231 while (t.size() > 0) {
2245 if ((ps.
flags() &
PerlX) && (t.size() >= 2 && t[1] ==
'?')) {
2312 bool nongreedy =
false;
2315 if (t.size() > 0 && t[0] ==
'?') {
2319 if (lastunary.
size() > 0) {
2326 static_cast<size_t>(t.data() - lastunary.
data())));
2331 static_cast<size_t>(t.data() - opstr.
data()));
2348 bool nongreedy =
false;
2350 if (t.size() > 0 && t[0] ==
'?') {
2354 if (lastunary.
size() > 0) {
2359 static_cast<size_t>(t.data() - lastunary.
data())));
2364 static_cast<size_t>(t.data() - opstr.
data()));
2374 t.size() >= 2 && (t[1] ==
'b' || t[1] ==
'B')) {
2408 while (t.size() > 0) {
2409 if (t.size() >= 2 && t[0] ==
'\\' && t[1] ==
'E') {
2423 if (t.size() >= 2 && (t[1] ==
'p' || t[1] ==
'P')) {
2459 lastunary = isunary;
static void Round1(Regexp **sub, int nsub, Regexp::ParseFlags flags, std::vector< Splice > *splices)
bool ParseCCCharacter(StringPiece *s, Rune *rp, const StringPiece &whole_class, RegexpStatus *status)
const int num_posix_groups
static void Round3(Regexp **sub, int nsub, Regexp::ParseFlags flags, std::vector< Splice > *splices)
bool AddRange(Rune lo, Rune hi)
Regexp & operator=(const Regexp &)=delete
virtual int ShortVisit(Regexp *re, int parent_arg)
int fullrune(const char *str, int n)
Regexp * FinishRegexp(Regexp *)
static void Round2(Regexp **sub, int nsub, Regexp::ParseFlags flags, std::vector< Splice > *splices)
void remove_prefix(size_type n)
static void AddUGroup(CharClassBuilder *cc, const UGroup *g, int sign, Regexp::ParseFlags parse_flags)
bool PushSimpleOp(RegexpOp op)
static const size_type npos
static Regexp * LeadingRegexp(Regexp *re)
Frame(Regexp **sub, int nsub)
bool MaybeConcatString(int r, ParseFlags flags)
bool IsMarker(RegexpOp op)
bool DoLeftParen(const StringPiece &name)
static Regexp * Concat(Regexp **subs, int nsubs, ParseFlags flags)
ParseState(ParseFlags flags, const StringPiece &whole_regexp, RegexpStatus *status)
void DoCollapse(RegexpOp op)
bool DoLeftParenNoCapture()
static Regexp * LiteralString(Rune *runes, int nrunes, ParseFlags flags)
std::vector< Splice > splices
ParseStatus ParseUnicodeGroup(StringPiece *s, Regexp::ParseFlags parse_flags, CharClassBuilder *cc, RegexpStatus *status)
bool ParseCCRange(StringPiece *s, RuneRange *rr, const StringPiece &whole_class, RegexpStatus *status)
virtual int PreVisit(Regexp *re, int parent_arg, bool *stop)
ParseState & operator=(const ParseState &)=delete
virtual int PostVisit(Regexp *re, int parent_arg, int pre_arg, int *child_args, int nchild_args)
bool PushRepeatOp(RegexpOp op, const StringPiece &s, bool nongreedy)
static const int kMaxRepeat
void AddRange(std::vector< T > *dst, T lo, T hi, int mult)
#define FALLTHROUGH_INTENDED
bool ParseCharClass(StringPiece *s, Regexp **out_re, RegexpStatus *status)
const UGroup * MaybeParsePerlCCEscape(StringPiece *s, Regexp::ParseFlags parse_flags)
static bool IsValidCaptureName(const StringPiece &name)
int runetochar(char *str, const Rune *rune)
static bool IsValidUTF8(const StringPiece &s, RegexpStatus *status)
static Regexp * Parse(const StringPiece &s, ParseFlags flags, RegexpStatus *status)
const int num_unicode_groups
bool ParsePerlFlags(StringPiece *s)
const int num_unicode_casefold
const UGroup perl_groups[]
static Rune * LeadingString(Regexp *re, int *nrune, ParseFlags *flags)
const UGroup posix_groups[]
static int FactorAlternation(Regexp **sub, int nsub, ParseFlags flags)
static const UGroup * LookupUnicodeGroup(const StringPiece &name)
static Regexp * NewCharClass(CharClass *cc, ParseFlags flags)
const std::string * name()
const CaseFold * LookupCaseFold(const CaseFold *f, int n, Rune r)
static bool MaybeParseRepetition(StringPiece *sp, int *lo, int *hi)
void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags)
static void AddFoldedRange(CharClassBuilder *cc, Rune lo, Rune hi, int depth)
T Walk(Regexp *re, T top_arg)
static bool ParseEscape(StringPiece *s, Rune *rp, RegexpStatus *status, int rune_max)
static const UGroup * LookupPerlGroup(const StringPiece &name)
static void RemoveLeadingString(Regexp *re, int n)
static const UGroup * LookupGroup(const StringPiece &name, const UGroup *groups, int ngroups)
const UGroup unicode_groups[]
const_pointer data() const
static bool Equal(Regexp *a, Regexp *b)
static ParseStatus ParseCCName(StringPiece *s, Regexp::ParseFlags parse_flags, CharClassBuilder *cc, RegexpStatus *status)
static Regexp * ConcatOrAlternate(RegexpOp op, Regexp **subs, int nsubs, ParseFlags flags, bool can_factor)
@ kRegexpTrailingBackslash
StringPiece whole_regexp_
const RegexpOp kVerticalBar
UniquePtr< SSL_SESSION > ret
Rune CycleFoldRune(Rune r)
AllocList * next[kMaxLevel]
static Regexp * RemoveLeadingRegexp(Regexp *re)
void ConvertLatin1ToUTF8(const StringPiece &latin1, std::string *utf)
static Regexp * AlternateNoFactor(Regexp **subs, int nsubs, ParseFlags flags)
int chartorune(Rune *rune, const char *str)
static const char prefix[]
const int num_perl_groups
const CaseFold unicode_casefold[]
static const UGroup * LookupPosixGroup(const StringPiece &name)
static int StringPieceToRune(Rune *r, StringPiece *sp, RegexpStatus *status)
bool PushRegexp(Regexp *re)
static bool ParseInteger(StringPiece *s, int *np)
bool PushRepetition(int min, int max, const StringPiece &s, bool nongreedy)
const RegexpOp kLeftParen
bool PushWordBoundary(bool word)
Rune ApplyFold(const CaseFold *f, Rune r)
CharClass * GetCharClass()
Splice(Regexp *prefix, Regexp **sub, int nsub)
Regexp(RegexpOp op, ParseFlags parse_flags)
grpc
Author(s):
autogenerated on Fri May 16 2025 02:59:39