Go to the documentation of this file.
25 #include "util/util.h"
26 #include "util/logging.h"
28 #include "util/strutil.h"
31 #include "re2/regexp.h"
42 : encoding_(opt ==
RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
43 posix_syntax_(opt ==
RE2::POSIX),
44 longest_match_(opt ==
RE2::POSIX),
45 log_errors_(opt !=
RE2::Quiet),
46 max_mem_(kDefaultMaxMem),
50 never_capture_(
false),
51 case_sensitive_(
true),
53 word_boundary_(
false),
152 if (!case_sensitive())
240 if (re->options_.log_errors())
241 LOG(ERROR) <<
"Error reverse compiling '" << trunc(re->pattern_) <<
"'";
243 new std::string(
"pattern too large - reverse compile failed");
244 re->error_code_ = RE2::ErrorPatternTooLarge;
282 prog->Fanout(&fanout);
287 while (1 << bucket < i->
value()) {
290 (*histogram)[bucket]++;
335 const Arg*
const args[],
int n) {
340 const Arg*
const args[],
int n) {
345 const Arg*
const args[],
int n) {
348 input->remove_prefix(consumed);
356 const Arg*
const args[],
int n) {
359 input->remove_prefix(consumed);
371 if (nvec >
static_cast<int>(
arraysize(vec)))
377 if (!re.
Rewrite(&s, rewrite, vec, nvec))
380 assert(vec[0].
data() >=
str->data());
381 assert(vec[0].
data() + vec[0].
size() <=
str->data() +
str->size());
391 if (nvec >
static_cast<int>(
arraysize(vec)))
394 const char* p =
str->data();
395 const char* ep = p +
str->size();
396 const char* lastend = NULL;
399 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
402 while (p ==
str->data()) {
406 if (!re.
Match(*
str,
static_cast<size_t>(p -
str->data()),
409 if (p < vec[0].
data())
410 out.append(p, vec[0].
data() - p);
411 if (vec[0].
data() == lastend && vec[0].
size() == 0) {
451 out.append(p, ep - p);
463 if (nvec >
static_cast<int>(
arraysize(vec)))
484 for (
size_t ii = 0; ii < unquoted.
size(); ++ii) {
487 if ((unquoted[ii] <
'a' || unquoted[ii] >
'z') &&
488 (unquoted[ii] <
'A' || unquoted[ii] >
'Z') &&
489 (unquoted[ii] <
'0' || unquoted[ii] >
'9') &&
490 unquoted[ii] !=
'_' &&
494 !(unquoted[ii] & 128)) {
495 if (unquoted[ii] ==
'\0') {
516 int n =
static_cast<int>(
prefix_.size());
525 for (
int i = 0;
i <
n;
i++) {
527 if (
'a' <= c && c <=
'z')
538 }
else if (!
max->empty()) {
556 const char* ae =
a +
len;
558 for (;
a < ae;
a++,
b++) {
561 if (
'A' <=
y &&
y <=
'Z')
577 int nsubmatch)
const {
584 if (startpos > endpos || endpos >
text.size()) {
586 LOG(
ERROR) <<
"RE2: invalid startpos, endpos pair. ["
587 <<
"startpos: " << startpos <<
", "
588 <<
"endpos: " << endpos <<
", "
589 <<
"text size: " <<
text.size() <<
"]";
607 if (ncap > nsubmatch)
622 size_t prefixlen = 0;
627 if (prefixlen > subtext.
size())
633 if (memcmp(&
prefix_[0], subtext.
data(), prefixlen) != 0)
646 bool skipped_test =
false;
654 const int kMaxBitStateBitmapSize = 256*1024;
656 size_t bit_state_text_max = kMaxBitStateBitmapSize /
prog_->
list_count();
658 bool dfa_failed =
false;
663 matchp, &dfa_failed, NULL)) {
687 LOG(
ERROR) <<
"DFA out of memory: size " <<
prog->size() <<
", "
688 <<
"bytemap range " <<
prog->bytemap_range() <<
", "
689 <<
"list count " <<
prog->list_count();
695 LOG(
ERROR) <<
"SearchDFA inconsistency";
714 if (can_one_pass &&
text.size() <= 4096 &&
715 (ncap > 1 ||
text.size() <= 8)) {
719 if (can_bit_state &&
text.size() <= bit_state_text_max && ncap > 1) {
724 &
match, &dfa_failed, NULL)) {
739 if (!skipped_test && ncap <= 1) {
761 LOG(
ERROR) <<
"SearchOnePass inconsistency";
764 }
else if (can_bit_state && subtext1.
size() <= bit_state_text_max) {
766 kind, submatch, ncap)) {
768 LOG(
ERROR) <<
"SearchBitState inconsistency";
774 LOG(
ERROR) <<
"SearchNFA inconsistency";
781 if (prefixlen > 0 && nsubmatch > 0)
783 submatch[0].
size() + prefixlen);
786 for (
int i = ncap;
i < nsubmatch;
i++)
810 if (n == 0 && consumed == NULL)
817 StringPiece* heapvec = NULL;
819 if (nvec <=
static_cast<int>(
arraysize(stkvec))) {
822 vec =
new StringPiece[nvec];
831 if (consumed != NULL)
832 *consumed =
static_cast<size_t>(vec[0].end() -
text.begin());
834 if (n == 0 ||
args == NULL) {
841 for (
int i = 0;
i <
n;
i++) {
842 const StringPiece&
s = vec[
i+1];
859 for (
const char *s = rewrite.
data(), *
end = s + rewrite.
size();
866 *
error =
"Rewrite schema error: '\\' not allowed at end.";
874 *
error =
"Rewrite schema error: "
875 "'\\' must be followed by a digit or '\\'.";
886 "Rewrite schema requests %d matches, but the regexp only has %d "
887 "parenthesized subexpressions.",
898 for (
const char *s = rewrite.
data(), *
end = s + rewrite.
size();
902 int c = (s <
end) ? *s : -1;
919 for (
const char *s = rewrite.
data(), *
end = s + rewrite.
size();
926 int c = (s <
end) ? *s : -1;
932 <<
" in regexp " << rewrite.
data();
939 }
else if (c ==
'\\') {
940 out->push_back(
'\\');
943 LOG(
ERROR) <<
"invalid rewrite pattern: " << rewrite.
data();
954 return (
dest == NULL);
958 if (
dest == NULL)
return true;
964 if (
dest == NULL)
return true;
970 if (
n != 1)
return false;
971 if (
dest == NULL)
return true;
972 *(
reinterpret_cast<char*
>(
dest)) =
str[0];
977 if (
n != 1)
return false;
978 if (
dest == NULL)
return true;
979 *(
reinterpret_cast<signed char*
>(
dest)) =
str[0];
984 if (
n != 1)
return false;
985 if (
dest == NULL)
return true;
986 *(
reinterpret_cast<unsigned char*
>(
dest)) =
str[0];
997 size_t* np,
bool accept_spaces) {
999 if (
n == 0)
return "";
1000 if (
n > 0 && isspace(*
str)) {
1003 if (!accept_spaces) {
1006 while (
n > 0 && isspace(*
str)) {
1021 if (
n >= 1 &&
str[0] ==
'-') {
1027 if (
n >= 3 &&
str[0] ==
'0' &&
str[1] ==
'0') {
1028 while (
n >= 3 &&
str[2] ==
'0') {
1039 if (
n > nbuf-1)
return "";
1050 bool RE2::Arg::parse_long_radix(
const char*
str,
1054 if (n == 0)
return false;
1060 if (
end !=
str + n)
return false;
1061 if (errno)
return false;
1062 if (
dest == NULL)
return true;
1063 *(
reinterpret_cast<long*
>(
dest)) =
r;
1067 bool RE2::Arg::parse_ulong_radix(
const char*
str,
1071 if (n == 0)
return false;
1074 if (
str[0] ==
'-') {
1083 if (
end !=
str + n)
return false;
1084 if (errno)
return false;
1085 if (
dest == NULL)
return true;
1086 *(
reinterpret_cast<unsigned long*
>(
dest)) =
r;
1090 bool RE2::Arg::parse_short_radix(
const char*
str,
1095 if (!parse_long_radix(
str, n, &
r,
radix))
return false;
1096 if ((
short)
r !=
r)
return false;
1097 if (
dest == NULL)
return true;
1098 *(
reinterpret_cast<short*
>(
dest)) = (short)
r;
1102 bool RE2::Arg::parse_ushort_radix(
const char*
str,
1107 if (!parse_ulong_radix(
str, n, &
r,
radix))
return false;
1108 if ((
unsigned short)
r !=
r)
return false;
1109 if (
dest == NULL)
return true;
1110 *(
reinterpret_cast<unsigned short*
>(
dest)) = (
unsigned short)
r;
1114 bool RE2::Arg::parse_int_radix(
const char*
str,
1119 if (!parse_long_radix(
str, n, &
r,
radix))
return false;
1120 if ((
int)
r !=
r)
return false;
1121 if (
dest == NULL)
return true;
1122 *(
reinterpret_cast<int*
>(
dest)) = (
int)
r;
1126 bool RE2::Arg::parse_uint_radix(
const char*
str,
1131 if (!parse_ulong_radix(
str, n, &
r,
radix))
return false;
1132 if ((
unsigned int)
r !=
r)
return false;
1133 if (
dest == NULL)
return true;
1134 *(
reinterpret_cast<unsigned int*
>(
dest)) = (
unsigned int)
r;
1138 bool RE2::Arg::parse_longlong_radix(
const char*
str,
1142 if (n == 0)
return false;
1148 if (
end !=
str + n)
return false;
1149 if (errno)
return false;
1150 if (
dest == NULL)
return true;
1151 *(
reinterpret_cast<long long*
>(
dest)) =
r;
1155 bool RE2::Arg::parse_ulonglong_radix(
const char*
str,
1159 if (n == 0)
return false;
1162 if (
str[0] ==
'-') {
1170 if (
end !=
str + n)
return false;
1171 if (errno)
return false;
1172 if (
dest == NULL)
return true;
1173 *(
reinterpret_cast<unsigned long long*
>(
dest)) =
r;
1179 if (
n == 0)
return false;
1180 static const int kMaxLength = 200;
1181 char buf[kMaxLength+1];
1191 if (
end !=
str +
n)
return false;
1192 if (errno)
return false;
1193 if (
dest == NULL)
return true;
1195 *(
reinterpret_cast<float*
>(
dest)) = (float)
r;
1197 *(
reinterpret_cast<double*
>(
dest)) =
r;
1210 #define DEFINE_INTEGER_PARSER(name) \
1211 bool RE2::Arg::parse_##name(const char* str, size_t n, void* dest) { \
1212 return parse_##name##_radix(str, n, dest, 10); \
1214 bool RE2::Arg::parse_##name##_hex(const char* str, size_t n, void* dest) { \
1215 return parse_##name##_radix(str, n, dest, 16); \
1217 bool RE2::Arg::parse_##name##_octal(const char* str, size_t n, void* dest) { \
1218 return parse_##name##_radix(str, n, dest, 8); \
1220 bool RE2::Arg::parse_##name##_cradix(const char* str, size_t n, \
1222 return parse_##name##_radix(str, n, dest, 0); \
1234 #undef DEFINE_INTEGER_PARSER
static std::string trunc(const StringPiece &pattern)
static bool parse_string(const char *str, size_t n, void *dest)
static bool PartialMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
bool SearchOnePass(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match, int nmatch)
int fullrune(const char *str, int n)
void remove_prefix(size_type n)
const std::map< int, std::string > & CapturingGroupNames() const
void remove_suffix(size_type n)
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen) const
static bool FullMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
static const std::map< std::string, int > * empty_named_groups
unsigned char match[65280+2]
static bool parse_char(const char *str, size_t n, void *dest)
int ProgramFanout(std::map< int, int > *histogram) const
int ReverseProgramFanout(std::map< int, int > *histogram) const
static bool Extract(const StringPiece &text, const RE2 &re, const StringPiece &rewrite, std::string *out)
static bool parse_double(const char *str, size_t n, void *dest)
const std::string & error() const
Prog * CompileToProg(int64_t max_mem)
Arg(64) -> Arg(128) ->Arg(256) ->Arg(512) ->Arg(1024) ->Arg(1536) ->Arg(2048) ->Arg(3072) ->Arg(4096) ->Arg(5120) ->Arg(6144) ->Arg(7168)
bool RequiredPrefix(std::string *prefix, bool *foldcase, Regexp **suffix)
static bool parse_uchar(const char *str, size_t n, void *dest)
re2::Prog * ReverseProg() const
static bool Replace(std::string *str, const RE2 &re, const StringPiece &rewrite)
static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code)
bool DoMatch(const StringPiece &text, Anchor re_anchor, size_t *consumed, const Arg *const args[], int n) const
static bool parse_schar(const char *str, size_t n, void *dest)
std::once_flag named_groups_once_
void call_once(absl::once_flag &flag, Callable &&fn, Args &&... args)
bool longest_match() const
static bool FindAndConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
const std::map< std::string, int > & NamedCapturingGroups() const
re2::Regexp * entire_regexp_
const Options & options() const
bool SearchBitState(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match, int nmatch)
bool CheckRewriteString(const StringPiece &rewrite, std::string *error) const
static int MaxSubmatch(const StringPiece &rewrite)
static const int kVecSize
std::string StringPrintf(const char *format,...)
static Regexp * Parse(const StringPiece &s, ParseFlags flags, RegexpStatus *status)
static int ascii_strcasecmp(const char *a, const char *b, size_t len)
static bool parse_float(const char *str, size_t n, void *dest)
static grpc_histogram * histogram
static bool parse_double_float(const char *str, size_t n, bool isfloat, void *dest)
void swap(Json::Value &a, Json::Value &b)
Specialize std::swap() for Json::Value.
bool Parse(FlagOpFn op, absl::string_view text, void *dst, std::string *error)
static const std::map< int, std::string > * empty_group_names
const std::map< std::string, int > * named_groups_
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
void PrefixSuccessor(std::string *prefix)
static const LogLevel ERROR
static const int kDefaultMaxMem
static bool ConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
Prog * CompileToReverseProg(int64_t max_mem)
bool SearchDFA(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match0, bool *failed, SparseSet *matches)
bool Match(const StringPiece &text, size_t startpos, size_t endpos, Anchor re_anchor, StringPiece *submatch, int nsubmatch) const
int ReverseProgramSize() const
static int GlobalReplace(std::string *str, const RE2 &re, const StringPiece &rewrite)
DEFINE_INTEGER_PARSER(short)
static std::string QuoteMeta(const StringPiece &unquoted)
const_pointer data() const
static const int kMaxOnePassCapture
void Init(const StringPiece &pattern, const Options &options)
std::map< std::string, int > * NamedCaptures()
@ kRegexpTrailingBackslash
static const int kMaxNumberLength
unsigned char suffix[65536]
static int Fanout(Prog *prog, std::map< int, int > *histogram)
static bool parse_stringpiece(const char *str, size_t n, void *dest)
const std::string & pattern() const
int chartorune(Rune *rune, const char *str)
std::map< int, std::string > * CaptureNames()
static bool parse_null(const char *str, size_t n, void *dest)
static const std::string * empty_string
std::once_flag group_names_once_
static const int kMaxArgs
static const char * TerminateNumber(char *buf, size_t nbuf, const char *str, size_t *np, bool accept_spaces)
const std::map< int, std::string > * group_names_
int NumberOfCapturingGroups() const
absl::StatusCode code() const
const std::string * error_
if(p->owned &&p->wrapped !=NULL)
bool Rewrite(std::string *out, const StringPiece &rewrite, const StringPiece *vec, int veclen) const
std::once_flag rprog_once_
re2::Regexp * suffix_regexp_
Encoding encoding() const
bool SearchNFA(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match, int nmatch)
void Copy(const Options &src)
grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:00