Go to the documentation of this file.
34 #include <unordered_map>
35 #include <unordered_set>
39 #include "util/logging.h"
41 #include "util/mutex.h"
44 #include "util/strutil.h"
46 #include "re2/stringpiece.h"
50 #pragma warning(disable: 4200)
55 #if !defined(__linux__)
56 static void*
memrchr(
const void* s,
int c,
size_t n) {
57 const unsigned char* p = (
const unsigned char*)s;
58 for (p +=
n;
n > 0;
n--)
98 bool anchored,
bool want_earliest_match,
bool run_forward,
99 bool* failed,
const char** ep,
SparseSet* matches);
131 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
132 std::atomic<State*>
next_[0];
153 for (
int i = 0;
i <
a->ninst_;
i++)
154 mix.
Mix(
a->inst_[
i]);
166 if (
a->flag_ !=
b->flag_)
168 if (
a->ninst_ !=
b->ninst_)
170 for (
int i = 0;
i <
a->ninst_;
i++)
171 if (
a->inst_[
i] !=
b->inst_[
i])
177 typedef std::unordered_set<State*, StateHash, StateEqual>
StateSet;
301 bool have_first_byte,
302 bool want_earliest_match,
367 return reinterpret_cast<const uint8_t*
>(
v);
378 #define MatchSep (-2)
443 mem_budget_(max_mem) {
458 (
sizeof(
int)+
sizeof(
int)) * 2;
474 int64_t one_state =
sizeof(
State) + nnext*
sizeof(std::atomic<State*>) +
497 #define DeadState reinterpret_cast<State*>(1)
500 #define FullMatchState reinterpret_cast<State*>(2)
502 #define SpecialStateMax FullMatchState
509 const char*
sep =
"";
511 if (q->is_mark(*
it)) {
531 const char*
sep =
"";
533 for (
int i = 0;
i <
state->ninst_;
i++) {
616 int* inst =
new int[q->size()];
619 bool sawmatch =
false;
620 bool sawmark =
false;
627 if (q->is_mark(
id)) {
628 if (
n > 0 && inst[
n-1] !=
Mark) {
648 fprintf(
stderr,
" -> FullMatchState\n");
658 needflags |= ip->
empty();
665 if (
n > 0 && inst[
n-1] ==
Mark)
692 if (
n == 0 &&
flag == 0) {
695 fprintf(
stderr,
" -> DeadState\n");
707 while (markp < ep && *markp !=
Mark)
709 std::sort(ip, markp);
755 state.ninst_ = ninst;
768 const int kStateCacheOverhead = 40;
770 int mem =
sizeof(
State) + nnext*
sizeof(std::atomic<State*>) +
779 char* space = std::allocator<char>().allocate(
mem);
781 (void)
new (s->next_) std::atomic<State*>[nnext];
784 for (
int i = 0;
i < nnext;
i++)
785 (
void)
new (s->next_ +
i) std::atomic<State*>(NULL);
786 s->inst_ =
new (s->next_ + nnext)
int[ninst];
787 memmove(s->inst_, inst, ninst*
sizeof s->inst_[0]);
807 int ninst = (*tmp)->ninst_;
809 int mem =
sizeof(
State) + nnext*
sizeof(std::atomic<State*>) +
811 std::allocator<char>().deallocate(
reinterpret_cast<char*
>(*
tmp),
mem);
819 for (
int i = 0;
i < s->ninst_;
i++) {
820 if (s->inst_[
i] ==
Mark) {
874 LOG(DFATAL) <<
"unhandled opcode: " << ip->
opcode();
963 LOG(DFATAL) <<
"unhandled opcode: " << ip->
opcode();
1018 LOG(DFATAL) <<
"DeadState in RunStateOnByte";
1021 if (
state == NULL) {
1022 LOG(DFATAL) <<
"NULL state in RunStateOnByte";
1025 LOG(DFATAL) <<
"Unexpected special state in RunStateOnByte";
1043 uint32_t oldbeforeflag = beforeflag;
1062 if (isword == islastword)
1069 if (beforeflag & ~oldbeforeflag & needflag) {
1074 bool ismatch =
false;
1145 mu_->ReaderUnlock();
1153 mu_->ReaderUnlock();
1155 mu_->WriterUnlock();
1234 is_special_ =
false;
1237 ninst_ =
state->ninst_;
1238 inst_ =
new int[ninst_];
1239 memmove(inst_,
state->inst_, ninst_*
sizeof inst_[0]);
1251 State* s = dfa_->CachedState(inst_, ninst_,
flag_);
1253 LOG(DFATAL) <<
"StateSaver failed to restore state.";
1327 bool have_first_byte,
1328 bool want_earliest_match,
1342 const uint8_t* lastmatch = NULL;
1343 bool matched =
false;
1355 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1356 int id = s->inst_[
i];
1362 if (want_earliest_match) {
1363 params->
ep =
reinterpret_cast<const char*
>(lastmatch);
1370 fprintf(
stderr,
"@%td: %s\n",
1373 if (have_first_byte && s ==
start) {
1416 State*
ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
1430 static_cast<size_t>(p - resetp) < 10*
state_cache_.size() &&
1446 (s = save_s.
Restore()) == NULL) {
1453 LOG(DFATAL) <<
"RunStateOnByteUnlocked failed after ResetCache";
1461 params->
ep =
reinterpret_cast<const char*
>(lastmatch);
1465 params->
ep =
reinterpret_cast<const char*
>(ep);
1479 fprintf(
stderr,
"match @%td! [%s]\n",
1482 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1483 int id = s->inst_[
i];
1489 if (want_earliest_match) {
1490 params->
ep =
reinterpret_cast<const char*
>(lastmatch);
1506 lastbyte = params->
text.
end()[0] & 0xFF;
1511 lastbyte = params->
text.
begin()[-1] & 0xFF;
1514 State*
ns = s->next_[
ByteMap(lastbyte)].load(std::memory_order_acquire);
1520 if ((s = save_s.
Restore()) == NULL) {
1526 LOG(DFATAL) <<
"RunStateOnByteUnlocked failed after Reset";
1534 params->
ep =
reinterpret_cast<const char*
>(lastmatch);
1538 params->
ep =
reinterpret_cast<const char*
>(ep);
1549 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1550 int id = s->inst_[
i];
1558 params->
ep =
reinterpret_cast<const char*
>(lastmatch);
1612 bool have_first_byte = params->
first_byte >= 0;
1613 int index = 4 * have_first_byte +
1616 return (this->*Searches[
index])(params);
1652 LOG(DFATAL) <<
"context does not contain text";
1664 }
else if (
text.begin()[-1] ==
'\n') {
1678 }
else if (
text.end()[0] ==
'\n') {
1699 LOG(DFATAL) <<
"Failed to analyze start state.";
1706 fprintf(
stderr,
"anchored=%d fwd=%d flags=%#x state=%s first_byte=%d\n",
1720 int fb = info->
first_byte.load(std::memory_order_acquire);
1725 fb = info->
first_byte.load(std::memory_order_relaxed);
1734 if (info->
start == NULL)
1754 if (first_byte == -1 ||
1760 info->
first_byte.store(first_byte, std::memory_order_release);
1768 bool want_earliest_match,
1782 fprintf(
stderr,
"text %s anchored=%d earliest=%d fwd=%d kind %d\n",
1784 run_forward,
kind_);
1801 if (run_forward == want_earliest_match)
1826 if (
kind == kFirstMatch) {
1831 }
else if (
kind == kManyMatch) {
1838 if (!
prog->reversed_)
1839 prog->dfa_longest_ =
new DFA(
prog, kLongestMatch,
prog->dfa_mem_ / 2);
1843 return dfa_longest_;
1869 bool carat = anchor_start();
1870 bool dollar = anchor_end();
1873 swap(carat, dollar);
1882 bool anchored = anchor == kAnchored || anchor_start() ||
kind == kFullMatch;
1883 bool endmatch =
false;
1884 if (
kind == kManyMatch) {
1886 }
else if (
kind == kFullMatch || anchor_end()) {
1888 kind = kLongestMatch;
1894 bool want_earliest_match =
false;
1895 if (
kind == kManyMatch) {
1897 if (matches == NULL) {
1898 want_earliest_match =
true;
1900 }
else if (match0 == NULL && !endmatch) {
1901 want_earliest_match =
true;
1902 kind = kLongestMatch;
1908 want_earliest_match, !reversed_,
1909 failed, &ep, matches);
1914 if (endmatch && ep != (reversed_ ?
text.data() :
text.data() +
text.size()))
1942 params.
start == NULL ||
1949 std::unordered_map<State*, int>
m;
1950 std::deque<State*> q;
1951 m.emplace(params.
start,
static_cast<int>(
m.size()));
1952 q.push_back(params.
start);
1956 std::vector<int>
input(nnext);
1957 for (
int c = 0; c < 256; c++) {
1959 while (c < 256-1 && prog_->bytemap()[c+1] ==
b)
1966 std::vector<int>
output(nnext);
1970 while (!q.empty()) {
1971 State* s = q.front();
1973 for (
int c :
input) {
1983 if (
m.find(
ns) ==
m.end()) {
1984 m.emplace(
ns,
static_cast<int>(
m.size()));
1996 return static_cast<int>(
m.size());
2001 return GetDFA(
kind)->BuildAllStates(
cb);
2017 static int kMaxEltRepetitions = 0;
2027 std::unordered_map<State*, int> previously_visited_states;
2072 for (
int i = 0;
i < maxlen;
i++) {
2073 if (previously_visited_states[s] > kMaxEltRepetitions)
2075 previously_visited_states[s]++;
2085 bool extended =
false;
2086 for (
int j = 0; j < 256; j++) {
2093 min->append(1,
static_cast<char>(j));
2103 previously_visited_states.clear();
2106 for (
int i = 0;
i < maxlen;
i++) {
2107 if (previously_visited_states[s] > kMaxEltRepetitions)
2109 previously_visited_states[s] += 1;
2112 bool extended =
false;
2113 for (
int j = 255; j >= 0; j--) {
2120 max->append(1,
static_cast<char>(j));
2150 return GetDFA(kLongestMatch)->PossibleMatchRange(
min,
max, maxlen);
State * RunStateOnByteUnlocked(State *, int)
Workq & operator=(const Workq &)=delete
#define NO_THREAD_SAFETY_ANALYSIS
Workq(int n, int maxmark)
DFA(Prog *prog, Prog::MatchKind kind, int64_t max_mem)
bool SearchFTT(SearchParams *params)
std::string DumpUnanchored()
bool SearchTTT(SearchParams *params)
int BuildEntireDFA(MatchKind kind, const DFAStateCallback &cb)
static std::string DumpWorkq(Workq *q)
bool SlowSearchLoop(SearchParams *params)
#define DCHECK_LE(val1, val2)
const_iterator end() const
RWLocker & operator=(const RWLocker &)=delete
bool SearchFFT(SearchParams *params)
const typedef MCPhysReg * iterator
static const uint8_t * BytePtr(const void *v)
static bool IsWordChar(uint8_t c)
int inst_count(InstOp op)
static std::string DumpState(State *state)
State * CachedState(int *inst, int ninst, uint32_t flag)
State * WorkqToCachedState(Workq *q, Workq *mq, uint32_t flag)
DFA * GetDFA(MatchKind kind)
void call_once(absl::once_flag &flag, Callable &&fn, Args &&... args)
bool contains(int i) const
def c_str(s, encoding='ascii')
int BuildAllStates(const Prog::DFAStateCallback &cb)
#define FALLTHROUGH_INTENDED
void RunWorkqOnByte(Workq *q, Workq *nq, int c, uint32_t flag, bool *ismatch)
bool SearchFTF(SearchParams *params)
DFA & operator=(const DFA &)=delete
bool SearchTFT(SearchParams *params)
static void * memrchr(const void *s, int c, size_t n)
bool InlinedSearchLoop(SearchParams *params, bool have_first_byte, bool want_earliest_match, bool run_forward)
std::string StringPrintf(const char *format,...)
StateSaver(DFA *dfa, State *state)
bool FastSearchLoop(SearchParams *params)
void RunWorkqOnEmptyString(Workq *q, Workq *nq, uint32_t flag)
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
std::unordered_set< State *, StateHash, StateEqual > StateSet
void swap(Json::Value &a, Json::Value &b)
Specialize std::swap() for Json::Value.
SearchParams(const StringPiece &text, const StringPiece &context, RWLocker *cache_lock)
State * RunStateOnByte(State *, int)
bool PossibleMatchRange(std::string *min, std::string *max, int maxlen)
void PrefixSuccessor(std::string *prefix)
const uint8_t * bytemap()
bool SearchTTF(SearchParams *params)
bool SearchFFF(SearchParams *params)
bool SearchDFA(const StringPiece &text, const StringPiece &context, Anchor anchor, MatchKind kind, StringPiece *match0, bool *failed, SparseSet *matches)
promise_detail::Loop< F > Loop(F f)
const_pointer data() const
bool Search(const StringPiece &text, const StringPiece &context, bool anchored, bool want_earliest_match, bool run_forward, bool *failed, const char **ep, SparseSet *matches)
StartInfo start_[kMaxStart]
std::atomic< int > first_byte
UniquePtr< SSL_SESSION > ret
static bool dfa_should_bail_when_slow
const_iterator begin() const
static const bool ExtraDebug
bool operator()(const State *a, const State *b) const
static void TEST_dfa_should_bail_when_slow(bool b)
void ResetCache(RWLocker *cache_lock)
void StateToWorkq(State *s, Workq *q)
grpc::ClientContext context
SearchParams & operator=(const SearchParams &)=delete
std::function< void(const int *next, bool match)> DFAStateCallback
size_t operator()(const State *a) const
#define DCHECK(condition)
bool AnalyzeSearch(SearchParams *params)
bool AnalyzeSearchHelper(SearchParams *params, StartInfo *info, uint32_t flags)
void AddToQueue(Workq *q, int id, uint32_t flag)
bool SearchTFF(SearchParams *params)
std::atomic< State * > next_[]
OPENSSL_EXPORT pem_password_cb * cb
static struct rpc_state state
iterator insert_new(int i)
grpc
Author(s):
autogenerated on Fri May 16 2025 02:58:16