34 #include <unordered_map>
35 #include <unordered_set>
39 #include "util/logging.h"
41 #include "util/mutex.h"
42 #include "util/strutil.h"
47 #include "re2/stringpiece.h"
51 #pragma warning(disable: 4200)
92 bool anchored,
bool want_earliest_match,
bool run_forward,
93 bool* failed,
const char** ep,
SparseSet* matches);
125 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 6 && __GNUC_MINOR__ >= 1
126 std::atomic<State*>
next_[0];
128 std::atomic<State*>
next_[];
147 for (
int i = 0;
i <
a->ninst_;
i++)
148 mix.
Mix(
a->inst_[
i]);
160 if (
a->flag_ !=
b->flag_)
162 if (
a->ninst_ !=
b->ninst_)
164 for (
int i = 0;
i <
a->ninst_;
i++)
165 if (
a->inst_[
i] !=
b->inst_[
i])
171 typedef std::unordered_set<State*, StateHash, StateEqual>
StateSet;
240 struct SearchParams {
290 template <
bool can_prefix_accel,
291 bool want_earliest_match,
353 return reinterpret_cast<const uint8_t*
>(
v);
364 #define MatchSep (-2)
429 mem_budget_(max_mem) {
444 (
sizeof(
int)+
sizeof(
int)) * 2;
460 int64_t one_state =
sizeof(State) + nnext*
sizeof(std::atomic<State*>) +
483 #define DeadState reinterpret_cast<State*>(1)
486 #define FullMatchState reinterpret_cast<State*>(2)
488 #define SpecialStateMax FullMatchState
495 const char*
sep =
"";
497 if (q->is_mark(*
it)) {
517 const char*
sep =
"";
519 for (
int i = 0;
i <
state->ninst_;
i++) {
602 PODArray<int> inst(
q->size());
605 bool sawmatch =
false;
606 bool sawmark =
false;
613 if (
q->is_mark(
id)) {
614 if (n > 0 && inst[n-1] !=
Mark) {
621 switch (ip->opcode()) {
629 (
it ==
q->begin() && ip->greedy(
prog_))) &&
633 fprintf(
stderr,
" -> FullMatchState\n");
643 needflags |= ip->empty();
650 if (n > 0 && inst[n-1] ==
Mark)
677 if (n == 0 &&
flag == 0) {
679 fprintf(
stderr,
" -> DeadState\n");
687 int* ip = inst.data();
691 while (markp < ep && *markp !=
Mark)
693 std::sort(ip, markp);
704 int* ip = inst.data();
738 state.ninst_ = ninst;
751 const int kStateCacheOverhead = 40;
753 int mem =
sizeof(State) + nnext*
sizeof(std::atomic<State*>) +
762 char* space = std::allocator<char>().allocate(
mem);
763 State*
s =
new (space) State;
764 (void)
new (
s->next_) std::atomic<State*>[nnext];
767 for (
int i = 0;
i < nnext;
i++)
768 (
void)
new (
s->next_ +
i) std::atomic<State*>(NULL);
769 s->inst_ =
new (
s->next_ + nnext)
int[ninst];
770 memmove(
s->inst_, inst, ninst*
sizeof s->inst_[0]);
790 int ninst = (*tmp)->ninst_;
792 int mem =
sizeof(State) + nnext*
sizeof(std::atomic<State*>) +
794 std::allocator<char>().deallocate(
reinterpret_cast<char*
>(*
tmp),
mem);
802 for (
int i = 0;
i <
s->ninst_;
i++) {
803 if (
s->inst_[i] ==
Mark) {
855 switch (ip->opcode()) {
857 LOG(DFATAL) <<
"unhandled opcode: " << ip->opcode();
876 if (ip->opcode() ==
kInstNop &&
q->maxmark() > 0 &&
892 if (ip->empty() & ~
flag)
919 if (oldq->is_mark(*i))
936 if (oldq->is_mark(*i)) {
944 switch (ip->opcode()) {
946 LOG(DFATAL) <<
"unhandled opcode: " << ip->opcode();
960 if (ip->hint() != 0) {
967 Prog::Inst* ip0 = ip;
988 fprintf(
stderr,
"%s on %d[%#x] -> %s [%d]\n",
1014 LOG(DFATAL) <<
"DeadState in RunStateOnByte";
1017 if (
state == NULL) {
1018 LOG(DFATAL) <<
"NULL state in RunStateOnByte";
1021 LOG(DFATAL) <<
"Unexpected special state in RunStateOnByte";
1026 State*
ns =
state->next_[
ByteMap(c)].load(std::memory_order_relaxed);
1039 uint32_t oldbeforeflag = beforeflag;
1058 if (isword == islastword)
1065 if (beforeflag & ~oldbeforeflag & needflag) {
1070 bool ismatch =
false;
1114 class DFA::RWLocker {
1123 void LockForWriting();
1141 mu_->ReaderUnlock();
1149 mu_->ReaderUnlock();
1151 mu_->WriterUnlock();
1168 cache_lock->LockForWriting();
1170 hooks::GetDFAStateCacheResetHook()({
1177 start_[i].
start.store(NULL, std::memory_order_relaxed);
1198 class DFA::StateSaver {
1200 explicit StateSaver(
DFA* dfa, State*
state);
1219 StateSaver(
const StateSaver&) =
delete;
1220 StateSaver&
operator=(
const StateSaver&) =
delete;
1233 is_special_ =
false;
1236 ninst_ =
state->ninst_;
1237 inst_ =
new int[ninst_];
1238 memmove(inst_,
state->inst_, ninst_*
sizeof inst_[0]);
1250 State*
s = dfa_->CachedState(inst_, ninst_,
flag_);
1252 LOG(DFATAL) <<
"StateSaver failed to restore state.";
1324 template <
bool can_prefix_accel,
1325 bool want_earliest_match,
1332 params->text.size());
1340 const uint8_t* lastmatch = NULL;
1341 bool matched =
false;
1353 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1354 int id = s->inst_[
i];
1357 params->matches->insert(
id);
1360 if (want_earliest_match) {
1361 params->ep =
reinterpret_cast<const char*
>(lastmatch);
1370 if (can_prefix_accel && s ==
start) {
1405 State*
ns = s->next_[bytemap[c]].load(std::memory_order_acquire);
1419 static_cast<size_t>(p - resetp) < 10*
state_cache_.size() &&
1421 params->failed =
true;
1435 (s = save_s.
Restore()) == NULL) {
1437 params->failed =
true;
1442 LOG(DFATAL) <<
"RunStateOnByteUnlocked failed after ResetCache";
1443 params->failed =
true;
1450 params->ep =
reinterpret_cast<const char*
>(lastmatch);
1454 params->ep =
reinterpret_cast<const char*
>(ep);
1470 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1471 int id = s->inst_[
i];
1474 params->matches->insert(
id);
1477 if (want_earliest_match) {
1478 params->ep =
reinterpret_cast<const char*
>(lastmatch);
1491 if (params->text.end() == params->context.end())
1494 lastbyte = params->text.end()[0] & 0xFF;
1496 if (params->text.begin() == params->context.begin())
1499 lastbyte = params->text.begin()[-1] & 0xFF;
1502 State*
ns = s->next_[
ByteMap(lastbyte)].load(std::memory_order_acquire);
1508 if ((s = save_s.
Restore()) == NULL) {
1509 params->failed =
true;
1514 LOG(DFATAL) <<
"RunStateOnByteUnlocked failed after Reset";
1515 params->failed =
true;
1522 params->ep =
reinterpret_cast<const char*
>(lastmatch);
1526 params->ep =
reinterpret_cast<const char*
>(ep);
1537 for (
int i = s->ninst_ - 1;
i >= 0;
i--) {
1538 int id = s->inst_[
i];
1541 params->matches->insert(
id);
1546 params->ep =
reinterpret_cast<const char*
>(lastmatch);
1552 return InlinedSearchLoop<false, false, false>(params);
1555 return InlinedSearchLoop<false, false, true>(params);
1558 return InlinedSearchLoop<false, true, false>(params);
1561 return InlinedSearchLoop<false, true, true>(params);
1564 return InlinedSearchLoop<true, false, false>(params);
1567 return InlinedSearchLoop<true, false, true>(params);
1570 return InlinedSearchLoop<true, true, false>(params);
1573 return InlinedSearchLoop<true, true, true>(params);
1581 static bool (
DFA::*Searches[])(SearchParams*) = {
1592 int index = 4 * params->can_prefix_accel +
1593 2 * params->want_earliest_match +
1594 1 * params->run_forward;
1595 return (this->*Searches[
index])(params);
1626 const StringPiece&
text = params->text;
1627 const StringPiece&
context = params->context;
1631 LOG(DFATAL) <<
"context does not contain text";
1639 if (params->run_forward) {
1643 }
else if (
text.begin()[-1] ==
'\n') {
1657 }
else if (
text.end()[0] ==
'\n') {
1668 if (params->anchored)
1678 LOG(DFATAL) <<
"Failed to analyze start state.";
1679 params->failed =
true;
1684 params->start = info->start.load(std::memory_order_acquire);
1691 !params->anchored &&
1694 params->can_prefix_accel =
true;
1697 fprintf(
stderr,
"anchored=%d fwd=%d flags=%#x state=%s can_prefix_accel=%d\n",
1698 params->anchored, params->run_forward,
flags,
1699 DumpState(params->start).c_str(), params->can_prefix_accel);
1708 State*
start = info->start.load(std::memory_order_acquire);
1713 start = info->start.load(std::memory_order_relaxed);
1726 info->start.store(
start, std::memory_order_release);
1734 bool want_earliest_match,
1748 fprintf(
stderr,
"text %s anchored=%d earliest=%d fwd=%d kind %d\n",
1754 params.anchored = anchored;
1755 params.want_earliest_match = want_earliest_match;
1756 params.run_forward = run_forward;
1757 params.matches = matches;
1766 if (run_forward == want_earliest_match)
1775 if (params.failed) {
1791 if (
kind == kFirstMatch) {
1796 }
else if (
kind == kManyMatch) {
1803 if (!
prog->reversed_)
1804 prog->dfa_longest_ =
new DFA(
prog, kLongestMatch,
prog->dfa_mem_ / 2);
1808 return dfa_longest_;
1831 StringPiece
context = const_context;
1834 bool caret = anchor_start();
1835 bool dollar = anchor_end();
1838 swap(caret, dollar);
1847 bool anchored = anchor == kAnchored || anchor_start() ||
kind == kFullMatch;
1848 bool endmatch =
false;
1849 if (
kind == kManyMatch) {
1851 }
else if (
kind == kFullMatch || anchor_end()) {
1853 kind = kLongestMatch;
1859 bool want_earliest_match =
false;
1860 if (
kind == kManyMatch) {
1862 if (matches == NULL) {
1863 want_earliest_match =
true;
1865 }
else if (match0 == NULL && !endmatch) {
1866 want_earliest_match =
true;
1867 kind = kLongestMatch;
1872 bool matched = dfa->Search(
text,
context, anchored,
1873 want_earliest_match, !reversed_,
1874 failed, &ep, matches);
1876 hooks::GetDFASearchFailureHook()({
1883 if (endmatch && ep != (reversed_ ?
text.data() :
text.data() +
text.size()))
1892 StringPiece(ep,
static_cast<size_t>(
text.data() +
text.size() - ep));
1895 StringPiece(
text.data(),
static_cast<size_t>(ep -
text.data()));
1908 SearchParams params(StringPiece(), StringPiece(), &l);
1909 params.anchored =
false;
1911 params.start == NULL ||
1918 std::unordered_map<State*, int>
m;
1919 std::deque<State*>
q;
1920 m.emplace(params.start,
static_cast<int>(
m.size()));
1921 q.push_back(params.start);
1925 std::vector<int>
input(nnext);
1926 for (
int c = 0;
c < 256;
c++) {
1928 while (c < 256-1 && prog_->bytemap()[c+1] ==
b)
1935 std::vector<int>
output(nnext);
1939 while (!
q.empty()) {
1940 State*
s =
q.front();
1942 for (
int c :
input) {
1952 if (
m.find(
ns) ==
m.end()) {
1953 m.emplace(
ns,
static_cast<int>(
m.size()));
1965 return static_cast<int>(
m.size());
1970 return GetDFA(
kind)->BuildAllStates(
cb);
1982 static int kMaxEltRepetitions = 0;
1992 std::unordered_map<State*, int> previously_visited_states;
1996 SearchParams params(StringPiece(), StringPiece(), &l);
1997 params.anchored =
true;
2034 State*
s = params.start;
2037 for (
int i = 0;
i < maxlen;
i++) {
2038 if (previously_visited_states[s] > kMaxEltRepetitions)
2040 previously_visited_states[
s]++;
2050 bool extended =
false;
2051 for (
int j = 0;
j < 256;
j++) {
2058 min->append(1,
static_cast<char>(j));
2068 previously_visited_states.clear();
2071 for (
int i = 0;
i < maxlen;
i++) {
2072 if (previously_visited_states[s] > kMaxEltRepetitions)
2074 previously_visited_states[
s] += 1;
2077 bool extended =
false;
2078 for (
int j = 255;
j >= 0;
j--) {
2085 max->append(1,
static_cast<char>(j));
2115 return GetDFA(kLongestMatch)->PossibleMatchRange(
min,
max, maxlen);