11 #include <immintrin.h>
22 #include "util/util.h"
23 #include "util/logging.h"
24 #include "util/strutil.h"
25 #include "re2/bitmap256.h"
26 #include "re2/stringpiece.h"
43 hint_foldcase_ = foldcase&1;
87 foldcase() ?
"/i" :
"",
88 lo_, hi_, hint(),
out());
95 static_cast<int>(empty_),
out());
155 for (
int id =
start;
id <
prog->size();
id++) {
185 for (
int c = 0;
c < 256;
c++) {
201 LOG(DFATAL) <<
"Unexpected opcode in IsMatch: " << ip->
opcode();
269 Inst*
j =
inst(ip->out());
270 Inst*
k =
inst(ip->out1());
272 j->lo() == 0x00 &&
j->hi() == 0xFF &&
279 k->lo() == 0x00 &&
k->hi() == 0xFF) {
290 if (p ==
text.data())
292 else if (p[-1] ==
'\n')
298 else if (p <
text.data() +
text.size() && p[0] ==
'\n')
302 if (p ==
text.data() && p ==
text.data() +
text.size()) {
304 }
else if (p ==
text.data()) {
307 }
else if (p ==
text.data() +
text.size()) {
336 class ByteMapBuilder {
347 void Mark(
int lo,
int hi);
357 std::vector<std::pair<int, int>>
colormap_;
358 std::vector<std::pair<int, int>>
ranges_;
373 if (lo == 0 && hi == 255)
380 for (std::vector<std::pair<int, int>>::const_iterator
it =
ranges_.begin();
383 int lo =
it->first-1;
432 std::vector<std::pair<int, int>>::const_iterator
it =
434 [=](
const std::pair<int, int>& kv) ->
bool {
435 return kv.first == oldcolor || kv.second == oldcolor;
441 colormap_.emplace_back(oldcolor, newcolor);
452 bool marked_line_boundaries =
false;
454 bool marked_word_boundaries =
false;
456 for (
int id = 0;
id <
size();
id++) {
462 if (ip->foldcase() && lo <= 'z' && hi >=
'a') {
469 if (foldlo <= foldhi) {
479 ip->out() ==
inst(
id+1)->
out())
484 !marked_line_boundaries) {
487 marked_line_boundaries =
true;
490 !marked_word_boundaries) {
493 for (
bool isword : {
true,
false}) {
495 for (
int i = 0;
i < 256;
i =
j) {
496 for (j = i + 1;
j < 256 &&
506 marked_word_boundaries =
true;
514 LOG(
ERROR) <<
"Using trivial bytemap.";
515 for (
int i = 0;
i < 256;
i++)
564 std::vector<int> stk;
569 SparseArray<int> rootmap(
size());
570 SparseArray<int> predmap(
size());
571 std::vector<std::vector<int>> predvec;
575 SparseArray<int> sorted(rootmap);
576 std::sort(sorted.begin(), sorted.end(), sorted.less);
581 MarkDominator(
i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
586 std::vector<int> flatmap(rootmap.size());
587 std::vector<Inst> flat;
588 flat.reserve(
size());
592 flatmap[
i->value()] =
static_cast<int>(flat.size());
593 EmitList(
i->index(), &rootmap, &flat, &reachable, &stk);
594 flat.back().set_last();
597 ComputeHints(&flat, flatmap[
i->value()],
static_cast<int>(flat.size()));
606 for (
int id = 0; id < static_cast<int>(flat.size());
id++) {
607 Inst* ip = &flat[
id];
609 ip->set_out(flatmap[ip->out()]);
630 size_ =
static_cast<int>(flat.size());
646 SparseArray<int>* predmap,
647 std::vector<std::vector<int>>* predvec,
648 SparseSet* reachable, std::vector<int>* stk) {
650 rootmap->set_new(0, rootmap->size());
655 if (!rootmap->has_index(
start()))
656 rootmap->set_new(
start(), rootmap->size());
661 while (!stk->empty()) {
662 int id = stk->back();
665 if (reachable->contains(
id))
667 reachable->insert_new(
id);
670 switch (ip->opcode()) {
672 LOG(DFATAL) <<
"unhandled opcode: " << ip->opcode();
678 for (
int out : {ip->out(), ip->out1()}) {
679 if (!predmap->has_index(
out)) {
680 predmap->set_new(
out,
static_cast<int>(predvec->size()));
681 predvec->emplace_back();
683 (*predvec)[predmap->get_existing(
out)].emplace_back(
id);
685 stk->push_back(ip->out1());
693 if (!rootmap->has_index(ip->out()))
694 rootmap->set_new(ip->out(), rootmap->size());
710 SparseArray<int>* predmap,
711 std::vector<std::vector<int>>* predvec,
712 SparseSet* reachable, std::vector<int>* stk) {
715 stk->push_back(
root);
716 while (!stk->empty()) {
717 int id = stk->back();
720 if (reachable->contains(
id))
722 reachable->insert_new(
id);
724 if (
id !=
root && rootmap->has_index(
id)) {
730 switch (ip->opcode()) {
732 LOG(DFATAL) <<
"unhandled opcode: " << ip->opcode();
737 stk->push_back(ip->out1());
757 i != reachable->end();
760 if (predmap->has_index(
id)) {
761 for (
int pred : (*predvec)[predmap->get_existing(
id)]) {
762 if (!reachable->contains(pred)) {
765 if (!rootmap->has_index(
id))
766 rootmap->set_new(
id, rootmap->size());
774 std::vector<Inst>* flat,
775 SparseSet* reachable, std::vector<int>* stk) {
778 stk->push_back(
root);
779 while (!stk->empty()) {
780 int id = stk->back();
783 if (reachable->contains(
id))
785 reachable->insert_new(
id);
787 if (
id !=
root && rootmap->has_index(
id)) {
790 flat->emplace_back();
792 flat->back().set_out(rootmap->get_existing(
id));
797 switch (ip->opcode()) {
799 LOG(DFATAL) <<
"unhandled opcode: " << ip->opcode();
803 flat->emplace_back();
805 flat->back().set_out(
static_cast<int>(flat->size()));
806 flat->back().out1_ =
static_cast<uint32_t>(flat->size())+1;
810 stk->push_back(ip->out1());
817 flat->emplace_back();
818 memmove(&flat->back(), ip,
sizeof *ip);
819 flat->back().set_out(rootmap->get_existing(ip->out()));
828 flat->emplace_back();
829 memmove(&flat->back(), ip,
sizeof *ip);
867 auto Recolor = [&](
int lo,
int hi) {
871 if (0 <= lo && !splits.Test(lo)) {
873 int next = splits.FindNextSetBit(lo+1);
874 colors[lo] = colors[
next];
876 if (!splits.Test(hi)) {
878 int next = splits.FindNextSetBit(hi+1);
879 colors[hi] = colors[
next];
884 int next = splits.FindNextSetBit(c);
895 Inst* ip = &(*flat)[
id];
899 if (ip->foldcase() && lo <= 'z' && hi >=
'a') {
906 if (foldlo <= foldhi) {
909 Recolor(foldlo, foldhi);
915 ip->hint_foldcase_ |= hint<<1;
939 for (
size_t i = 0;
i <
size; ++
i) {
941 nfa[
b] |= 1 << (
i+1);
944 for (
int b = 0;
b < 256; ++
b)
954 for (
size_t dcurr = 0; dcurr <
size; ++dcurr) {
957 uint16_t nnext = nfa[
b] & ((ncurr << 1) | 1);
958 size_t dnext = dcurr+1;
961 states[dnext] = nnext;
977 for (
size_t dcurr = 0; dcurr <
size; ++dcurr) {
980 uint16_t nnext = nfa[
b] & ((ncurr << 1) | 1);
982 while (states[dnext] != nnext)
984 dfa[
b] |=
static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
990 if (
'a' <=
b &&
b <=
'z') {
992 dfa[
b] |=
static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
999 for (
int b = 0;
b < 256; ++
b)
1006 bool prefix_foldcase) {
1054 uint64_t curr0 = next0 >> (curr & 63);
1055 uint64_t curr1 = next1 >> (curr0 & 63);
1056 uint64_t curr2 = next2 >> (curr1 & 63);
1057 uint64_t curr3 = next3 >> (curr2 & 63);
1058 uint64_t curr4 = next4 >> (curr3 & 63);
1059 uint64_t curr5 = next5 >> (curr4 & 63);
1060 uint64_t curr6 = next6 >> (curr5 & 63);
1061 uint64_t curr7 = next7 >> (curr6 & 63);
1068 if (((curr7-curr0) & 63) == 0)
return p+1-
prefix_size_;
1069 if (((curr7-curr1) & 63) == 0)
return p+2-
prefix_size_;
1070 if (((curr7-curr2) & 63) == 0)
return p+3-
prefix_size_;
1071 if (((curr7-curr3) & 63) == 0)
return p+4-
prefix_size_;
1072 if (((curr7-curr4) & 63) == 0)
return p+5-
prefix_size_;
1073 if (((curr7-curr5) & 63) == 0)
return p+6-
prefix_size_;
1074 if (((curr7-curr6) & 63) == 0)
return p+7-
prefix_size_;
1075 if (((curr7-curr7) & 63) == 0)
return p+8-
prefix_size_;
1080 }
while (p != endp);
1090 curr =
next >> (curr & 63);
1097 #if defined(__AVX2__)
1099 static int FindLSBSet(
uint32_t n) {
1101 #if defined(__GNUC__)
1102 return __builtin_ctz(n);
1103 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
1105 _BitScanForward(&c, n);
1106 return static_cast<int>(c);
1109 for (
int shift = 1 << 4; shift != 0; shift >>= 1) {
1129 #if defined(__AVX2__)
1131 if (
size >=
sizeof(__m256i)) {
1132 const __m256i* fp =
reinterpret_cast<const __m256i*
>(
1133 reinterpret_cast<const char*
>(
data));
1134 const __m256i* bp =
reinterpret_cast<const __m256i*
>(
1136 const __m256i* endfp = fp +
size/
sizeof(__m256i);
1140 const __m256i f_loadu = _mm256_loadu_si256(fp++);
1141 const __m256i b_loadu = _mm256_loadu_si256(bp++);
1142 const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
1143 const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
1144 const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
1145 if (fb_testz == 0) {
1146 const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
1147 const int fb_movemask = _mm256_movemask_epi8(fb_and);
1148 const int fb_ctz = FindLSBSet(fb_movemask);
1149 return reinterpret_cast<const char*
>(fp-1) + fb_ctz;
1151 }
while (fp != endfp);
1157 const char* p0 =
reinterpret_cast<const char*
>(
data);
1158 for (
const char* p = p0;; p++) {