16 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
25 #include "re2/regexp.h"
30 #define ASSERT_HEX(type, value) \
34 RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
35 ASSERT_EQ(v, 0x##value); \
36 ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
38 ASSERT_EQ(v, 0x##value); \
48 ASSERT_HEX(
unsigned long long, cafebabedeadbeefULL);
53 TEST(RE2, OctalTests) {
54 #define ASSERT_OCTAL(type, value) \
57 ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
58 ASSERT_EQ(v, 0##value); \
59 ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
61 ASSERT_EQ(v, 0##value); \
76 TEST(RE2, DecimalTests) {
77 #define ASSERT_DECIMAL(type, value) \
80 ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
81 ASSERT_EQ(v, value); \
83 RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
84 ASSERT_EQ(v, value); \
103 const char *original;
108 static const ReplaceTest
tests[] = {
109 {
"(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
111 "the quick brown fox jumps over the lazy dogs.",
112 "ethay quick brown fox jumps over the lazy dogs.",
113 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
117 "abcd.efghi@google.com",
118 "abcd-NOSPAM.efghi@google.com",
119 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
176 {
"", NULL, NULL, NULL, NULL, 0 }
179 for (
const ReplaceTest* t =
tests;
t->original != NULL;
t++) {
195 EXPECT_EQ(expect_ok, actual_ok) <<
" for " << rewrite <<
" error: " <<
error;
198 TEST(CheckRewriteString,
all) {
241 StringPiece
input(s);
250 TEST(RE2, ConsumeN) {
252 StringPiece
input(s);
279 StringPiece
input(s);
297 TEST(RE2, FindAndConsumeN) {
299 StringPiece
input(s);
321 TEST(RE2, MatchNumberPeculiarity) {
322 RE2
r(
"(foo)|(bar)|(baz)");
347 RE2 re(
"((\\w+):([0-9]+))");
348 StringPiece
group[4];
351 StringPiece
s =
"zyzzyva";
356 s =
"a chrisr:9000 here";
376 size_t patlen = strlen(
pattern);
377 for (
int i = 0;
i <
size;
i++) {
381 RE2 re(
"([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?",
RE2::Quiet);
392 <<
"Unquoted='" << unquoted <<
"', quoted='" << quoted <<
"'.";
403 <<
"Unquoted='" << unquoted <<
"', quoted='" << quoted <<
"'.";
408 TEST(QuoteMeta, Simple) {
420 TEST(QuoteMeta, SimpleNegative) {
435 TEST(QuoteMeta, Latin1) {
452 TEST(QuoteMeta, HasNull) {
466 TEST(ProgramSize, BigProgram) {
467 RE2 re_simple(
"simple regexp");
468 RE2 re_medium(
"medium.*regexp");
469 RE2 re_complex(
"complex.{1,128}regexp");
472 ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
473 ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
475 ASSERT_GT(re_simple.ReverseProgramSize(), 0);
476 ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
477 ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
480 TEST(ProgramFanout, BigProgram) {
481 RE2 re1(
"(?:(?:(?:(?:(?:.)?){1})*)+)");
482 RE2 re10(
"(?:(?:(?:(?:(?:.)?){10})*)+)");
483 RE2 re100(
"(?:(?:(?:(?:(?:.)?){100})*)+)");
484 RE2 re1000(
"(?:(?:(?:(?:(?:.)?){1000})*)+)");
524 TEST(EmptyCharset, Fuzz) {
525 static const char *empties[] = {
537 TEST(EmptyCharset, BitstateAssumptions) {
539 static const char *nop_empties[] = {
540 "((((()))))" "[^\\S\\s]?",
541 "((((()))))" "([^\\S\\s])?",
542 "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
543 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
545 StringPiece
group[6];
546 for (
size_t i = 0;
i <
arraysize(nop_empties);
i++)
553 RE2 re(
"(hello world)");
554 ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
555 const std::map<std::string, int>&
m = re.NamedCapturingGroups();
560 RE2 re(
"(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
561 ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
562 const std::map<std::string, int>&
m = re.NamedCapturingGroups();
571 TEST(RE2, CapturedGroupTest) {
572 RE2 re(
"directions from (?P<S>.*) to (?P<D>.*)");
573 int num_groups = re.NumberOfCapturingGroups();
581 const RE2::Arg*
const matches[4] = {&arg0, &arg1, &arg2, &arg3};
583 re, matches, num_groups));
584 const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
585 EXPECT_TRUE(named_groups.find(
"S") != named_groups.end());
586 EXPECT_TRUE(named_groups.find(
"D") != named_groups.end());
589 int source_group_index = named_groups.find(
"S")->second;
590 int destination_group_index = named_groups.find(
"D")->second;
599 TEST(RE2, FullMatchWithNoArgs) {
607 TEST(RE2, PartialMatch) {
615 TEST(RE2, PartialMatchN) {
639 TEST(RE2, FullMatchZeroArg) {
644 TEST(RE2, FullMatchOneArg) {
654 RE2::FullMatch(
"1234567890123456789012345678901234567890",
"(\\d+)", &i));
657 TEST(RE2, FullMatchIntegerArg) {
673 TEST(RE2, FullMatchStringArg) {
680 TEST(RE2, FullMatchStringPieceArg) {
690 TEST(RE2, FullMatchMultiArg) {
699 TEST(RE2, FullMatchN) {
723 TEST(RE2, FullMatchIgnoredArg) {
729 RE2::FullMatch(
"ruby:1234",
"(\\w+)(:)(\\d+)", &s, (
void*)NULL, &i));
739 TEST(RE2, FullMatchTypedNullArg) {
763 TEST(RE2, NULTerminated) {
764 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
767 long pagesize = sysconf(_SC_PAGE_SIZE);
769 #ifndef MAP_ANONYMOUS
770 #define MAP_ANONYMOUS MAP_ANON
772 v =
static_cast<char*
>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
773 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
775 LOG(
INFO) <<
"Memory at " << (
void*)
v;
776 ASSERT_EQ(munmap(
v + pagesize, pagesize), 0) <<
" error " << errno;
777 v[pagesize - 1] =
'1';
785 TEST(RE2, FullMatchTypeTests) {
796 ASSERT_EQ(c,
static_cast<unsigned char>(
'H'));
888 TEST(RE2, FloatingPointFullMatchTypes) {
915 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
939 TEST(RE2, FullMatchAnchored) {
948 TEST(RE2, FullMatchBraces) {
955 TEST(RE2, Complicated) {
963 TEST(RE2, FullMatchEnd) {
982 TEST(RE2, FullMatchArgCount) {
1012 &a[2], &a[3], &a[4]));
1021 &a[1], &a[2], &a[3], &a[4], &a[5]));
1031 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1042 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1043 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1044 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1045 &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1046 &a[13], &a[14], &a[15]));
1065 TEST(RE2, Accessors) {
1069 const RE2 re(kPattern);
1085 const char utf8_string[] = {
1086 (char)0xe6, (
char)0x97, (char)0xa5,
1087 (
char)0xe6, (char)0x9c, (
char)0xac,
1088 (char)0xe8, (
char)0xaa, (char)0x9e,
1091 const char utf8_pattern[] = {
1093 (char)0xe6, (
char)0x9c, (char)0xac,
1101 RE2 re_test2(
"...");
1110 RE2 re_test4(
"(.)");
1117 RE2 re_test6(utf8_string);
1123 RE2 re_test8(utf8_pattern);
1127 TEST(RE2, UngreedyUTF8) {
1132 const char*
pattern =
"\\w+X";
1135 RE2 match_sentence_re(
pattern);
1141 const char*
pattern =
"(?U)\\w+X";
1145 RE2 match_sentence_re(
pattern);
1152 TEST(RE2, Rejects) {
1178 TEST(RE2, NoCrash) {
1188 RE2 re(
"(((.{100}){100}){100}){100}",
RE2::Quiet);
1204 TEST(RE2, Recursion) {
1207 int bytes = 15 * 1024;
1215 TEST(RE2, BigCountedRepetition) {
1218 opt.set_max_mem(256<<20);
1220 RE2 re(
".{512}x", opt);
1228 TEST(RE2, DeepRecursion) {
1237 RE2 re(
"((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1243 TEST(CaseInsensitive, MatchAndConsume) {
1245 StringPiece sp(
text);
1253 TEST(RE2, ImplicitConversions) {
1255 StringPiece re_stringpiece(
".");
1256 const char* re_cstring =
".";
1264 TEST(RE2, CL8622304) {
1273 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1284 static struct ErrorTest {
1321 static struct NeverTest {
1326 {
"(.*)",
"abc\ndef\nghi\n",
"abc" },
1327 {
"(?s)(abc.*def)",
"abc\ndef\n", NULL },
1328 {
"(abc(.|\n)*def)",
"abc\ndef\n", NULL },
1329 {
"(abc[^x]*def)",
"abc\ndef\n", NULL },
1330 {
"(abc[^x]*def)",
"abczzzdef\ndef\n",
"abczzzdef" },
1332 TEST(RE2, NeverNewline) {
1334 opt.set_never_nl(
true);
1337 RE2 re(
t.regexp, opt);
1338 if (
t.match == NULL) {
1351 opt.set_dot_nl(
true);
1354 opt.set_never_nl(
true);
1359 TEST(RE2, NeverCapture) {
1361 opt.set_never_capture(
true);
1362 RE2 re(
"(r)(e)", opt);
1363 EXPECT_EQ(0, re.NumberOfCapturingGroups());
1370 TEST(RE2, BitstateCaptureBug) {
1372 opt.set_max_mem(20000);
1373 RE2 re(
"(_________$)", opt);
1374 StringPiece
s =
"xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1379 TEST(RE2, UnicodeClasses) {
1436 TEST(RE2, LazyRE2) {
1438 static LazyRE2
a = {
"a"};
1449 TEST(RE2, NullVsEmptyString) {
1456 StringPiece
empty(
"");
1463 TEST(RE2, NullVsEmptyStringSubmatches) {
1468 StringPiece matches[4];
1484 StringPiece
empty(
"");
1498 TEST(RE2, Bug1816809) {
1499 RE2 re(
"(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1500 StringPiece piece(
"llx-3;llx4");
1506 TEST(RE2, Bug3061120) {
1513 TEST(RE2, CapturingGroupNames) {
1516 RE2 re(
"((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1518 const std::map<int, std::string>& have = re.CapturingGroupNames();
1519 std::map<int, std::string> want;
1526 TEST(RE2, RegexpToStringLossOfAnchor) {
1534 TEST(RE2, Bug10131674) {
1536 RE2 re(
"\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332",
RE2::Latin1);
1541 TEST(RE2, Bug18391750) {
1544 (char)0x28, (
char)0x28, (char)0xfc, (
char)0xfc, (char)0x08, (
char)0x08,
1545 (char)0x26, (
char)0x26, (char)0x28, (
char)0xc2, (char)0x9b, (
char)0xc5,
1546 (char)0xc5, (
char)0xd4, (char)0x8f, (
char)0x8f, (char)0x69, (
char)0x69,
1547 (char)0xe7, (
char)0x29, (char)0x7b, (
char)0x37, (char)0x31, (
char)0x31,
1548 (char)0x7d, (
char)0xae, (char)0x7c, (
char)0x7c, (char)0xf3, (
char)0x29,
1549 (char)0xae, (
char)0xae, (char)0x2e, (
char)0x2a, (char)0x29, (
char)0x00,
1553 opt.set_longest_match(
true);
1554 opt.set_dot_nl(
true);
1555 opt.set_case_sensitive(
false);
1561 TEST(RE2, Bug18458852) {
1566 (char)0x28, (
char)0x05, (char)0x05, (
char)0x41, (char)0x41, (
char)0x28,
1567 (char)0x24, (
char)0x5b, (char)0x5e, (
char)0xf5, (char)0x87, (
char)0x87,
1568 (char)0x90, (
char)0x29, (char)0x5d, (
char)0x29, (char)0x29, (
char)0x00,
1574 TEST(RE2, Bug18523943) {
1579 (char)0x29, (
char)0x29, (char)0x24, (
char)0x00,
1582 (char)0x28, (
char)0x0a, (char)0x2a, (
char)0x2a, (char)0x29, (
char)0x00,
1584 opt.set_log_errors(
false);
1586 opt.set_posix_syntax(
true);
1587 opt.set_longest_match(
true);
1588 opt.set_literal(
false);
1589 opt.set_never_nl(
true);
1591 RE2 re((
const char*)
b, opt);
1597 TEST(RE2, Bug21371806) {
1604 RE2 re(
"g\\p{Zl}]", opt);
1608 TEST(RE2, Bug26356109) {
1614 RE2 re(
"a\\C*?c|a\\C*?b");
1621 ASSERT_EQ(
m, s) <<
" (UNANCHORED) got m='" <<
m <<
"', want '" <<
s <<
"'";
1624 ASSERT_EQ(
m, s) <<
" (ANCHOR_BOTH) got m='" <<
m <<
"', want '" <<
s <<
"'";
1627 TEST(RE2, Issue104) {
1650 RE2 star(
"(?:|a)*");
1652 ASSERT_EQ(
m,
"") <<
" got m='" <<
m <<
"', want ''";
1654 RE2 plus(
"(?:|a)+");
1656 ASSERT_EQ(
m,
"") <<
" got m='" <<
m <<
"', want ''";