bloaty/third_party/re2/re2/testing/re2_test.cc
Go to the documentation of this file.
1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 // TODO: Test extractions for PartialMatch/Consume
7 
8 #include <errno.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <map>
13 #include <string>
14 #include <utility>
15 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
16 #include <sys/mman.h>
17 #include <unistd.h> /* for sysconf */
18 #endif
19 
20 #include "util/test.h"
21 #include "util/logging.h"
22 #include "util/strutil.h"
23 #include "re2/re2.h"
24 #include "re2/regexp.h"
25 
26 namespace re2 {
27 
28 TEST(RE2, HexTests) {
29 #define ASSERT_HEX(type, value) \
30  do { \
31  type v; \
32  ASSERT_TRUE( \
33  RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
34  ASSERT_EQ(v, 0x##value); \
35  ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
36  RE2::CRadix(&v))); \
37  ASSERT_EQ(v, 0x##value); \
38  } while (0)
39 
40  ASSERT_HEX(short, 2bad);
41  ASSERT_HEX(unsigned short, 2badU);
42  ASSERT_HEX(int, dead);
43  ASSERT_HEX(unsigned int, deadU);
44  ASSERT_HEX(long, 7eadbeefL);
45  ASSERT_HEX(unsigned long, deadbeefUL);
46  ASSERT_HEX(long long, 12345678deadbeefLL);
47  ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
48 
49 #undef ASSERT_HEX
50 }
51 
52 TEST(RE2, OctalTests) {
53 #define ASSERT_OCTAL(type, value) \
54  do { \
55  type v; \
56  ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
57  ASSERT_EQ(v, 0##value); \
58  ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
59  RE2::CRadix(&v))); \
60  ASSERT_EQ(v, 0##value); \
61  } while (0)
62 
63  ASSERT_OCTAL(short, 77777);
64  ASSERT_OCTAL(unsigned short, 177777U);
65  ASSERT_OCTAL(int, 17777777777);
66  ASSERT_OCTAL(unsigned int, 37777777777U);
67  ASSERT_OCTAL(long, 17777777777L);
68  ASSERT_OCTAL(unsigned long, 37777777777UL);
69  ASSERT_OCTAL(long long, 777777777777777777777LL);
70  ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
71 
72 #undef ASSERT_OCTAL
73 }
74 
75 TEST(RE2, DecimalTests) {
76 #define ASSERT_DECIMAL(type, value) \
77  do { \
78  type v; \
79  ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
80  ASSERT_EQ(v, value); \
81  ASSERT_TRUE( \
82  RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
83  ASSERT_EQ(v, value); \
84  } while (0)
85 
86  ASSERT_DECIMAL(short, -1);
87  ASSERT_DECIMAL(unsigned short, 9999);
88  ASSERT_DECIMAL(int, -1000);
89  ASSERT_DECIMAL(unsigned int, 12345U);
90  ASSERT_DECIMAL(long, -10000000L);
91  ASSERT_DECIMAL(unsigned long, 3083324652U);
92  ASSERT_DECIMAL(long long, -100000000000000LL);
93  ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
94 
95 #undef ASSERT_DECIMAL
96 }
97 
99  struct ReplaceTest {
100  const char *regexp;
101  const char *rewrite;
102  const char *original;
103  const char *single;
104  const char *global;
105  int greplace_count;
106  };
107  static const ReplaceTest tests[] = {
108  { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
109  "\\2\\1ay",
110  "the quick brown fox jumps over the lazy dogs.",
111  "ethay quick brown fox jumps over the lazy dogs.",
112  "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
113  9 },
114  { "\\w+",
115  "\\0-NOSPAM",
116  "abcd.efghi@google.com",
117  "abcd-NOSPAM.efghi@google.com",
118  "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
119  4 },
120  { "^",
121  "(START)",
122  "foo",
123  "(START)foo",
124  "(START)foo",
125  1 },
126  { "^",
127  "(START)",
128  "",
129  "(START)",
130  "(START)",
131  1 },
132  { "$",
133  "(END)",
134  "",
135  "(END)",
136  "(END)",
137  1 },
138  { "b",
139  "bb",
140  "ababababab",
141  "abbabababab",
142  "abbabbabbabbabb",
143  5 },
144  { "b",
145  "bb",
146  "bbbbbb",
147  "bbbbbbb",
148  "bbbbbbbbbbbb",
149  6 },
150  { "b+",
151  "bb",
152  "bbbbbb",
153  "bb",
154  "bb",
155  1 },
156  { "b*",
157  "bb",
158  "bbbbbb",
159  "bb",
160  "bb",
161  1 },
162  { "b*",
163  "bb",
164  "aaaaa",
165  "bbaaaaa",
166  "bbabbabbabbabbabb",
167  6 },
168  // Check newline handling
169  { "a.*a",
170  "(\\0)",
171  "aba\naba",
172  "(aba)\naba",
173  "(aba)\n(aba)",
174  2 },
175  { "", NULL, NULL, NULL, NULL, 0 }
176  };
177 
178  for (const ReplaceTest* t = tests; t->original != NULL; t++) {
179  std::string one(t->original);
180  ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
181  ASSERT_EQ(one, t->single);
182  std::string all(t->original);
183  ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
184  << "Got: " << all;
185  ASSERT_EQ(all, t->global);
186  }
187 }
188 
189 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
190  bool expect_ok) {
192  RE2 exp(regexp);
193  bool actual_ok = exp.CheckRewriteString(rewrite, &error);
194  EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
195 }
196 
197 TEST(CheckRewriteString, all) {
198  TestCheckRewriteString("abc", "foo", true);
199  TestCheckRewriteString("abc", "foo\\", false);
200  TestCheckRewriteString("abc", "foo\\0bar", true);
201 
202  TestCheckRewriteString("a(b)c", "foo", true);
203  TestCheckRewriteString("a(b)c", "foo\\0bar", true);
204  TestCheckRewriteString("a(b)c", "foo\\1bar", true);
205  TestCheckRewriteString("a(b)c", "foo\\2bar", false);
206  TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
207 
208  TestCheckRewriteString("a(b)(c)", "foo\\12", true);
209  TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
210  TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
211 }
212 
213 TEST(RE2, Extract) {
214  std::string s;
215 
216  ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
217  ASSERT_EQ(s, "kremvax!boris");
218 
219  ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
220  ASSERT_EQ(s, "'foo'");
221  // check that false match doesn't overwrite
222  ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
223  ASSERT_EQ(s, "'foo'");
224 }
225 
227  RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
228  std::string word;
229 
230  std::string s(" aaa b!@#$@#$cccc");
231  StringPiece input(s);
232 
233  ASSERT_TRUE(RE2::Consume(&input, r, &word));
234  ASSERT_EQ(word, "aaa") << " input: " << input;
235  ASSERT_TRUE(RE2::Consume(&input, r, &word));
236  ASSERT_EQ(word, "b") << " input: " << input;
237  ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
238 }
239 
240 TEST(RE2, ConsumeN) {
241  const std::string s(" one two three 4");
242  StringPiece input(s);
243 
244  RE2::Arg argv[2];
245  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
246 
247  // 0 arg
248  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
249 
250  // 1 arg
251  std::string word;
252  argv[0] = &word;
253  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
254  EXPECT_EQ("two", word);
255 
256  // Multi-args
257  int n;
258  argv[1] = &n;
259  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
260  EXPECT_EQ("three", word);
261  EXPECT_EQ(4, n);
262 }
263 
265  RE2 r("(\\w+)"); // matches a word
266  std::string word;
267 
268  std::string s(" aaa b!@#$@#$cccc");
269  StringPiece input(s);
270 
272  ASSERT_EQ(word, "aaa");
274  ASSERT_EQ(word, "b");
276  ASSERT_EQ(word, "cccc");
278 
279  // Check that FindAndConsume works without any submatches.
280  // Earlier version used uninitialized data for
281  // length to consume.
282  input = "aaa";
284  ASSERT_EQ(input, "");
285 }
286 
287 TEST(RE2, FindAndConsumeN) {
288  const std::string s(" one two three 4");
289  StringPiece input(s);
290 
291  RE2::Arg argv[2];
292  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
293 
294  // 0 arg
295  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
296 
297  // 1 arg
298  std::string word;
299  argv[0] = &word;
300  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
301  EXPECT_EQ("two", word);
302 
303  // Multi-args
304  int n;
305  argv[1] = &n;
306  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
307  EXPECT_EQ("three", word);
308  EXPECT_EQ(4, n);
309 }
310 
311 TEST(RE2, MatchNumberPeculiarity) {
312  RE2 r("(foo)|(bar)|(baz)");
313  std::string word1;
314  std::string word2;
315  std::string word3;
316 
317  ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
318  ASSERT_EQ(word1, "foo");
319  ASSERT_EQ(word2, "");
320  ASSERT_EQ(word3, "");
321  ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
322  ASSERT_EQ(word1, "");
323  ASSERT_EQ(word2, "bar");
324  ASSERT_EQ(word3, "");
325  ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
326  ASSERT_EQ(word1, "");
327  ASSERT_EQ(word2, "");
328  ASSERT_EQ(word3, "baz");
329  ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
330 
331  std::string a;
332  ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
333  ASSERT_EQ(a, "");
334 }
335 
337  RE2 re("((\\w+):([0-9]+))"); // extracts host and port
338  StringPiece group[4];
339 
340  // No match.
341  StringPiece s = "zyzzyva";
342  ASSERT_FALSE(
343  re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
344 
345  // Matches and extracts.
346  s = "a chrisr:9000 here";
347  ASSERT_TRUE(
348  re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
349  ASSERT_EQ(group[0], "chrisr:9000");
350  ASSERT_EQ(group[1], "chrisr:9000");
351  ASSERT_EQ(group[2], "chrisr");
352  ASSERT_EQ(group[3], "9000");
353 
354  std::string all, host;
355  int port;
356  ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
357  ASSERT_EQ(all, "chrisr:9000");
358  ASSERT_EQ(host, "chrisr");
359  ASSERT_EQ(port, 9000);
360 }
361 
362 static void TestRecursion(int size, const char* pattern) {
363  // Fill up a string repeating the pattern given
364  std::string domain;
365  domain.resize(size);
366  size_t patlen = strlen(pattern);
367  for (int i = 0; i < size; i++) {
368  domain[i] = pattern[i % patlen];
369  }
370  // Just make sure it doesn't crash due to too much recursion.
371  RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
372  RE2::FullMatch(domain, re);
373 }
374 
375 // A meta-quoted string, interpreted as a pattern, should always match
376 // the original unquoted string.
377 static void TestQuoteMeta(const std::string& unquoted,
379  std::string quoted = RE2::QuoteMeta(unquoted);
380  RE2 re(quoted, options);
381  EXPECT_TRUE(RE2::FullMatch(unquoted, re))
382  << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
383 }
384 
385 // A meta-quoted string, interpreted as a pattern, should always match
386 // the original unquoted string.
388  const std::string& unquoted, const std::string& should_not_match,
390  std::string quoted = RE2::QuoteMeta(unquoted);
391  RE2 re(quoted, options);
392  EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
393  << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
394 }
395 
396 // Tests that quoted meta characters match their original strings,
397 // and that a few things that shouldn't match indeed do not.
398 TEST(QuoteMeta, Simple) {
399  TestQuoteMeta("foo");
400  TestQuoteMeta("foo.bar");
401  TestQuoteMeta("foo\\.bar");
402  TestQuoteMeta("[1-9]");
403  TestQuoteMeta("1.5-2.0?");
404  TestQuoteMeta("\\d");
405  TestQuoteMeta("Who doesn't like ice cream?");
406  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
407  TestQuoteMeta("((?!)xxx).*yyy");
408  TestQuoteMeta("([");
409 }
410 TEST(QuoteMeta, SimpleNegative) {
411  NegativeTestQuoteMeta("foo", "bar");
412  NegativeTestQuoteMeta("...", "bar");
413  NegativeTestQuoteMeta("\\.", ".");
414  NegativeTestQuoteMeta("\\.", "..");
415  NegativeTestQuoteMeta("(a)", "a");
416  NegativeTestQuoteMeta("(a|b)", "a");
417  NegativeTestQuoteMeta("(a|b)", "(a)");
418  NegativeTestQuoteMeta("(a|b)", "a|b");
419  NegativeTestQuoteMeta("[0-9]", "0");
420  NegativeTestQuoteMeta("[0-9]", "0-9");
421  NegativeTestQuoteMeta("[0-9]", "[9]");
422  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
423 }
424 
425 TEST(QuoteMeta, Latin1) {
426  TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
427 }
428 
429 TEST(QuoteMeta, UTF8) {
430  TestQuoteMeta("Plácido Domingo");
431  TestQuoteMeta("xyz"); // No fancy utf8.
432  TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
433  TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
434  TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
435  TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
436  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
437  // still work.
438  NegativeTestQuoteMeta("27\xc2\xb0",
439  "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
440 }
441 
442 TEST(QuoteMeta, HasNull) {
443  std::string has_null;
444 
445  // string with one null character
446  has_null += '\0';
447  TestQuoteMeta(has_null);
448  NegativeTestQuoteMeta(has_null, "");
449 
450  // Don't want null-followed-by-'1' to be interpreted as '\01'.
451  has_null += '1';
452  TestQuoteMeta(has_null);
453  NegativeTestQuoteMeta(has_null, "\1");
454 }
455 
456 TEST(ProgramSize, BigProgram) {
457  RE2 re_simple("simple regexp");
458  RE2 re_medium("medium.*regexp");
459  RE2 re_complex("complex.{1,128}regexp");
460 
461  ASSERT_GT(re_simple.ProgramSize(), 0);
462  ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
463  ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
464 
465  ASSERT_GT(re_simple.ReverseProgramSize(), 0);
466  ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
467  ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
468 }
469 
470 TEST(ProgramFanout, BigProgram) {
471  RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
472  RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
473  RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
474  RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
475 
476  std::map<int, int> histogram;
477 
478  // 3 is the largest non-empty bucket and has 1 element.
480  ASSERT_EQ(1, histogram[3]);
481 
482  // 7 is the largest non-empty bucket and has 10 elements.
483  ASSERT_EQ(7, re10.ProgramFanout(&histogram));
484  ASSERT_EQ(10, histogram[7]);
485 
486  // 10 is the largest non-empty bucket and has 100 elements.
487  ASSERT_EQ(10, re100.ProgramFanout(&histogram));
488  ASSERT_EQ(100, histogram[10]);
489 
490  // 13 is the largest non-empty bucket and has 1000 elements.
491  ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
492  ASSERT_EQ(1000, histogram[13]);
493 
494  // 2 is the largest non-empty bucket and has 3 elements.
495  // This differs from the others due to how reverse `.' works.
497  ASSERT_EQ(3, histogram[2]);
498 
499  // 5 is the largest non-empty bucket and has 10 elements.
501  ASSERT_EQ(10, histogram[5]);
502 
503  // 9 is the largest non-empty bucket and has 100 elements.
505  ASSERT_EQ(100, histogram[9]);
506 
507  // 12 is the largest non-empty bucket and has 1000 elements.
509  ASSERT_EQ(1000, histogram[12]);
510 }
511 
512 // Issue 956519: handling empty character sets was
513 // causing NULL dereference. This tests a few empty character sets.
514 // (The way to get an empty character set is to negate a full one.)
515 TEST(EmptyCharset, Fuzz) {
516  static const char *empties[] = {
517  "[^\\S\\s]",
518  "[^\\S[:space:]]",
519  "[^\\D\\d]",
520  "[^\\D[:digit:]]"
521  };
522  for (size_t i = 0; i < arraysize(empties); i++)
523  ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
524 }
525 
526 // Bitstate assumes that kInstFail instructions in
527 // alternations or capture groups have been "compiled away".
528 TEST(EmptyCharset, BitstateAssumptions) {
529  // Captures trigger use of Bitstate.
530  static const char *nop_empties[] = {
531  "((((()))))" "[^\\S\\s]?",
532  "((((()))))" "([^\\S\\s])?",
533  "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
534  "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
535  };
536  StringPiece group[6];
537  for (size_t i = 0; i < arraysize(nop_empties); i++)
538  ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
539 }
540 
541 // Test that named groups work correctly.
542 TEST(Capture, NamedGroups) {
543  {
544  RE2 re("(hello world)");
546  const std::map<std::string, int>& m = re.NamedCapturingGroups();
547  ASSERT_EQ(m.size(), 0);
548  }
549 
550  {
551  RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
553  const std::map<std::string, int>& m = re.NamedCapturingGroups();
554  ASSERT_EQ(m.size(), 4);
555  ASSERT_EQ(m.find("A")->second, 1);
556  ASSERT_EQ(m.find("B")->second, 2);
557  ASSERT_EQ(m.find("C")->second, 3);
558  ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
559  }
560 }
561 
562 TEST(RE2, CapturedGroupTest) {
563  RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
564  int num_groups = re.NumberOfCapturingGroups();
565  EXPECT_EQ(2, num_groups);
566  std::string args[4];
567  RE2::Arg arg0(&args[0]);
568  RE2::Arg arg1(&args[1]);
569  RE2::Arg arg2(&args[2]);
570  RE2::Arg arg3(&args[3]);
571 
572  const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
573  EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
574  re, matches, num_groups));
575  const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
576  EXPECT_TRUE(named_groups.find("S") != named_groups.end());
577  EXPECT_TRUE(named_groups.find("D") != named_groups.end());
578 
579  // The named group index is 1-based.
580  int source_group_index = named_groups.find("S")->second;
581  int destination_group_index = named_groups.find("D")->second;
582  EXPECT_EQ(1, source_group_index);
583  EXPECT_EQ(2, destination_group_index);
584 
585  // The args is zero-based.
586  EXPECT_EQ("mountain view", args[source_group_index - 1]);
587  EXPECT_EQ("san jose", args[destination_group_index - 1]);
588 }
589 
590 TEST(RE2, FullMatchWithNoArgs) {
591  ASSERT_TRUE(RE2::FullMatch("h", "h"));
592  ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
593  ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
594  ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
595  ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
596 }
597 
598 TEST(RE2, PartialMatch) {
599  ASSERT_TRUE(RE2::PartialMatch("x", "x"));
600  ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
601  ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
602  ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
603  ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
604 }
605 
606 TEST(RE2, PartialMatchN) {
607  RE2::Arg argv[2];
608  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
609 
610  // 0 arg
611  EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
612  EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
613 
614  // 1 arg
615  int i;
616  argv[0] = &i;
617  EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
618  EXPECT_EQ(1001, i);
619  EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
620 
621  // Multi-arg
622  std::string s;
623  argv[1] = &s;
624  EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
625  EXPECT_EQ(42, i);
626  EXPECT_EQ("life", s);
627  EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
628 }
629 
630 TEST(RE2, FullMatchZeroArg) {
631  // Zero-arg
632  ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
633 }
634 
635 TEST(RE2, FullMatchOneArg) {
636  int i;
637 
638  // Single-arg
639  ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
640  ASSERT_EQ(i, 1001);
641  ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
642  ASSERT_EQ(i, -123);
643  ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
644  ASSERT_FALSE(
645  RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
646 }
647 
648 TEST(RE2, FullMatchIntegerArg) {
649  int i;
650 
651  // Digits surrounding integer-arg
652  ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
653  ASSERT_EQ(i, 23);
654  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
655  ASSERT_EQ(i, 1);
656  ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
657  ASSERT_EQ(i, -1);
658  ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
659  ASSERT_EQ(i, 1);
660  ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
661  ASSERT_EQ(i, -1);
662 }
663 
664 TEST(RE2, FullMatchStringArg) {
665  std::string s;
666  // String-arg
667  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
668  ASSERT_EQ(s, std::string("ell"));
669 }
670 
671 TEST(RE2, FullMatchStringPieceArg) {
672  int i;
673  // StringPiece-arg
674  StringPiece sp;
675  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
676  ASSERT_EQ(sp.size(), 4);
677  ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
678  ASSERT_EQ(i, 1234);
679 }
680 
681 TEST(RE2, FullMatchMultiArg) {
682  int i;
683  std::string s;
684  // Multi-arg
685  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
686  ASSERT_EQ(s, std::string("ruby"));
687  ASSERT_EQ(i, 1234);
688 }
689 
690 TEST(RE2, FullMatchN) {
691  RE2::Arg argv[2];
692  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
693 
694  // 0 arg
695  EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
696  EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
697 
698  // 1 arg
699  int i;
700  argv[0] = &i;
701  EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
702  EXPECT_EQ(1001, i);
703  EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
704 
705  // Multi-arg
706  std::string s;
707  argv[1] = &s;
708  EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
709  EXPECT_EQ(42, i);
710  EXPECT_EQ("life", s);
711  EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
712 }
713 
714 TEST(RE2, FullMatchIgnoredArg) {
715  int i;
716  std::string s;
717 
718  // Old-school NULL should be ignored.
719  ASSERT_TRUE(
720  RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
721  ASSERT_EQ(s, std::string("ruby"));
722  ASSERT_EQ(i, 1234);
723 
724  // C++11 nullptr should also be ignored.
725  ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
726  ASSERT_EQ(s, std::string("rubz"));
727  ASSERT_EQ(i, 1235);
728 }
729 
730 TEST(RE2, FullMatchTypedNullArg) {
731  std::string s;
732 
733  // Ignore non-void* NULL arg
734  ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
735  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
736  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
737  ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
738  ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
739  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
740  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
741 
742  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
743  ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
744  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
745  ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
746  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
747  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
748 }
749 
750 // Check that numeric parsing code does not read past the end of
751 // the number being parsed.
752 // This implementation requires mmap(2) et al. and thus cannot
753 // be used unless they are available.
754 TEST(RE2, NULTerminated) {
755 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
756  char *v;
757  int x;
758  long pagesize = sysconf(_SC_PAGE_SIZE);
759 
760 #ifndef MAP_ANONYMOUS
761 #define MAP_ANONYMOUS MAP_ANON
762 #endif
763  v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
764  MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
765  ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
766  LOG(INFO) << "Memory at " << (void*)v;
767  ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
768  v[pagesize - 1] = '1';
769 
770  x = 0;
771  ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
772  ASSERT_EQ(x, 1);
773 #endif
774 }
775 
776 TEST(RE2, FullMatchTypeTests) {
777  // Type tests
778  std::string zeros(1000, '0');
779  {
780  char c;
781  ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
782  ASSERT_EQ(c, 'H');
783  }
784  {
785  unsigned char c;
786  ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
787  ASSERT_EQ(c, static_cast<unsigned char>('H'));
788  }
789  {
790  int16_t v;
791  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
792  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
793  ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
794  ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
795  ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
796  ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
797  }
798  {
799  uint16_t v;
800  ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
801  ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
802  ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
803  ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
804  }
805  {
806  int32_t v;
807  static const int32_t max = INT32_C(0x7fffffff);
808  static const int32_t min = -max - 1;
809  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
810  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
811  ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
812  ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
813  ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
814  ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
815 
816  ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
817  ASSERT_EQ(v, max);
818  ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
819  ASSERT_EQ(v, min);
820 
821  ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
822  ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
823  ASSERT_EQ(v, max);
824  ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
825  }
826  {
827  uint32_t v;
828  static const uint32_t max = UINT32_C(0xffffffff);
829  ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
830  ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
831  ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
832  ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
833 
834  ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
835  }
836  {
837  int64_t v;
838  static const int64_t max = INT64_C(0x7fffffffffffffff);
839  static const int64_t min = -max - 1;
841 
842  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
843  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
844 
846  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
847 
849  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
850 
852  ASSERT_NE(str.back(), '9');
853  str.back()++;
854  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
855 
857  ASSERT_NE(str.back(), '9');
858  str.back()++;
859  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
860  }
861  {
862  uint64_t v;
863  int64_t v2;
864  static const uint64_t max = UINT64_C(0xffffffffffffffff);
866 
867  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
868  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
869 
871  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
872 
873  ASSERT_NE(str.back(), '9');
874  str.back()++;
875  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
876  }
877 }
878 
879 TEST(RE2, FloatingPointFullMatchTypes) {
880  std::string zeros(1000, '0');
881  {
882  float v;
883  ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
884  ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
885  ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
886  ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
887 
888  ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
889  ASSERT_EQ(v, float(1e23));
890 
891  // 6700000000081920.1 is an edge case.
892  // 6700000000081920 is exactly halfway between
893  // two float32s, so the .1 should make it round up.
894  // However, the .1 is outside the precision possible with
895  // a float64: the nearest float64 is 6700000000081920.
896  // So if the code uses strtod and then converts to float32,
897  // round-to-even will make it round down instead of up.
898  // To pass the test, the parser must call strtof directly.
899  // This test case is carefully chosen to use only a 17-digit
900  // number, since C does not guarantee to get the correctly
901  // rounded answer for strtod and strtof unless the input is
902  // short.
903  //
904  // This is known to fail on Cygwin and MinGW due to a broken
905  // implementation of strtof(3). And apparently MSVC too. Sigh.
906 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
907  ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
908  ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
909  ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
910  ASSERT_EQ(v, 6700000000081920.1f)
911  << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
912 #endif
913  }
914  {
915  double v;
916  ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
917  ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
918  ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
919  ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
920  ASSERT_EQ(v, double(1e23));
921 
922  ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
923  ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
924  ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
925  ASSERT_EQ(v, 1.0000000596046448)
926  << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
927  }
928 }
929 
930 TEST(RE2, FullMatchAnchored) {
931  int i;
932  // Check that matching is fully anchored
933  ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
934  ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
935  ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
936  ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
937 }
938 
939 TEST(RE2, FullMatchBraces) {
940  // Braces
941  ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
942  ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
943  ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
944 }
945 
946 TEST(RE2, Complicated) {
947  // Complicated RE2
948  ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
949  ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
950  ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
951  ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
952 }
953 
954 TEST(RE2, FullMatchEnd) {
955  // Check full-match handling (needs '$' tacked on internally)
956  ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
957  ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
958  ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
959  ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
960  ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
961  ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
962  ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
963 
964  // Uncomment the following if we change the handling of '$' to
965  // prevent it from matching a trailing newline
966  if (false) {
967  // Check that we don't get bitten by pcre's special handling of a
968  // '\n' at the end of the string matching '$'
969  ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
970  }
971 }
972 
973 TEST(RE2, FullMatchArgCount) {
974  // Number of args
975  int a[16];
976  ASSERT_TRUE(RE2::FullMatch("", ""));
977 
978  memset(a, 0, sizeof(0));
979  ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
980  ASSERT_EQ(a[0], 1);
981 
982  memset(a, 0, sizeof(0));
983  ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
984  ASSERT_EQ(a[0], 1);
985  ASSERT_EQ(a[1], 2);
986 
987  memset(a, 0, sizeof(0));
988  ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
989  ASSERT_EQ(a[0], 1);
990  ASSERT_EQ(a[1], 2);
991  ASSERT_EQ(a[2], 3);
992 
993  memset(a, 0, sizeof(0));
994  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
995  &a[2], &a[3]));
996  ASSERT_EQ(a[0], 1);
997  ASSERT_EQ(a[1], 2);
998  ASSERT_EQ(a[2], 3);
999  ASSERT_EQ(a[3], 4);
1000 
1001  memset(a, 0, sizeof(0));
1002  ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1003  &a[2], &a[3], &a[4]));
1004  ASSERT_EQ(a[0], 1);
1005  ASSERT_EQ(a[1], 2);
1006  ASSERT_EQ(a[2], 3);
1007  ASSERT_EQ(a[3], 4);
1008  ASSERT_EQ(a[4], 5);
1009 
1010  memset(a, 0, sizeof(0));
1011  ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1012  &a[1], &a[2], &a[3], &a[4], &a[5]));
1013  ASSERT_EQ(a[0], 1);
1014  ASSERT_EQ(a[1], 2);
1015  ASSERT_EQ(a[2], 3);
1016  ASSERT_EQ(a[3], 4);
1017  ASSERT_EQ(a[4], 5);
1018  ASSERT_EQ(a[5], 6);
1019 
1020  memset(a, 0, sizeof(0));
1021  ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1022  &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1023  ASSERT_EQ(a[0], 1);
1024  ASSERT_EQ(a[1], 2);
1025  ASSERT_EQ(a[2], 3);
1026  ASSERT_EQ(a[3], 4);
1027  ASSERT_EQ(a[4], 5);
1028  ASSERT_EQ(a[5], 6);
1029  ASSERT_EQ(a[6], 7);
1030 
1031  memset(a, 0, sizeof(0));
1032  ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1033  "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1034  "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1035  &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1036  &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1037  &a[13], &a[14], &a[15]));
1038  ASSERT_EQ(a[0], 1);
1039  ASSERT_EQ(a[1], 2);
1040  ASSERT_EQ(a[2], 3);
1041  ASSERT_EQ(a[3], 4);
1042  ASSERT_EQ(a[4], 5);
1043  ASSERT_EQ(a[5], 6);
1044  ASSERT_EQ(a[6], 7);
1045  ASSERT_EQ(a[7], 8);
1046  ASSERT_EQ(a[8], 9);
1047  ASSERT_EQ(a[9], 0);
1048  ASSERT_EQ(a[10], 1);
1049  ASSERT_EQ(a[11], 2);
1050  ASSERT_EQ(a[12], 3);
1051  ASSERT_EQ(a[13], 4);
1052  ASSERT_EQ(a[14], 5);
1053  ASSERT_EQ(a[15], 6);
1054 }
1055 
1056 TEST(RE2, Accessors) {
1057  // Check the pattern() accessor
1058  {
1059  const std::string kPattern = "http://([^/]+)/.*";
1060  const RE2 re(kPattern);
1061  ASSERT_EQ(kPattern, re.pattern());
1062  }
1063 
1064  // Check RE2 error field.
1065  {
1066  RE2 re("foo");
1067  ASSERT_TRUE(re.error().empty()); // Must have no error
1068  ASSERT_TRUE(re.ok());
1070  }
1071 }
1072 
1074  // Check UTF-8 handling
1075  // Three Japanese characters (nihongo)
1076  const char utf8_string[] = {
1077  (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1078  (char)0xe6, (char)0x9c, (char)0xac, // 627c
1079  (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1080  0
1081  };
1082  const char utf8_pattern[] = {
1083  '.',
1084  (char)0xe6, (char)0x9c, (char)0xac, // 627c
1085  '.',
1086  0
1087  };
1088 
1089  // Both should match in either mode, bytes or UTF-8
1090  RE2 re_test1(".........", RE2::Latin1);
1091  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1092  RE2 re_test2("...");
1093  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1094 
1095  // Check that '.' matches one byte or UTF-8 character
1096  // according to the mode.
1097  std::string s;
1098  RE2 re_test3("(.)", RE2::Latin1);
1099  ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1100  ASSERT_EQ(s, std::string("\xe6"));
1101  RE2 re_test4("(.)");
1102  ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1103  ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
1104 
1105  // Check that string matches itself in either mode
1106  RE2 re_test5(utf8_string, RE2::Latin1);
1107  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1108  RE2 re_test6(utf8_string);
1109  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1110 
1111  // Check that pattern matches string only in UTF8 mode
1112  RE2 re_test7(utf8_pattern, RE2::Latin1);
1113  ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1114  RE2 re_test8(utf8_pattern);
1115  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1116 }
1117 
1118 TEST(RE2, UngreedyUTF8) {
1119  // Check that ungreedy, UTF8 regular expressions don't match when they
1120  // oughtn't -- see bug 82246.
1121  {
1122  // This code always worked.
1123  const char* pattern = "\\w+X";
1124  const std::string target = "a aX";
1125  RE2 match_sentence(pattern, RE2::Latin1);
1126  RE2 match_sentence_re(pattern);
1127 
1128  ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1129  ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1130  }
1131  {
1132  const char* pattern = "(?U)\\w+X";
1133  const std::string target = "a aX";
1134  RE2 match_sentence(pattern, RE2::Latin1);
1135  ASSERT_EQ(match_sentence.error(), "");
1136  RE2 match_sentence_re(pattern);
1137 
1138  ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1139  ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1140  }
1141 }
1142 
1143 TEST(RE2, Rejects) {
1144  {
1145  RE2 re("a\\1", RE2::Quiet);
1146  ASSERT_FALSE(re.ok()); }
1147  {
1148  RE2 re("a[x", RE2::Quiet);
1149  ASSERT_FALSE(re.ok());
1150  }
1151  {
1152  RE2 re("a[z-a]", RE2::Quiet);
1153  ASSERT_FALSE(re.ok());
1154  }
1155  {
1156  RE2 re("a[[:foobar:]]", RE2::Quiet);
1157  ASSERT_FALSE(re.ok());
1158  }
1159  {
1160  RE2 re("a(b", RE2::Quiet);
1161  ASSERT_FALSE(re.ok());
1162  }
1163  {
1164  RE2 re("a\\", RE2::Quiet);
1165  ASSERT_FALSE(re.ok());
1166  }
1167 }
1168 
1169 TEST(RE2, NoCrash) {
1170  // Test that using a bad regexp doesn't crash.
1171  {
1172  RE2 re("a\\", RE2::Quiet);
1173  ASSERT_FALSE(re.ok());
1174  ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1175  }
1176 
1177  // Test that using an enormous regexp doesn't crash
1178  {
1179  RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1180  ASSERT_FALSE(re.ok());
1181  ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1182  }
1183 
1184  // Test that a crazy regexp still compiles and runs.
1185  {
1186  RE2 re(".{512}x", RE2::Quiet);
1187  ASSERT_TRUE(re.ok());
1188  std::string s;
1189  s.append(515, 'c');
1190  s.append("x");
1192  }
1193 }
1194 
1195 TEST(RE2, Recursion) {
1196  // Test that recursion is stopped.
1197  // This test is PCRE-legacy -- there's no recursion in RE2.
1198  int bytes = 15 * 1024; // enough to crash PCRE
1199  TestRecursion(bytes, ".");
1200  TestRecursion(bytes, "a");
1201  TestRecursion(bytes, "a.");
1202  TestRecursion(bytes, "ab.");
1203  TestRecursion(bytes, "abc.");
1204 }
1205 
1206 TEST(RE2, BigCountedRepetition) {
1207  // Test that counted repetition works, given tons of memory.
1208  RE2::Options opt;
1209  opt.set_max_mem(256<<20);
1210 
1211  RE2 re(".{512}x", opt);
1212  ASSERT_TRUE(re.ok());
1213  std::string s;
1214  s.append(515, 'c');
1215  s.append("x");
1217 }
1218 
1219 TEST(RE2, DeepRecursion) {
1220  // Test for deep stack recursion. This would fail with a
1221  // segmentation violation due to stack overflow before pcre was
1222  // patched.
1223  // Again, a PCRE legacy test. RE2 doesn't recurse.
1224  std::string comment("x*");
1225  std::string a(131072, 'a');
1226  comment += a;
1227  comment += "*x";
1228  RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1229  ASSERT_TRUE(RE2::FullMatch(comment, re));
1230 }
1231 
1232 // Suggested by Josh Hyman. Failed when SearchOnePass was
1233 // not implementing case-folding.
1234 TEST(CaseInsensitive, MatchAndConsume) {
1236  std::string text = "A fish named *Wanda*";
1237  StringPiece sp(text);
1238 
1239  EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
1240  EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1241 }
1242 
1243 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1244 // and C string literals.
1245 TEST(RE2, ImplicitConversions) {
1246  std::string re_string(".");
1247  StringPiece re_stringpiece(".");
1248  const char* re_cstring = ".";
1249  EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1250  EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1251  EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1252  EXPECT_TRUE(RE2::PartialMatch("e", "."));
1253 }
1254 
1255 // Bugs introduced by 8622304
1256 TEST(RE2, CL8622304) {
1257  // reported by ingow
1258  std::string dir;
1259  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1260  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1261 
1262  // reported by jacobsa
1263  std::string key, val;
1264  EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1265  "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1266  &key,
1267  &val));
1268  EXPECT_EQ(key, "bar");
1269  EXPECT_EQ(val, "1,0x2F,030,4,5");
1270 }
1271 
1272 
1273 // Check that RE2 returns correct regexp pieces on error.
1274 // In particular, make sure it returns whole runes
1275 // and that it always reports invalid UTF-8.
1276 // Also check that Perl error flag piece is big enough.
1277 static struct ErrorTest {
1278  const char *regexp;
1279  const char *error;
1280 } error_tests[] = {
1281  { "ab\\αcd", "\\α" },
1282  { "ef\\x☺01", "\\x☺0" },
1283  { "gh\\x1☺01", "\\x1☺" },
1284  { "ij\\x1", "\\x1" },
1285  { "kl\\x", "\\x" },
1286  { "uv\\x{0000☺}", "\\x{0000☺" },
1287  { "wx\\p{ABC", "\\p{ABC" },
1288  { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
1289  { "aa(?sm☺i", "(?sm☺" },
1290  { "bb[abc", "[abc" },
1291 
1292  { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
1293  { "op\377qr", "" },
1294  { "st\\x{00000\377", "" },
1295  { "zz\\p{\377}", "" },
1296  { "zz\\x{00\377}", "" },
1297  { "zz(?P<name\377>abc)", "" },
1298 };
1299 TEST(RE2, ErrorArgs) {
1300  for (size_t i = 0; i < arraysize(error_tests); i++) {
1301  RE2 re(error_tests[i].regexp, RE2::Quiet);
1302  EXPECT_FALSE(re.ok());
1303  EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1304  }
1305 }
1306 
1307 // Check that "never match \n" mode never matches \n.
1308 static struct NeverTest {
1309  const char* regexp;
1310  const char* text;
1311  const char* match;
1312 } never_tests[] = {
1313  { "(.*)", "abc\ndef\nghi\n", "abc" },
1314  { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1315  { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1316  { "(abc[^x]*def)", "abc\ndef\n", NULL },
1317  { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1318 };
1319 TEST(RE2, NeverNewline) {
1320  RE2::Options opt;
1321  opt.set_never_nl(true);
1322  for (size_t i = 0; i < arraysize(never_tests); i++) {
1323  const NeverTest& t = never_tests[i];
1324  RE2 re(t.regexp, opt);
1325  if (t.match == NULL) {
1326  EXPECT_FALSE(re.PartialMatch(t.text, re));
1327  } else {
1328  StringPiece m;
1329  EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1330  EXPECT_EQ(m, t.match);
1331  }
1332  }
1333 }
1334 
1335 // Check that dot_nl option works.
1336 TEST(RE2, DotNL) {
1337  RE2::Options opt;
1338  opt.set_dot_nl(true);
1339  EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1340  EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1341  opt.set_never_nl(true);
1342  EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1343 }
1344 
1345 // Check that there are no capturing groups in "never capture" mode.
1346 TEST(RE2, NeverCapture) {
1347  RE2::Options opt;
1348  opt.set_never_capture(true);
1349  RE2 re("(r)(e)", opt);
1351 }
1352 
1353 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1354 // Triggered by a failed DFA search falling back to Bitstate when
1355 // using Match with a NULL submatch set. Bitstate tried to read
1356 // the submatch[0] entry even if nsubmatch was 0.
1357 TEST(RE2, BitstateCaptureBug) {
1358  RE2::Options opt;
1359  opt.set_max_mem(20000);
1360  RE2 re("(_________$)", opt);
1361  StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1362  EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1363 }
1364 
1365 // C++ version of bug 609710.
1366 TEST(RE2, UnicodeClasses) {
1367  const std::string str = "ABCDEFGHI譚永鋒";
1368  std::string a, b, c;
1369 
1370  EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1371  EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1372  EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1373  EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1374  EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1375  EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1376 
1377  EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1378  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1379  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1380  EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1381  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1382  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1383 
1384  EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1385  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1386  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1387  EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1388  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1389  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1390 
1391  EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1392  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1393  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1394  EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1395  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1396  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1397 
1398  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1399  EXPECT_EQ("A", a);
1400  EXPECT_EQ("B", b);
1401  EXPECT_EQ("C", c);
1402 
1403  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1404  EXPECT_EQ("A", a);
1405  EXPECT_EQ("B", b);
1406  EXPECT_EQ("C", c);
1407 
1408  EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1409 
1410  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1411  EXPECT_EQ("A", a);
1412  EXPECT_EQ("B", b);
1413  EXPECT_EQ("C", c);
1414 
1415  EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1416 
1417  EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1418  EXPECT_EQ("譚", a);
1419  EXPECT_EQ("永", b);
1420  EXPECT_EQ("鋒", c);
1421 }
1422 
1424  // Test with and without options.
1425  static LazyRE2 a = {"a"};
1426  static LazyRE2 b = {"b", RE2::Latin1};
1427 
1428  EXPECT_EQ("a", a->pattern());
1429  EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1430 
1431  EXPECT_EQ("b", b->pattern());
1432  EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1433 }
1434 
1435 // Bug reported by saito. 2009/02/17
1436 TEST(RE2, NullVsEmptyString) {
1437  RE2 re(".*");
1438  EXPECT_TRUE(re.ok());
1439 
1440  StringPiece null;
1441  EXPECT_TRUE(RE2::FullMatch(null, re));
1442 
1443  StringPiece empty("");
1445 }
1446 
1447 // Similar to the previous test, check that the null string and the empty
1448 // string both match, but also that the null string can only provide null
1449 // submatches whereas the empty string can also provide empty submatches.
1450 TEST(RE2, NullVsEmptyStringSubmatches) {
1451  RE2 re("()|(foo)");
1452  EXPECT_TRUE(re.ok());
1453 
1454  // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1455  StringPiece matches[4];
1456 
1457  for (size_t i = 0; i < arraysize(matches); i++)
1458  matches[i] = "bar";
1459 
1460  StringPiece null;
1461  EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1462  matches, arraysize(matches)));
1463  for (size_t i = 0; i < arraysize(matches); i++) {
1464  EXPECT_TRUE(matches[i].data() == NULL); // always null
1465  EXPECT_TRUE(matches[i].empty());
1466  }
1467 
1468  for (size_t i = 0; i < arraysize(matches); i++)
1469  matches[i] = "bar";
1470 
1471  StringPiece empty("");
1472  EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1473  matches, arraysize(matches)));
1474  EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
1475  EXPECT_TRUE(matches[0].empty());
1476  EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
1477  EXPECT_TRUE(matches[1].empty());
1478  EXPECT_TRUE(matches[2].data() == NULL);
1479  EXPECT_TRUE(matches[2].empty());
1480  EXPECT_TRUE(matches[3].data() == NULL);
1481  EXPECT_TRUE(matches[3].empty());
1482 }
1483 
1484 // Issue 1816809
1485 TEST(RE2, Bug1816809) {
1486  RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1487  StringPiece piece("llx-3;llx4");
1488  std::string x;
1489  EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1490 }
1491 
1492 // Issue 3061120
1493 TEST(RE2, Bug3061120) {
1494  RE2 re("(?i)\\W");
1495  EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1496  EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1497  EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1498 }
1499 
1500 TEST(RE2, CapturingGroupNames) {
1501  // Opening parentheses annotated with group IDs:
1502  // 12 3 45 6 7
1503  RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1504  EXPECT_TRUE(re.ok());
1505  const std::map<int, std::string>& have = re.CapturingGroupNames();
1506  std::map<int, std::string> want;
1507  want[3] = "G2";
1508  want[6] = "G2";
1509  want[7] = "G1";
1510  EXPECT_EQ(want, have);
1511 }
1512 
1513 TEST(RE2, RegexpToStringLossOfAnchor) {
1514  EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1515  EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1516  EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1517  EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1518 }
1519 
1520 // Issue 10131674
1521 TEST(RE2, Bug10131674) {
1522  // Some of these escapes describe values that do not fit in a byte.
1523  RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1524  EXPECT_FALSE(re.ok());
1525  EXPECT_FALSE(RE2::FullMatch("hello world", re));
1526 }
1527 
1528 TEST(RE2, Bug18391750) {
1529  // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1530  const char t[] = {
1531  (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1532  (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1533  (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1534  (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1535  (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1536  (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1537  };
1538  RE2::Options opt;
1540  opt.set_longest_match(true);
1541  opt.set_dot_nl(true);
1542  opt.set_case_sensitive(false);
1543  RE2 re(t, opt);
1544  ASSERT_TRUE(re.ok());
1545  RE2::PartialMatch(t, re);
1546 }
1547 
1548 TEST(RE2, Bug18458852) {
1549  // Bug in parser accepting invalid (too large) rune,
1550  // causing compiler to fail in DCHECK in UTF-8
1551  // character class code.
1552  const char b[] = {
1553  (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1554  (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1555  (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1556  };
1557  RE2 re(b);
1558  ASSERT_FALSE(re.ok());
1559 }
1560 
1561 TEST(RE2, Bug18523943) {
1562  // Bug in BitState: case kFailInst failed the match entirely.
1563 
1564  RE2::Options opt;
1565  const char a[] = {
1566  (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1567  };
1568  const char b[] = {
1569  (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1570  };
1571  opt.set_log_errors(false);
1573  opt.set_posix_syntax(true);
1574  opt.set_longest_match(true);
1575  opt.set_literal(false);
1576  opt.set_never_nl(true);
1577 
1578  RE2 re((const char*)b, opt);
1579  ASSERT_TRUE(re.ok());
1580  std::string s1;
1581  ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1582 }
1583 
1584 TEST(RE2, Bug21371806) {
1585  // Bug in parser accepting Unicode groups in Latin-1 mode,
1586  // causing compiler to fail in DCHECK in prog.cc.
1587 
1588  RE2::Options opt;
1590 
1591  RE2 re("g\\p{Zl}]", opt);
1592  ASSERT_TRUE(re.ok());
1593 }
1594 
1595 TEST(RE2, Bug26356109) {
1596  // Bug in parser caused by factoring of common prefixes in alternations.
1597 
1598  // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1599  // consume "ab" and then stop (when unanchored) whereas it should consume all
1600  // of "abc" as per first-match semantics.
1601  RE2 re("a\\C*?c|a\\C*?b");
1602  ASSERT_TRUE(re.ok());
1603 
1604  std::string s = "abc";
1605  StringPiece m;
1606 
1607  ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1608  ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1609 
1610  ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1611  ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1612 }
1613 
1614 TEST(RE2, Issue104) {
1615  // RE2::GlobalReplace always advanced by one byte when the empty string was
1616  // matched, which would clobber any rune that is longer than one byte.
1617 
1618  std::string s = "bc";
1619  ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1620  ASSERT_EQ("dbdcd", s);
1621 
1622  s = "ąć";
1623  ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1624  ASSERT_EQ("ĈąĈćĈ", s);
1625 
1626  s = "人类";
1627  ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1628  ASSERT_EQ("小人小类小", s);
1629 }
1630 
1631 } // namespace re2
xds_interop_client.str
str
Definition: xds_interop_client.py:487
EXPECT_FALSE
#define EXPECT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1970
test_group_name.all
all
Definition: test_group_name.py:241
_gevent_test_main.result
result
Definition: _gevent_test_main.py:96
check_banned_filenames.bad
bad
Definition: check_banned_filenames.py:26
ASSERT_NE
#define ASSERT_NE(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2060
re2::RE2::CRadix
static Arg CRadix(short *x)
re2::RE2::FullMatch
static bool FullMatch(const StringPiece &text, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:367
re2::RE2::PartialMatchN
static bool PartialMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:339
check_tracer_sanity.pattern
pattern
Definition: check_tracer_sanity.py:25
memset
return memset(p, 0, total)
NamedGroups
Span< const NamedGroup > NamedGroups()
Definition: ssl_key_share.cc:304
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
re2::RE2::CapturingGroupNames
const std::map< int, std::string > & CapturingGroupNames() const
Definition: bloaty/third_party/re2/re2/re2.cc:322
re2::RE2::FullMatchN
static bool FullMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:334
re2::NeverTest
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1308
string.h
options
double_dict options[]
Definition: capstone_test.c:55
re2::Regexp
Definition: bloaty/third_party/re2/re2/regexp.h:274
re2::StringPiece::size
size_type size() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:80
re2::RE2::error_code
ErrorCode error_code() const
Definition: bloaty/third_party/re2/re2/re2.h:279
absl::cord_internal::Consume
void Consume(CordRep *rep, ConsumeFn consume_fn)
Definition: cord_rep_consume.cc:45
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
error
grpc_error_handle error
Definition: retry_filter.cc:499
INT64_C
#define INT64_C(val)
Definition: stdint-msvc2008.h:233
re2::RE2::ANCHOR_BOTH
@ ANCHOR_BOTH
Definition: bloaty/third_party/re2/re2/re2.h:475
re2::RE2::ProgramFanout
int ProgramFanout(std::map< int, int > *histogram) const
Definition: bloaty/third_party/re2/re2/re2.cc:295
re2::RE2::UNANCHORED
@ UNANCHORED
Definition: bloaty/third_party/re2/re2/re2.h:473
re2::RE2::ReverseProgramFanout
int ReverseProgramFanout(std::map< int, int > *histogram) const
Definition: bloaty/third_party/re2/re2/re2.cc:301
re2::RE2::Extract
static bool Extract(const StringPiece &text, const RE2 &re, const StringPiece &rewrite, std::string *out)
Definition: bloaty/third_party/re2/re2/re2.cc:457
re2::RE2::error
const std::string & error() const
Definition: bloaty/third_party/re2/re2/re2.h:275
re2::RE2::Options::set_never_nl
void set_never_nl(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:656
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::RE2::Consume
static bool Consume(StringPiece *input, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:383
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::RE2::Options::set_case_sensitive
void set_case_sensitive(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:665
re2::ErrorTest
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1277
re2::NeverTest::match
const char * match
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1311
re2::UTF8
static std::string UTF8(Rune r)
Definition: bloaty/third_party/re2/re2/testing/exhaustive3_test.cc:35
re2::RE2::Replace
static bool Replace(std::string *str, const RE2 &re, const StringPiece &rewrite)
Definition: bloaty/third_party/re2/re2/re2.cc:366
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: iomgr/time_averaged_stats_test.cc:27
re2::LazyRE2
Definition: bloaty/third_party/re2/re2/re2.h:918
re2::ErrorTest::error
const char * error
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1279
re2::RE2::FindAndConsume
static bool FindAndConsume(StringPiece *input, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:392
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
ASSERT_DECIMAL
#define ASSERT_DECIMAL(type, value)
ULL
#define ULL(x)
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/coded_stream_unittest.cc:57
re2::RE2::FindAndConsumeN
static bool FindAndConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:355
re2::RE2::NamedCapturingGroups
const std::map< std::string, int > & NamedCapturingGroups() const
Definition: bloaty/third_party/re2/re2/re2.cc:311
int16_t
signed short int16_t
Definition: stdint-msvc2008.h:76
re2::RE2::Options::EncodingUTF8
@ EncodingUTF8
Definition: bloaty/third_party/re2/re2/re2.h:604
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
asyncio_get_stats.args
args
Definition: asyncio_get_stats.py:40
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
ToString
std::string ToString(const grpc::string_ref &r)
Definition: string_ref_helper.cc:24
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
python_utils.jobset.INFO
INFO
Definition: jobset.py:111
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::RE2::CheckRewriteString
bool CheckRewriteString(const StringPiece &rewrite, std::string *error) const
Definition: bloaty/third_party/re2/re2/re2.cc:856
re2::RE2::DefaultOptions
@ DefaultOptions
Definition: bloaty/third_party/re2/re2/re2.h:248
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
re2::RE2::Options::set_posix_syntax
void set_posix_syntax(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:641
re2::RE2::ProgramSize
int ProgramSize() const
Definition: bloaty/third_party/re2/re2/re2.cc:265
re2::RE2::Options::set_log_errors
void set_log_errors(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:647
uint64_t
unsigned __int64 uint64_t
Definition: stdint-msvc2008.h:90
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::RE2::ok
bool ok() const
Definition: bloaty/third_party/re2/re2/re2.h:266
re2::TestQuoteMeta
static void TestQuoteMeta(const std::string &unquoted, const RE2::Options &options=RE2::DefaultOptions)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:377
re2::TestCheckRewriteString
static void TestCheckRewriteString(const char *regexp, const char *rewrite, bool expect_ok)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:189
histogram
static grpc_histogram * histogram
Definition: test/core/fling/client.cc:34
grpc_ruby_generator::Replace
std::string Replace(std::string s, const std::string &from, const std::string &to)
Definition: ruby_generator_string-inl.h:52
re2::RE2::PartialMatch
static bool PartialMatch(const StringPiece &text, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:374
re2::error_tests
static struct re2::ErrorTest error_tests[]
re2::RE2::Options::set_max_mem
void set_max_mem(int64_t m)
Definition: bloaty/third_party/re2/re2/re2.h:650
x
int x
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3610
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
re2::RE2::Options::set_literal
void set_literal(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:653
re2::ErrorTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1278
min
#define min(a, b)
Definition: qsort.h:83
re2::RE2::Quiet
@ Quiet
Definition: bloaty/third_party/re2/re2/re2.h:251
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::RE2::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/re2.h:249
re2::RE2
Definition: bloaty/third_party/re2/re2/re2.h:211
re2::RE2::Options::set_never_capture
void set_never_capture(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:662
UINT64_C
#define UINT64_C(val)
Definition: stdint-msvc2008.h:238
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
stdint.h
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::RE2::ConsumeN
static bool ConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:344
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
Match
static bool Match(const upb_msgdef *m, const char *name, const upb_fielddef **f, const upb_oneofdef **o, const char *prefix, const char *suffix)
Definition: protobuf/ruby/ext/google/protobuf_c/message.c:195
re2::RE2::Match
bool Match(const StringPiece &text, size_t startpos, size_t endpos, Anchor re_anchor, StringPiece *submatch, int nsubmatch) const
Definition: bloaty/third_party/re2/re2/re2.cc:572
re2::RE2::ReverseProgramSize
int ReverseProgramSize() const
Definition: bloaty/third_party/re2/re2/re2.cc:271
tests.unit._exit_scenarios.port
port
Definition: _exit_scenarios.py:179
re2::RE2::GlobalReplace
static int GlobalReplace(std::string *str, const RE2 &re, const StringPiece &rewrite)
Definition: bloaty/third_party/re2/re2/re2.cc:386
re2::RE2::QuoteMeta
static std::string QuoteMeta(const StringPiece &unquoted)
Definition: bloaty/third_party/re2/re2/re2.cc:473
re2::RE2::POSIX
@ POSIX
Definition: bloaty/third_party/re2/re2/re2.h:250
re2::StringPiece::data
const_pointer data() const
Definition: bloaty/third_party/re2/re2/stringpiece.h:85
UINT32_C
#define UINT32_C(val)
Definition: stdint-msvc2008.h:237
key
const char * key
Definition: hpack_parser_table.cc:164
re2::RE2::Options
Definition: bloaty/third_party/re2/re2/re2.h:548
upload.group
group
Definition: bloaty/third_party/googletest/googlemock/scripts/upload.py:397
bytes
uint8 bytes[10]
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/coded_stream_unittest.cc:153
tests
Definition: src/python/grpcio_tests/tests/__init__.py:1
ASSERT_HEX
#define ASSERT_HEX(type, value)
re2::RE2::Options::set_encoding
void set_encoding(Encoding encoding)
Definition: bloaty/third_party/re2/re2/re2.h:627
re2::RE2::NoError
@ NoError
Definition: bloaty/third_party/re2/re2/re2.h:221
re2::RE2::error_arg
const std::string & error_arg() const
Definition: bloaty/third_party/re2/re2/re2.h:283
re2::NeverTest::text
const char * text
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1310
re2::RE2::Options::set_longest_match
void set_longest_match(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:644
re2::never_tests
static struct re2::NeverTest never_tests[]
re2::RE2::pattern
const std::string & pattern() const
Definition: bloaty/third_party/re2/re2/re2.h:271
L
lua_State * L
Definition: upb/upb/bindings/lua/main.c:35
fix_build_deps.r
r
Definition: fix_build_deps.py:491
zeros
int zeros
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:459
re2::NegativeTestQuoteMeta
static void NegativeTestQuoteMeta(const std::string &unquoted, const std::string &should_not_match, const RE2::Options &options=RE2::DefaultOptions)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:387
ASSERT_TRUE
#define ASSERT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1973
ASSERT_FALSE
#define ASSERT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1976
re2::RE2::Options::EncodingLatin1
@ EncodingLatin1
Definition: bloaty/third_party/re2/re2/re2.h:605
re2::NeverTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1309
input
std::string input
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/tokenizer_unittest.cc:197
EXPECT_TRUE
#define EXPECT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1967
re2::FindAndConsume
void FindAndConsume(int iters, int nbytes)
Definition: bloaty/third_party/re2/re2/testing/regexp_benchmark.cc:292
re2::TestRecursion
static void TestRecursion(int size, const char *pattern)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:362
INT32_C
#define INT32_C(val)
Definition: stdint-msvc2008.h:232
ASSERT_OCTAL
#define ASSERT_OCTAL(type, value)
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
ASSERT_GT
#define ASSERT_GT(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2076
size
voidpf void uLong size
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
re2::RE2::NumberOfCapturingGroups
int NumberOfCapturingGroups() const
Definition: bloaty/third_party/re2/re2/re2.h:481
regress.m
m
Definition: regress/regress.py:25
int32_t
signed int int32_t
Definition: stdint-msvc2008.h:77
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
to_string
static bool to_string(zval *from)
Definition: protobuf/php/ext/google/protobuf/convert.c:333
setup.target
target
Definition: third_party/bloaty/third_party/protobuf/python/setup.py:179
errno.h
re2::RE2::Arg
Definition: bloaty/third_party/re2/re2/re2.h:786
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
LL
#define LL(x)
ASSERT_EQ
#define ASSERT_EQ(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2056
re2::RE2::Options::set_dot_nl
void set_dot_nl(bool b)
Definition: bloaty/third_party/re2/re2/re2.h:659


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:00