re2/re2/testing/re2_test.cc
Go to the documentation of this file.
1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 // TODO: Test extractions for PartialMatch/Consume
7 
8 #include <errno.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <map>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
17 #include <sys/mman.h>
18 #include <unistd.h> /* for sysconf */
19 #endif
20 
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "re2/re2.h"
25 #include "re2/regexp.h"
26 
27 namespace re2 {
28 
29 TEST(RE2, HexTests) {
30 #define ASSERT_HEX(type, value) \
31  do { \
32  type v; \
33  ASSERT_TRUE( \
34  RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
35  ASSERT_EQ(v, 0x##value); \
36  ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
37  RE2::CRadix(&v))); \
38  ASSERT_EQ(v, 0x##value); \
39  } while (0)
40 
41  ASSERT_HEX(short, 2bad);
42  ASSERT_HEX(unsigned short, 2badU);
43  ASSERT_HEX(int, dead);
44  ASSERT_HEX(unsigned int, deadU);
45  ASSERT_HEX(long, 7eadbeefL);
46  ASSERT_HEX(unsigned long, deadbeefUL);
47  ASSERT_HEX(long long, 12345678deadbeefLL);
48  ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
49 
50 #undef ASSERT_HEX
51 }
52 
53 TEST(RE2, OctalTests) {
54 #define ASSERT_OCTAL(type, value) \
55  do { \
56  type v; \
57  ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
58  ASSERT_EQ(v, 0##value); \
59  ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
60  RE2::CRadix(&v))); \
61  ASSERT_EQ(v, 0##value); \
62  } while (0)
63 
64  ASSERT_OCTAL(short, 77777);
65  ASSERT_OCTAL(unsigned short, 177777U);
66  ASSERT_OCTAL(int, 17777777777);
67  ASSERT_OCTAL(unsigned int, 37777777777U);
68  ASSERT_OCTAL(long, 17777777777L);
69  ASSERT_OCTAL(unsigned long, 37777777777UL);
70  ASSERT_OCTAL(long long, 777777777777777777777LL);
71  ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
72 
73 #undef ASSERT_OCTAL
74 }
75 
76 TEST(RE2, DecimalTests) {
77 #define ASSERT_DECIMAL(type, value) \
78  do { \
79  type v; \
80  ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
81  ASSERT_EQ(v, value); \
82  ASSERT_TRUE( \
83  RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
84  ASSERT_EQ(v, value); \
85  } while (0)
86 
87  ASSERT_DECIMAL(short, -1);
88  ASSERT_DECIMAL(unsigned short, 9999);
89  ASSERT_DECIMAL(int, -1000);
90  ASSERT_DECIMAL(unsigned int, 12345U);
91  ASSERT_DECIMAL(long, -10000000L);
92  ASSERT_DECIMAL(unsigned long, 3083324652U);
93  ASSERT_DECIMAL(long long, -100000000000000LL);
94  ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
95 
96 #undef ASSERT_DECIMAL
97 }
98 
99 TEST(RE2, Replace) {
100  struct ReplaceTest {
101  const char *regexp;
102  const char *rewrite;
103  const char *original;
104  const char *single;
105  const char *global;
106  int greplace_count;
107  };
108  static const ReplaceTest tests[] = {
109  { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
110  "\\2\\1ay",
111  "the quick brown fox jumps over the lazy dogs.",
112  "ethay quick brown fox jumps over the lazy dogs.",
113  "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
114  9 },
115  { "\\w+",
116  "\\0-NOSPAM",
117  "abcd.efghi@google.com",
118  "abcd-NOSPAM.efghi@google.com",
119  "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
120  4 },
121  { "^",
122  "(START)",
123  "foo",
124  "(START)foo",
125  "(START)foo",
126  1 },
127  { "^",
128  "(START)",
129  "",
130  "(START)",
131  "(START)",
132  1 },
133  { "$",
134  "(END)",
135  "",
136  "(END)",
137  "(END)",
138  1 },
139  { "b",
140  "bb",
141  "ababababab",
142  "abbabababab",
143  "abbabbabbabbabb",
144  5 },
145  { "b",
146  "bb",
147  "bbbbbb",
148  "bbbbbbb",
149  "bbbbbbbbbbbb",
150  6 },
151  { "b+",
152  "bb",
153  "bbbbbb",
154  "bb",
155  "bb",
156  1 },
157  { "b*",
158  "bb",
159  "bbbbbb",
160  "bb",
161  "bb",
162  1 },
163  { "b*",
164  "bb",
165  "aaaaa",
166  "bbaaaaa",
167  "bbabbabbabbabbabb",
168  6 },
169  // Check newline handling
170  { "a.*a",
171  "(\\0)",
172  "aba\naba",
173  "(aba)\naba",
174  "(aba)\n(aba)",
175  2 },
176  { "", NULL, NULL, NULL, NULL, 0 }
177  };
178 
179  for (const ReplaceTest* t = tests; t->original != NULL; t++) {
180  std::string one(t->original);
181  ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
182  ASSERT_EQ(one, t->single);
183  std::string all(t->original);
184  ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
185  << "Got: " << all;
186  ASSERT_EQ(all, t->global);
187  }
188 }
189 
190 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
191  bool expect_ok) {
193  RE2 exp(regexp);
194  bool actual_ok = exp.CheckRewriteString(rewrite, &error);
195  EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
196 }
197 
198 TEST(CheckRewriteString, all) {
199  TestCheckRewriteString("abc", "foo", true);
200  TestCheckRewriteString("abc", "foo\\", false);
201  TestCheckRewriteString("abc", "foo\\0bar", true);
202 
203  TestCheckRewriteString("a(b)c", "foo", true);
204  TestCheckRewriteString("a(b)c", "foo\\0bar", true);
205  TestCheckRewriteString("a(b)c", "foo\\1bar", true);
206  TestCheckRewriteString("a(b)c", "foo\\2bar", false);
207  TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
208 
209  TestCheckRewriteString("a(b)(c)", "foo\\12", true);
210  TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
211  TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
212 }
213 
214 TEST(RE2, Extract) {
215  std::string s;
216 
217  ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
218  ASSERT_EQ(s, "kremvax!boris");
219 
220  ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
221  ASSERT_EQ(s, "'foo'");
222  // check that false match doesn't overwrite
223  ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
224  ASSERT_EQ(s, "'foo'");
225 }
226 
227 TEST(RE2, MaxSubmatchTooLarge) {
228  std::string s;
229  ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
230  s = "foo";
231  ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
232  s = "foo";
233  ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
234 }
235 
236 TEST(RE2, Consume) {
237  RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
238  std::string word;
239 
240  std::string s(" aaa b!@#$@#$cccc");
241  StringPiece input(s);
242 
243  ASSERT_TRUE(RE2::Consume(&input, r, &word));
244  ASSERT_EQ(word, "aaa") << " input: " << input;
245  ASSERT_TRUE(RE2::Consume(&input, r, &word));
246  ASSERT_EQ(word, "b") << " input: " << input;
247  ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
248 }
249 
250 TEST(RE2, ConsumeN) {
251  const std::string s(" one two three 4");
252  StringPiece input(s);
253 
254  RE2::Arg argv[2];
255  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
256 
257  // 0 arg
258  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
259 
260  // 1 arg
261  std::string word;
262  argv[0] = &word;
263  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
264  EXPECT_EQ("two", word);
265 
266  // Multi-args
267  int n;
268  argv[1] = &n;
269  EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
270  EXPECT_EQ("three", word);
271  EXPECT_EQ(4, n);
272 }
273 
274 TEST(RE2, FindAndConsume) {
275  RE2 r("(\\w+)"); // matches a word
276  std::string word;
277 
278  std::string s(" aaa b!@#$@#$cccc");
279  StringPiece input(s);
280 
282  ASSERT_EQ(word, "aaa");
284  ASSERT_EQ(word, "b");
286  ASSERT_EQ(word, "cccc");
288 
289  // Check that FindAndConsume works without any submatches.
290  // Earlier version used uninitialized data for
291  // length to consume.
292  input = "aaa";
294  ASSERT_EQ(input, "");
295 }
296 
297 TEST(RE2, FindAndConsumeN) {
298  const std::string s(" one two three 4");
299  StringPiece input(s);
300 
301  RE2::Arg argv[2];
302  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
303 
304  // 0 arg
305  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
306 
307  // 1 arg
308  std::string word;
309  argv[0] = &word;
310  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
311  EXPECT_EQ("two", word);
312 
313  // Multi-args
314  int n;
315  argv[1] = &n;
316  EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
317  EXPECT_EQ("three", word);
318  EXPECT_EQ(4, n);
319 }
320 
321 TEST(RE2, MatchNumberPeculiarity) {
322  RE2 r("(foo)|(bar)|(baz)");
323  std::string word1;
324  std::string word2;
325  std::string word3;
326 
327  ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
328  ASSERT_EQ(word1, "foo");
329  ASSERT_EQ(word2, "");
330  ASSERT_EQ(word3, "");
331  ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
332  ASSERT_EQ(word1, "");
333  ASSERT_EQ(word2, "bar");
334  ASSERT_EQ(word3, "");
335  ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
336  ASSERT_EQ(word1, "");
337  ASSERT_EQ(word2, "");
338  ASSERT_EQ(word3, "baz");
339  ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
340 
341  std::string a;
342  ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
343  ASSERT_EQ(a, "");
344 }
345 
346 TEST(RE2, Match) {
347  RE2 re("((\\w+):([0-9]+))"); // extracts host and port
348  StringPiece group[4];
349 
350  // No match.
351  StringPiece s = "zyzzyva";
352  ASSERT_FALSE(
353  re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
354 
355  // Matches and extracts.
356  s = "a chrisr:9000 here";
357  ASSERT_TRUE(
358  re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
359  ASSERT_EQ(group[0], "chrisr:9000");
360  ASSERT_EQ(group[1], "chrisr:9000");
361  ASSERT_EQ(group[2], "chrisr");
362  ASSERT_EQ(group[3], "9000");
363 
364  std::string all, host;
365  int port;
366  ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
367  ASSERT_EQ(all, "chrisr:9000");
368  ASSERT_EQ(host, "chrisr");
369  ASSERT_EQ(port, 9000);
370 }
371 
372 static void TestRecursion(int size, const char* pattern) {
373  // Fill up a string repeating the pattern given
374  std::string domain;
375  domain.resize(size);
376  size_t patlen = strlen(pattern);
377  for (int i = 0; i < size; i++) {
378  domain[i] = pattern[i % patlen];
379  }
380  // Just make sure it doesn't crash due to too much recursion.
381  RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
382  RE2::FullMatch(domain, re);
383 }
384 
385 // A meta-quoted string, interpreted as a pattern, should always match
386 // the original unquoted string.
387 static void TestQuoteMeta(const std::string& unquoted,
389  std::string quoted = RE2::QuoteMeta(unquoted);
390  RE2 re(quoted, options);
391  EXPECT_TRUE(RE2::FullMatch(unquoted, re))
392  << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
393 }
394 
395 // A meta-quoted string, interpreted as a pattern, should always match
396 // the original unquoted string.
398  const std::string& unquoted, const std::string& should_not_match,
400  std::string quoted = RE2::QuoteMeta(unquoted);
401  RE2 re(quoted, options);
402  EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
403  << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
404 }
405 
406 // Tests that quoted meta characters match their original strings,
407 // and that a few things that shouldn't match indeed do not.
408 TEST(QuoteMeta, Simple) {
409  TestQuoteMeta("foo");
410  TestQuoteMeta("foo.bar");
411  TestQuoteMeta("foo\\.bar");
412  TestQuoteMeta("[1-9]");
413  TestQuoteMeta("1.5-2.0?");
414  TestQuoteMeta("\\d");
415  TestQuoteMeta("Who doesn't like ice cream?");
416  TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
417  TestQuoteMeta("((?!)xxx).*yyy");
418  TestQuoteMeta("([");
419 }
420 TEST(QuoteMeta, SimpleNegative) {
421  NegativeTestQuoteMeta("foo", "bar");
422  NegativeTestQuoteMeta("...", "bar");
423  NegativeTestQuoteMeta("\\.", ".");
424  NegativeTestQuoteMeta("\\.", "..");
425  NegativeTestQuoteMeta("(a)", "a");
426  NegativeTestQuoteMeta("(a|b)", "a");
427  NegativeTestQuoteMeta("(a|b)", "(a)");
428  NegativeTestQuoteMeta("(a|b)", "a|b");
429  NegativeTestQuoteMeta("[0-9]", "0");
430  NegativeTestQuoteMeta("[0-9]", "0-9");
431  NegativeTestQuoteMeta("[0-9]", "[9]");
432  NegativeTestQuoteMeta("((?!)xxx)", "xxx");
433 }
434 
435 TEST(QuoteMeta, Latin1) {
436  TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
437 }
438 
439 TEST(QuoteMeta, UTF8) {
440  TestQuoteMeta("Plácido Domingo");
441  TestQuoteMeta("xyz"); // No fancy utf8.
442  TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
443  TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
444  TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
445  TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
446  TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
447  // still work.
448  NegativeTestQuoteMeta("27\xc2\xb0",
449  "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
450 }
451 
452 TEST(QuoteMeta, HasNull) {
453  std::string has_null;
454 
455  // string with one null character
456  has_null += '\0';
457  TestQuoteMeta(has_null);
458  NegativeTestQuoteMeta(has_null, "");
459 
460  // Don't want null-followed-by-'1' to be interpreted as '\01'.
461  has_null += '1';
462  TestQuoteMeta(has_null);
463  NegativeTestQuoteMeta(has_null, "\1");
464 }
465 
466 TEST(ProgramSize, BigProgram) {
467  RE2 re_simple("simple regexp");
468  RE2 re_medium("medium.*regexp");
469  RE2 re_complex("complex.{1,128}regexp");
470 
471  ASSERT_GT(re_simple.ProgramSize(), 0);
472  ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
473  ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
474 
475  ASSERT_GT(re_simple.ReverseProgramSize(), 0);
476  ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
477  ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
478 }
479 
480 TEST(ProgramFanout, BigProgram) {
481  RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
482  RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
483  RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
484  RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
485 
486  std::vector<int> histogram;
487 
488  // 3 is the largest non-empty bucket and has 2 element.
489  ASSERT_EQ(3, re1.ProgramFanout(&histogram));
490  ASSERT_EQ(2, histogram[3]);
491 
492  // 6 is the largest non-empty bucket and has 11 elements.
493  ASSERT_EQ(6, re10.ProgramFanout(&histogram));
494  ASSERT_EQ(11, histogram[6]);
495 
496  // 9 is the largest non-empty bucket and has 101 elements.
497  ASSERT_EQ(9, re100.ProgramFanout(&histogram));
498  ASSERT_EQ(101, histogram[9]);
499 
500  // 13 is the largest non-empty bucket and has 1001 elements.
501  ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
502  ASSERT_EQ(1001, histogram[13]);
503 
504  // 2 is the largest non-empty bucket and has 2 element.
505  ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
506  ASSERT_EQ(2, histogram[2]);
507 
508  // 5 is the largest non-empty bucket and has 11 elements.
509  ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
510  ASSERT_EQ(11, histogram[5]);
511 
512  // 9 is the largest non-empty bucket and has 101 elements.
513  ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
514  ASSERT_EQ(101, histogram[9]);
515 
516  // 12 is the largest non-empty bucket and has 1001 elements.
517  ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
518  ASSERT_EQ(1001, histogram[12]);
519 }
520 
521 // Issue 956519: handling empty character sets was
522 // causing NULL dereference. This tests a few empty character sets.
523 // (The way to get an empty character set is to negate a full one.)
524 TEST(EmptyCharset, Fuzz) {
525  static const char *empties[] = {
526  "[^\\S\\s]",
527  "[^\\S[:space:]]",
528  "[^\\D\\d]",
529  "[^\\D[:digit:]]"
530  };
531  for (size_t i = 0; i < arraysize(empties); i++)
532  ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
533 }
534 
535 // Bitstate assumes that kInstFail instructions in
536 // alternations or capture groups have been "compiled away".
537 TEST(EmptyCharset, BitstateAssumptions) {
538  // Captures trigger use of Bitstate.
539  static const char *nop_empties[] = {
540  "((((()))))" "[^\\S\\s]?",
541  "((((()))))" "([^\\S\\s])?",
542  "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
543  "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
544  };
545  StringPiece group[6];
546  for (size_t i = 0; i < arraysize(nop_empties); i++)
547  ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
548 }
549 
550 // Test that named groups work correctly.
551 TEST(Capture, NamedGroups) {
552  {
553  RE2 re("(hello world)");
554  ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
555  const std::map<std::string, int>& m = re.NamedCapturingGroups();
556  ASSERT_EQ(m.size(), 0);
557  }
558 
559  {
560  RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
561  ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
562  const std::map<std::string, int>& m = re.NamedCapturingGroups();
563  ASSERT_EQ(m.size(), 4);
564  ASSERT_EQ(m.find("A")->second, 1);
565  ASSERT_EQ(m.find("B")->second, 2);
566  ASSERT_EQ(m.find("C")->second, 3);
567  ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
568  }
569 }
570 
571 TEST(RE2, CapturedGroupTest) {
572  RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
573  int num_groups = re.NumberOfCapturingGroups();
574  EXPECT_EQ(2, num_groups);
575  std::string args[4];
576  RE2::Arg arg0(&args[0]);
577  RE2::Arg arg1(&args[1]);
578  RE2::Arg arg2(&args[2]);
579  RE2::Arg arg3(&args[3]);
580 
581  const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
582  EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
583  re, matches, num_groups));
584  const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
585  EXPECT_TRUE(named_groups.find("S") != named_groups.end());
586  EXPECT_TRUE(named_groups.find("D") != named_groups.end());
587 
588  // The named group index is 1-based.
589  int source_group_index = named_groups.find("S")->second;
590  int destination_group_index = named_groups.find("D")->second;
591  EXPECT_EQ(1, source_group_index);
592  EXPECT_EQ(2, destination_group_index);
593 
594  // The args is zero-based.
595  EXPECT_EQ("mountain view", args[source_group_index - 1]);
596  EXPECT_EQ("san jose", args[destination_group_index - 1]);
597 }
598 
599 TEST(RE2, FullMatchWithNoArgs) {
600  ASSERT_TRUE(RE2::FullMatch("h", "h"));
601  ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
602  ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
603  ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
604  ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
605 }
606 
607 TEST(RE2, PartialMatch) {
608  ASSERT_TRUE(RE2::PartialMatch("x", "x"));
609  ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
610  ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
611  ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
612  ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
613 }
614 
615 TEST(RE2, PartialMatchN) {
616  RE2::Arg argv[2];
617  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
618 
619  // 0 arg
620  EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
621  EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
622 
623  // 1 arg
624  int i;
625  argv[0] = &i;
626  EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
627  EXPECT_EQ(1001, i);
628  EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
629 
630  // Multi-arg
631  std::string s;
632  argv[1] = &s;
633  EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
634  EXPECT_EQ(42, i);
635  EXPECT_EQ("life", s);
636  EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
637 }
638 
639 TEST(RE2, FullMatchZeroArg) {
640  // Zero-arg
641  ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
642 }
643 
644 TEST(RE2, FullMatchOneArg) {
645  int i;
646 
647  // Single-arg
648  ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
649  ASSERT_EQ(i, 1001);
650  ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
651  ASSERT_EQ(i, -123);
652  ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
653  ASSERT_FALSE(
654  RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
655 }
656 
657 TEST(RE2, FullMatchIntegerArg) {
658  int i;
659 
660  // Digits surrounding integer-arg
661  ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
662  ASSERT_EQ(i, 23);
663  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
664  ASSERT_EQ(i, 1);
665  ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
666  ASSERT_EQ(i, -1);
667  ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
668  ASSERT_EQ(i, 1);
669  ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
670  ASSERT_EQ(i, -1);
671 }
672 
673 TEST(RE2, FullMatchStringArg) {
674  std::string s;
675  // String-arg
676  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
677  ASSERT_EQ(s, std::string("ell"));
678 }
679 
680 TEST(RE2, FullMatchStringPieceArg) {
681  int i;
682  // StringPiece-arg
683  StringPiece sp;
684  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
685  ASSERT_EQ(sp.size(), 4);
686  ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
687  ASSERT_EQ(i, 1234);
688 }
689 
690 TEST(RE2, FullMatchMultiArg) {
691  int i;
692  std::string s;
693  // Multi-arg
694  ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
695  ASSERT_EQ(s, std::string("ruby"));
696  ASSERT_EQ(i, 1234);
697 }
698 
699 TEST(RE2, FullMatchN) {
700  RE2::Arg argv[2];
701  const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
702 
703  // 0 arg
704  EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
705  EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
706 
707  // 1 arg
708  int i;
709  argv[0] = &i;
710  EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
711  EXPECT_EQ(1001, i);
712  EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
713 
714  // Multi-arg
715  std::string s;
716  argv[1] = &s;
717  EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
718  EXPECT_EQ(42, i);
719  EXPECT_EQ("life", s);
720  EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
721 }
722 
723 TEST(RE2, FullMatchIgnoredArg) {
724  int i;
725  std::string s;
726 
727  // Old-school NULL should be ignored.
728  ASSERT_TRUE(
729  RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
730  ASSERT_EQ(s, std::string("ruby"));
731  ASSERT_EQ(i, 1234);
732 
733  // C++11 nullptr should also be ignored.
734  ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
735  ASSERT_EQ(s, std::string("rubz"));
736  ASSERT_EQ(i, 1235);
737 }
738 
739 TEST(RE2, FullMatchTypedNullArg) {
740  std::string s;
741 
742  // Ignore non-void* NULL arg
743  ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
744  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
745  ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
746  ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
747  ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
748  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
749  ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
750 
751  // Fail on non-void* NULL arg if the match doesn't parse for the given type.
752  ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
753  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
754  ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
755  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
756  ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
757 }
758 
759 // Check that numeric parsing code does not read past the end of
760 // the number being parsed.
761 // This implementation requires mmap(2) et al. and thus cannot
762 // be used unless they are available.
763 TEST(RE2, NULTerminated) {
764 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
765  char *v;
766  int x;
767  long pagesize = sysconf(_SC_PAGE_SIZE);
768 
769 #ifndef MAP_ANONYMOUS
770 #define MAP_ANONYMOUS MAP_ANON
771 #endif
772  v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
773  MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
774  ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
775  LOG(INFO) << "Memory at " << (void*)v;
776  ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
777  v[pagesize - 1] = '1';
778 
779  x = 0;
780  ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
781  ASSERT_EQ(x, 1);
782 #endif
783 }
784 
785 TEST(RE2, FullMatchTypeTests) {
786  // Type tests
787  std::string zeros(1000, '0');
788  {
789  char c;
790  ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
791  ASSERT_EQ(c, 'H');
792  }
793  {
794  unsigned char c;
795  ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
796  ASSERT_EQ(c, static_cast<unsigned char>('H'));
797  }
798  {
799  int16_t v;
800  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
801  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
802  ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
803  ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
804  ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
805  ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
806  }
807  {
808  uint16_t v;
809  ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
810  ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
811  ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
812  ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
813  }
814  {
815  int32_t v;
816  static const int32_t max = INT32_C(0x7fffffff);
817  static const int32_t min = -max - 1;
818  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
819  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
820  ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
821  ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
822  ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
823  ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
824 
825  ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
826  ASSERT_EQ(v, max);
827  ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
828  ASSERT_EQ(v, min);
829 
830  ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
831  ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
832  ASSERT_EQ(v, max);
833  ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
834  }
835  {
836  uint32_t v;
837  static const uint32_t max = UINT32_C(0xffffffff);
838  ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
839  ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
840  ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
841  ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
842 
843  ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
844  }
845  {
846  int64_t v;
847  static const int64_t max = INT64_C(0x7fffffffffffffff);
848  static const int64_t min = -max - 1;
850 
851  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
852  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
853 
855  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
856 
858  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
859 
861  ASSERT_NE(str.back(), '9');
862  str.back()++;
863  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
864 
866  ASSERT_NE(str.back(), '9');
867  str.back()++;
868  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
869  }
870  {
871  uint64_t v;
872  int64_t v2;
873  static const uint64_t max = UINT64_C(0xffffffffffffffff);
875 
876  ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
877  ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
878 
880  ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
881 
882  ASSERT_NE(str.back(), '9');
883  str.back()++;
884  ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
885  }
886 }
887 
888 TEST(RE2, FloatingPointFullMatchTypes) {
889  std::string zeros(1000, '0');
890  {
891  float v;
892  ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
893  ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
894  ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
895  ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
896 
897  ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
898  ASSERT_EQ(v, float(1e23));
899 
900  // 6700000000081920.1 is an edge case.
901  // 6700000000081920 is exactly halfway between
902  // two float32s, so the .1 should make it round up.
903  // However, the .1 is outside the precision possible with
904  // a float64: the nearest float64 is 6700000000081920.
905  // So if the code uses strtod and then converts to float32,
906  // round-to-even will make it round down instead of up.
907  // To pass the test, the parser must call strtof directly.
908  // This test case is carefully chosen to use only a 17-digit
909  // number, since C does not guarantee to get the correctly
910  // rounded answer for strtod and strtof unless the input is
911  // short.
912  //
913  // This is known to fail on Cygwin and MinGW due to a broken
914  // implementation of strtof(3). And apparently MSVC too. Sigh.
915 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
916  ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
917  ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
918  ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
919  ASSERT_EQ(v, 6700000000081920.1f)
920  << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
921 #endif
922  }
923  {
924  double v;
925  ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
926  ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
927  ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
928  ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
929  ASSERT_EQ(v, double(1e23));
930 
931  ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
932  ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
933  ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
934  ASSERT_EQ(v, 1.0000000596046448)
935  << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
936  }
937 }
938 
939 TEST(RE2, FullMatchAnchored) {
940  int i;
941  // Check that matching is fully anchored
942  ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
943  ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
944  ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
945  ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
946 }
947 
948 TEST(RE2, FullMatchBraces) {
949  // Braces
950  ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
951  ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
952  ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
953 }
954 
955 TEST(RE2, Complicated) {
956  // Complicated RE2
957  ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
958  ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
959  ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
960  ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
961 }
962 
963 TEST(RE2, FullMatchEnd) {
964  // Check full-match handling (needs '$' tacked on internally)
965  ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
966  ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
967  ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
968  ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
969  ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
970  ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
971  ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
972 
973  // Uncomment the following if we change the handling of '$' to
974  // prevent it from matching a trailing newline
975  if (false) {
976  // Check that we don't get bitten by pcre's special handling of a
977  // '\n' at the end of the string matching '$'
978  ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
979  }
980 }
981 
982 TEST(RE2, FullMatchArgCount) {
983  // Number of args
984  int a[16];
985  ASSERT_TRUE(RE2::FullMatch("", ""));
986 
987  memset(a, 0, sizeof(0));
988  ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
989  ASSERT_EQ(a[0], 1);
990 
991  memset(a, 0, sizeof(0));
992  ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
993  ASSERT_EQ(a[0], 1);
994  ASSERT_EQ(a[1], 2);
995 
996  memset(a, 0, sizeof(0));
997  ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
998  ASSERT_EQ(a[0], 1);
999  ASSERT_EQ(a[1], 2);
1000  ASSERT_EQ(a[2], 3);
1001 
1002  memset(a, 0, sizeof(0));
1003  ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1004  &a[2], &a[3]));
1005  ASSERT_EQ(a[0], 1);
1006  ASSERT_EQ(a[1], 2);
1007  ASSERT_EQ(a[2], 3);
1008  ASSERT_EQ(a[3], 4);
1009 
1010  memset(a, 0, sizeof(0));
1011  ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1012  &a[2], &a[3], &a[4]));
1013  ASSERT_EQ(a[0], 1);
1014  ASSERT_EQ(a[1], 2);
1015  ASSERT_EQ(a[2], 3);
1016  ASSERT_EQ(a[3], 4);
1017  ASSERT_EQ(a[4], 5);
1018 
1019  memset(a, 0, sizeof(0));
1020  ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1021  &a[1], &a[2], &a[3], &a[4], &a[5]));
1022  ASSERT_EQ(a[0], 1);
1023  ASSERT_EQ(a[1], 2);
1024  ASSERT_EQ(a[2], 3);
1025  ASSERT_EQ(a[3], 4);
1026  ASSERT_EQ(a[4], 5);
1027  ASSERT_EQ(a[5], 6);
1028 
1029  memset(a, 0, sizeof(0));
1030  ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1031  &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1032  ASSERT_EQ(a[0], 1);
1033  ASSERT_EQ(a[1], 2);
1034  ASSERT_EQ(a[2], 3);
1035  ASSERT_EQ(a[3], 4);
1036  ASSERT_EQ(a[4], 5);
1037  ASSERT_EQ(a[5], 6);
1038  ASSERT_EQ(a[6], 7);
1039 
1040  memset(a, 0, sizeof(0));
1041  ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1042  "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1043  "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1044  &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1045  &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1046  &a[13], &a[14], &a[15]));
1047  ASSERT_EQ(a[0], 1);
1048  ASSERT_EQ(a[1], 2);
1049  ASSERT_EQ(a[2], 3);
1050  ASSERT_EQ(a[3], 4);
1051  ASSERT_EQ(a[4], 5);
1052  ASSERT_EQ(a[5], 6);
1053  ASSERT_EQ(a[6], 7);
1054  ASSERT_EQ(a[7], 8);
1055  ASSERT_EQ(a[8], 9);
1056  ASSERT_EQ(a[9], 0);
1057  ASSERT_EQ(a[10], 1);
1058  ASSERT_EQ(a[11], 2);
1059  ASSERT_EQ(a[12], 3);
1060  ASSERT_EQ(a[13], 4);
1061  ASSERT_EQ(a[14], 5);
1062  ASSERT_EQ(a[15], 6);
1063 }
1064 
1065 TEST(RE2, Accessors) {
1066  // Check the pattern() accessor
1067  {
1068  const std::string kPattern = "http://([^/]+)/.*";
1069  const RE2 re(kPattern);
1070  ASSERT_EQ(kPattern, re.pattern());
1071  }
1072 
1073  // Check RE2 error field.
1074  {
1075  RE2 re("foo");
1076  ASSERT_TRUE(re.error().empty()); // Must have no error
1077  ASSERT_TRUE(re.ok());
1078  ASSERT_EQ(re.error_code(), RE2::NoError);
1079  }
1080 }
1081 
1082 TEST(RE2, UTF8) {
1083  // Check UTF-8 handling
1084  // Three Japanese characters (nihongo)
1085  const char utf8_string[] = {
1086  (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1087  (char)0xe6, (char)0x9c, (char)0xac, // 627c
1088  (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1089  0
1090  };
1091  const char utf8_pattern[] = {
1092  '.',
1093  (char)0xe6, (char)0x9c, (char)0xac, // 627c
1094  '.',
1095  0
1096  };
1097 
1098  // Both should match in either mode, bytes or UTF-8
1099  RE2 re_test1(".........", RE2::Latin1);
1100  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1101  RE2 re_test2("...");
1102  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1103 
1104  // Check that '.' matches one byte or UTF-8 character
1105  // according to the mode.
1106  std::string s;
1107  RE2 re_test3("(.)", RE2::Latin1);
1108  ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1109  ASSERT_EQ(s, std::string("\xe6"));
1110  RE2 re_test4("(.)");
1111  ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1112  ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
1113 
1114  // Check that string matches itself in either mode
1115  RE2 re_test5(utf8_string, RE2::Latin1);
1116  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1117  RE2 re_test6(utf8_string);
1118  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1119 
1120  // Check that pattern matches string only in UTF8 mode
1121  RE2 re_test7(utf8_pattern, RE2::Latin1);
1122  ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1123  RE2 re_test8(utf8_pattern);
1124  ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1125 }
1126 
1127 TEST(RE2, UngreedyUTF8) {
1128  // Check that ungreedy, UTF8 regular expressions don't match when they
1129  // oughtn't -- see bug 82246.
1130  {
1131  // This code always worked.
1132  const char* pattern = "\\w+X";
1133  const std::string target = "a aX";
1134  RE2 match_sentence(pattern, RE2::Latin1);
1135  RE2 match_sentence_re(pattern);
1136 
1137  ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1138  ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1139  }
1140  {
1141  const char* pattern = "(?U)\\w+X";
1142  const std::string target = "a aX";
1143  RE2 match_sentence(pattern, RE2::Latin1);
1144  ASSERT_EQ(match_sentence.error(), "");
1145  RE2 match_sentence_re(pattern);
1146 
1147  ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1148  ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1149  }
1150 }
1151 
1152 TEST(RE2, Rejects) {
1153  {
1154  RE2 re("a\\1", RE2::Quiet);
1155  ASSERT_FALSE(re.ok()); }
1156  {
1157  RE2 re("a[x", RE2::Quiet);
1158  ASSERT_FALSE(re.ok());
1159  }
1160  {
1161  RE2 re("a[z-a]", RE2::Quiet);
1162  ASSERT_FALSE(re.ok());
1163  }
1164  {
1165  RE2 re("a[[:foobar:]]", RE2::Quiet);
1166  ASSERT_FALSE(re.ok());
1167  }
1168  {
1169  RE2 re("a(b", RE2::Quiet);
1170  ASSERT_FALSE(re.ok());
1171  }
1172  {
1173  RE2 re("a\\", RE2::Quiet);
1174  ASSERT_FALSE(re.ok());
1175  }
1176 }
1177 
1178 TEST(RE2, NoCrash) {
1179  // Test that using a bad regexp doesn't crash.
1180  {
1181  RE2 re("a\\", RE2::Quiet);
1182  ASSERT_FALSE(re.ok());
1183  ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1184  }
1185 
1186  // Test that using an enormous regexp doesn't crash
1187  {
1188  RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1189  ASSERT_FALSE(re.ok());
1190  ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1191  }
1192 
1193  // Test that a crazy regexp still compiles and runs.
1194  {
1195  RE2 re(".{512}x", RE2::Quiet);
1196  ASSERT_TRUE(re.ok());
1197  std::string s;
1198  s.append(515, 'c');
1199  s.append("x");
1201  }
1202 }
1203 
1204 TEST(RE2, Recursion) {
1205  // Test that recursion is stopped.
1206  // This test is PCRE-legacy -- there's no recursion in RE2.
1207  int bytes = 15 * 1024; // enough to crash PCRE
1208  TestRecursion(bytes, ".");
1209  TestRecursion(bytes, "a");
1210  TestRecursion(bytes, "a.");
1211  TestRecursion(bytes, "ab.");
1212  TestRecursion(bytes, "abc.");
1213 }
1214 
1215 TEST(RE2, BigCountedRepetition) {
1216  // Test that counted repetition works, given tons of memory.
1217  RE2::Options opt;
1218  opt.set_max_mem(256<<20);
1219 
1220  RE2 re(".{512}x", opt);
1221  ASSERT_TRUE(re.ok());
1222  std::string s;
1223  s.append(515, 'c');
1224  s.append("x");
1226 }
1227 
1228 TEST(RE2, DeepRecursion) {
1229  // Test for deep stack recursion. This would fail with a
1230  // segmentation violation due to stack overflow before pcre was
1231  // patched.
1232  // Again, a PCRE legacy test. RE2 doesn't recurse.
1233  std::string comment("x*");
1234  std::string a(131072, 'a');
1235  comment += a;
1236  comment += "*x";
1237  RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1238  ASSERT_TRUE(RE2::FullMatch(comment, re));
1239 }
1240 
1241 // Suggested by Josh Hyman. Failed when SearchOnePass was
1242 // not implementing case-folding.
1243 TEST(CaseInsensitive, MatchAndConsume) {
1244  std::string text = "A fish named *Wanda*";
1245  StringPiece sp(text);
1246  StringPiece result;
1247  EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
1248  EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1249 }
1250 
1251 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1252 // and C string literals.
1253 TEST(RE2, ImplicitConversions) {
1254  std::string re_string(".");
1255  StringPiece re_stringpiece(".");
1256  const char* re_cstring = ".";
1257  EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1258  EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1259  EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1260  EXPECT_TRUE(RE2::PartialMatch("e", "."));
1261 }
1262 
1263 // Bugs introduced by 8622304
1264 TEST(RE2, CL8622304) {
1265  // reported by ingow
1266  std::string dir;
1267  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1268  EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1269 
1270  // reported by jacobsa
1271  std::string key, val;
1272  EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1273  "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1274  &key,
1275  &val));
1276  EXPECT_EQ(key, "bar");
1277  EXPECT_EQ(val, "1,0x2F,030,4,5");
1278 }
1279 
1280 // Check that RE2 returns correct regexp pieces on error.
1281 // In particular, make sure it returns whole runes
1282 // and that it always reports invalid UTF-8.
1283 // Also check that Perl error flag piece is big enough.
1284 static struct ErrorTest {
1285  const char *regexp;
1287  const char *error_arg;
1288 } error_tests[] = {
1289  { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
1290  { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
1291  { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
1292  { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
1293  { "kl\\x", RE2::ErrorBadEscape, "\\x" },
1294  { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
1295  { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
1296  // used to return (?s but the error is X
1297  { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
1298  { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
1299  { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
1300  { "abc(def", RE2::ErrorMissingParen, "abc(def" },
1301  { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
1302 
1303  // no argument string returned for invalid UTF-8
1304  { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
1305  { "op\377qr", RE2::ErrorBadUTF8, "" },
1306  { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
1307  { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
1308  { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
1309  { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
1310 };
1311 TEST(RE2, ErrorCodeAndArg) {
1312  for (size_t i = 0; i < arraysize(error_tests); i++) {
1313  RE2 re(error_tests[i].regexp, RE2::Quiet);
1314  EXPECT_FALSE(re.ok());
1316  EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
1317  }
1318 }
1319 
1320 // Check that "never match \n" mode never matches \n.
1321 static struct NeverTest {
1322  const char* regexp;
1323  const char* text;
1324  const char* match;
1325 } never_tests[] = {
1326  { "(.*)", "abc\ndef\nghi\n", "abc" },
1327  { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1328  { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1329  { "(abc[^x]*def)", "abc\ndef\n", NULL },
1330  { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1331 };
1332 TEST(RE2, NeverNewline) {
1333  RE2::Options opt;
1334  opt.set_never_nl(true);
1335  for (size_t i = 0; i < arraysize(never_tests); i++) {
1336  const NeverTest& t = never_tests[i];
1337  RE2 re(t.regexp, opt);
1338  if (t.match == NULL) {
1339  EXPECT_FALSE(re.PartialMatch(t.text, re));
1340  } else {
1341  StringPiece m;
1342  EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1343  EXPECT_EQ(m, t.match);
1344  }
1345  }
1346 }
1347 
1348 // Check that dot_nl option works.
1349 TEST(RE2, DotNL) {
1350  RE2::Options opt;
1351  opt.set_dot_nl(true);
1352  EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1353  EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1354  opt.set_never_nl(true);
1355  EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1356 }
1357 
1358 // Check that there are no capturing groups in "never capture" mode.
1359 TEST(RE2, NeverCapture) {
1360  RE2::Options opt;
1361  opt.set_never_capture(true);
1362  RE2 re("(r)(e)", opt);
1363  EXPECT_EQ(0, re.NumberOfCapturingGroups());
1364 }
1365 
1366 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1367 // Triggered by a failed DFA search falling back to Bitstate when
1368 // using Match with a NULL submatch set. Bitstate tried to read
1369 // the submatch[0] entry even if nsubmatch was 0.
1370 TEST(RE2, BitstateCaptureBug) {
1371  RE2::Options opt;
1372  opt.set_max_mem(20000);
1373  RE2 re("(_________$)", opt);
1374  StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1375  EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1376 }
1377 
1378 // C++ version of bug 609710.
1379 TEST(RE2, UnicodeClasses) {
1380  const std::string str = "ABCDEFGHI譚永鋒";
1381  std::string a, b, c;
1382 
1383  EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1384  EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1385  EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1386  EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1387  EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1388  EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1389 
1390  EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1391  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1392  EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1393  EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1394  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1395  EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1396 
1397  EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1398  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1399  EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1400  EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1401  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1402  EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1403 
1404  EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1405  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1406  EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1407  EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1408  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1409  EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1410 
1411  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1412  EXPECT_EQ("A", a);
1413  EXPECT_EQ("B", b);
1414  EXPECT_EQ("C", c);
1415 
1416  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1417  EXPECT_EQ("A", a);
1418  EXPECT_EQ("B", b);
1419  EXPECT_EQ("C", c);
1420 
1421  EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1422 
1423  EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1424  EXPECT_EQ("A", a);
1425  EXPECT_EQ("B", b);
1426  EXPECT_EQ("C", c);
1427 
1428  EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1429 
1430  EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1431  EXPECT_EQ("譚", a);
1432  EXPECT_EQ("永", b);
1433  EXPECT_EQ("鋒", c);
1434 }
1435 
1436 TEST(RE2, LazyRE2) {
1437  // Test with and without options.
1438  static LazyRE2 a = {"a"};
1439  static LazyRE2 b = {"b", RE2::Latin1};
1440 
1441  EXPECT_EQ("a", a->pattern());
1442  EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1443 
1444  EXPECT_EQ("b", b->pattern());
1445  EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1446 }
1447 
1448 // Bug reported by saito. 2009/02/17
1449 TEST(RE2, NullVsEmptyString) {
1450  RE2 re(".*");
1451  EXPECT_TRUE(re.ok());
1452 
1453  StringPiece null;
1454  EXPECT_TRUE(RE2::FullMatch(null, re));
1455 
1456  StringPiece empty("");
1458 }
1459 
1460 // Similar to the previous test, check that the null string and the empty
1461 // string both match, but also that the null string can only provide null
1462 // submatches whereas the empty string can also provide empty submatches.
1463 TEST(RE2, NullVsEmptyStringSubmatches) {
1464  RE2 re("()|(foo)");
1465  EXPECT_TRUE(re.ok());
1466 
1467  // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1468  StringPiece matches[4];
1469 
1470  for (size_t i = 0; i < arraysize(matches); i++)
1471  matches[i] = "bar";
1472 
1473  StringPiece null;
1474  EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1475  matches, arraysize(matches)));
1476  for (size_t i = 0; i < arraysize(matches); i++) {
1477  EXPECT_TRUE(matches[i].data() == NULL); // always null
1478  EXPECT_TRUE(matches[i].empty());
1479  }
1480 
1481  for (size_t i = 0; i < arraysize(matches); i++)
1482  matches[i] = "bar";
1483 
1484  StringPiece empty("");
1485  EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1486  matches, arraysize(matches)));
1487  EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
1488  EXPECT_TRUE(matches[0].empty());
1489  EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
1490  EXPECT_TRUE(matches[1].empty());
1491  EXPECT_TRUE(matches[2].data() == NULL);
1492  EXPECT_TRUE(matches[2].empty());
1493  EXPECT_TRUE(matches[3].data() == NULL);
1494  EXPECT_TRUE(matches[3].empty());
1495 }
1496 
1497 // Issue 1816809
1498 TEST(RE2, Bug1816809) {
1499  RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1500  StringPiece piece("llx-3;llx4");
1501  std::string x;
1502  EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1503 }
1504 
1505 // Issue 3061120
1506 TEST(RE2, Bug3061120) {
1507  RE2 re("(?i)\\W");
1508  EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1509  EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1510  EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1511 }
1512 
1513 TEST(RE2, CapturingGroupNames) {
1514  // Opening parentheses annotated with group IDs:
1515  // 12 3 45 6 7
1516  RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1517  EXPECT_TRUE(re.ok());
1518  const std::map<int, std::string>& have = re.CapturingGroupNames();
1519  std::map<int, std::string> want;
1520  want[3] = "G2";
1521  want[6] = "G2";
1522  want[7] = "G1";
1523  EXPECT_EQ(want, have);
1524 }
1525 
1526 TEST(RE2, RegexpToStringLossOfAnchor) {
1527  EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1528  EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1529  EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1530  EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1531 }
1532 
1533 // Issue 10131674
1534 TEST(RE2, Bug10131674) {
1535  // Some of these escapes describe values that do not fit in a byte.
1536  RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1537  EXPECT_FALSE(re.ok());
1538  EXPECT_FALSE(RE2::FullMatch("hello world", re));
1539 }
1540 
1541 TEST(RE2, Bug18391750) {
1542  // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1543  const char t[] = {
1544  (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1545  (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1546  (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1547  (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1548  (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1549  (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1550  };
1551  RE2::Options opt;
1552  opt.set_encoding(RE2::Options::EncodingLatin1);
1553  opt.set_longest_match(true);
1554  opt.set_dot_nl(true);
1555  opt.set_case_sensitive(false);
1556  RE2 re(t, opt);
1557  ASSERT_TRUE(re.ok());
1558  RE2::PartialMatch(t, re);
1559 }
1560 
1561 TEST(RE2, Bug18458852) {
1562  // Bug in parser accepting invalid (too large) rune,
1563  // causing compiler to fail in DCHECK in UTF-8
1564  // character class code.
1565  const char b[] = {
1566  (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1567  (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1568  (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1569  };
1570  RE2 re(b);
1571  ASSERT_FALSE(re.ok());
1572 }
1573 
1574 TEST(RE2, Bug18523943) {
1575  // Bug in BitState: case kFailInst failed the match entirely.
1576 
1577  RE2::Options opt;
1578  const char a[] = {
1579  (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1580  };
1581  const char b[] = {
1582  (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1583  };
1584  opt.set_log_errors(false);
1585  opt.set_encoding(RE2::Options::EncodingLatin1);
1586  opt.set_posix_syntax(true);
1587  opt.set_longest_match(true);
1588  opt.set_literal(false);
1589  opt.set_never_nl(true);
1590 
1591  RE2 re((const char*)b, opt);
1592  ASSERT_TRUE(re.ok());
1593  std::string s1;
1594  ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1595 }
1596 
1597 TEST(RE2, Bug21371806) {
1598  // Bug in parser accepting Unicode groups in Latin-1 mode,
1599  // causing compiler to fail in DCHECK in prog.cc.
1600 
1601  RE2::Options opt;
1602  opt.set_encoding(RE2::Options::EncodingLatin1);
1603 
1604  RE2 re("g\\p{Zl}]", opt);
1605  ASSERT_TRUE(re.ok());
1606 }
1607 
1608 TEST(RE2, Bug26356109) {
1609  // Bug in parser caused by factoring of common prefixes in alternations.
1610 
1611  // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1612  // consume "ab" and then stop (when unanchored) whereas it should consume all
1613  // of "abc" as per first-match semantics.
1614  RE2 re("a\\C*?c|a\\C*?b");
1615  ASSERT_TRUE(re.ok());
1616 
1617  std::string s = "abc";
1618  StringPiece m;
1619 
1620  ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1621  ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1622 
1623  ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1624  ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1625 }
1626 
1627 TEST(RE2, Issue104) {
1628  // RE2::GlobalReplace always advanced by one byte when the empty string was
1629  // matched, which would clobber any rune that is longer than one byte.
1630 
1631  std::string s = "bc";
1632  ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1633  ASSERT_EQ("dbdcd", s);
1634 
1635  s = "ąć";
1636  ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1637  ASSERT_EQ("ĈąĈćĈ", s);
1638 
1639  s = "人类";
1640  ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1641  ASSERT_EQ("小人小类小", s);
1642 }
1643 
1644 TEST(RE2, Issue310) {
1645  // (?:|a)* matched more text than (?:|a)+ did.
1646 
1647  std::string s = "aaa";
1648  StringPiece m;
1649 
1650  RE2 star("(?:|a)*");
1651  ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1652  ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
1653 
1654  RE2 plus("(?:|a)+");
1655  ASSERT_TRUE(plus.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1656  ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
1657 }
1658 
1659 } // namespace re2
xds_interop_client.str
str
Definition: xds_interop_client.py:487
EXPECT_FALSE
#define EXPECT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1970
test_group_name.all
all
Definition: test_group_name.py:241
_gevent_test_main.result
result
Definition: _gevent_test_main.py:96
re2::RE2::ErrorBadUTF8
@ ErrorBadUTF8
Definition: bloaty/third_party/re2/re2/re2.h:237
check_banned_filenames.bad
bad
Definition: check_banned_filenames.py:26
ASSERT_NE
#define ASSERT_NE(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2060
re2::RE2::CRadix
static Arg CRadix(short *x)
re2::RE2::FullMatch
static bool FullMatch(const StringPiece &text, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:367
re2::RE2::PartialMatchN
static bool PartialMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:339
check_tracer_sanity.pattern
pattern
Definition: check_tracer_sanity.py:25
memset
return memset(p, 0, total)
fix_build_deps.c
list c
Definition: fix_build_deps.py:490
NamedGroups
Span< const NamedGroup > NamedGroups()
Definition: ssl_key_share.cc:304
re2::RE2::ErrorBadEscape
@ ErrorBadEscape
Definition: bloaty/third_party/re2/re2/re2.h:227
uint16_t
unsigned short uint16_t
Definition: stdint-msvc2008.h:79
re2::ErrorTest::error_code
RE2::ErrorCode error_code
Definition: re2/re2/testing/re2_test.cc:1286
re2::RE2::FullMatchN
static bool FullMatchN(const StringPiece &text, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:334
string.h
options
double_dict options[]
Definition: capstone_test.c:55
re2::RE2::error_code
ErrorCode error_code() const
Definition: bloaty/third_party/re2/re2/re2.h:279
absl::cord_internal::Consume
void Consume(CordRep *rep, ConsumeFn consume_fn)
Definition: cord_rep_consume.cc:45
testing::internal::string
::std::string string
Definition: bloaty/third_party/protobuf/third_party/googletest/googletest/include/gtest/internal/gtest-port.h:881
error
grpc_error_handle error
Definition: retry_filter.cc:499
INT64_C
#define INT64_C(val)
Definition: stdint-msvc2008.h:233
re2::RE2::ANCHOR_BOTH
@ ANCHOR_BOTH
Definition: bloaty/third_party/re2/re2/re2.h:475
re2::RE2::UNANCHORED
@ UNANCHORED
Definition: bloaty/third_party/re2/re2/re2.h:473
re2::RE2::Extract
static bool Extract(const StringPiece &text, const RE2 &re, const StringPiece &rewrite, std::string *out)
Definition: bloaty/third_party/re2/re2/re2.cc:457
re2::RE2::error
const std::string & error() const
Definition: bloaty/third_party/re2/re2/re2.h:275
re2::RE2::ErrorBadPerlOp
@ ErrorBadPerlOp
Definition: bloaty/third_party/re2/re2/re2.h:236
re2
Definition: bloaty/third_party/re2/re2/bitmap256.h:17
re2::RE2::Consume
static bool Consume(StringPiece *input, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:383
absl::FormatConversionChar::s
@ s
a
int a
Definition: abseil-cpp/absl/container/internal/hash_policy_traits_test.cc:88
re2::RE2::ErrorMissingBracket
@ ErrorMissingBracket
Definition: bloaty/third_party/re2/re2/re2.h:230
Arg
Arg(64) -> Arg(128) ->Arg(256) ->Arg(512) ->Arg(1024) ->Arg(1536) ->Arg(2048) ->Arg(3072) ->Arg(4096) ->Arg(5120) ->Arg(6144) ->Arg(7168)
re2::NeverTest::match
const char * match
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1311
re2::UTF8
static std::string UTF8(Rune r)
Definition: bloaty/third_party/re2/re2/testing/exhaustive3_test.cc:35
re2::RE2::Replace
static bool Replace(std::string *str, const RE2 &re, const StringPiece &rewrite)
Definition: bloaty/third_party/re2/re2/re2.cc:366
EXPECT_EQ
#define EXPECT_EQ(a, b)
Definition: iomgr/time_averaged_stats_test.cc:27
re2::RE2::FindAndConsume
static bool FindAndConsume(StringPiece *input, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:392
uint32_t
unsigned int uint32_t
Definition: stdint-msvc2008.h:80
ULL
#define ULL(x)
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/coded_stream_unittest.cc:57
re2::RE2::ErrorUnexpectedParen
@ ErrorUnexpectedParen
Definition: re2/re2/re2.h:251
re2::RE2::FindAndConsumeN
static bool FindAndConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:355
int16_t
signed short int16_t
Definition: stdint-msvc2008.h:76
re2::RE2::Options::EncodingUTF8
@ EncodingUTF8
Definition: bloaty/third_party/re2/re2/re2.h:604
gen_server_registered_method_bad_client_test_body.text
def text
Definition: gen_server_registered_method_bad_client_test_body.py:50
asyncio_get_stats.args
args
Definition: asyncio_get_stats.py:40
int64_t
signed __int64 int64_t
Definition: stdint-msvc2008.h:89
ToString
std::string ToString(const grpc::string_ref &r)
Definition: string_ref_helper.cc:24
max
int max
Definition: bloaty/third_party/zlib/examples/enough.c:170
python_utils.jobset.INFO
INFO
Definition: jobset.py:111
LOG
#define LOG(severity)
Definition: bloaty/third_party/re2/util/logging.h:53
re2::RE2::CheckRewriteString
bool CheckRewriteString(const StringPiece &rewrite, std::string *error) const
Definition: bloaty/third_party/re2/re2/re2.cc:856
re2::RE2::DefaultOptions
@ DefaultOptions
Definition: bloaty/third_party/re2/re2/re2.h:248
setup.v
v
Definition: third_party/bloaty/third_party/capstone/bindings/python/setup.py:42
ASSERT_DECIMAL
#define ASSERT_DECIMAL(type, value)
uint64_t
unsigned __int64 uint64_t
Definition: stdint-msvc2008.h:90
re2::StringPrintf
std::string StringPrintf(const char *format,...)
Definition: bloaty/third_party/re2/util/strutil.cc:140
re2::RE2::ok
bool ok() const
Definition: bloaty/third_party/re2/re2/re2.h:266
re2::TestQuoteMeta
static void TestQuoteMeta(const std::string &unquoted, const RE2::Options &options=RE2::DefaultOptions)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:377
re2::TestCheckRewriteString
static void TestCheckRewriteString(const char *regexp, const char *rewrite, bool expect_ok)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:189
histogram
static grpc_histogram * histogram
Definition: test/core/fling/client.cc:34
grpc_ruby_generator::Replace
std::string Replace(std::string s, const std::string &from, const std::string &to)
Definition: ruby_generator_string-inl.h:52
re2::RE2::PartialMatch
static bool PartialMatch(const StringPiece &text, const RE2 &re, A &&... a)
Definition: bloaty/third_party/re2/re2/re2.h:374
re2::error_tests
static struct re2::ErrorTest error_tests[]
ASSERT_HEX
#define ASSERT_HEX(type, value)
re2::RE2::ErrorCode
ErrorCode
Definition: bloaty/third_party/re2/re2/re2.h:220
x
int x
Definition: bloaty/third_party/googletest/googlemock/test/gmock-matchers_test.cc:3610
data
char data[kBufferLength]
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:1006
re2::ErrorTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1278
min
#define min(a, b)
Definition: qsort.h:83
re2::RE2::Quiet
@ Quiet
Definition: bloaty/third_party/re2/re2/re2.h:251
b
uint64_t b
Definition: abseil-cpp/absl/container/internal/layout_test.cc:53
re2::RE2::Latin1
@ Latin1
Definition: bloaty/third_party/re2/re2/re2.h:249
re2::RE2
Definition: bloaty/third_party/re2/re2/re2.h:211
UINT64_C
#define UINT64_C(val)
Definition: stdint-msvc2008.h:238
n
int n
Definition: abseil-cpp/absl/container/btree_test.cc:1080
stdint.h
arraysize
#define arraysize(array)
Definition: benchmark/src/arraysize.h:28
re2::RE2::ConsumeN
static bool ConsumeN(StringPiece *input, const RE2 &re, const Arg *const args[], int n)
Definition: bloaty/third_party/re2/re2/re2.cc:344
google_benchmark.example.empty
def empty(state)
Definition: example.py:31
Match
static bool Match(const upb_msgdef *m, const char *name, const upb_fielddef **f, const upb_oneofdef **o, const char *prefix, const char *suffix)
Definition: protobuf/ruby/ext/google/protobuf_c/message.c:195
re2::RE2::Match
bool Match(const StringPiece &text, size_t startpos, size_t endpos, Anchor re_anchor, StringPiece *submatch, int nsubmatch) const
Definition: bloaty/third_party/re2/re2/re2.cc:572
tests.unit._exit_scenarios.port
port
Definition: _exit_scenarios.py:179
re2::RE2::GlobalReplace
static int GlobalReplace(std::string *str, const RE2 &re, const StringPiece &rewrite)
Definition: bloaty/third_party/re2/re2/re2.cc:386
re2::RE2::QuoteMeta
static std::string QuoteMeta(const StringPiece &unquoted)
Definition: bloaty/third_party/re2/re2/re2.cc:473
re2::RE2::POSIX
@ POSIX
Definition: bloaty/third_party/re2/re2/re2.h:250
UINT32_C
#define UINT32_C(val)
Definition: stdint-msvc2008.h:237
key
const char * key
Definition: hpack_parser_table.cc:164
re2::RE2::Options
Definition: bloaty/third_party/re2/re2/re2.h:548
upload.group
group
Definition: bloaty/third_party/googletest/googlemock/scripts/upload.py:397
bytes
uint8 bytes[10]
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/coded_stream_unittest.cc:153
tests
Definition: src/python/grpcio_tests/tests/__init__.py:1
re2::RE2::NoError
@ NoError
Definition: bloaty/third_party/re2/re2/re2.h:221
re2::RE2::error_arg
const std::string & error_arg() const
Definition: bloaty/third_party/re2/re2/re2.h:283
absl::str_format_internal::LengthMod::t
@ t
re2::NeverTest::text
const char * text
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1310
re2::never_tests
static struct re2::NeverTest never_tests[]
fix_build_deps.r
r
Definition: fix_build_deps.py:491
zeros
int zeros
Definition: abseil-cpp/absl/strings/internal/str_format/float_conversion.cc:459
re2::NegativeTestQuoteMeta
static void NegativeTestQuoteMeta(const std::string &unquoted, const std::string &should_not_match, const RE2::Options &options=RE2::DefaultOptions)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:387
ASSERT_OCTAL
#define ASSERT_OCTAL(type, value)
ASSERT_TRUE
#define ASSERT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1973
ASSERT_FALSE
#define ASSERT_FALSE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1976
re2::RE2::Options::EncodingLatin1
@ EncodingLatin1
Definition: bloaty/third_party/re2/re2/re2.h:605
re2::RE2::ErrorMissingParen
@ ErrorMissingParen
Definition: bloaty/third_party/re2/re2/re2.h:231
re2::NeverTest::regexp
const char * regexp
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:1309
input
std::string input
Definition: bloaty/third_party/protobuf/src/google/protobuf/io/tokenizer_unittest.cc:197
EXPECT_TRUE
#define EXPECT_TRUE(condition)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:1967
re2::FindAndConsume
void FindAndConsume(int iters, int nbytes)
Definition: bloaty/third_party/re2/re2/testing/regexp_benchmark.cc:292
re2::TestRecursion
static void TestRecursion(int size, const char *pattern)
Definition: bloaty/third_party/re2/re2/testing/re2_test.cc:362
INT32_C
#define INT32_C(val)
Definition: stdint-msvc2008.h:232
re2::TEST
TEST(TestCharClassBuilder, Adds)
Definition: bloaty/third_party/re2/re2/testing/charclass_test.cc:198
ASSERT_GT
#define ASSERT_GT(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2076
size
voidpf void uLong size
Definition: bloaty/third_party/zlib/contrib/minizip/ioapi.h:136
re2::ErrorTest::error_arg
const char * error_arg
Definition: re2/re2/testing/re2_test.cc:1287
regress.m
m
Definition: regress/regress.py:25
int32_t
signed int int32_t
Definition: stdint-msvc2008.h:77
re2::StringPiece
Definition: bloaty/third_party/re2/re2/stringpiece.h:39
to_string
static bool to_string(zval *from)
Definition: protobuf/php/ext/google/protobuf/convert.c:333
setup.target
target
Definition: third_party/bloaty/third_party/protobuf/python/setup.py:179
errno.h
re2::RE2::ErrorBadCharRange
@ ErrorBadCharRange
Definition: bloaty/third_party/re2/re2/re2.h:229
i
uint64_t i
Definition: abseil-cpp/absl/container/btree_benchmark.cc:230
LL
#define LL(x)
ASSERT_EQ
#define ASSERT_EQ(val1, val2)
Definition: bloaty/third_party/googletest/googletest/include/gtest/gtest.h:2056


grpc
Author(s):
autogenerated on Fri May 16 2025 03:00:00