10 #include "util/test.h"
11 #include "util/logging.h"
12 #include "re2/regexp.h"
37 {
"[[:alnum:]]",
"[0-9A-Za-z]" },
38 {
"[[:alpha:]]",
"[A-Za-z]" },
39 {
"[[:blank:]]",
"[\\t ]" },
40 {
"[[:cntrl:]]",
"[\\x00-\\x1f\\x7f]" },
41 {
"[[:digit:]]",
"[0-9]" },
42 {
"[[:graph:]]",
"[!-~]" },
43 {
"[[:lower:]]",
"[a-z]" },
44 {
"[[:print:]]",
"[ -~]" },
45 {
"[[:punct:]]",
"[!-/:-@\\[-`{-~]" },
46 {
"[[:space:]]" ,
"[\\t-\\r ]" },
47 {
"[[:upper:]]",
"[A-Z]" },
48 {
"[[:xdigit:]]",
"[0-9A-Fa-f]" },
52 {
"\\s",
"[\\t-\\n\\f-\\r ]" },
53 {
"\\w",
"[0-9A-Z_a-z]" },
55 {
"\\S",
"[^\\t-\\n\\f-\\r ]" },
56 {
"\\W",
"[^0-9A-Z_a-z]" },
58 {
"[\\s]",
"[\\t-\\n\\f-\\r ]" },
59 {
"[\\w]",
"[0-9A-Z_a-z]" },
60 {
"[\\D]",
"[^0-9]" },
61 {
"[\\S]",
"[^\\t-\\n\\f-\\r ]" },
62 {
"[\\W]",
"[^0-9A-Z_a-z]" },
72 {
"(a){0,2}",
"(?:(a)(a)?)?" },
73 {
"(a){0,4}",
"(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },
74 {
"(a){2,6}",
"(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },
75 {
"a{0,2}",
"(?:aa?)?" },
76 {
"a{0,4}",
"(?:a(?:a(?:aa?)?)?)?" },
77 {
"a{2,6}",
"aa(?:a(?:a(?:aa?)?)?)?" },
81 {
"a{5,}",
"aaaaa+" },
85 {
"(?:a{1,}){1,}",
"a+" },
86 {
"(a{1,}b{1,})",
"(a+b+)" },
87 {
"a{1,}|b{1,}",
"a+|b+" },
88 {
"(?:a{1,})*",
"(?:a+)*" },
89 {
"(?:a{1,})+",
"a+" },
90 {
"(?:a{1,})?",
"(?:a+)?" },
95 {
"[a-za-za-z]",
"[a-z]" },
96 {
"[A-Za-zA-Za-z]",
"[A-Za-z]" },
97 {
"[ABCDEFGH]",
"[A-H]" },
98 {
"[AB-CD-EF-GH]",
"[A-H]" },
99 {
"[W-ZP-XE-R]",
"[E-Z]" },
100 {
"[a-ee-gg-m]",
"[a-m]" },
101 {
"[a-ea-ha-m]",
"[a-m]" },
102 {
"[a-ma-ha-e]",
"[a-m]" },
103 {
"[a-zA-Z0-9 -~]",
"[ -~]" },
106 {
"[^[:cntrl:][:^cntrl:]]",
"[^\\x00-\\x{10ffff}]" },
109 {
"[[:cntrl:][:^cntrl:]]",
"." },
114 {
"(?i)K",
"[Kk\\x{212a}]" },
115 {
"(?i)k",
"[Kk\\x{212a}]" },
116 {
"(?i)\\x{212a}",
"[Kk\\x{212a}]" },
117 {
"(?i)[a-z]",
"[A-Za-z\\x{17f}\\x{212a}]" },
118 {
"(?i)[\\x00-\\x{FFFD}]",
"[\\x00-\\x{fffd}]" },
119 {
"(?i)[\\x00-\\x{10ffff}]",
"." },
127 {
"(a|b|)",
"([a-b]|(?:))" },
128 {
"(|)",
"((?:)|(?:))" },
130 {
"(()|())",
"(()|())" },
131 {
"(a|)",
"(a|(?:))" },
132 {
"ab()cd()",
"ab()cd()" },
140 {
"(){0,2}",
"(?:()()?)?" },
148 {
"a*a{2,}",
"aa+" },
149 {
"a*a{2,3}",
"aa+" },
153 {
"a+a{2}",
"aaa+" },
154 {
"a+a{2,}",
"aaa+" },
155 {
"a+a{2,3}",
"aaa+" },
158 {
"a?a?",
"(?:aa?)?" },
159 {
"a?a{2}",
"aaa?" },
160 {
"a?a{2,}",
"aa+" },
161 {
"a?a{2,3}",
"aa(?:aa?)?" },
163 {
"a{2}a+",
"aaa+" },
164 {
"a{2}a?",
"aaa?" },
165 {
"a{2}a{2}",
"aaaa" },
166 {
"a{2}a{2,}",
"aaaa+" },
167 {
"a{2}a{2,3}",
"aaaaa?" },
168 {
"a{2,}a*",
"aa+" },
169 {
"a{2,}a+",
"aaa+" },
170 {
"a{2,}a?",
"aa+" },
171 {
"a{2,}a{2}",
"aaaa+" },
172 {
"a{2,}a{2,}",
"aaaa+" },
173 {
"a{2,}a{2,3}",
"aaaa+" },
174 {
"a{2,3}a*",
"aa+" },
175 {
"a{2,3}a+",
"aaa+" },
176 {
"a{2,3}a?",
"aa(?:aa?)?" },
177 {
"a{2,3}a{2}",
"aaaaa?" },
178 {
"a{2,3}a{2,}",
"aaaa+" },
179 {
"a{2,3}a{2,3}",
"aaaa(?:aa?)?" },
181 {
"\\d*\\d*",
"[0-9]*" },
183 {
"\\C*\\C*",
"\\C*" },
185 {
"(?i)A*a*",
"[Aa]*" },
186 {
"(?i)a+A+",
"[Aa][Aa]+" },
187 {
"(?i)A*(?-i)a*",
"[Aa]*a*" },
188 {
"(?i)a+(?-i)A+",
"[Aa]+A+" },
191 {
"a+?a+?",
"aa+?" },
192 {
"a*?a*",
"a*?a*" },
193 {
"a+a+?",
"a+a+?" },
196 {
"\\d*\\d",
"[0-9]+" },
198 {
"\\C*\\C",
"\\C+" },
200 {
"(?i)A*a",
"[Aa]+" },
201 {
"(?i)a+A",
"[Aa][Aa]+" },
202 {
"(?i)A*(?-i)a",
"[Aa]*a" },
203 {
"(?i)a+(?-i)A",
"[Aa]+A" },
208 {
"(?i)a*aa",
"[Aa][Aa]+" },
209 {
"(?i)a*aab",
"[Aa][Aa]+[Bb]" },
210 {
"(?i)a*(?-i)aa",
"[Aa]*aa" },
211 {
"(?i)a*(?-i)aab",
"[Aa]*aab" },
214 {
"\\d*\\D*",
"[0-9]*[^0-9]*" },
216 {
"\\d+\\D",
"[0-9]+[^0-9]" },
219 {
"(a*)a*",
"(a*)a*" },
220 {
"a+(a)",
"a+(a)" },
221 {
"(a?)(aa)",
"(a?)(aa)" },
223 {
"aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a",
"aaaaaaaaaaaaaaaa+" },
228 {
"(?:a*aab){2}",
"aa+baa+b" },
233 {
"(a*aab)",
"(aa+b)" },
236 {
"(?:(?:a){0,}){0,}",
"a*" },
237 {
"(?:(?:a){1,}){1,}",
"a+" },
238 {
"(?:(?:a){0,1}){0,1}",
"a?" },
239 {
"(?:(?:a){0,}){1,}",
"a*" },
240 {
"(?:(?:a){0,}){0,1}",
"a*" },
241 {
"(?:(?:a){1,}){0,}",
"a*" },
242 {
"(?:(?:a){1,}){0,1}",
"a*" },
243 {
"(?:(?:a){0,1}){0,}",
"a*" },
244 {
"(?:(?:a){0,1}){1,}",
"a*" },
247 TEST(TestSimplify, SimpleRegexps) {
256 Regexp* sre = re->Simplify();
260 if (strcmp(
tests[i].regexp,
tests[i].simplified) == 0) {
262 <<
" " << re->ToString() <<
" " << sre->ToString();
266 <<
" " <<
tests[
i].regexp <<
" " << sre->Dump();