1// Copyright 2012 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28
29#include <stdlib.h>
30
31#include "src/v8.h"
32
33#include "src/ast.h"
34#include "src/char-predicates-inl.h"
35#include "src/jsregexp.h"
36#include "src/ostreams.h"
37#include "src/parser.h"
38#include "src/regexp-macro-assembler.h"
39#include "src/regexp-macro-assembler-irregexp.h"
40#include "src/string-stream.h"
41#include "src/zone-inl.h"
42#ifdef V8_INTERPRETED_REGEXP
43#include "src/interpreter-irregexp.h"
44#else  // V8_INTERPRETED_REGEXP
45#include "src/macro-assembler.h"
46#if V8_TARGET_ARCH_ARM
47#include "src/arm/assembler-arm.h"  // NOLINT
48#include "src/arm/macro-assembler-arm.h"
49#include "src/arm/regexp-macro-assembler-arm.h"
50#endif
51#if V8_TARGET_ARCH_ARM64
52#include "src/arm64/assembler-arm64.h"
53#include "src/arm64/macro-assembler-arm64.h"
54#include "src/arm64/regexp-macro-assembler-arm64.h"
55#endif
56#if V8_TARGET_ARCH_MIPS
57#include "src/mips/assembler-mips.h"
58#include "src/mips/macro-assembler-mips.h"
59#include "src/mips/regexp-macro-assembler-mips.h"
60#endif
61#if V8_TARGET_ARCH_MIPS64
62#include "src/mips64/assembler-mips64.h"
63#include "src/mips64/macro-assembler-mips64.h"
64#include "src/mips64/regexp-macro-assembler-mips64.h"
65#endif
66#if V8_TARGET_ARCH_X64
67#include "src/x64/assembler-x64.h"
68#include "src/x64/macro-assembler-x64.h"
69#include "src/x64/regexp-macro-assembler-x64.h"
70#endif
71#if V8_TARGET_ARCH_IA32
72#include "src/ia32/assembler-ia32.h"
73#include "src/ia32/macro-assembler-ia32.h"
74#include "src/ia32/regexp-macro-assembler-ia32.h"
75#endif
76#if V8_TARGET_ARCH_X87
77#include "src/x87/assembler-x87.h"
78#include "src/x87/macro-assembler-x87.h"
79#include "src/x87/regexp-macro-assembler-x87.h"
80#endif
81#endif  // V8_INTERPRETED_REGEXP
82#include "test/cctest/cctest.h"
83
84using namespace v8::internal;
85
86
87static bool CheckParse(const char* input) {
88  v8::HandleScope scope(CcTest::isolate());
89  Zone zone(CcTest::i_isolate());
90  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
91  RegExpCompileData result;
92  return v8::internal::RegExpParser::ParseRegExp(
93      &reader, false, &result, &zone);
94}
95
96
97static void CheckParseEq(const char* input, const char* expected) {
98  v8::HandleScope scope(CcTest::isolate());
99  Zone zone(CcTest::i_isolate());
100  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
101  RegExpCompileData result;
102  CHECK(v8::internal::RegExpParser::ParseRegExp(
103      &reader, false, &result, &zone));
104  CHECK(result.tree != NULL);
105  CHECK(result.error.is_null());
106  OStringStream os;
107  result.tree->Print(os, &zone);
108  CHECK_EQ(expected, os.c_str());
109}
110
111
112static bool CheckSimple(const char* input) {
113  v8::HandleScope scope(CcTest::isolate());
114  Zone zone(CcTest::i_isolate());
115  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
116  RegExpCompileData result;
117  CHECK(v8::internal::RegExpParser::ParseRegExp(
118      &reader, false, &result, &zone));
119  CHECK(result.tree != NULL);
120  CHECK(result.error.is_null());
121  return result.simple;
122}
123
124struct MinMaxPair {
125  int min_match;
126  int max_match;
127};
128
129
130static MinMaxPair CheckMinMaxMatch(const char* input) {
131  v8::HandleScope scope(CcTest::isolate());
132  Zone zone(CcTest::i_isolate());
133  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
134  RegExpCompileData result;
135  CHECK(v8::internal::RegExpParser::ParseRegExp(
136      &reader, false, &result, &zone));
137  CHECK(result.tree != NULL);
138  CHECK(result.error.is_null());
139  int min_match = result.tree->min_match();
140  int max_match = result.tree->max_match();
141  MinMaxPair pair = { min_match, max_match };
142  return pair;
143}
144
145
146#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
147#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
148#define CHECK_MIN_MAX(input, min, max)                                         \
149  { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
150    CHECK_EQ(min, min_max.min_match);                                          \
151    CHECK_EQ(max, min_max.max_match);                                          \
152  }
153
154TEST(Parser) {
155  CHECK_PARSE_ERROR("?");
156
157  CheckParseEq("abc", "'abc'");
158  CheckParseEq("", "%");
159  CheckParseEq("abc|def", "(| 'abc' 'def')");
160  CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
161  CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
162  CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
163  CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
164  CheckParseEq("a*", "(# 0 - g 'a')");
165  CheckParseEq("a*?", "(# 0 - n 'a')");
166  CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
167  CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
168  CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
169  CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
170  CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
171  CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
172  CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
173  CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
174  CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
175  CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
176  CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
177  CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
178  CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
179  CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
180  CheckParseEq("(?:foo)", "'foo'");
181  CheckParseEq("(?: foo )", "' foo '");
182  CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
183  CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
184  CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
185  CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
186  CheckParseEq("()", "(^ %)");
187  CheckParseEq("(?=)", "(-> + %)");
188  CheckParseEq("[]", "^[\\x00-\\uffff]");  // Doesn't compile on windows
189  CheckParseEq("[^]", "[\\x00-\\uffff]");  // \uffff isn't in codepage 1252
190  CheckParseEq("[x]", "[x]");
191  CheckParseEq("[xyz]", "[x y z]");
192  CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
193  CheckParseEq("[-123]", "[- 1 2 3]");
194  CheckParseEq("[^123]", "^[1 2 3]");
195  CheckParseEq("]", "']'");
196  CheckParseEq("}", "'}'");
197  CheckParseEq("[a-b-c]", "[a-b - c]");
198  CheckParseEq("[\\d]", "[0-9]");
199  CheckParseEq("[x\\dz]", "[x 0-9 z]");
200  CheckParseEq("[\\d-z]", "[0-9 - z]");
201  CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
202  CheckParseEq("[z-\\d]", "[z - 0-9]");
203  // Control character outside character class.
204  CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
205  CheckParseEq("\\c!", "'\\c!'");
206  CheckParseEq("\\c_", "'\\c_'");
207  CheckParseEq("\\c~", "'\\c~'");
208  CheckParseEq("\\c1", "'\\c1'");
209  // Control character inside character class.
210  CheckParseEq("[\\c!]", "[\\ c !]");
211  CheckParseEq("[\\c_]", "[\\x1f]");
212  CheckParseEq("[\\c~]", "[\\ c ~]");
213  CheckParseEq("[\\ca]", "[\\x01]");
214  CheckParseEq("[\\cz]", "[\\x1a]");
215  CheckParseEq("[\\cA]", "[\\x01]");
216  CheckParseEq("[\\cZ]", "[\\x1a]");
217  CheckParseEq("[\\c1]", "[\\x11]");
218
219  CheckParseEq("[a\\]c]", "[a ] c]");
220  CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
221  CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
222  CheckParseEq("\\0", "'\\x00'");
223  CheckParseEq("\\8", "'8'");
224  CheckParseEq("\\9", "'9'");
225  CheckParseEq("\\11", "'\\x09'");
226  CheckParseEq("\\11a", "'\\x09a'");
227  CheckParseEq("\\011", "'\\x09'");
228  CheckParseEq("\\00011", "'\\x0011'");
229  CheckParseEq("\\118", "'\\x098'");
230  CheckParseEq("\\111", "'I'");
231  CheckParseEq("\\1111", "'I1'");
232  CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
233  CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
234  CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
235  CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
236  CheckParseEq("(x)(x)(x)\\1*",
237               "(: (^ 'x') (^ 'x') (^ 'x')"
238               " (# 0 - g (<- 1)))");
239  CheckParseEq("(x)(x)(x)\\2*",
240               "(: (^ 'x') (^ 'x') (^ 'x')"
241               " (# 0 - g (<- 2)))");
242  CheckParseEq("(x)(x)(x)\\3*",
243               "(: (^ 'x') (^ 'x') (^ 'x')"
244               " (# 0 - g (<- 3)))");
245  CheckParseEq("(x)(x)(x)\\4*",
246               "(: (^ 'x') (^ 'x') (^ 'x')"
247               " (# 0 - g '\\x04'))");
248  CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
249               "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
250               " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
251  CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
252               "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
253               " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
254  CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
255  CheckParseEq("(a\\1)", "(^ 'a')");
256  CheckParseEq("(\\1a)", "(^ 'a')");
257  CheckParseEq("(?=a)?a", "'a'");
258  CheckParseEq("(?=a){0,10}a", "'a'");
259  CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
260  CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
261  CheckParseEq("(?!a)?a", "'a'");
262  CheckParseEq("\\1(a)", "(^ 'a')");
263  CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
264  CheckParseEq("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
265  CheckParseEq("[\\0]", "[\\x00]");
266  CheckParseEq("[\\11]", "[\\x09]");
267  CheckParseEq("[\\11a]", "[\\x09 a]");
268  CheckParseEq("[\\011]", "[\\x09]");
269  CheckParseEq("[\\00011]", "[\\x00 1 1]");
270  CheckParseEq("[\\118]", "[\\x09 8]");
271  CheckParseEq("[\\111]", "[I]");
272  CheckParseEq("[\\1111]", "[I 1]");
273  CheckParseEq("\\x34", "'\x34'");
274  CheckParseEq("\\x60", "'\x60'");
275  CheckParseEq("\\x3z", "'x3z'");
276  CheckParseEq("\\c", "'\\c'");
277  CheckParseEq("\\u0034", "'\x34'");
278  CheckParseEq("\\u003z", "'u003z'");
279  CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
280
281  CHECK_SIMPLE("", false);
282  CHECK_SIMPLE("a", true);
283  CHECK_SIMPLE("a|b", false);
284  CHECK_SIMPLE("a\\n", false);
285  CHECK_SIMPLE("^a", false);
286  CHECK_SIMPLE("a$", false);
287  CHECK_SIMPLE("a\\b!", false);
288  CHECK_SIMPLE("a\\Bb", false);
289  CHECK_SIMPLE("a*", false);
290  CHECK_SIMPLE("a*?", false);
291  CHECK_SIMPLE("a?", false);
292  CHECK_SIMPLE("a??", false);
293  CHECK_SIMPLE("a{0,1}?", false);
294  CHECK_SIMPLE("a{1,1}?", false);
295  CHECK_SIMPLE("a{1,2}?", false);
296  CHECK_SIMPLE("a+?", false);
297  CHECK_SIMPLE("(a)", false);
298  CHECK_SIMPLE("(a)\\1", false);
299  CHECK_SIMPLE("(\\1a)", false);
300  CHECK_SIMPLE("\\1(a)", false);
301  CHECK_SIMPLE("a\\s", false);
302  CHECK_SIMPLE("a\\S", false);
303  CHECK_SIMPLE("a\\d", false);
304  CHECK_SIMPLE("a\\D", false);
305  CHECK_SIMPLE("a\\w", false);
306  CHECK_SIMPLE("a\\W", false);
307  CHECK_SIMPLE("a.", false);
308  CHECK_SIMPLE("a\\q", false);
309  CHECK_SIMPLE("a[a]", false);
310  CHECK_SIMPLE("a[^a]", false);
311  CHECK_SIMPLE("a[a-z]", false);
312  CHECK_SIMPLE("a[\\q]", false);
313  CHECK_SIMPLE("a(?:b)", false);
314  CHECK_SIMPLE("a(?=b)", false);
315  CHECK_SIMPLE("a(?!b)", false);
316  CHECK_SIMPLE("\\x60", false);
317  CHECK_SIMPLE("\\u0060", false);
318  CHECK_SIMPLE("\\cA", false);
319  CHECK_SIMPLE("\\q", false);
320  CHECK_SIMPLE("\\1112", false);
321  CHECK_SIMPLE("\\0", false);
322  CHECK_SIMPLE("(a)\\1", false);
323  CHECK_SIMPLE("(?=a)?a", false);
324  CHECK_SIMPLE("(?!a)?a\\1", false);
325  CHECK_SIMPLE("(?:(?=a))a\\1", false);
326
327  CheckParseEq("a{}", "'a{}'");
328  CheckParseEq("a{,}", "'a{,}'");
329  CheckParseEq("a{", "'a{'");
330  CheckParseEq("a{z}", "'a{z}'");
331  CheckParseEq("a{1z}", "'a{1z}'");
332  CheckParseEq("a{12z}", "'a{12z}'");
333  CheckParseEq("a{12,", "'a{12,'");
334  CheckParseEq("a{12,3b", "'a{12,3b'");
335  CheckParseEq("{}", "'{}'");
336  CheckParseEq("{,}", "'{,}'");
337  CheckParseEq("{", "'{'");
338  CheckParseEq("{z}", "'{z}'");
339  CheckParseEq("{1z}", "'{1z}'");
340  CheckParseEq("{12z}", "'{12z}'");
341  CheckParseEq("{12,", "'{12,'");
342  CheckParseEq("{12,3b", "'{12,3b'");
343
344  CHECK_MIN_MAX("a", 1, 1);
345  CHECK_MIN_MAX("abc", 3, 3);
346  CHECK_MIN_MAX("a[bc]d", 3, 3);
347  CHECK_MIN_MAX("a|bc", 1, 2);
348  CHECK_MIN_MAX("ab|c", 1, 2);
349  CHECK_MIN_MAX("a||bc", 0, 2);
350  CHECK_MIN_MAX("|", 0, 0);
351  CHECK_MIN_MAX("(?:ab)", 2, 2);
352  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
353  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
354  CHECK_MIN_MAX("(ab)", 2, 2);
355  CHECK_MIN_MAX("(ab|cde)", 2, 3);
356  CHECK_MIN_MAX("(ab)\\1", 2, 4);
357  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
358  CHECK_MIN_MAX("(?:ab)?", 0, 2);
359  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
360  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
361  CHECK_MIN_MAX("a?", 0, 1);
362  CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
363  CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
364  CHECK_MIN_MAX("a??", 0, 1);
365  CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
366  CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
367  CHECK_MIN_MAX("(?:a?)?", 0, 1);
368  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
369  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
370  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
371  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
372  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
373  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
374  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
375  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
376  CHECK_MIN_MAX("a{0}", 0, 0);
377  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
378  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
379  CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
380  CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
381  CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
382  CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
383  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
384  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
385  CHECK_MIN_MAX("a\\bc", 2, 2);
386  CHECK_MIN_MAX("a\\Bc", 2, 2);
387  CHECK_MIN_MAX("a\\sc", 3, 3);
388  CHECK_MIN_MAX("a\\Sc", 3, 3);
389  CHECK_MIN_MAX("a(?=b)c", 2, 2);
390  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
391  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
392}
393
394
395TEST(ParserRegression) {
396  CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
397  CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
398  CheckParseEq("{", "'{'");
399  CheckParseEq("a|", "(| 'a' %)");
400}
401
402static void ExpectError(const char* input,
403                        const char* expected) {
404  v8::HandleScope scope(CcTest::isolate());
405  Zone zone(CcTest::i_isolate());
406  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
407  RegExpCompileData result;
408  CHECK(!v8::internal::RegExpParser::ParseRegExp(
409      &reader, false, &result, &zone));
410  CHECK(result.tree == NULL);
411  CHECK(!result.error.is_null());
412  SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
413  CHECK_EQ(expected, str.get());
414}
415
416
417TEST(Errors) {
418  const char* kEndBackslash = "\\ at end of pattern";
419  ExpectError("\\", kEndBackslash);
420  const char* kUnterminatedGroup = "Unterminated group";
421  ExpectError("(foo", kUnterminatedGroup);
422  const char* kInvalidGroup = "Invalid group";
423  ExpectError("(?", kInvalidGroup);
424  const char* kUnterminatedCharacterClass = "Unterminated character class";
425  ExpectError("[", kUnterminatedCharacterClass);
426  ExpectError("[a-", kUnterminatedCharacterClass);
427  const char* kNothingToRepeat = "Nothing to repeat";
428  ExpectError("*", kNothingToRepeat);
429  ExpectError("?", kNothingToRepeat);
430  ExpectError("+", kNothingToRepeat);
431  ExpectError("{1}", kNothingToRepeat);
432  ExpectError("{1,2}", kNothingToRepeat);
433  ExpectError("{1,}", kNothingToRepeat);
434
435  // Check that we don't allow more than kMaxCapture captures
436  const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
437  const char* kTooManyCaptures = "Too many captures";
438  OStringStream os;
439  for (int i = 0; i <= kMaxCaptures; i++) {
440    os << "()";
441  }
442  ExpectError(os.c_str(), kTooManyCaptures);
443}
444
445
446static bool IsDigit(uc16 c) {
447  return ('0' <= c && c <= '9');
448}
449
450
451static bool NotDigit(uc16 c) {
452  return !IsDigit(c);
453}
454
455
456static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
457  // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
458  // WhiteSpace (7.2) and LineTerminator (7.3) values.
459  return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
460}
461
462
463static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
464  return !IsWhiteSpaceOrLineTerminator(c);
465}
466
467
468static bool NotWord(uc16 c) {
469  return !IsRegExpWord(c);
470}
471
472
473static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
474  Zone zone(CcTest::i_isolate());
475  ZoneList<CharacterRange>* ranges =
476      new(&zone) ZoneList<CharacterRange>(2, &zone);
477  CharacterRange::AddClassEscape(c, ranges, &zone);
478  for (unsigned i = 0; i < (1 << 16); i++) {
479    bool in_class = false;
480    for (int j = 0; !in_class && j < ranges->length(); j++) {
481      CharacterRange& range = ranges->at(j);
482      in_class = (range.from() <= i && i <= range.to());
483    }
484    CHECK_EQ(pred(i), in_class);
485  }
486}
487
488
489TEST(CharacterClassEscapes) {
490  TestCharacterClassEscapes('.', IsRegExpNewline);
491  TestCharacterClassEscapes('d', IsDigit);
492  TestCharacterClassEscapes('D', NotDigit);
493  TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
494  TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
495  TestCharacterClassEscapes('w', IsRegExpWord);
496  TestCharacterClassEscapes('W', NotWord);
497}
498
499
500static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
501                           Zone* zone) {
502  Isolate* isolate = CcTest::i_isolate();
503  FlatStringReader reader(isolate, CStrVector(input));
504  RegExpCompileData compile_data;
505  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
506                                               &compile_data, zone))
507    return NULL;
508  Handle<String> pattern = isolate->factory()->
509      NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
510  Handle<String> sample_subject =
511      isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
512  RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
513                        sample_subject, is_one_byte, zone);
514  return compile_data.node;
515}
516
517
518static void Execute(const char* input, bool multiline, bool is_one_byte,
519                    bool dot_output = false) {
520  v8::HandleScope scope(CcTest::isolate());
521  Zone zone(CcTest::i_isolate());
522  RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
523  USE(node);
524#ifdef DEBUG
525  if (dot_output) {
526    RegExpEngine::DotPrint(input, node, false);
527  }
528#endif  // DEBUG
529}
530
531
532class TestConfig {
533 public:
534  typedef int Key;
535  typedef int Value;
536  static const int kNoKey;
537  static int NoValue() { return 0; }
538  static inline int Compare(int a, int b) {
539    if (a < b)
540      return -1;
541    else if (a > b)
542      return 1;
543    else
544      return 0;
545  }
546};
547
548
549const int TestConfig::kNoKey = 0;
550
551
552static unsigned PseudoRandom(int i, int j) {
553  return ~(~((i * 781) ^ (j * 329)));
554}
555
556
557TEST(SplayTreeSimple) {
558  static const unsigned kLimit = 1000;
559  Zone zone(CcTest::i_isolate());
560  ZoneSplayTree<TestConfig> tree(&zone);
561  bool seen[kLimit];
562  for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
563#define CHECK_MAPS_EQUAL() do {                                      \
564    for (unsigned k = 0; k < kLimit; k++)                            \
565      CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
566  } while (false)
567  for (int i = 0; i < 50; i++) {
568    for (int j = 0; j < 50; j++) {
569      unsigned next = PseudoRandom(i, j) % kLimit;
570      if (seen[next]) {
571        // We've already seen this one.  Check the value and remove
572        // it.
573        ZoneSplayTree<TestConfig>::Locator loc;
574        CHECK(tree.Find(next, &loc));
575        CHECK_EQ(next, loc.key());
576        CHECK_EQ(3 * next, loc.value());
577        tree.Remove(next);
578        seen[next] = false;
579        CHECK_MAPS_EQUAL();
580      } else {
581        // Check that it wasn't there already and then add it.
582        ZoneSplayTree<TestConfig>::Locator loc;
583        CHECK(!tree.Find(next, &loc));
584        CHECK(tree.Insert(next, &loc));
585        CHECK_EQ(next, loc.key());
586        loc.set_value(3 * next);
587        seen[next] = true;
588        CHECK_MAPS_EQUAL();
589      }
590      int val = PseudoRandom(j, i) % kLimit;
591      if (seen[val]) {
592        ZoneSplayTree<TestConfig>::Locator loc;
593        CHECK(tree.FindGreatestLessThan(val, &loc));
594        CHECK_EQ(loc.key(), val);
595        break;
596      }
597      val = PseudoRandom(i + j, i - j) % kLimit;
598      if (seen[val]) {
599        ZoneSplayTree<TestConfig>::Locator loc;
600        CHECK(tree.FindLeastGreaterThan(val, &loc));
601        CHECK_EQ(loc.key(), val);
602        break;
603      }
604    }
605  }
606}
607
608
609TEST(DispatchTableConstruction) {
610  // Initialize test data.
611  static const int kLimit = 1000;
612  static const int kRangeCount = 8;
613  static const int kRangeSize = 16;
614  uc16 ranges[kRangeCount][2 * kRangeSize];
615  for (int i = 0; i < kRangeCount; i++) {
616    Vector<uc16> range(ranges[i], 2 * kRangeSize);
617    for (int j = 0; j < 2 * kRangeSize; j++) {
618      range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
619    }
620    range.Sort();
621    for (int j = 1; j < 2 * kRangeSize; j++) {
622      CHECK(range[j-1] <= range[j]);
623    }
624  }
625  // Enter test data into dispatch table.
626  Zone zone(CcTest::i_isolate());
627  DispatchTable table(&zone);
628  for (int i = 0; i < kRangeCount; i++) {
629    uc16* range = ranges[i];
630    for (int j = 0; j < 2 * kRangeSize; j += 2)
631      table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
632  }
633  // Check that the table looks as we would expect
634  for (int p = 0; p < kLimit; p++) {
635    OutSet* outs = table.Get(p);
636    for (int j = 0; j < kRangeCount; j++) {
637      uc16* range = ranges[j];
638      bool is_on = false;
639      for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
640        is_on = (range[k] <= p && p <= range[k + 1]);
641      CHECK_EQ(is_on, outs->Get(j));
642    }
643  }
644}
645
646
647// Test of debug-only syntax.
648#ifdef DEBUG
649
650TEST(ParsePossessiveRepetition) {
651  bool old_flag_value = FLAG_regexp_possessive_quantifier;
652
653  // Enable possessive quantifier syntax.
654  FLAG_regexp_possessive_quantifier = true;
655
656  CheckParseEq("a*+", "(# 0 - p 'a')");
657  CheckParseEq("a++", "(# 1 - p 'a')");
658  CheckParseEq("a?+", "(# 0 1 p 'a')");
659  CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
660  CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
661
662  // Disable possessive quantifier syntax.
663  FLAG_regexp_possessive_quantifier = false;
664
665  CHECK_PARSE_ERROR("a*+");
666  CHECK_PARSE_ERROR("a++");
667  CHECK_PARSE_ERROR("a?+");
668  CHECK_PARSE_ERROR("a{10,20}+");
669  CHECK_PARSE_ERROR("a{10,20}+b");
670
671  FLAG_regexp_possessive_quantifier = old_flag_value;
672}
673
674#endif
675
676// Tests of interpreter.
677
678
679#ifndef V8_INTERPRETED_REGEXP
680
681#if V8_TARGET_ARCH_IA32
682typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
683#elif V8_TARGET_ARCH_X64
684typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
685#elif V8_TARGET_ARCH_ARM
686typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
687#elif V8_TARGET_ARCH_ARM64
688typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
689#elif V8_TARGET_ARCH_MIPS
690typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
691#elif V8_TARGET_ARCH_MIPS64
692typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
693#elif V8_TARGET_ARCH_X87
694typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
695#endif
696
697class ContextInitializer {
698 public:
699  ContextInitializer()
700      : scope_(CcTest::isolate()),
701        env_(v8::Context::New(CcTest::isolate())) {
702    env_->Enter();
703  }
704  ~ContextInitializer() {
705    env_->Exit();
706  }
707 private:
708  v8::HandleScope scope_;
709  v8::Handle<v8::Context> env_;
710};
711
712
713static ArchRegExpMacroAssembler::Result Execute(Code* code,
714                                                String* input,
715                                                int start_offset,
716                                                const byte* input_start,
717                                                const byte* input_end,
718                                                int* captures) {
719  return NativeRegExpMacroAssembler::Execute(
720      code,
721      input,
722      start_offset,
723      input_start,
724      input_end,
725      captures,
726      0,
727      CcTest::i_isolate());
728}
729
730
731TEST(MacroAssemblerNativeSuccess) {
732  v8::V8::Initialize();
733  ContextInitializer initializer;
734  Isolate* isolate = CcTest::i_isolate();
735  Factory* factory = isolate->factory();
736  Zone zone(isolate);
737
738  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
739
740  m.Succeed();
741
742  Handle<String> source = factory->NewStringFromStaticChars("");
743  Handle<Object> code_object = m.GetCode(source);
744  Handle<Code> code = Handle<Code>::cast(code_object);
745
746  int captures[4] = {42, 37, 87, 117};
747  Handle<String> input = factory->NewStringFromStaticChars("foofoo");
748  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
749  const byte* start_adr =
750      reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
751
752  NativeRegExpMacroAssembler::Result result =
753      Execute(*code,
754              *input,
755              0,
756              start_adr,
757              start_adr + seq_input->length(),
758              captures);
759
760  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
761  CHECK_EQ(-1, captures[0]);
762  CHECK_EQ(-1, captures[1]);
763  CHECK_EQ(-1, captures[2]);
764  CHECK_EQ(-1, captures[3]);
765}
766
767
768TEST(MacroAssemblerNativeSimple) {
769  v8::V8::Initialize();
770  ContextInitializer initializer;
771  Isolate* isolate = CcTest::i_isolate();
772  Factory* factory = isolate->factory();
773  Zone zone(isolate);
774
775  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
776
777  Label fail, backtrack;
778  m.PushBacktrack(&fail);
779  m.CheckNotAtStart(NULL);
780  m.LoadCurrentCharacter(2, NULL);
781  m.CheckNotCharacter('o', NULL);
782  m.LoadCurrentCharacter(1, NULL, false);
783  m.CheckNotCharacter('o', NULL);
784  m.LoadCurrentCharacter(0, NULL, false);
785  m.CheckNotCharacter('f', NULL);
786  m.WriteCurrentPositionToRegister(0, 0);
787  m.WriteCurrentPositionToRegister(1, 3);
788  m.AdvanceCurrentPosition(3);
789  m.PushBacktrack(&backtrack);
790  m.Succeed();
791  m.Bind(&backtrack);
792  m.Backtrack();
793  m.Bind(&fail);
794  m.Fail();
795
796  Handle<String> source = factory->NewStringFromStaticChars("^foo");
797  Handle<Object> code_object = m.GetCode(source);
798  Handle<Code> code = Handle<Code>::cast(code_object);
799
800  int captures[4] = {42, 37, 87, 117};
801  Handle<String> input = factory->NewStringFromStaticChars("foofoo");
802  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
803  Address start_adr = seq_input->GetCharsAddress();
804
805  NativeRegExpMacroAssembler::Result result =
806      Execute(*code,
807              *input,
808              0,
809              start_adr,
810              start_adr + input->length(),
811              captures);
812
813  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
814  CHECK_EQ(0, captures[0]);
815  CHECK_EQ(3, captures[1]);
816  CHECK_EQ(-1, captures[2]);
817  CHECK_EQ(-1, captures[3]);
818
819  input = factory->NewStringFromStaticChars("barbarbar");
820  seq_input = Handle<SeqOneByteString>::cast(input);
821  start_adr = seq_input->GetCharsAddress();
822
823  result = Execute(*code,
824                   *input,
825                   0,
826                   start_adr,
827                   start_adr + input->length(),
828                   captures);
829
830  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
831}
832
833
834TEST(MacroAssemblerNativeSimpleUC16) {
835  v8::V8::Initialize();
836  ContextInitializer initializer;
837  Isolate* isolate = CcTest::i_isolate();
838  Factory* factory = isolate->factory();
839  Zone zone(isolate);
840
841  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
842
843  Label fail, backtrack;
844  m.PushBacktrack(&fail);
845  m.CheckNotAtStart(NULL);
846  m.LoadCurrentCharacter(2, NULL);
847  m.CheckNotCharacter('o', NULL);
848  m.LoadCurrentCharacter(1, NULL, false);
849  m.CheckNotCharacter('o', NULL);
850  m.LoadCurrentCharacter(0, NULL, false);
851  m.CheckNotCharacter('f', NULL);
852  m.WriteCurrentPositionToRegister(0, 0);
853  m.WriteCurrentPositionToRegister(1, 3);
854  m.AdvanceCurrentPosition(3);
855  m.PushBacktrack(&backtrack);
856  m.Succeed();
857  m.Bind(&backtrack);
858  m.Backtrack();
859  m.Bind(&fail);
860  m.Fail();
861
862  Handle<String> source = factory->NewStringFromStaticChars("^foo");
863  Handle<Object> code_object = m.GetCode(source);
864  Handle<Code> code = Handle<Code>::cast(code_object);
865
866  int captures[4] = {42, 37, 87, 117};
867  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
868                              static_cast<uc16>(0x2603)};
869  Handle<String> input = factory->NewStringFromTwoByte(
870      Vector<const uc16>(input_data, 6)).ToHandleChecked();
871  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
872  Address start_adr = seq_input->GetCharsAddress();
873
874  NativeRegExpMacroAssembler::Result result =
875      Execute(*code,
876              *input,
877              0,
878              start_adr,
879              start_adr + input->length(),
880              captures);
881
882  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
883  CHECK_EQ(0, captures[0]);
884  CHECK_EQ(3, captures[1]);
885  CHECK_EQ(-1, captures[2]);
886  CHECK_EQ(-1, captures[3]);
887
888  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
889                               static_cast<uc16>(0x2603)};
890  input = factory->NewStringFromTwoByte(
891      Vector<const uc16>(input_data2, 9)).ToHandleChecked();
892  seq_input = Handle<SeqTwoByteString>::cast(input);
893  start_adr = seq_input->GetCharsAddress();
894
895  result = Execute(*code,
896                   *input,
897                   0,
898                   start_adr,
899                   start_adr + input->length() * 2,
900                   captures);
901
902  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
903}
904
905
906TEST(MacroAssemblerNativeBacktrack) {
907  v8::V8::Initialize();
908  ContextInitializer initializer;
909  Isolate* isolate = CcTest::i_isolate();
910  Factory* factory = isolate->factory();
911  Zone zone(isolate);
912
913  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
914
915  Label fail;
916  Label backtrack;
917  m.LoadCurrentCharacter(10, &fail);
918  m.Succeed();
919  m.Bind(&fail);
920  m.PushBacktrack(&backtrack);
921  m.LoadCurrentCharacter(10, NULL);
922  m.Succeed();
923  m.Bind(&backtrack);
924  m.Fail();
925
926  Handle<String> source = factory->NewStringFromStaticChars("..........");
927  Handle<Object> code_object = m.GetCode(source);
928  Handle<Code> code = Handle<Code>::cast(code_object);
929
930  Handle<String> input = factory->NewStringFromStaticChars("foofoo");
931  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
932  Address start_adr = seq_input->GetCharsAddress();
933
934  NativeRegExpMacroAssembler::Result result =
935      Execute(*code,
936              *input,
937              0,
938              start_adr,
939              start_adr + input->length(),
940              NULL);
941
942  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
943}
944
945
946TEST(MacroAssemblerNativeBackReferenceLATIN1) {
947  v8::V8::Initialize();
948  ContextInitializer initializer;
949  Isolate* isolate = CcTest::i_isolate();
950  Factory* factory = isolate->factory();
951  Zone zone(isolate);
952
953  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
954
955  m.WriteCurrentPositionToRegister(0, 0);
956  m.AdvanceCurrentPosition(2);
957  m.WriteCurrentPositionToRegister(1, 0);
958  Label nomatch;
959  m.CheckNotBackReference(0, &nomatch);
960  m.Fail();
961  m.Bind(&nomatch);
962  m.AdvanceCurrentPosition(2);
963  Label missing_match;
964  m.CheckNotBackReference(0, &missing_match);
965  m.WriteCurrentPositionToRegister(2, 0);
966  m.Succeed();
967  m.Bind(&missing_match);
968  m.Fail();
969
970  Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
971  Handle<Object> code_object = m.GetCode(source);
972  Handle<Code> code = Handle<Code>::cast(code_object);
973
974  Handle<String> input = factory->NewStringFromStaticChars("fooofo");
975  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
976  Address start_adr = seq_input->GetCharsAddress();
977
978  int output[4];
979  NativeRegExpMacroAssembler::Result result =
980      Execute(*code,
981              *input,
982              0,
983              start_adr,
984              start_adr + input->length(),
985              output);
986
987  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
988  CHECK_EQ(0, output[0]);
989  CHECK_EQ(2, output[1]);
990  CHECK_EQ(6, output[2]);
991  CHECK_EQ(-1, output[3]);
992}
993
994
995TEST(MacroAssemblerNativeBackReferenceUC16) {
996  v8::V8::Initialize();
997  ContextInitializer initializer;
998  Isolate* isolate = CcTest::i_isolate();
999  Factory* factory = isolate->factory();
1000  Zone zone(isolate);
1001
1002  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
1003
1004  m.WriteCurrentPositionToRegister(0, 0);
1005  m.AdvanceCurrentPosition(2);
1006  m.WriteCurrentPositionToRegister(1, 0);
1007  Label nomatch;
1008  m.CheckNotBackReference(0, &nomatch);
1009  m.Fail();
1010  m.Bind(&nomatch);
1011  m.AdvanceCurrentPosition(2);
1012  Label missing_match;
1013  m.CheckNotBackReference(0, &missing_match);
1014  m.WriteCurrentPositionToRegister(2, 0);
1015  m.Succeed();
1016  m.Bind(&missing_match);
1017  m.Fail();
1018
1019  Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1020  Handle<Object> code_object = m.GetCode(source);
1021  Handle<Code> code = Handle<Code>::cast(code_object);
1022
1023  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1024  Handle<String> input = factory->NewStringFromTwoByte(
1025      Vector<const uc16>(input_data, 6)).ToHandleChecked();
1026  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1027  Address start_adr = seq_input->GetCharsAddress();
1028
1029  int output[4];
1030  NativeRegExpMacroAssembler::Result result =
1031      Execute(*code,
1032              *input,
1033              0,
1034              start_adr,
1035              start_adr + input->length() * 2,
1036              output);
1037
1038  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1039  CHECK_EQ(0, output[0]);
1040  CHECK_EQ(2, output[1]);
1041  CHECK_EQ(6, output[2]);
1042  CHECK_EQ(-1, output[3]);
1043}
1044
1045
1046
1047TEST(MacroAssemblernativeAtStart) {
1048  v8::V8::Initialize();
1049  ContextInitializer initializer;
1050  Isolate* isolate = CcTest::i_isolate();
1051  Factory* factory = isolate->factory();
1052  Zone zone(isolate);
1053
1054  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
1055
1056  Label not_at_start, newline, fail;
1057  m.CheckNotAtStart(&not_at_start);
1058  // Check that prevchar = '\n' and current = 'f'.
1059  m.CheckCharacter('\n', &newline);
1060  m.Bind(&fail);
1061  m.Fail();
1062  m.Bind(&newline);
1063  m.LoadCurrentCharacter(0, &fail);
1064  m.CheckNotCharacter('f', &fail);
1065  m.Succeed();
1066
1067  m.Bind(&not_at_start);
1068  // Check that prevchar = 'o' and current = 'b'.
1069  Label prevo;
1070  m.CheckCharacter('o', &prevo);
1071  m.Fail();
1072  m.Bind(&prevo);
1073  m.LoadCurrentCharacter(0, &fail);
1074  m.CheckNotCharacter('b', &fail);
1075  m.Succeed();
1076
1077  Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
1078  Handle<Object> code_object = m.GetCode(source);
1079  Handle<Code> code = Handle<Code>::cast(code_object);
1080
1081  Handle<String> input = factory->NewStringFromStaticChars("foobar");
1082  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1083  Address start_adr = seq_input->GetCharsAddress();
1084
1085  NativeRegExpMacroAssembler::Result result =
1086      Execute(*code,
1087              *input,
1088              0,
1089              start_adr,
1090              start_adr + input->length(),
1091              NULL);
1092
1093  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1094
1095  result = Execute(*code,
1096                   *input,
1097                   3,
1098                   start_adr + 3,
1099                   start_adr + input->length(),
1100                   NULL);
1101
1102  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1103}
1104
1105
1106TEST(MacroAssemblerNativeBackRefNoCase) {
1107  v8::V8::Initialize();
1108  ContextInitializer initializer;
1109  Isolate* isolate = CcTest::i_isolate();
1110  Factory* factory = isolate->factory();
1111  Zone zone(isolate);
1112
1113  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
1114
1115  Label fail, succ;
1116
1117  m.WriteCurrentPositionToRegister(0, 0);
1118  m.WriteCurrentPositionToRegister(2, 0);
1119  m.AdvanceCurrentPosition(3);
1120  m.WriteCurrentPositionToRegister(3, 0);
1121  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
1122  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
1123  Label expected_fail;
1124  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1125  m.Bind(&fail);
1126  m.Fail();
1127
1128  m.Bind(&expected_fail);
1129  m.AdvanceCurrentPosition(3);  // Skip "xYz"
1130  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1131  m.Fail();
1132
1133  m.Bind(&succ);
1134  m.WriteCurrentPositionToRegister(1, 0);
1135  m.Succeed();
1136
1137  Handle<String> source =
1138      factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
1139  Handle<Object> code_object = m.GetCode(source);
1140  Handle<Code> code = Handle<Code>::cast(code_object);
1141
1142  Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1143  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1144  Address start_adr = seq_input->GetCharsAddress();
1145
1146  int output[4];
1147  NativeRegExpMacroAssembler::Result result =
1148      Execute(*code,
1149              *input,
1150              0,
1151              start_adr,
1152              start_adr + input->length(),
1153              output);
1154
1155  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1156  CHECK_EQ(0, output[0]);
1157  CHECK_EQ(12, output[1]);
1158  CHECK_EQ(0, output[2]);
1159  CHECK_EQ(3, output[3]);
1160}
1161
1162
1163
1164TEST(MacroAssemblerNativeRegisters) {
1165  v8::V8::Initialize();
1166  ContextInitializer initializer;
1167  Isolate* isolate = CcTest::i_isolate();
1168  Factory* factory = isolate->factory();
1169  Zone zone(isolate);
1170
1171  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 6, &zone);
1172
1173  uc16 foo_chars[3] = {'f', 'o', 'o'};
1174  Vector<const uc16> foo(foo_chars, 3);
1175
1176  enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1177  Label fail;
1178  Label backtrack;
1179  m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
1180  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1181  m.PushBacktrack(&backtrack);
1182  m.WriteStackPointerToRegister(sp);
1183  // Fill stack and registers
1184  m.AdvanceCurrentPosition(2);
1185  m.WriteCurrentPositionToRegister(out1, 0);
1186  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1187  m.PushBacktrack(&fail);
1188  // Drop backtrack stack frames.
1189  m.ReadStackPointerFromRegister(sp);
1190  // And take the first backtrack (to &backtrack)
1191  m.Backtrack();
1192
1193  m.PushCurrentPosition();
1194  m.AdvanceCurrentPosition(2);
1195  m.PopCurrentPosition();
1196
1197  m.Bind(&backtrack);
1198  m.PopRegister(out1);
1199  m.ReadCurrentPositionFromRegister(out1);
1200  m.AdvanceCurrentPosition(3);
1201  m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
1202
1203  Label loop;
1204  m.SetRegister(loop_cnt, 0);  // loop counter
1205  m.Bind(&loop);
1206  m.AdvanceRegister(loop_cnt, 1);
1207  m.AdvanceCurrentPosition(1);
1208  m.IfRegisterLT(loop_cnt, 3, &loop);
1209  m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
1210
1211  Label loop2;
1212  m.SetRegister(loop_cnt, 2);  // loop counter
1213  m.Bind(&loop2);
1214  m.AdvanceRegister(loop_cnt, -1);
1215  m.AdvanceCurrentPosition(1);
1216  m.IfRegisterGE(loop_cnt, 0, &loop2);
1217  m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
1218
1219  Label loop3;
1220  Label exit_loop3;
1221  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1222  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1223  m.ReadCurrentPositionFromRegister(out3);
1224  m.Bind(&loop3);
1225  m.AdvanceCurrentPosition(1);
1226  m.CheckGreedyLoop(&exit_loop3);
1227  m.GoTo(&loop3);
1228  m.Bind(&exit_loop3);
1229  m.PopCurrentPosition();
1230  m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
1231
1232  m.Succeed();
1233
1234  m.Bind(&fail);
1235  m.Fail();
1236
1237  Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
1238  Handle<Object> code_object = m.GetCode(source);
1239  Handle<Code> code = Handle<Code>::cast(code_object);
1240
1241  // String long enough for test (content doesn't matter).
1242  Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1243  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1244  Address start_adr = seq_input->GetCharsAddress();
1245
1246  int output[6];
1247  NativeRegExpMacroAssembler::Result result =
1248      Execute(*code,
1249              *input,
1250              0,
1251              start_adr,
1252              start_adr + input->length(),
1253              output);
1254
1255  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1256  CHECK_EQ(0, output[0]);
1257  CHECK_EQ(3, output[1]);
1258  CHECK_EQ(6, output[2]);
1259  CHECK_EQ(9, output[3]);
1260  CHECK_EQ(9, output[4]);
1261  CHECK_EQ(-1, output[5]);
1262}
1263
1264
1265TEST(MacroAssemblerStackOverflow) {
1266  v8::V8::Initialize();
1267  ContextInitializer initializer;
1268  Isolate* isolate = CcTest::i_isolate();
1269  Factory* factory = isolate->factory();
1270  Zone zone(isolate);
1271
1272  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
1273
1274  Label loop;
1275  m.Bind(&loop);
1276  m.PushBacktrack(&loop);
1277  m.GoTo(&loop);
1278
1279  Handle<String> source =
1280      factory->NewStringFromStaticChars("<stack overflow test>");
1281  Handle<Object> code_object = m.GetCode(source);
1282  Handle<Code> code = Handle<Code>::cast(code_object);
1283
1284  // String long enough for test (content doesn't matter).
1285  Handle<String> input = factory->NewStringFromStaticChars("dummy");
1286  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1287  Address start_adr = seq_input->GetCharsAddress();
1288
1289  NativeRegExpMacroAssembler::Result result =
1290      Execute(*code,
1291              *input,
1292              0,
1293              start_adr,
1294              start_adr + input->length(),
1295              NULL);
1296
1297  CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1298  CHECK(isolate->has_pending_exception());
1299  isolate->clear_pending_exception();
1300}
1301
1302
1303TEST(MacroAssemblerNativeLotsOfRegisters) {
1304  v8::V8::Initialize();
1305  ContextInitializer initializer;
1306  Isolate* isolate = CcTest::i_isolate();
1307  Factory* factory = isolate->factory();
1308  Zone zone(isolate);
1309
1310  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 2, &zone);
1311
1312  // At least 2048, to ensure the allocated space for registers
1313  // span one full page.
1314  const int large_number = 8000;
1315  m.WriteCurrentPositionToRegister(large_number, 42);
1316  m.WriteCurrentPositionToRegister(0, 0);
1317  m.WriteCurrentPositionToRegister(1, 1);
1318  Label done;
1319  m.CheckNotBackReference(0, &done);  // Performs a system-stack push.
1320  m.Bind(&done);
1321  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1322  m.PopRegister(1);
1323  m.Succeed();
1324
1325  Handle<String> source =
1326      factory->NewStringFromStaticChars("<huge register space test>");
1327  Handle<Object> code_object = m.GetCode(source);
1328  Handle<Code> code = Handle<Code>::cast(code_object);
1329
1330  // String long enough for test (content doesn't matter).
1331  Handle<String> input = factory->NewStringFromStaticChars("sample text");
1332  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1333  Address start_adr = seq_input->GetCharsAddress();
1334
1335  int captures[2];
1336  NativeRegExpMacroAssembler::Result result =
1337      Execute(*code,
1338              *input,
1339              0,
1340              start_adr,
1341              start_adr + input->length(),
1342              captures);
1343
1344  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1345  CHECK_EQ(0, captures[0]);
1346  CHECK_EQ(42, captures[1]);
1347
1348  isolate->clear_pending_exception();
1349}
1350
1351#else  // V8_INTERPRETED_REGEXP
1352
1353TEST(MacroAssembler) {
1354  byte codes[1024];
1355  Zone zone(CcTest::i_isolate());
1356  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone);
1357  // ^f(o)o.
1358  Label start, fail, backtrack;
1359
1360  m.SetRegister(4, 42);
1361  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1362  m.AdvanceRegister(4, 42);
1363  m.GoTo(&start);
1364  m.Fail();
1365  m.Bind(&start);
1366  m.PushBacktrack(&fail);
1367  m.CheckNotAtStart(NULL);
1368  m.LoadCurrentCharacter(0, NULL);
1369  m.CheckNotCharacter('f', NULL);
1370  m.LoadCurrentCharacter(1, NULL);
1371  m.CheckNotCharacter('o', NULL);
1372  m.LoadCurrentCharacter(2, NULL);
1373  m.CheckNotCharacter('o', NULL);
1374  m.WriteCurrentPositionToRegister(0, 0);
1375  m.WriteCurrentPositionToRegister(1, 3);
1376  m.WriteCurrentPositionToRegister(2, 1);
1377  m.WriteCurrentPositionToRegister(3, 2);
1378  m.AdvanceCurrentPosition(3);
1379  m.PushBacktrack(&backtrack);
1380  m.Succeed();
1381  m.Bind(&backtrack);
1382  m.ClearRegisters(2, 3);
1383  m.Backtrack();
1384  m.Bind(&fail);
1385  m.PopRegister(0);
1386  m.Fail();
1387
1388  Isolate* isolate = CcTest::i_isolate();
1389  Factory* factory = isolate->factory();
1390  HandleScope scope(isolate);
1391
1392  Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
1393  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1394  int captures[5];
1395
1396  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1397  Handle<String> f1_16 = factory->NewStringFromTwoByte(
1398      Vector<const uc16>(str1, 6)).ToHandleChecked();
1399
1400  CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1401  CHECK_EQ(0, captures[0]);
1402  CHECK_EQ(3, captures[1]);
1403  CHECK_EQ(1, captures[2]);
1404  CHECK_EQ(2, captures[3]);
1405  CHECK_EQ(84, captures[4]);
1406
1407  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1408  Handle<String> f2_16 = factory->NewStringFromTwoByte(
1409      Vector<const uc16>(str2, 6)).ToHandleChecked();
1410
1411  CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1412  CHECK_EQ(42, captures[0]);
1413}
1414
1415#endif  // V8_INTERPRETED_REGEXP
1416
1417
1418TEST(AddInverseToTable) {
1419  static const int kLimit = 1000;
1420  static const int kRangeCount = 16;
1421  for (int t = 0; t < 10; t++) {
1422    Zone zone(CcTest::i_isolate());
1423    ZoneList<CharacterRange>* ranges =
1424        new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1425    for (int i = 0; i < kRangeCount; i++) {
1426      int from = PseudoRandom(t + 87, i + 25) % kLimit;
1427      int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1428      if (to > kLimit) to = kLimit;
1429      ranges->Add(CharacterRange(from, to), &zone);
1430    }
1431    DispatchTable table(&zone);
1432    DispatchTableConstructor cons(&table, false, &zone);
1433    cons.set_choice_index(0);
1434    cons.AddInverse(ranges);
1435    for (int i = 0; i < kLimit; i++) {
1436      bool is_on = false;
1437      for (int j = 0; !is_on && j < kRangeCount; j++)
1438        is_on = ranges->at(j).Contains(i);
1439      OutSet* set = table.Get(i);
1440      CHECK_EQ(is_on, set->Get(0) == false);
1441    }
1442  }
1443  Zone zone(CcTest::i_isolate());
1444  ZoneList<CharacterRange>* ranges =
1445      new(&zone) ZoneList<CharacterRange>(1, &zone);
1446  ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1447  DispatchTable table(&zone);
1448  DispatchTableConstructor cons(&table, false, &zone);
1449  cons.set_choice_index(0);
1450  cons.AddInverse(ranges);
1451  CHECK(!table.Get(0xFFFE)->Get(0));
1452  CHECK(table.Get(0xFFFF)->Get(0));
1453}
1454
1455
1456static uc32 canonicalize(uc32 c) {
1457  unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1458  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1459  if (count == 0) {
1460    return c;
1461  } else {
1462    CHECK_EQ(1, count);
1463    return canon[0];
1464  }
1465}
1466
1467
1468TEST(LatinCanonicalize) {
1469  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1470  for (char lower = 'a'; lower <= 'z'; lower++) {
1471    char upper = lower + ('A' - 'a');
1472    CHECK_EQ(canonicalize(lower), canonicalize(upper));
1473    unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1474    int length = un_canonicalize.get(lower, '\0', uncanon);
1475    CHECK_EQ(2, length);
1476    CHECK_EQ(upper, uncanon[0]);
1477    CHECK_EQ(lower, uncanon[1]);
1478  }
1479  for (uc32 c = 128; c < (1 << 21); c++)
1480    CHECK_GE(canonicalize(c), 128);
1481  unibrow::Mapping<unibrow::ToUppercase> to_upper;
1482  // Canonicalization is only defined for the Basic Multilingual Plane.
1483  for (uc32 c = 0; c < (1 << 16); c++) {
1484    unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1485    int length = to_upper.get(c, '\0', upper);
1486    if (length == 0) {
1487      length = 1;
1488      upper[0] = c;
1489    }
1490    uc32 u = upper[0];
1491    if (length > 1 || (c >= 128 && u < 128))
1492      u = c;
1493    CHECK_EQ(u, canonicalize(c));
1494  }
1495}
1496
1497
1498static uc32 CanonRangeEnd(uc32 c) {
1499  unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1500  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1501  if (count == 0) {
1502    return c;
1503  } else {
1504    CHECK_EQ(1, count);
1505    return canon[0];
1506  }
1507}
1508
1509
1510TEST(RangeCanonicalization) {
1511  // Check that we arrive at the same result when using the basic
1512  // range canonicalization primitives as when using immediate
1513  // canonicalization.
1514  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1515  int block_start = 0;
1516  while (block_start <= 0xFFFF) {
1517    uc32 block_end = CanonRangeEnd(block_start);
1518    unsigned block_length = block_end - block_start + 1;
1519    if (block_length > 1) {
1520      unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1521      int first_length = un_canonicalize.get(block_start, '\0', first);
1522      for (unsigned i = 1; i < block_length; i++) {
1523        unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1524        int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1525        CHECK_EQ(first_length, succ_length);
1526        for (int j = 0; j < succ_length; j++) {
1527          int calc = first[j] + i;
1528          int found = succ[j];
1529          CHECK_EQ(calc, found);
1530        }
1531      }
1532    }
1533    block_start = block_start + block_length;
1534  }
1535}
1536
1537
1538TEST(UncanonicalizeEquivalence) {
1539  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1540  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1541  for (int i = 0; i < (1 << 16); i++) {
1542    int length = un_canonicalize.get(i, '\0', chars);
1543    for (int j = 0; j < length; j++) {
1544      unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1545      int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1546      CHECK_EQ(length, length2);
1547      for (int k = 0; k < length; k++)
1548        CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1549    }
1550  }
1551}
1552
1553
1554static void TestRangeCaseIndependence(CharacterRange input,
1555                                      Vector<CharacterRange> expected) {
1556  Zone zone(CcTest::i_isolate());
1557  int count = expected.length();
1558  ZoneList<CharacterRange>* list =
1559      new(&zone) ZoneList<CharacterRange>(count, &zone);
1560  input.AddCaseEquivalents(list, false, &zone);
1561  CHECK_EQ(count, list->length());
1562  for (int i = 0; i < list->length(); i++) {
1563    CHECK_EQ(expected[i].from(), list->at(i).from());
1564    CHECK_EQ(expected[i].to(), list->at(i).to());
1565  }
1566}
1567
1568
1569static void TestSimpleRangeCaseIndependence(CharacterRange input,
1570                                            CharacterRange expected) {
1571  EmbeddedVector<CharacterRange, 1> vector;
1572  vector[0] = expected;
1573  TestRangeCaseIndependence(input, vector);
1574}
1575
1576
1577TEST(CharacterRangeCaseIndependence) {
1578  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1579                                  CharacterRange::Singleton('A'));
1580  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1581                                  CharacterRange::Singleton('Z'));
1582  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1583                                  CharacterRange('A', 'Z'));
1584  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1585                                  CharacterRange('C', 'F'));
1586  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1587                                  CharacterRange('A', 'B'));
1588  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1589                                  CharacterRange('Y', 'Z'));
1590  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1591                                  CharacterRange('A', 'Z'));
1592  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1593                                  CharacterRange('a', 'z'));
1594  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1595                                  CharacterRange('c', 'f'));
1596  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1597                                  CharacterRange('a', 'z'));
1598  // Here we need to add [l-z] to complete the case independence of
1599  // [A-Za-z] but we expect [a-z] to be added since we always add a
1600  // whole block at a time.
1601  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1602                                  CharacterRange('a', 'z'));
1603}
1604
1605
1606static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1607  if (ranges == NULL)
1608    return false;
1609  for (int i = 0; i < ranges->length(); i++) {
1610    CharacterRange range = ranges->at(i);
1611    if (range.from() <= c && c <= range.to())
1612      return true;
1613  }
1614  return false;
1615}
1616
1617
1618TEST(CharClassDifference) {
1619  Zone zone(CcTest::i_isolate());
1620  ZoneList<CharacterRange>* base =
1621      new(&zone) ZoneList<CharacterRange>(1, &zone);
1622  base->Add(CharacterRange::Everything(), &zone);
1623  Vector<const int> overlay = CharacterRange::GetWordBounds();
1624  ZoneList<CharacterRange>* included = NULL;
1625  ZoneList<CharacterRange>* excluded = NULL;
1626  CharacterRange::Split(base, overlay, &included, &excluded, &zone);
1627  for (int i = 0; i < (1 << 16); i++) {
1628    bool in_base = InClass(i, base);
1629    if (in_base) {
1630      bool in_overlay = false;
1631      for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1632        if (overlay[j] <= i && i < overlay[j+1])
1633          in_overlay = true;
1634      }
1635      CHECK_EQ(in_overlay, InClass(i, included));
1636      CHECK_EQ(!in_overlay, InClass(i, excluded));
1637    } else {
1638      CHECK(!InClass(i, included));
1639      CHECK(!InClass(i, excluded));
1640    }
1641  }
1642}
1643
1644
1645TEST(CanonicalizeCharacterSets) {
1646  Zone zone(CcTest::i_isolate());
1647  ZoneList<CharacterRange>* list =
1648      new(&zone) ZoneList<CharacterRange>(4, &zone);
1649  CharacterSet set(list);
1650
1651  list->Add(CharacterRange(10, 20), &zone);
1652  list->Add(CharacterRange(30, 40), &zone);
1653  list->Add(CharacterRange(50, 60), &zone);
1654  set.Canonicalize();
1655  DCHECK_EQ(3, list->length());
1656  DCHECK_EQ(10, list->at(0).from());
1657  DCHECK_EQ(20, list->at(0).to());
1658  DCHECK_EQ(30, list->at(1).from());
1659  DCHECK_EQ(40, list->at(1).to());
1660  DCHECK_EQ(50, list->at(2).from());
1661  DCHECK_EQ(60, list->at(2).to());
1662
1663  list->Rewind(0);
1664  list->Add(CharacterRange(10, 20), &zone);
1665  list->Add(CharacterRange(50, 60), &zone);
1666  list->Add(CharacterRange(30, 40), &zone);
1667  set.Canonicalize();
1668  DCHECK_EQ(3, list->length());
1669  DCHECK_EQ(10, list->at(0).from());
1670  DCHECK_EQ(20, list->at(0).to());
1671  DCHECK_EQ(30, list->at(1).from());
1672  DCHECK_EQ(40, list->at(1).to());
1673  DCHECK_EQ(50, list->at(2).from());
1674  DCHECK_EQ(60, list->at(2).to());
1675
1676  list->Rewind(0);
1677  list->Add(CharacterRange(30, 40), &zone);
1678  list->Add(CharacterRange(10, 20), &zone);
1679  list->Add(CharacterRange(25, 25), &zone);
1680  list->Add(CharacterRange(100, 100), &zone);
1681  list->Add(CharacterRange(1, 1), &zone);
1682  set.Canonicalize();
1683  DCHECK_EQ(5, list->length());
1684  DCHECK_EQ(1, list->at(0).from());
1685  DCHECK_EQ(1, list->at(0).to());
1686  DCHECK_EQ(10, list->at(1).from());
1687  DCHECK_EQ(20, list->at(1).to());
1688  DCHECK_EQ(25, list->at(2).from());
1689  DCHECK_EQ(25, list->at(2).to());
1690  DCHECK_EQ(30, list->at(3).from());
1691  DCHECK_EQ(40, list->at(3).to());
1692  DCHECK_EQ(100, list->at(4).from());
1693  DCHECK_EQ(100, list->at(4).to());
1694
1695  list->Rewind(0);
1696  list->Add(CharacterRange(10, 19), &zone);
1697  list->Add(CharacterRange(21, 30), &zone);
1698  list->Add(CharacterRange(20, 20), &zone);
1699  set.Canonicalize();
1700  DCHECK_EQ(1, list->length());
1701  DCHECK_EQ(10, list->at(0).from());
1702  DCHECK_EQ(30, list->at(0).to());
1703}
1704
1705
1706TEST(CharacterRangeMerge) {
1707  Zone zone(CcTest::i_isolate());
1708  ZoneList<CharacterRange> l1(4, &zone);
1709  ZoneList<CharacterRange> l2(4, &zone);
1710  // Create all combinations of intersections of ranges, both singletons and
1711  // longer.
1712
1713  int offset = 0;
1714
1715  // The five kinds of singleton intersections:
1716  //     X
1717  //   Y      - outside before
1718  //    Y     - outside touching start
1719  //     Y    - overlap
1720  //      Y   - outside touching end
1721  //       Y  - outside after
1722
1723  for (int i = 0; i < 5; i++) {
1724    l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1725    l2.Add(CharacterRange::Singleton(offset + i), &zone);
1726    offset += 6;
1727  }
1728
1729  // The seven kinds of singleton/non-singleton intersections:
1730  //    XXX
1731  //  Y        - outside before
1732  //   Y       - outside touching start
1733  //    Y      - inside touching start
1734  //     Y     - entirely inside
1735  //      Y    - inside touching end
1736  //       Y   - outside touching end
1737  //        Y  - disjoint after
1738
1739  for (int i = 0; i < 7; i++) {
1740    l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1741    l2.Add(CharacterRange::Singleton(offset + i), &zone);
1742    offset += 8;
1743  }
1744
1745  // The eleven kinds of non-singleton intersections:
1746  //
1747  //       XXXXXXXX
1748  // YYYY                  - outside before.
1749  //   YYYY                - outside touching start.
1750  //     YYYY              - overlapping start
1751  //       YYYY            - inside touching start
1752  //         YYYY          - entirely inside
1753  //           YYYY        - inside touching end
1754  //             YYYY      - overlapping end
1755  //               YYYY    - outside touching end
1756  //                 YYYY  - outside after
1757  //       YYYYYYYY        - identical
1758  //     YYYYYYYYYYYY      - containing entirely.
1759
1760  for (int i = 0; i < 9; i++) {
1761    l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
1762    l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1763    offset += 22;
1764  }
1765  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1766  l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1767  offset += 22;
1768  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1769  l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1770  offset += 22;
1771
1772  // Different kinds of multi-range overlap:
1773  // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
1774  //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
1775
1776  l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1777  l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1778  for (int i = 0; i < 6; i++) {
1779    l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1780    l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1781    offset += 9;
1782  }
1783
1784  DCHECK(CharacterRange::IsCanonical(&l1));
1785  DCHECK(CharacterRange::IsCanonical(&l2));
1786
1787  ZoneList<CharacterRange> first_only(4, &zone);
1788  ZoneList<CharacterRange> second_only(4, &zone);
1789  ZoneList<CharacterRange> both(4, &zone);
1790}
1791
1792
1793TEST(Graph) {
1794  Execute("\\b\\w+\\b", false, true, true);
1795}
1796