1// Copyright 2012 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 29#include <stdlib.h> 30 31#include "v8.h" 32 33#include "ast.h" 34#include "char-predicates-inl.h" 35#include "cctest.h" 36#include "jsregexp.h" 37#include "parser.h" 38#include "regexp-macro-assembler.h" 39#include "regexp-macro-assembler-irregexp.h" 40#include "string-stream.h" 41#include "zone-inl.h" 42#ifdef V8_INTERPRETED_REGEXP 43#include "interpreter-irregexp.h" 44#else // V8_INTERPRETED_REGEXP 45#include "macro-assembler.h" 46#include "code.h" 47#if V8_TARGET_ARCH_ARM 48#include "arm/assembler-arm.h" 49#include "arm/macro-assembler-arm.h" 50#include "arm/regexp-macro-assembler-arm.h" 51#endif 52#if V8_TARGET_ARCH_MIPS 53#include "mips/assembler-mips.h" 54#include "mips/macro-assembler-mips.h" 55#include "mips/regexp-macro-assembler-mips.h" 56#endif 57#if V8_TARGET_ARCH_X64 58#include "x64/assembler-x64.h" 59#include "x64/macro-assembler-x64.h" 60#include "x64/regexp-macro-assembler-x64.h" 61#endif 62#if V8_TARGET_ARCH_IA32 63#include "ia32/assembler-ia32.h" 64#include "ia32/macro-assembler-ia32.h" 65#include "ia32/regexp-macro-assembler-ia32.h" 66#endif 67#endif // V8_INTERPRETED_REGEXP 68 69using namespace v8::internal; 70 71 72static bool CheckParse(const char* input) { 73 V8::Initialize(NULL); 74 v8::HandleScope scope(CcTest::isolate()); 75 Zone zone(CcTest::i_isolate()); 76 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 77 RegExpCompileData result; 78 return v8::internal::RegExpParser::ParseRegExp( 79 &reader, false, &result, &zone); 80} 81 82 83static SmartArrayPointer<const char> Parse(const char* input) { 84 V8::Initialize(NULL); 85 v8::HandleScope scope(CcTest::isolate()); 86 Zone zone(CcTest::i_isolate()); 87 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 88 RegExpCompileData result; 89 CHECK(v8::internal::RegExpParser::ParseRegExp( 90 &reader, false, &result, &zone)); 91 CHECK(result.tree != NULL); 92 CHECK(result.error.is_null()); 93 SmartArrayPointer<const char> output = result.tree->ToString(&zone); 94 return output; 95} 96 97 98static bool CheckSimple(const char* input) { 99 V8::Initialize(NULL); 100 v8::HandleScope scope(CcTest::isolate()); 101 Zone zone(CcTest::i_isolate()); 102 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 103 RegExpCompileData result; 104 CHECK(v8::internal::RegExpParser::ParseRegExp( 105 &reader, false, &result, &zone)); 106 CHECK(result.tree != NULL); 107 CHECK(result.error.is_null()); 108 return result.simple; 109} 110 111struct MinMaxPair { 112 int min_match; 113 int max_match; 114}; 115 116 117static MinMaxPair CheckMinMaxMatch(const char* input) { 118 V8::Initialize(NULL); 119 v8::HandleScope scope(CcTest::isolate()); 120 Zone zone(CcTest::i_isolate()); 121 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 122 RegExpCompileData result; 123 CHECK(v8::internal::RegExpParser::ParseRegExp( 124 &reader, false, &result, &zone)); 125 CHECK(result.tree != NULL); 126 CHECK(result.error.is_null()); 127 int min_match = result.tree->min_match(); 128 int max_match = result.tree->max_match(); 129 MinMaxPair pair = { min_match, max_match }; 130 return pair; 131} 132 133 134#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input)) 135#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input)) 136#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input)); 137#define CHECK_MIN_MAX(input, min, max) \ 138 { MinMaxPair min_max = CheckMinMaxMatch(input); \ 139 CHECK_EQ(min, min_max.min_match); \ 140 CHECK_EQ(max, min_max.max_match); \ 141 } 142 143TEST(Parser) { 144 V8::Initialize(NULL); 145 146 CHECK_PARSE_ERROR("?"); 147 148 CHECK_PARSE_EQ("abc", "'abc'"); 149 CHECK_PARSE_EQ("", "%"); 150 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')"); 151 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 152 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)"); 153 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); 154 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); 155 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')"); 156 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')"); 157 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))"); 158 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))"); 159 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))"); 160 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))"); 161 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))"); 162 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))"); 163 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))"); 164 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))"); 165 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))"); 166 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))"); 167 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))"); 168 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))"); 169 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'"); 170 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')"); 171 CHECK_PARSE_EQ("(?:foo)", "'foo'"); 172 CHECK_PARSE_EQ("(?: foo )", "' foo '"); 173 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 174 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 175 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 176 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 177 CHECK_PARSE_EQ("()", "(^ %)"); 178 CHECK_PARSE_EQ("(?=)", "(-> + %)"); 179 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows 180 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252 181 CHECK_PARSE_EQ("[x]", "[x]"); 182 CHECK_PARSE_EQ("[xyz]", "[x y z]"); 183 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 184 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]"); 185 CHECK_PARSE_EQ("[^123]", "^[1 2 3]"); 186 CHECK_PARSE_EQ("]", "']'"); 187 CHECK_PARSE_EQ("}", "'}'"); 188 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]"); 189 CHECK_PARSE_EQ("[\\d]", "[0-9]"); 190 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]"); 191 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]"); 192 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]"); 193 CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]"); 194 // Control character outside character class. 195 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK", 196 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); 197 CHECK_PARSE_EQ("\\c!", "'\\c!'"); 198 CHECK_PARSE_EQ("\\c_", "'\\c_'"); 199 CHECK_PARSE_EQ("\\c~", "'\\c~'"); 200 CHECK_PARSE_EQ("\\c1", "'\\c1'"); 201 // Control character inside character class. 202 CHECK_PARSE_EQ("[\\c!]", "[\\ c !]"); 203 CHECK_PARSE_EQ("[\\c_]", "[\\x1f]"); 204 CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]"); 205 CHECK_PARSE_EQ("[\\ca]", "[\\x01]"); 206 CHECK_PARSE_EQ("[\\cz]", "[\\x1a]"); 207 CHECK_PARSE_EQ("[\\cA]", "[\\x01]"); 208 CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]"); 209 CHECK_PARSE_EQ("[\\c1]", "[\\x11]"); 210 211 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]"); 212 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 213 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 214 CHECK_PARSE_EQ("\\0", "'\\x00'"); 215 CHECK_PARSE_EQ("\\8", "'8'"); 216 CHECK_PARSE_EQ("\\9", "'9'"); 217 CHECK_PARSE_EQ("\\11", "'\\x09'"); 218 CHECK_PARSE_EQ("\\11a", "'\\x09a'"); 219 CHECK_PARSE_EQ("\\011", "'\\x09'"); 220 CHECK_PARSE_EQ("\\00011", "'\\x0011'"); 221 CHECK_PARSE_EQ("\\118", "'\\x098'"); 222 CHECK_PARSE_EQ("\\111", "'I'"); 223 CHECK_PARSE_EQ("\\1111", "'I1'"); 224 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); 225 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); 226 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); 227 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')"); 228 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')" 229 " (# 0 - g (<- 1)))"); 230 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')" 231 " (# 0 - g (<- 2)))"); 232 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')" 233 " (# 0 - g (<- 3)))"); 234 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')" 235 " (# 0 - g '\\x04'))"); 236 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", 237 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 238 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); 239 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", 240 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 241 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')"); 242 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))"); 243 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')"); 244 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')"); 245 CHECK_PARSE_EQ("(?=a)?a", "'a'"); 246 CHECK_PARSE_EQ("(?=a){0,10}a", "'a'"); 247 CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')"); 248 CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')"); 249 CHECK_PARSE_EQ("(?!a)?a", "'a'"); 250 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')"); 251 CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))"); 252 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))"); 253 CHECK_PARSE_EQ("[\\0]", "[\\x00]"); 254 CHECK_PARSE_EQ("[\\11]", "[\\x09]"); 255 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]"); 256 CHECK_PARSE_EQ("[\\011]", "[\\x09]"); 257 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]"); 258 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]"); 259 CHECK_PARSE_EQ("[\\111]", "[I]"); 260 CHECK_PARSE_EQ("[\\1111]", "[I 1]"); 261 CHECK_PARSE_EQ("\\x34", "'\x34'"); 262 CHECK_PARSE_EQ("\\x60", "'\x60'"); 263 CHECK_PARSE_EQ("\\x3z", "'x3z'"); 264 CHECK_PARSE_EQ("\\c", "'\\c'"); 265 CHECK_PARSE_EQ("\\u0034", "'\x34'"); 266 CHECK_PARSE_EQ("\\u003z", "'u003z'"); 267 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))"); 268 269 CHECK_SIMPLE("", false); 270 CHECK_SIMPLE("a", true); 271 CHECK_SIMPLE("a|b", false); 272 CHECK_SIMPLE("a\\n", false); 273 CHECK_SIMPLE("^a", false); 274 CHECK_SIMPLE("a$", false); 275 CHECK_SIMPLE("a\\b!", false); 276 CHECK_SIMPLE("a\\Bb", false); 277 CHECK_SIMPLE("a*", false); 278 CHECK_SIMPLE("a*?", false); 279 CHECK_SIMPLE("a?", false); 280 CHECK_SIMPLE("a??", false); 281 CHECK_SIMPLE("a{0,1}?", false); 282 CHECK_SIMPLE("a{1,1}?", false); 283 CHECK_SIMPLE("a{1,2}?", false); 284 CHECK_SIMPLE("a+?", false); 285 CHECK_SIMPLE("(a)", false); 286 CHECK_SIMPLE("(a)\\1", false); 287 CHECK_SIMPLE("(\\1a)", false); 288 CHECK_SIMPLE("\\1(a)", false); 289 CHECK_SIMPLE("a\\s", false); 290 CHECK_SIMPLE("a\\S", false); 291 CHECK_SIMPLE("a\\d", false); 292 CHECK_SIMPLE("a\\D", false); 293 CHECK_SIMPLE("a\\w", false); 294 CHECK_SIMPLE("a\\W", false); 295 CHECK_SIMPLE("a.", false); 296 CHECK_SIMPLE("a\\q", false); 297 CHECK_SIMPLE("a[a]", false); 298 CHECK_SIMPLE("a[^a]", false); 299 CHECK_SIMPLE("a[a-z]", false); 300 CHECK_SIMPLE("a[\\q]", false); 301 CHECK_SIMPLE("a(?:b)", false); 302 CHECK_SIMPLE("a(?=b)", false); 303 CHECK_SIMPLE("a(?!b)", false); 304 CHECK_SIMPLE("\\x60", false); 305 CHECK_SIMPLE("\\u0060", false); 306 CHECK_SIMPLE("\\cA", false); 307 CHECK_SIMPLE("\\q", false); 308 CHECK_SIMPLE("\\1112", false); 309 CHECK_SIMPLE("\\0", false); 310 CHECK_SIMPLE("(a)\\1", false); 311 CHECK_SIMPLE("(?=a)?a", false); 312 CHECK_SIMPLE("(?!a)?a\\1", false); 313 CHECK_SIMPLE("(?:(?=a))a\\1", false); 314 315 CHECK_PARSE_EQ("a{}", "'a{}'"); 316 CHECK_PARSE_EQ("a{,}", "'a{,}'"); 317 CHECK_PARSE_EQ("a{", "'a{'"); 318 CHECK_PARSE_EQ("a{z}", "'a{z}'"); 319 CHECK_PARSE_EQ("a{1z}", "'a{1z}'"); 320 CHECK_PARSE_EQ("a{12z}", "'a{12z}'"); 321 CHECK_PARSE_EQ("a{12,", "'a{12,'"); 322 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'"); 323 CHECK_PARSE_EQ("{}", "'{}'"); 324 CHECK_PARSE_EQ("{,}", "'{,}'"); 325 CHECK_PARSE_EQ("{", "'{'"); 326 CHECK_PARSE_EQ("{z}", "'{z}'"); 327 CHECK_PARSE_EQ("{1z}", "'{1z}'"); 328 CHECK_PARSE_EQ("{12z}", "'{12z}'"); 329 CHECK_PARSE_EQ("{12,", "'{12,'"); 330 CHECK_PARSE_EQ("{12,3b", "'{12,3b'"); 331 332 CHECK_MIN_MAX("a", 1, 1); 333 CHECK_MIN_MAX("abc", 3, 3); 334 CHECK_MIN_MAX("a[bc]d", 3, 3); 335 CHECK_MIN_MAX("a|bc", 1, 2); 336 CHECK_MIN_MAX("ab|c", 1, 2); 337 CHECK_MIN_MAX("a||bc", 0, 2); 338 CHECK_MIN_MAX("|", 0, 0); 339 CHECK_MIN_MAX("(?:ab)", 2, 2); 340 CHECK_MIN_MAX("(?:ab|cde)", 2, 3); 341 CHECK_MIN_MAX("(?:ab)|cde", 2, 3); 342 CHECK_MIN_MAX("(ab)", 2, 2); 343 CHECK_MIN_MAX("(ab|cde)", 2, 3); 344 CHECK_MIN_MAX("(ab)\\1", 2, 4); 345 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6); 346 CHECK_MIN_MAX("(?:ab)?", 0, 2); 347 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity); 348 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity); 349 CHECK_MIN_MAX("a?", 0, 1); 350 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity); 351 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity); 352 CHECK_MIN_MAX("a??", 0, 1); 353 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity); 354 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity); 355 CHECK_MIN_MAX("(?:a?)?", 0, 1); 356 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity); 357 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity); 358 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity); 359 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity); 360 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity); 361 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity); 362 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity); 363 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity); 364 CHECK_MIN_MAX("a{0}", 0, 0); 365 CHECK_MIN_MAX("(?:a+){0}", 0, 0); 366 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0); 367 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity); 368 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity); 369 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity); 370 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity); 371 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity); 372 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14); 373 CHECK_MIN_MAX("a\\bc", 2, 2); 374 CHECK_MIN_MAX("a\\Bc", 2, 2); 375 CHECK_MIN_MAX("a\\sc", 3, 3); 376 CHECK_MIN_MAX("a\\Sc", 3, 3); 377 CHECK_MIN_MAX("a(?=b)c", 2, 2); 378 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2); 379 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2); 380} 381 382 383TEST(ParserRegression) { 384 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])"); 385 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); 386 CHECK_PARSE_EQ("{", "'{'"); 387 CHECK_PARSE_EQ("a|", "(| 'a' %)"); 388} 389 390static void ExpectError(const char* input, 391 const char* expected) { 392 V8::Initialize(NULL); 393 v8::HandleScope scope(CcTest::isolate()); 394 Zone zone(CcTest::i_isolate()); 395 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 396 RegExpCompileData result; 397 CHECK(!v8::internal::RegExpParser::ParseRegExp( 398 &reader, false, &result, &zone)); 399 CHECK(result.tree == NULL); 400 CHECK(!result.error.is_null()); 401 SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS); 402 CHECK_EQ(expected, *str); 403} 404 405 406TEST(Errors) { 407 const char* kEndBackslash = "\\ at end of pattern"; 408 ExpectError("\\", kEndBackslash); 409 const char* kUnterminatedGroup = "Unterminated group"; 410 ExpectError("(foo", kUnterminatedGroup); 411 const char* kInvalidGroup = "Invalid group"; 412 ExpectError("(?", kInvalidGroup); 413 const char* kUnterminatedCharacterClass = "Unterminated character class"; 414 ExpectError("[", kUnterminatedCharacterClass); 415 ExpectError("[a-", kUnterminatedCharacterClass); 416 const char* kNothingToRepeat = "Nothing to repeat"; 417 ExpectError("*", kNothingToRepeat); 418 ExpectError("?", kNothingToRepeat); 419 ExpectError("+", kNothingToRepeat); 420 ExpectError("{1}", kNothingToRepeat); 421 ExpectError("{1,2}", kNothingToRepeat); 422 ExpectError("{1,}", kNothingToRepeat); 423 424 // Check that we don't allow more than kMaxCapture captures 425 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures. 426 const char* kTooManyCaptures = "Too many captures"; 427 HeapStringAllocator allocator; 428 StringStream accumulator(&allocator); 429 for (int i = 0; i <= kMaxCaptures; i++) { 430 accumulator.Add("()"); 431 } 432 SmartArrayPointer<const char> many_captures(accumulator.ToCString()); 433 ExpectError(*many_captures, kTooManyCaptures); 434} 435 436 437static bool IsDigit(uc16 c) { 438 return ('0' <= c && c <= '9'); 439} 440 441 442static bool NotDigit(uc16 c) { 443 return !IsDigit(c); 444} 445 446 447static bool IsWhiteSpace(uc16 c) { 448 switch (c) { 449 case 0x09: 450 case 0x0A: 451 case 0x0B: 452 case 0x0C: 453 case 0x0d: 454 case 0x20: 455 case 0xA0: 456 case 0x2028: 457 case 0x2029: 458 case 0xFEFF: 459 return true; 460 default: 461 return unibrow::Space::Is(c); 462 } 463} 464 465 466static bool NotWhiteSpace(uc16 c) { 467 return !IsWhiteSpace(c); 468} 469 470 471static bool NotWord(uc16 c) { 472 return !IsRegExpWord(c); 473} 474 475 476static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { 477 Zone zone(CcTest::i_isolate()); 478 ZoneList<CharacterRange>* ranges = 479 new(&zone) ZoneList<CharacterRange>(2, &zone); 480 CharacterRange::AddClassEscape(c, ranges, &zone); 481 for (unsigned i = 0; i < (1 << 16); i++) { 482 bool in_class = false; 483 for (int j = 0; !in_class && j < ranges->length(); j++) { 484 CharacterRange& range = ranges->at(j); 485 in_class = (range.from() <= i && i <= range.to()); 486 } 487 CHECK_EQ(pred(i), in_class); 488 } 489} 490 491 492TEST(CharacterClassEscapes) { 493 v8::internal::V8::Initialize(NULL); 494 TestCharacterClassEscapes('.', IsRegExpNewline); 495 TestCharacterClassEscapes('d', IsDigit); 496 TestCharacterClassEscapes('D', NotDigit); 497 TestCharacterClassEscapes('s', IsWhiteSpace); 498 TestCharacterClassEscapes('S', NotWhiteSpace); 499 TestCharacterClassEscapes('w', IsRegExpWord); 500 TestCharacterClassEscapes('W', NotWord); 501} 502 503 504static RegExpNode* Compile(const char* input, 505 bool multiline, 506 bool is_ascii, 507 Zone* zone) { 508 V8::Initialize(NULL); 509 Isolate* isolate = CcTest::i_isolate(); 510 FlatStringReader reader(isolate, CStrVector(input)); 511 RegExpCompileData compile_data; 512 if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline, 513 &compile_data, zone)) 514 return NULL; 515 Handle<String> pattern = isolate->factory()-> 516 NewStringFromUtf8(CStrVector(input)); 517 Handle<String> sample_subject = 518 isolate->factory()->NewStringFromUtf8(CStrVector("")); 519 RegExpEngine::Compile(&compile_data, 520 false, 521 false, 522 multiline, 523 pattern, 524 sample_subject, 525 is_ascii, 526 zone); 527 return compile_data.node; 528} 529 530 531static void Execute(const char* input, 532 bool multiline, 533 bool is_ascii, 534 bool dot_output = false) { 535 v8::HandleScope scope(CcTest::isolate()); 536 Zone zone(CcTest::i_isolate()); 537 RegExpNode* node = Compile(input, multiline, is_ascii, &zone); 538 USE(node); 539#ifdef DEBUG 540 if (dot_output) { 541 RegExpEngine::DotPrint(input, node, false); 542 exit(0); 543 } 544#endif // DEBUG 545} 546 547 548class TestConfig { 549 public: 550 typedef int Key; 551 typedef int Value; 552 static const int kNoKey; 553 static int NoValue() { return 0; } 554 static inline int Compare(int a, int b) { 555 if (a < b) 556 return -1; 557 else if (a > b) 558 return 1; 559 else 560 return 0; 561 } 562}; 563 564 565const int TestConfig::kNoKey = 0; 566 567 568static unsigned PseudoRandom(int i, int j) { 569 return ~(~((i * 781) ^ (j * 329))); 570} 571 572 573TEST(SplayTreeSimple) { 574 v8::internal::V8::Initialize(NULL); 575 static const unsigned kLimit = 1000; 576 Zone zone(CcTest::i_isolate()); 577 ZoneSplayTree<TestConfig> tree(&zone); 578 bool seen[kLimit]; 579 for (unsigned i = 0; i < kLimit; i++) seen[i] = false; 580#define CHECK_MAPS_EQUAL() do { \ 581 for (unsigned k = 0; k < kLimit; k++) \ 582 CHECK_EQ(seen[k], tree.Find(k, &loc)); \ 583 } while (false) 584 for (int i = 0; i < 50; i++) { 585 for (int j = 0; j < 50; j++) { 586 unsigned next = PseudoRandom(i, j) % kLimit; 587 if (seen[next]) { 588 // We've already seen this one. Check the value and remove 589 // it. 590 ZoneSplayTree<TestConfig>::Locator loc; 591 CHECK(tree.Find(next, &loc)); 592 CHECK_EQ(next, loc.key()); 593 CHECK_EQ(3 * next, loc.value()); 594 tree.Remove(next); 595 seen[next] = false; 596 CHECK_MAPS_EQUAL(); 597 } else { 598 // Check that it wasn't there already and then add it. 599 ZoneSplayTree<TestConfig>::Locator loc; 600 CHECK(!tree.Find(next, &loc)); 601 CHECK(tree.Insert(next, &loc)); 602 CHECK_EQ(next, loc.key()); 603 loc.set_value(3 * next); 604 seen[next] = true; 605 CHECK_MAPS_EQUAL(); 606 } 607 int val = PseudoRandom(j, i) % kLimit; 608 if (seen[val]) { 609 ZoneSplayTree<TestConfig>::Locator loc; 610 CHECK(tree.FindGreatestLessThan(val, &loc)); 611 CHECK_EQ(loc.key(), val); 612 break; 613 } 614 val = PseudoRandom(i + j, i - j) % kLimit; 615 if (seen[val]) { 616 ZoneSplayTree<TestConfig>::Locator loc; 617 CHECK(tree.FindLeastGreaterThan(val, &loc)); 618 CHECK_EQ(loc.key(), val); 619 break; 620 } 621 } 622 } 623} 624 625 626TEST(DispatchTableConstruction) { 627 v8::internal::V8::Initialize(NULL); 628 // Initialize test data. 629 static const int kLimit = 1000; 630 static const int kRangeCount = 8; 631 static const int kRangeSize = 16; 632 uc16 ranges[kRangeCount][2 * kRangeSize]; 633 for (int i = 0; i < kRangeCount; i++) { 634 Vector<uc16> range(ranges[i], 2 * kRangeSize); 635 for (int j = 0; j < 2 * kRangeSize; j++) { 636 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; 637 } 638 range.Sort(); 639 for (int j = 1; j < 2 * kRangeSize; j++) { 640 CHECK(range[j-1] <= range[j]); 641 } 642 } 643 // Enter test data into dispatch table. 644 Zone zone(CcTest::i_isolate()); 645 DispatchTable table(&zone); 646 for (int i = 0; i < kRangeCount; i++) { 647 uc16* range = ranges[i]; 648 for (int j = 0; j < 2 * kRangeSize; j += 2) 649 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone); 650 } 651 // Check that the table looks as we would expect 652 for (int p = 0; p < kLimit; p++) { 653 OutSet* outs = table.Get(p); 654 for (int j = 0; j < kRangeCount; j++) { 655 uc16* range = ranges[j]; 656 bool is_on = false; 657 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) 658 is_on = (range[k] <= p && p <= range[k + 1]); 659 CHECK_EQ(is_on, outs->Get(j)); 660 } 661 } 662} 663 664 665// Test of debug-only syntax. 666#ifdef DEBUG 667 668TEST(ParsePossessiveRepetition) { 669 bool old_flag_value = FLAG_regexp_possessive_quantifier; 670 671 // Enable possessive quantifier syntax. 672 FLAG_regexp_possessive_quantifier = true; 673 674 CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')"); 675 CHECK_PARSE_EQ("a++", "(# 1 - p 'a')"); 676 CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')"); 677 CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')"); 678 CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')"); 679 680 // Disable possessive quantifier syntax. 681 FLAG_regexp_possessive_quantifier = false; 682 683 CHECK_PARSE_ERROR("a*+"); 684 CHECK_PARSE_ERROR("a++"); 685 CHECK_PARSE_ERROR("a?+"); 686 CHECK_PARSE_ERROR("a{10,20}+"); 687 CHECK_PARSE_ERROR("a{10,20}+b"); 688 689 FLAG_regexp_possessive_quantifier = old_flag_value; 690} 691 692#endif 693 694// Tests of interpreter. 695 696 697#ifndef V8_INTERPRETED_REGEXP 698 699#if V8_TARGET_ARCH_IA32 700typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler; 701#elif V8_TARGET_ARCH_X64 702typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler; 703#elif V8_TARGET_ARCH_ARM 704typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler; 705#elif V8_TARGET_ARCH_MIPS 706typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 707#endif 708 709class ContextInitializer { 710 public: 711 ContextInitializer() 712 : scope_(CcTest::isolate()), 713 env_(v8::Context::New(CcTest::isolate())) { 714 env_->Enter(); 715 } 716 ~ContextInitializer() { 717 env_->Exit(); 718 } 719 private: 720 v8::HandleScope scope_; 721 v8::Handle<v8::Context> env_; 722}; 723 724 725static ArchRegExpMacroAssembler::Result Execute(Code* code, 726 String* input, 727 int start_offset, 728 const byte* input_start, 729 const byte* input_end, 730 int* captures) { 731 return NativeRegExpMacroAssembler::Execute( 732 code, 733 input, 734 start_offset, 735 input_start, 736 input_end, 737 captures, 738 0, 739 CcTest::i_isolate()); 740} 741 742 743TEST(MacroAssemblerNativeSuccess) { 744 v8::V8::Initialize(); 745 ContextInitializer initializer; 746 Isolate* isolate = CcTest::i_isolate(); 747 Factory* factory = isolate->factory(); 748 Zone zone(isolate); 749 750 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 751 752 m.Succeed(); 753 754 Handle<String> source = factory->NewStringFromAscii(CStrVector("")); 755 Handle<Object> code_object = m.GetCode(source); 756 Handle<Code> code = Handle<Code>::cast(code_object); 757 758 int captures[4] = {42, 37, 87, 117}; 759 Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo")); 760 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 761 const byte* start_adr = 762 reinterpret_cast<const byte*>(seq_input->GetCharsAddress()); 763 764 NativeRegExpMacroAssembler::Result result = 765 Execute(*code, 766 *input, 767 0, 768 start_adr, 769 start_adr + seq_input->length(), 770 captures); 771 772 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 773 CHECK_EQ(-1, captures[0]); 774 CHECK_EQ(-1, captures[1]); 775 CHECK_EQ(-1, captures[2]); 776 CHECK_EQ(-1, captures[3]); 777} 778 779 780TEST(MacroAssemblerNativeSimple) { 781 v8::V8::Initialize(); 782 ContextInitializer initializer; 783 Isolate* isolate = CcTest::i_isolate(); 784 Factory* factory = isolate->factory(); 785 Zone zone(isolate); 786 787 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 788 789 Label fail, backtrack; 790 m.PushBacktrack(&fail); 791 m.CheckNotAtStart(NULL); 792 m.LoadCurrentCharacter(2, NULL); 793 m.CheckNotCharacter('o', NULL); 794 m.LoadCurrentCharacter(1, NULL, false); 795 m.CheckNotCharacter('o', NULL); 796 m.LoadCurrentCharacter(0, NULL, false); 797 m.CheckNotCharacter('f', NULL); 798 m.WriteCurrentPositionToRegister(0, 0); 799 m.WriteCurrentPositionToRegister(1, 3); 800 m.AdvanceCurrentPosition(3); 801 m.PushBacktrack(&backtrack); 802 m.Succeed(); 803 m.Bind(&backtrack); 804 m.Backtrack(); 805 m.Bind(&fail); 806 m.Fail(); 807 808 Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo")); 809 Handle<Object> code_object = m.GetCode(source); 810 Handle<Code> code = Handle<Code>::cast(code_object); 811 812 int captures[4] = {42, 37, 87, 117}; 813 Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo")); 814 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 815 Address start_adr = seq_input->GetCharsAddress(); 816 817 NativeRegExpMacroAssembler::Result result = 818 Execute(*code, 819 *input, 820 0, 821 start_adr, 822 start_adr + input->length(), 823 captures); 824 825 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 826 CHECK_EQ(0, captures[0]); 827 CHECK_EQ(3, captures[1]); 828 CHECK_EQ(-1, captures[2]); 829 CHECK_EQ(-1, captures[3]); 830 831 input = factory->NewStringFromAscii(CStrVector("barbarbar")); 832 seq_input = Handle<SeqOneByteString>::cast(input); 833 start_adr = seq_input->GetCharsAddress(); 834 835 result = Execute(*code, 836 *input, 837 0, 838 start_adr, 839 start_adr + input->length(), 840 captures); 841 842 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 843} 844 845 846TEST(MacroAssemblerNativeSimpleUC16) { 847 v8::V8::Initialize(); 848 ContextInitializer initializer; 849 Isolate* isolate = CcTest::i_isolate(); 850 Factory* factory = isolate->factory(); 851 Zone zone(isolate); 852 853 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 854 855 Label fail, backtrack; 856 m.PushBacktrack(&fail); 857 m.CheckNotAtStart(NULL); 858 m.LoadCurrentCharacter(2, NULL); 859 m.CheckNotCharacter('o', NULL); 860 m.LoadCurrentCharacter(1, NULL, false); 861 m.CheckNotCharacter('o', NULL); 862 m.LoadCurrentCharacter(0, NULL, false); 863 m.CheckNotCharacter('f', NULL); 864 m.WriteCurrentPositionToRegister(0, 0); 865 m.WriteCurrentPositionToRegister(1, 3); 866 m.AdvanceCurrentPosition(3); 867 m.PushBacktrack(&backtrack); 868 m.Succeed(); 869 m.Bind(&backtrack); 870 m.Backtrack(); 871 m.Bind(&fail); 872 m.Fail(); 873 874 Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo")); 875 Handle<Object> code_object = m.GetCode(source); 876 Handle<Code> code = Handle<Code>::cast(code_object); 877 878 int captures[4] = {42, 37, 87, 117}; 879 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', 880 static_cast<uc16>(0x2603)}; 881 Handle<String> input = 882 factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6)); 883 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 884 Address start_adr = seq_input->GetCharsAddress(); 885 886 NativeRegExpMacroAssembler::Result result = 887 Execute(*code, 888 *input, 889 0, 890 start_adr, 891 start_adr + input->length(), 892 captures); 893 894 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 895 CHECK_EQ(0, captures[0]); 896 CHECK_EQ(3, captures[1]); 897 CHECK_EQ(-1, captures[2]); 898 CHECK_EQ(-1, captures[3]); 899 900 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 901 static_cast<uc16>(0x2603)}; 902 input = factory->NewStringFromTwoByte(Vector<const uc16>(input_data2, 9)); 903 seq_input = Handle<SeqTwoByteString>::cast(input); 904 start_adr = seq_input->GetCharsAddress(); 905 906 result = Execute(*code, 907 *input, 908 0, 909 start_adr, 910 start_adr + input->length() * 2, 911 captures); 912 913 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 914} 915 916 917TEST(MacroAssemblerNativeBacktrack) { 918 v8::V8::Initialize(); 919 ContextInitializer initializer; 920 Isolate* isolate = CcTest::i_isolate(); 921 Factory* factory = isolate->factory(); 922 Zone zone(isolate); 923 924 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 925 926 Label fail; 927 Label backtrack; 928 m.LoadCurrentCharacter(10, &fail); 929 m.Succeed(); 930 m.Bind(&fail); 931 m.PushBacktrack(&backtrack); 932 m.LoadCurrentCharacter(10, NULL); 933 m.Succeed(); 934 m.Bind(&backtrack); 935 m.Fail(); 936 937 Handle<String> source = factory->NewStringFromAscii(CStrVector("..........")); 938 Handle<Object> code_object = m.GetCode(source); 939 Handle<Code> code = Handle<Code>::cast(code_object); 940 941 Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo")); 942 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 943 Address start_adr = seq_input->GetCharsAddress(); 944 945 NativeRegExpMacroAssembler::Result result = 946 Execute(*code, 947 *input, 948 0, 949 start_adr, 950 start_adr + input->length(), 951 NULL); 952 953 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 954} 955 956 957TEST(MacroAssemblerNativeBackReferenceASCII) { 958 v8::V8::Initialize(); 959 ContextInitializer initializer; 960 Isolate* isolate = CcTest::i_isolate(); 961 Factory* factory = isolate->factory(); 962 Zone zone(isolate); 963 964 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 965 966 m.WriteCurrentPositionToRegister(0, 0); 967 m.AdvanceCurrentPosition(2); 968 m.WriteCurrentPositionToRegister(1, 0); 969 Label nomatch; 970 m.CheckNotBackReference(0, &nomatch); 971 m.Fail(); 972 m.Bind(&nomatch); 973 m.AdvanceCurrentPosition(2); 974 Label missing_match; 975 m.CheckNotBackReference(0, &missing_match); 976 m.WriteCurrentPositionToRegister(2, 0); 977 m.Succeed(); 978 m.Bind(&missing_match); 979 m.Fail(); 980 981 Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1")); 982 Handle<Object> code_object = m.GetCode(source); 983 Handle<Code> code = Handle<Code>::cast(code_object); 984 985 Handle<String> input = factory->NewStringFromAscii(CStrVector("fooofo")); 986 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 987 Address start_adr = seq_input->GetCharsAddress(); 988 989 int output[4]; 990 NativeRegExpMacroAssembler::Result result = 991 Execute(*code, 992 *input, 993 0, 994 start_adr, 995 start_adr + input->length(), 996 output); 997 998 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 999 CHECK_EQ(0, output[0]); 1000 CHECK_EQ(2, output[1]); 1001 CHECK_EQ(6, output[2]); 1002 CHECK_EQ(-1, output[3]); 1003} 1004 1005 1006TEST(MacroAssemblerNativeBackReferenceUC16) { 1007 v8::V8::Initialize(); 1008 ContextInitializer initializer; 1009 Isolate* isolate = CcTest::i_isolate(); 1010 Factory* factory = isolate->factory(); 1011 Zone zone(isolate); 1012 1013 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone); 1014 1015 m.WriteCurrentPositionToRegister(0, 0); 1016 m.AdvanceCurrentPosition(2); 1017 m.WriteCurrentPositionToRegister(1, 0); 1018 Label nomatch; 1019 m.CheckNotBackReference(0, &nomatch); 1020 m.Fail(); 1021 m.Bind(&nomatch); 1022 m.AdvanceCurrentPosition(2); 1023 Label missing_match; 1024 m.CheckNotBackReference(0, &missing_match); 1025 m.WriteCurrentPositionToRegister(2, 0); 1026 m.Succeed(); 1027 m.Bind(&missing_match); 1028 m.Fail(); 1029 1030 Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1")); 1031 Handle<Object> code_object = m.GetCode(source); 1032 Handle<Code> code = Handle<Code>::cast(code_object); 1033 1034 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028}; 1035 Handle<String> input = 1036 factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6)); 1037 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 1038 Address start_adr = seq_input->GetCharsAddress(); 1039 1040 int output[4]; 1041 NativeRegExpMacroAssembler::Result result = 1042 Execute(*code, 1043 *input, 1044 0, 1045 start_adr, 1046 start_adr + input->length() * 2, 1047 output); 1048 1049 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1050 CHECK_EQ(0, output[0]); 1051 CHECK_EQ(2, output[1]); 1052 CHECK_EQ(6, output[2]); 1053 CHECK_EQ(-1, output[3]); 1054} 1055 1056 1057 1058TEST(MacroAssemblernativeAtStart) { 1059 v8::V8::Initialize(); 1060 ContextInitializer initializer; 1061 Isolate* isolate = CcTest::i_isolate(); 1062 Factory* factory = isolate->factory(); 1063 Zone zone(isolate); 1064 1065 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 1066 1067 Label not_at_start, newline, fail; 1068 m.CheckNotAtStart(¬_at_start); 1069 // Check that prevchar = '\n' and current = 'f'. 1070 m.CheckCharacter('\n', &newline); 1071 m.Bind(&fail); 1072 m.Fail(); 1073 m.Bind(&newline); 1074 m.LoadCurrentCharacter(0, &fail); 1075 m.CheckNotCharacter('f', &fail); 1076 m.Succeed(); 1077 1078 m.Bind(¬_at_start); 1079 // Check that prevchar = 'o' and current = 'b'. 1080 Label prevo; 1081 m.CheckCharacter('o', &prevo); 1082 m.Fail(); 1083 m.Bind(&prevo); 1084 m.LoadCurrentCharacter(0, &fail); 1085 m.CheckNotCharacter('b', &fail); 1086 m.Succeed(); 1087 1088 Handle<String> source = factory->NewStringFromAscii(CStrVector("(^f|ob)")); 1089 Handle<Object> code_object = m.GetCode(source); 1090 Handle<Code> code = Handle<Code>::cast(code_object); 1091 1092 Handle<String> input = factory->NewStringFromAscii(CStrVector("foobar")); 1093 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1094 Address start_adr = seq_input->GetCharsAddress(); 1095 1096 NativeRegExpMacroAssembler::Result result = 1097 Execute(*code, 1098 *input, 1099 0, 1100 start_adr, 1101 start_adr + input->length(), 1102 NULL); 1103 1104 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1105 1106 result = Execute(*code, 1107 *input, 1108 3, 1109 start_adr + 3, 1110 start_adr + input->length(), 1111 NULL); 1112 1113 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1114} 1115 1116 1117TEST(MacroAssemblerNativeBackRefNoCase) { 1118 v8::V8::Initialize(); 1119 ContextInitializer initializer; 1120 Isolate* isolate = CcTest::i_isolate(); 1121 Factory* factory = isolate->factory(); 1122 Zone zone(isolate); 1123 1124 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone); 1125 1126 Label fail, succ; 1127 1128 m.WriteCurrentPositionToRegister(0, 0); 1129 m.WriteCurrentPositionToRegister(2, 0); 1130 m.AdvanceCurrentPosition(3); 1131 m.WriteCurrentPositionToRegister(3, 0); 1132 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC". 1133 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC". 1134 Label expected_fail; 1135 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail); 1136 m.Bind(&fail); 1137 m.Fail(); 1138 1139 m.Bind(&expected_fail); 1140 m.AdvanceCurrentPosition(3); // Skip "xYz" 1141 m.CheckNotBackReferenceIgnoreCase(2, &succ); 1142 m.Fail(); 1143 1144 m.Bind(&succ); 1145 m.WriteCurrentPositionToRegister(1, 0); 1146 m.Succeed(); 1147 1148 Handle<String> source = 1149 factory->NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)")); 1150 Handle<Object> code_object = m.GetCode(source); 1151 Handle<Code> code = Handle<Code>::cast(code_object); 1152 1153 Handle<String> input = 1154 factory->NewStringFromAscii(CStrVector("aBcAbCABCxYzab")); 1155 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1156 Address start_adr = seq_input->GetCharsAddress(); 1157 1158 int output[4]; 1159 NativeRegExpMacroAssembler::Result result = 1160 Execute(*code, 1161 *input, 1162 0, 1163 start_adr, 1164 start_adr + input->length(), 1165 output); 1166 1167 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1168 CHECK_EQ(0, output[0]); 1169 CHECK_EQ(12, output[1]); 1170 CHECK_EQ(0, output[2]); 1171 CHECK_EQ(3, output[3]); 1172} 1173 1174 1175 1176TEST(MacroAssemblerNativeRegisters) { 1177 v8::V8::Initialize(); 1178 ContextInitializer initializer; 1179 Isolate* isolate = CcTest::i_isolate(); 1180 Factory* factory = isolate->factory(); 1181 Zone zone(isolate); 1182 1183 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6, &zone); 1184 1185 uc16 foo_chars[3] = {'f', 'o', 'o'}; 1186 Vector<const uc16> foo(foo_chars, 3); 1187 1188 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt }; 1189 Label fail; 1190 Label backtrack; 1191 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0] 1192 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1193 m.PushBacktrack(&backtrack); 1194 m.WriteStackPointerToRegister(sp); 1195 // Fill stack and registers 1196 m.AdvanceCurrentPosition(2); 1197 m.WriteCurrentPositionToRegister(out1, 0); 1198 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1199 m.PushBacktrack(&fail); 1200 // Drop backtrack stack frames. 1201 m.ReadStackPointerFromRegister(sp); 1202 // And take the first backtrack (to &backtrack) 1203 m.Backtrack(); 1204 1205 m.PushCurrentPosition(); 1206 m.AdvanceCurrentPosition(2); 1207 m.PopCurrentPosition(); 1208 1209 m.Bind(&backtrack); 1210 m.PopRegister(out1); 1211 m.ReadCurrentPositionFromRegister(out1); 1212 m.AdvanceCurrentPosition(3); 1213 m.WriteCurrentPositionToRegister(out2, 0); // [0,3] 1214 1215 Label loop; 1216 m.SetRegister(loop_cnt, 0); // loop counter 1217 m.Bind(&loop); 1218 m.AdvanceRegister(loop_cnt, 1); 1219 m.AdvanceCurrentPosition(1); 1220 m.IfRegisterLT(loop_cnt, 3, &loop); 1221 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6] 1222 1223 Label loop2; 1224 m.SetRegister(loop_cnt, 2); // loop counter 1225 m.Bind(&loop2); 1226 m.AdvanceRegister(loop_cnt, -1); 1227 m.AdvanceCurrentPosition(1); 1228 m.IfRegisterGE(loop_cnt, 0, &loop2); 1229 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9] 1230 1231 Label loop3; 1232 Label exit_loop3; 1233 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1234 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1235 m.ReadCurrentPositionFromRegister(out3); 1236 m.Bind(&loop3); 1237 m.AdvanceCurrentPosition(1); 1238 m.CheckGreedyLoop(&exit_loop3); 1239 m.GoTo(&loop3); 1240 m.Bind(&exit_loop3); 1241 m.PopCurrentPosition(); 1242 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1] 1243 1244 m.Succeed(); 1245 1246 m.Bind(&fail); 1247 m.Fail(); 1248 1249 Handle<String> source = 1250 factory->NewStringFromAscii(CStrVector("<loop test>")); 1251 Handle<Object> code_object = m.GetCode(source); 1252 Handle<Code> code = Handle<Code>::cast(code_object); 1253 1254 // String long enough for test (content doesn't matter). 1255 Handle<String> input = 1256 factory->NewStringFromAscii(CStrVector("foofoofoofoofoo")); 1257 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1258 Address start_adr = seq_input->GetCharsAddress(); 1259 1260 int output[6]; 1261 NativeRegExpMacroAssembler::Result result = 1262 Execute(*code, 1263 *input, 1264 0, 1265 start_adr, 1266 start_adr + input->length(), 1267 output); 1268 1269 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1270 CHECK_EQ(0, output[0]); 1271 CHECK_EQ(3, output[1]); 1272 CHECK_EQ(6, output[2]); 1273 CHECK_EQ(9, output[3]); 1274 CHECK_EQ(9, output[4]); 1275 CHECK_EQ(-1, output[5]); 1276} 1277 1278 1279TEST(MacroAssemblerStackOverflow) { 1280 v8::V8::Initialize(); 1281 ContextInitializer initializer; 1282 Isolate* isolate = CcTest::i_isolate(); 1283 Factory* factory = isolate->factory(); 1284 Zone zone(isolate); 1285 1286 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone); 1287 1288 Label loop; 1289 m.Bind(&loop); 1290 m.PushBacktrack(&loop); 1291 m.GoTo(&loop); 1292 1293 Handle<String> source = 1294 factory->NewStringFromAscii(CStrVector("<stack overflow test>")); 1295 Handle<Object> code_object = m.GetCode(source); 1296 Handle<Code> code = Handle<Code>::cast(code_object); 1297 1298 // String long enough for test (content doesn't matter). 1299 Handle<String> input = 1300 factory->NewStringFromAscii(CStrVector("dummy")); 1301 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1302 Address start_adr = seq_input->GetCharsAddress(); 1303 1304 NativeRegExpMacroAssembler::Result result = 1305 Execute(*code, 1306 *input, 1307 0, 1308 start_adr, 1309 start_adr + input->length(), 1310 NULL); 1311 1312 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result); 1313 CHECK(isolate->has_pending_exception()); 1314 isolate->clear_pending_exception(); 1315} 1316 1317 1318TEST(MacroAssemblerNativeLotsOfRegisters) { 1319 v8::V8::Initialize(); 1320 ContextInitializer initializer; 1321 Isolate* isolate = CcTest::i_isolate(); 1322 Factory* factory = isolate->factory(); 1323 Zone zone(isolate); 1324 1325 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2, &zone); 1326 1327 // At least 2048, to ensure the allocated space for registers 1328 // span one full page. 1329 const int large_number = 8000; 1330 m.WriteCurrentPositionToRegister(large_number, 42); 1331 m.WriteCurrentPositionToRegister(0, 0); 1332 m.WriteCurrentPositionToRegister(1, 1); 1333 Label done; 1334 m.CheckNotBackReference(0, &done); // Performs a system-stack push. 1335 m.Bind(&done); 1336 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck); 1337 m.PopRegister(1); 1338 m.Succeed(); 1339 1340 Handle<String> source = 1341 factory->NewStringFromAscii(CStrVector("<huge register space test>")); 1342 Handle<Object> code_object = m.GetCode(source); 1343 Handle<Code> code = Handle<Code>::cast(code_object); 1344 1345 // String long enough for test (content doesn't matter). 1346 Handle<String> input = 1347 factory->NewStringFromAscii(CStrVector("sample text")); 1348 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1349 Address start_adr = seq_input->GetCharsAddress(); 1350 1351 int captures[2]; 1352 NativeRegExpMacroAssembler::Result result = 1353 Execute(*code, 1354 *input, 1355 0, 1356 start_adr, 1357 start_adr + input->length(), 1358 captures); 1359 1360 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1361 CHECK_EQ(0, captures[0]); 1362 CHECK_EQ(42, captures[1]); 1363 1364 isolate->clear_pending_exception(); 1365} 1366 1367#else // V8_INTERPRETED_REGEXP 1368 1369TEST(MacroAssembler) { 1370 V8::Initialize(NULL); 1371 byte codes[1024]; 1372 Zone zone(CcTest::i_isolate()); 1373 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone); 1374 // ^f(o)o. 1375 Label start, fail, backtrack; 1376 1377 m.SetRegister(4, 42); 1378 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck); 1379 m.AdvanceRegister(4, 42); 1380 m.GoTo(&start); 1381 m.Fail(); 1382 m.Bind(&start); 1383 m.PushBacktrack(&fail); 1384 m.CheckNotAtStart(NULL); 1385 m.LoadCurrentCharacter(0, NULL); 1386 m.CheckNotCharacter('f', NULL); 1387 m.LoadCurrentCharacter(1, NULL); 1388 m.CheckNotCharacter('o', NULL); 1389 m.LoadCurrentCharacter(2, NULL); 1390 m.CheckNotCharacter('o', NULL); 1391 m.WriteCurrentPositionToRegister(0, 0); 1392 m.WriteCurrentPositionToRegister(1, 3); 1393 m.WriteCurrentPositionToRegister(2, 1); 1394 m.WriteCurrentPositionToRegister(3, 2); 1395 m.AdvanceCurrentPosition(3); 1396 m.PushBacktrack(&backtrack); 1397 m.Succeed(); 1398 m.Bind(&backtrack); 1399 m.ClearRegisters(2, 3); 1400 m.Backtrack(); 1401 m.Bind(&fail); 1402 m.PopRegister(0); 1403 m.Fail(); 1404 1405 Isolate* isolate = CcTest::i_isolate(); 1406 Factory* factory = isolate->factory(); 1407 HandleScope scope(isolate); 1408 1409 Handle<String> source = factory->NewStringFromAscii(CStrVector("^f(o)o")); 1410 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source)); 1411 int captures[5]; 1412 1413 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'}; 1414 Handle<String> f1_16 = 1415 factory->NewStringFromTwoByte(Vector<const uc16>(str1, 6)); 1416 1417 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0)); 1418 CHECK_EQ(0, captures[0]); 1419 CHECK_EQ(3, captures[1]); 1420 CHECK_EQ(1, captures[2]); 1421 CHECK_EQ(2, captures[3]); 1422 CHECK_EQ(84, captures[4]); 1423 1424 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'}; 1425 Handle<String> f2_16 = 1426 factory->NewStringFromTwoByte(Vector<const uc16>(str2, 6)); 1427 1428 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0)); 1429 CHECK_EQ(42, captures[0]); 1430} 1431 1432#endif // V8_INTERPRETED_REGEXP 1433 1434 1435TEST(AddInverseToTable) { 1436 v8::internal::V8::Initialize(NULL); 1437 static const int kLimit = 1000; 1438 static const int kRangeCount = 16; 1439 for (int t = 0; t < 10; t++) { 1440 Zone zone(CcTest::i_isolate()); 1441 ZoneList<CharacterRange>* ranges = 1442 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone); 1443 for (int i = 0; i < kRangeCount; i++) { 1444 int from = PseudoRandom(t + 87, i + 25) % kLimit; 1445 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); 1446 if (to > kLimit) to = kLimit; 1447 ranges->Add(CharacterRange(from, to), &zone); 1448 } 1449 DispatchTable table(&zone); 1450 DispatchTableConstructor cons(&table, false, &zone); 1451 cons.set_choice_index(0); 1452 cons.AddInverse(ranges); 1453 for (int i = 0; i < kLimit; i++) { 1454 bool is_on = false; 1455 for (int j = 0; !is_on && j < kRangeCount; j++) 1456 is_on = ranges->at(j).Contains(i); 1457 OutSet* set = table.Get(i); 1458 CHECK_EQ(is_on, set->Get(0) == false); 1459 } 1460 } 1461 Zone zone(CcTest::i_isolate()); 1462 ZoneList<CharacterRange>* ranges = 1463 new(&zone) ZoneList<CharacterRange>(1, &zone); 1464 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone); 1465 DispatchTable table(&zone); 1466 DispatchTableConstructor cons(&table, false, &zone); 1467 cons.set_choice_index(0); 1468 cons.AddInverse(ranges); 1469 CHECK(!table.Get(0xFFFE)->Get(0)); 1470 CHECK(table.Get(0xFFFF)->Get(0)); 1471} 1472 1473 1474static uc32 canonicalize(uc32 c) { 1475 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; 1476 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); 1477 if (count == 0) { 1478 return c; 1479 } else { 1480 CHECK_EQ(1, count); 1481 return canon[0]; 1482 } 1483} 1484 1485 1486TEST(LatinCanonicalize) { 1487 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1488 for (char lower = 'a'; lower <= 'z'; lower++) { 1489 char upper = lower + ('A' - 'a'); 1490 CHECK_EQ(canonicalize(lower), canonicalize(upper)); 1491 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1492 int length = un_canonicalize.get(lower, '\0', uncanon); 1493 CHECK_EQ(2, length); 1494 CHECK_EQ(upper, uncanon[0]); 1495 CHECK_EQ(lower, uncanon[1]); 1496 } 1497 for (uc32 c = 128; c < (1 << 21); c++) 1498 CHECK_GE(canonicalize(c), 128); 1499 unibrow::Mapping<unibrow::ToUppercase> to_upper; 1500 // Canonicalization is only defined for the Basic Multilingual Plane. 1501 for (uc32 c = 0; c < (1 << 16); c++) { 1502 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; 1503 int length = to_upper.get(c, '\0', upper); 1504 if (length == 0) { 1505 length = 1; 1506 upper[0] = c; 1507 } 1508 uc32 u = upper[0]; 1509 if (length > 1 || (c >= 128 && u < 128)) 1510 u = c; 1511 CHECK_EQ(u, canonicalize(c)); 1512 } 1513} 1514 1515 1516static uc32 CanonRangeEnd(uc32 c) { 1517 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; 1518 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); 1519 if (count == 0) { 1520 return c; 1521 } else { 1522 CHECK_EQ(1, count); 1523 return canon[0]; 1524 } 1525} 1526 1527 1528TEST(RangeCanonicalization) { 1529 // Check that we arrive at the same result when using the basic 1530 // range canonicalization primitives as when using immediate 1531 // canonicalization. 1532 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1533 int block_start = 0; 1534 while (block_start <= 0xFFFF) { 1535 uc32 block_end = CanonRangeEnd(block_start); 1536 unsigned block_length = block_end - block_start + 1; 1537 if (block_length > 1) { 1538 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1539 int first_length = un_canonicalize.get(block_start, '\0', first); 1540 for (unsigned i = 1; i < block_length; i++) { 1541 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1542 int succ_length = un_canonicalize.get(block_start + i, '\0', succ); 1543 CHECK_EQ(first_length, succ_length); 1544 for (int j = 0; j < succ_length; j++) { 1545 int calc = first[j] + i; 1546 int found = succ[j]; 1547 CHECK_EQ(calc, found); 1548 } 1549 } 1550 } 1551 block_start = block_start + block_length; 1552 } 1553} 1554 1555 1556TEST(UncanonicalizeEquivalence) { 1557 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1558 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1559 for (int i = 0; i < (1 << 16); i++) { 1560 int length = un_canonicalize.get(i, '\0', chars); 1561 for (int j = 0; j < length; j++) { 1562 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1563 int length2 = un_canonicalize.get(chars[j], '\0', chars2); 1564 CHECK_EQ(length, length2); 1565 for (int k = 0; k < length; k++) 1566 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k])); 1567 } 1568 } 1569} 1570 1571 1572static void TestRangeCaseIndependence(CharacterRange input, 1573 Vector<CharacterRange> expected) { 1574 Zone zone(CcTest::i_isolate()); 1575 int count = expected.length(); 1576 ZoneList<CharacterRange>* list = 1577 new(&zone) ZoneList<CharacterRange>(count, &zone); 1578 input.AddCaseEquivalents(list, false, &zone); 1579 CHECK_EQ(count, list->length()); 1580 for (int i = 0; i < list->length(); i++) { 1581 CHECK_EQ(expected[i].from(), list->at(i).from()); 1582 CHECK_EQ(expected[i].to(), list->at(i).to()); 1583 } 1584} 1585 1586 1587static void TestSimpleRangeCaseIndependence(CharacterRange input, 1588 CharacterRange expected) { 1589 EmbeddedVector<CharacterRange, 1> vector; 1590 vector[0] = expected; 1591 TestRangeCaseIndependence(input, vector); 1592} 1593 1594 1595TEST(CharacterRangeCaseIndependence) { 1596 v8::internal::V8::Initialize(NULL); 1597 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'), 1598 CharacterRange::Singleton('A')); 1599 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'), 1600 CharacterRange::Singleton('Z')); 1601 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'), 1602 CharacterRange('A', 'Z')); 1603 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'), 1604 CharacterRange('C', 'F')); 1605 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'), 1606 CharacterRange('A', 'B')); 1607 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'), 1608 CharacterRange('Y', 'Z')); 1609 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1), 1610 CharacterRange('A', 'Z')); 1611 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'), 1612 CharacterRange('a', 'z')); 1613 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'), 1614 CharacterRange('c', 'f')); 1615 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1), 1616 CharacterRange('a', 'z')); 1617 // Here we need to add [l-z] to complete the case independence of 1618 // [A-Za-z] but we expect [a-z] to be added since we always add a 1619 // whole block at a time. 1620 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'), 1621 CharacterRange('a', 'z')); 1622} 1623 1624 1625static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) { 1626 if (ranges == NULL) 1627 return false; 1628 for (int i = 0; i < ranges->length(); i++) { 1629 CharacterRange range = ranges->at(i); 1630 if (range.from() <= c && c <= range.to()) 1631 return true; 1632 } 1633 return false; 1634} 1635 1636 1637TEST(CharClassDifference) { 1638 v8::internal::V8::Initialize(NULL); 1639 Zone zone(CcTest::i_isolate()); 1640 ZoneList<CharacterRange>* base = 1641 new(&zone) ZoneList<CharacterRange>(1, &zone); 1642 base->Add(CharacterRange::Everything(), &zone); 1643 Vector<const int> overlay = CharacterRange::GetWordBounds(); 1644 ZoneList<CharacterRange>* included = NULL; 1645 ZoneList<CharacterRange>* excluded = NULL; 1646 CharacterRange::Split(base, overlay, &included, &excluded, &zone); 1647 for (int i = 0; i < (1 << 16); i++) { 1648 bool in_base = InClass(i, base); 1649 if (in_base) { 1650 bool in_overlay = false; 1651 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) { 1652 if (overlay[j] <= i && i < overlay[j+1]) 1653 in_overlay = true; 1654 } 1655 CHECK_EQ(in_overlay, InClass(i, included)); 1656 CHECK_EQ(!in_overlay, InClass(i, excluded)); 1657 } else { 1658 CHECK(!InClass(i, included)); 1659 CHECK(!InClass(i, excluded)); 1660 } 1661 } 1662} 1663 1664 1665TEST(CanonicalizeCharacterSets) { 1666 v8::internal::V8::Initialize(NULL); 1667 Zone zone(CcTest::i_isolate()); 1668 ZoneList<CharacterRange>* list = 1669 new(&zone) ZoneList<CharacterRange>(4, &zone); 1670 CharacterSet set(list); 1671 1672 list->Add(CharacterRange(10, 20), &zone); 1673 list->Add(CharacterRange(30, 40), &zone); 1674 list->Add(CharacterRange(50, 60), &zone); 1675 set.Canonicalize(); 1676 ASSERT_EQ(3, list->length()); 1677 ASSERT_EQ(10, list->at(0).from()); 1678 ASSERT_EQ(20, list->at(0).to()); 1679 ASSERT_EQ(30, list->at(1).from()); 1680 ASSERT_EQ(40, list->at(1).to()); 1681 ASSERT_EQ(50, list->at(2).from()); 1682 ASSERT_EQ(60, list->at(2).to()); 1683 1684 list->Rewind(0); 1685 list->Add(CharacterRange(10, 20), &zone); 1686 list->Add(CharacterRange(50, 60), &zone); 1687 list->Add(CharacterRange(30, 40), &zone); 1688 set.Canonicalize(); 1689 ASSERT_EQ(3, list->length()); 1690 ASSERT_EQ(10, list->at(0).from()); 1691 ASSERT_EQ(20, list->at(0).to()); 1692 ASSERT_EQ(30, list->at(1).from()); 1693 ASSERT_EQ(40, list->at(1).to()); 1694 ASSERT_EQ(50, list->at(2).from()); 1695 ASSERT_EQ(60, list->at(2).to()); 1696 1697 list->Rewind(0); 1698 list->Add(CharacterRange(30, 40), &zone); 1699 list->Add(CharacterRange(10, 20), &zone); 1700 list->Add(CharacterRange(25, 25), &zone); 1701 list->Add(CharacterRange(100, 100), &zone); 1702 list->Add(CharacterRange(1, 1), &zone); 1703 set.Canonicalize(); 1704 ASSERT_EQ(5, list->length()); 1705 ASSERT_EQ(1, list->at(0).from()); 1706 ASSERT_EQ(1, list->at(0).to()); 1707 ASSERT_EQ(10, list->at(1).from()); 1708 ASSERT_EQ(20, list->at(1).to()); 1709 ASSERT_EQ(25, list->at(2).from()); 1710 ASSERT_EQ(25, list->at(2).to()); 1711 ASSERT_EQ(30, list->at(3).from()); 1712 ASSERT_EQ(40, list->at(3).to()); 1713 ASSERT_EQ(100, list->at(4).from()); 1714 ASSERT_EQ(100, list->at(4).to()); 1715 1716 list->Rewind(0); 1717 list->Add(CharacterRange(10, 19), &zone); 1718 list->Add(CharacterRange(21, 30), &zone); 1719 list->Add(CharacterRange(20, 20), &zone); 1720 set.Canonicalize(); 1721 ASSERT_EQ(1, list->length()); 1722 ASSERT_EQ(10, list->at(0).from()); 1723 ASSERT_EQ(30, list->at(0).to()); 1724} 1725 1726 1727TEST(CharacterRangeMerge) { 1728 v8::internal::V8::Initialize(NULL); 1729 Zone zone(CcTest::i_isolate()); 1730 ZoneList<CharacterRange> l1(4, &zone); 1731 ZoneList<CharacterRange> l2(4, &zone); 1732 // Create all combinations of intersections of ranges, both singletons and 1733 // longer. 1734 1735 int offset = 0; 1736 1737 // The five kinds of singleton intersections: 1738 // X 1739 // Y - outside before 1740 // Y - outside touching start 1741 // Y - overlap 1742 // Y - outside touching end 1743 // Y - outside after 1744 1745 for (int i = 0; i < 5; i++) { 1746 l1.Add(CharacterRange::Singleton(offset + 2), &zone); 1747 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1748 offset += 6; 1749 } 1750 1751 // The seven kinds of singleton/non-singleton intersections: 1752 // XXX 1753 // Y - outside before 1754 // Y - outside touching start 1755 // Y - inside touching start 1756 // Y - entirely inside 1757 // Y - inside touching end 1758 // Y - outside touching end 1759 // Y - disjoint after 1760 1761 for (int i = 0; i < 7; i++) { 1762 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone); 1763 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1764 offset += 8; 1765 } 1766 1767 // The eleven kinds of non-singleton intersections: 1768 // 1769 // XXXXXXXX 1770 // YYYY - outside before. 1771 // YYYY - outside touching start. 1772 // YYYY - overlapping start 1773 // YYYY - inside touching start 1774 // YYYY - entirely inside 1775 // YYYY - inside touching end 1776 // YYYY - overlapping end 1777 // YYYY - outside touching end 1778 // YYYY - outside after 1779 // YYYYYYYY - identical 1780 // YYYYYYYYYYYY - containing entirely. 1781 1782 for (int i = 0; i < 9; i++) { 1783 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8. 1784 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone); 1785 offset += 22; 1786 } 1787 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1788 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1789 offset += 22; 1790 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1791 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone); 1792 offset += 22; 1793 1794 // Different kinds of multi-range overlap: 1795 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX 1796 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y 1797 1798 l1.Add(CharacterRange::Range(offset, offset + 21), &zone); 1799 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone); 1800 for (int i = 0; i < 6; i++) { 1801 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone); 1802 l2.Add(CharacterRange::Singleton(offset + 8), &zone); 1803 offset += 9; 1804 } 1805 1806 ASSERT(CharacterRange::IsCanonical(&l1)); 1807 ASSERT(CharacterRange::IsCanonical(&l2)); 1808 1809 ZoneList<CharacterRange> first_only(4, &zone); 1810 ZoneList<CharacterRange> second_only(4, &zone); 1811 ZoneList<CharacterRange> both(4, &zone); 1812} 1813 1814 1815TEST(Graph) { 1816 V8::Initialize(NULL); 1817 Execute("\\b\\w+\\b", false, true, true); 1818} 1819