1// Copyright 2012 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#include <cstdlib> 29#include <sstream> 30 31#include "include/v8.h" 32#include "src/v8.h" 33 34#include "src/ast/ast.h" 35#include "src/char-predicates-inl.h" 36#include "src/ostreams.h" 37#include "src/regexp/jsregexp.h" 38#include "src/regexp/regexp-macro-assembler.h" 39#include "src/regexp/regexp-macro-assembler-irregexp.h" 40#include "src/regexp/regexp-parser.h" 41#include "src/splay-tree-inl.h" 42#include "src/string-stream.h" 43#ifdef V8_INTERPRETED_REGEXP 44#include "src/regexp/interpreter-irregexp.h" 45#else // V8_INTERPRETED_REGEXP 46#include "src/macro-assembler.h" 47#if V8_TARGET_ARCH_ARM 48#include "src/arm/assembler-arm.h" // NOLINT 49#include "src/arm/macro-assembler-arm.h" 50#include "src/regexp/arm/regexp-macro-assembler-arm.h" 51#endif 52#if V8_TARGET_ARCH_ARM64 53#include "src/arm64/assembler-arm64.h" 54#include "src/arm64/macro-assembler-arm64.h" 55#include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 56#endif 57#if V8_TARGET_ARCH_S390 58#include "src/regexp/s390/regexp-macro-assembler-s390.h" 59#include "src/s390/assembler-s390.h" 60#include "src/s390/macro-assembler-s390.h" 61#endif 62#if V8_TARGET_ARCH_PPC 63#include "src/ppc/assembler-ppc.h" 64#include "src/ppc/macro-assembler-ppc.h" 65#include "src/regexp/ppc/regexp-macro-assembler-ppc.h" 66#endif 67#if V8_TARGET_ARCH_MIPS 68#include "src/mips/assembler-mips.h" 69#include "src/mips/macro-assembler-mips.h" 70#include "src/regexp/mips/regexp-macro-assembler-mips.h" 71#endif 72#if V8_TARGET_ARCH_MIPS64 73#include "src/mips64/assembler-mips64.h" 74#include "src/mips64/macro-assembler-mips64.h" 75#include "src/regexp/mips64/regexp-macro-assembler-mips64.h" 76#endif 77#if V8_TARGET_ARCH_X64 78#include "src/regexp/x64/regexp-macro-assembler-x64.h" 79#include "src/x64/assembler-x64.h" 80#include "src/x64/macro-assembler-x64.h" 81#endif 82#if V8_TARGET_ARCH_IA32 83#include "src/ia32/assembler-ia32.h" 84#include "src/ia32/macro-assembler-ia32.h" 85#include "src/regexp/ia32/regexp-macro-assembler-ia32.h" 86#endif 87#if V8_TARGET_ARCH_X87 88#include "src/regexp/x87/regexp-macro-assembler-x87.h" 89#include "src/x87/assembler-x87.h" 90#include "src/x87/macro-assembler-x87.h" 91#endif 92#endif // V8_INTERPRETED_REGEXP 93#include "test/cctest/cctest.h" 94 95using namespace v8::internal; 96 97 98static bool CheckParse(const char* input) { 99 v8::HandleScope scope(CcTest::isolate()); 100 Zone zone(CcTest::i_isolate()->allocator()); 101 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 102 RegExpCompileData result; 103 return v8::internal::RegExpParser::ParseRegExp( 104 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result); 105} 106 107 108static void CheckParseEq(const char* input, const char* expected, 109 bool unicode = false) { 110 v8::HandleScope scope(CcTest::isolate()); 111 Zone zone(CcTest::i_isolate()->allocator()); 112 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 113 RegExpCompileData result; 114 JSRegExp::Flags flags = JSRegExp::kNone; 115 if (unicode) flags |= JSRegExp::kUnicode; 116 CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone, 117 &reader, flags, &result)); 118 CHECK(result.tree != NULL); 119 CHECK(result.error.is_null()); 120 std::ostringstream os; 121 result.tree->Print(os, &zone); 122 if (strcmp(expected, os.str().c_str()) != 0) { 123 printf("%s | %s\n", expected, os.str().c_str()); 124 } 125 CHECK_EQ(0, strcmp(expected, os.str().c_str())); 126} 127 128 129static bool CheckSimple(const char* input) { 130 v8::HandleScope scope(CcTest::isolate()); 131 Zone zone(CcTest::i_isolate()->allocator()); 132 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 133 RegExpCompileData result; 134 CHECK(v8::internal::RegExpParser::ParseRegExp( 135 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result)); 136 CHECK(result.tree != NULL); 137 CHECK(result.error.is_null()); 138 return result.simple; 139} 140 141struct MinMaxPair { 142 int min_match; 143 int max_match; 144}; 145 146 147static MinMaxPair CheckMinMaxMatch(const char* input) { 148 v8::HandleScope scope(CcTest::isolate()); 149 Zone zone(CcTest::i_isolate()->allocator()); 150 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 151 RegExpCompileData result; 152 CHECK(v8::internal::RegExpParser::ParseRegExp( 153 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result)); 154 CHECK(result.tree != NULL); 155 CHECK(result.error.is_null()); 156 int min_match = result.tree->min_match(); 157 int max_match = result.tree->max_match(); 158 MinMaxPair pair = { min_match, max_match }; 159 return pair; 160} 161 162 163#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input)) 164#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input)); 165#define CHECK_MIN_MAX(input, min, max) \ 166 { MinMaxPair min_max = CheckMinMaxMatch(input); \ 167 CHECK_EQ(min, min_max.min_match); \ 168 CHECK_EQ(max, min_max.max_match); \ 169 } 170 171 172void TestRegExpParser(bool lookbehind) { 173 FLAG_harmony_regexp_lookbehind = lookbehind; 174 175 CHECK_PARSE_ERROR("?"); 176 177 CheckParseEq("abc", "'abc'"); 178 CheckParseEq("", "%"); 179 CheckParseEq("abc|def", "(| 'abc' 'def')"); 180 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')"); 181 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)"); 182 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')"); 183 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])"); 184 CheckParseEq("a*", "(# 0 - g 'a')"); 185 CheckParseEq("a*?", "(# 0 - n 'a')"); 186 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))"); 187 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))"); 188 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))"); 189 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))"); 190 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))"); 191 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))"); 192 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))"); 193 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))"); 194 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))"); 195 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))"); 196 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))"); 197 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))"); 198 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'"); 199 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')"); 200 CheckParseEq("(?:foo)", "'foo'"); 201 CheckParseEq("(?: foo )", "' foo '"); 202 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))"); 203 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')"); 204 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')"); 205 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')"); 206 if (lookbehind) { 207 CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')"); 208 CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')"); 209 } else { 210 CHECK_PARSE_ERROR("foo(?<=bar)baz"); 211 CHECK_PARSE_ERROR("foo(?<!bar)baz"); 212 } 213 CheckParseEq("()", "(^ %)"); 214 CheckParseEq("(?=)", "(-> + %)"); 215 CheckParseEq("[]", "^[\\x00-\\u{10ffff}]"); // Doesn't compile on windows 216 CheckParseEq("[^]", "[\\x00-\\u{10ffff}]"); // \uffff isn't in codepage 1252 217 CheckParseEq("[x]", "[x]"); 218 CheckParseEq("[xyz]", "[x y z]"); 219 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]"); 220 CheckParseEq("[-123]", "[- 1 2 3]"); 221 CheckParseEq("[^123]", "^[1 2 3]"); 222 CheckParseEq("]", "']'"); 223 CheckParseEq("}", "'}'"); 224 CheckParseEq("[a-b-c]", "[a-b - c]"); 225 CheckParseEq("[\\d]", "[0-9]"); 226 CheckParseEq("[x\\dz]", "[x 0-9 z]"); 227 CheckParseEq("[\\d-z]", "[0-9 - z]"); 228 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]"); 229 CheckParseEq("[z-\\d]", "[z - 0-9]"); 230 // Control character outside character class. 231 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'"); 232 CheckParseEq("\\c!", "'\\c!'"); 233 CheckParseEq("\\c_", "'\\c_'"); 234 CheckParseEq("\\c~", "'\\c~'"); 235 CheckParseEq("\\c1", "'\\c1'"); 236 // Control character inside character class. 237 CheckParseEq("[\\c!]", "[\\ c !]"); 238 CheckParseEq("[\\c_]", "[\\x1f]"); 239 CheckParseEq("[\\c~]", "[\\ c ~]"); 240 CheckParseEq("[\\ca]", "[\\x01]"); 241 CheckParseEq("[\\cz]", "[\\x1a]"); 242 CheckParseEq("[\\cA]", "[\\x01]"); 243 CheckParseEq("[\\cZ]", "[\\x1a]"); 244 CheckParseEq("[\\c1]", "[\\x11]"); 245 246 CheckParseEq("[a\\]c]", "[a ] c]"); 247 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '"); 248 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]"); 249 CheckParseEq("\\0", "'\\x00'"); 250 CheckParseEq("\\8", "'8'"); 251 CheckParseEq("\\9", "'9'"); 252 CheckParseEq("\\11", "'\\x09'"); 253 CheckParseEq("\\11a", "'\\x09a'"); 254 CheckParseEq("\\011", "'\\x09'"); 255 CheckParseEq("\\00011", "'\\x0011'"); 256 CheckParseEq("\\118", "'\\x098'"); 257 CheckParseEq("\\111", "'I'"); 258 CheckParseEq("\\1111", "'I1'"); 259 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))"); 260 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))"); 261 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))"); 262 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')"); 263 CheckParseEq("(x)(x)(x)\\1*", 264 "(: (^ 'x') (^ 'x') (^ 'x')" 265 " (# 0 - g (<- 1)))"); 266 CheckParseEq("(x)(x)(x)\\2*", 267 "(: (^ 'x') (^ 'x') (^ 'x')" 268 " (# 0 - g (<- 2)))"); 269 CheckParseEq("(x)(x)(x)\\3*", 270 "(: (^ 'x') (^ 'x') (^ 'x')" 271 " (# 0 - g (<- 3)))"); 272 CheckParseEq("(x)(x)(x)\\4*", 273 "(: (^ 'x') (^ 'x') (^ 'x')" 274 " (# 0 - g '\\x04'))"); 275 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10", 276 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 277 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))"); 278 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", 279 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')" 280 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')"); 281 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))"); 282 CheckParseEq("(a\\1)", "(^ 'a')"); 283 CheckParseEq("(\\1a)", "(^ 'a')"); 284 CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))"); 285 CheckParseEq("(?=a)?a", "'a'"); 286 CheckParseEq("(?=a){0,10}a", "'a'"); 287 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')"); 288 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')"); 289 CheckParseEq("(?!a)?a", "'a'"); 290 CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))"); 291 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))"); 292 CheckParseEq("(?!\\1(a\\1)\\1)\\1", 293 "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))"); 294 CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1", 295 "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))"); 296 if (lookbehind) { 297 CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1", 298 "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))"); 299 } 300 CheckParseEq("[\\0]", "[\\x00]"); 301 CheckParseEq("[\\11]", "[\\x09]"); 302 CheckParseEq("[\\11a]", "[\\x09 a]"); 303 CheckParseEq("[\\011]", "[\\x09]"); 304 CheckParseEq("[\\00011]", "[\\x00 1 1]"); 305 CheckParseEq("[\\118]", "[\\x09 8]"); 306 CheckParseEq("[\\111]", "[I]"); 307 CheckParseEq("[\\1111]", "[I 1]"); 308 CheckParseEq("\\x34", "'\x34'"); 309 CheckParseEq("\\x60", "'\x60'"); 310 CheckParseEq("\\x3z", "'x3z'"); 311 CheckParseEq("\\c", "'\\c'"); 312 CheckParseEq("\\u0034", "'\x34'"); 313 CheckParseEq("\\u003z", "'u003z'"); 314 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))"); 315 316 // Unicode regexps 317 CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true); 318 CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')", 319 true); 320 CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')", 321 true); 322 CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true); 323 CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true); 324 325 CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true); 326 CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]", 327 true); 328 329 CHECK_SIMPLE("", false); 330 CHECK_SIMPLE("a", true); 331 CHECK_SIMPLE("a|b", false); 332 CHECK_SIMPLE("a\\n", false); 333 CHECK_SIMPLE("^a", false); 334 CHECK_SIMPLE("a$", false); 335 CHECK_SIMPLE("a\\b!", false); 336 CHECK_SIMPLE("a\\Bb", false); 337 CHECK_SIMPLE("a*", false); 338 CHECK_SIMPLE("a*?", false); 339 CHECK_SIMPLE("a?", false); 340 CHECK_SIMPLE("a??", false); 341 CHECK_SIMPLE("a{0,1}?", false); 342 CHECK_SIMPLE("a{1,1}?", false); 343 CHECK_SIMPLE("a{1,2}?", false); 344 CHECK_SIMPLE("a+?", false); 345 CHECK_SIMPLE("(a)", false); 346 CHECK_SIMPLE("(a)\\1", false); 347 CHECK_SIMPLE("(\\1a)", false); 348 CHECK_SIMPLE("\\1(a)", false); 349 CHECK_SIMPLE("a\\s", false); 350 CHECK_SIMPLE("a\\S", false); 351 CHECK_SIMPLE("a\\d", false); 352 CHECK_SIMPLE("a\\D", false); 353 CHECK_SIMPLE("a\\w", false); 354 CHECK_SIMPLE("a\\W", false); 355 CHECK_SIMPLE("a.", false); 356 CHECK_SIMPLE("a\\q", false); 357 CHECK_SIMPLE("a[a]", false); 358 CHECK_SIMPLE("a[^a]", false); 359 CHECK_SIMPLE("a[a-z]", false); 360 CHECK_SIMPLE("a[\\q]", false); 361 CHECK_SIMPLE("a(?:b)", false); 362 CHECK_SIMPLE("a(?=b)", false); 363 CHECK_SIMPLE("a(?!b)", false); 364 CHECK_SIMPLE("\\x60", false); 365 CHECK_SIMPLE("\\u0060", false); 366 CHECK_SIMPLE("\\cA", false); 367 CHECK_SIMPLE("\\q", false); 368 CHECK_SIMPLE("\\1112", false); 369 CHECK_SIMPLE("\\0", false); 370 CHECK_SIMPLE("(a)\\1", false); 371 CHECK_SIMPLE("(?=a)?a", false); 372 CHECK_SIMPLE("(?!a)?a\\1", false); 373 CHECK_SIMPLE("(?:(?=a))a\\1", false); 374 375 CheckParseEq("a{}", "'a{}'"); 376 CheckParseEq("a{,}", "'a{,}'"); 377 CheckParseEq("a{", "'a{'"); 378 CheckParseEq("a{z}", "'a{z}'"); 379 CheckParseEq("a{1z}", "'a{1z}'"); 380 CheckParseEq("a{12z}", "'a{12z}'"); 381 CheckParseEq("a{12,", "'a{12,'"); 382 CheckParseEq("a{12,3b", "'a{12,3b'"); 383 CheckParseEq("{}", "'{}'"); 384 CheckParseEq("{,}", "'{,}'"); 385 CheckParseEq("{", "'{'"); 386 CheckParseEq("{z}", "'{z}'"); 387 CheckParseEq("{1z}", "'{1z}'"); 388 CheckParseEq("{12z}", "'{12z}'"); 389 CheckParseEq("{12,", "'{12,'"); 390 CheckParseEq("{12,3b", "'{12,3b'"); 391 392 CHECK_MIN_MAX("a", 1, 1); 393 CHECK_MIN_MAX("abc", 3, 3); 394 CHECK_MIN_MAX("a[bc]d", 3, 3); 395 CHECK_MIN_MAX("a|bc", 1, 2); 396 CHECK_MIN_MAX("ab|c", 1, 2); 397 CHECK_MIN_MAX("a||bc", 0, 2); 398 CHECK_MIN_MAX("|", 0, 0); 399 CHECK_MIN_MAX("(?:ab)", 2, 2); 400 CHECK_MIN_MAX("(?:ab|cde)", 2, 3); 401 CHECK_MIN_MAX("(?:ab)|cde", 2, 3); 402 CHECK_MIN_MAX("(ab)", 2, 2); 403 CHECK_MIN_MAX("(ab|cde)", 2, 3); 404 CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity); 405 CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity); 406 CHECK_MIN_MAX("(?:ab)?", 0, 2); 407 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity); 408 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity); 409 CHECK_MIN_MAX("a?", 0, 1); 410 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity); 411 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity); 412 CHECK_MIN_MAX("a??", 0, 1); 413 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity); 414 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity); 415 CHECK_MIN_MAX("(?:a?)?", 0, 1); 416 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity); 417 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity); 418 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity); 419 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity); 420 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity); 421 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity); 422 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity); 423 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity); 424 CHECK_MIN_MAX("a{0}", 0, 0); 425 CHECK_MIN_MAX("(?:a+){0}", 0, 0); 426 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0); 427 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity); 428 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity); 429 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity); 430 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity); 431 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity); 432 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14); 433 CHECK_MIN_MAX("a\\bc", 2, 2); 434 CHECK_MIN_MAX("a\\Bc", 2, 2); 435 CHECK_MIN_MAX("a\\sc", 3, 3); 436 CHECK_MIN_MAX("a\\Sc", 3, 3); 437 CHECK_MIN_MAX("a(?=b)c", 2, 2); 438 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2); 439 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2); 440 441 FLAG_harmony_regexp_named_captures = true; 442 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>", 443 "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true); 444 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>", 445 "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true); 446 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>", 447 "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true); 448 CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true); 449 CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true); 450 CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true); 451 CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true); 452 CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true); 453 454 CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true); 455 CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true); 456 FLAG_harmony_regexp_named_captures = false; 457} 458 459 460TEST(ParserWithLookbehind) { 461 TestRegExpParser(true); // Lookbehind enabled. 462} 463 464 465TEST(ParserWithoutLookbehind) { 466 TestRegExpParser(true); // Lookbehind enabled. 467} 468 469TEST(ParserRegression) { 470 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])"); 471 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); 472 CheckParseEq("{", "'{'"); 473 CheckParseEq("a|", "(| 'a' %)"); 474} 475 476static void ExpectError(const char* input, const char* expected, 477 bool unicode = false) { 478 v8::HandleScope scope(CcTest::isolate()); 479 Zone zone(CcTest::i_isolate()->allocator()); 480 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); 481 RegExpCompileData result; 482 JSRegExp::Flags flags = JSRegExp::kNone; 483 if (unicode) flags |= JSRegExp::kUnicode; 484 CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone, 485 &reader, flags, &result)); 486 CHECK(result.tree == NULL); 487 CHECK(!result.error.is_null()); 488 v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS); 489 CHECK_EQ(0, strcmp(expected, str.get())); 490} 491 492 493TEST(Errors) { 494 const char* kEndBackslash = "\\ at end of pattern"; 495 ExpectError("\\", kEndBackslash); 496 const char* kUnterminatedGroup = "Unterminated group"; 497 ExpectError("(foo", kUnterminatedGroup); 498 const char* kInvalidGroup = "Invalid group"; 499 ExpectError("(?", kInvalidGroup); 500 const char* kUnterminatedCharacterClass = "Unterminated character class"; 501 ExpectError("[", kUnterminatedCharacterClass); 502 ExpectError("[a-", kUnterminatedCharacterClass); 503 const char* kNothingToRepeat = "Nothing to repeat"; 504 ExpectError("*", kNothingToRepeat); 505 ExpectError("?", kNothingToRepeat); 506 ExpectError("+", kNothingToRepeat); 507 ExpectError("{1}", kNothingToRepeat); 508 ExpectError("{1,2}", kNothingToRepeat); 509 ExpectError("{1,}", kNothingToRepeat); 510 511 // Check that we don't allow more than kMaxCapture captures 512 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures. 513 const char* kTooManyCaptures = "Too many captures"; 514 std::ostringstream os; 515 for (int i = 0; i <= kMaxCaptures; i++) { 516 os << "()"; 517 } 518 ExpectError(os.str().c_str(), kTooManyCaptures); 519 520 FLAG_harmony_regexp_named_captures = true; 521 const char* kInvalidCaptureName = "Invalid capture group name"; 522 ExpectError("(?<>.)", kInvalidCaptureName, true); 523 ExpectError("(?<1>.)", kInvalidCaptureName, true); 524 ExpectError("(?<_%>.)", kInvalidCaptureName, true); 525 ExpectError("\\k<a", kInvalidCaptureName, true); 526 const char* kDuplicateCaptureName = "Duplicate capture group name"; 527 ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true); 528 const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence"; 529 ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true); 530 const char* kInvalidCaptureReferenced = "Invalid named capture referenced"; 531 ExpectError("\\k<a>", kInvalidCaptureReferenced, true); 532 ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true); 533 const char* kInvalidNamedReference = "Invalid named reference"; 534 ExpectError("\\ka", kInvalidNamedReference, true); 535 FLAG_harmony_regexp_named_captures = false; 536} 537 538 539static bool IsDigit(uc16 c) { 540 return ('0' <= c && c <= '9'); 541} 542 543 544static bool NotDigit(uc16 c) { 545 return !IsDigit(c); 546} 547 548 549static bool IsWhiteSpaceOrLineTerminator(uc16 c) { 550 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes 551 // WhiteSpace (7.2) and LineTerminator (7.3) values. 552 return v8::internal::WhiteSpaceOrLineTerminator::Is(c); 553} 554 555 556static bool NotWhiteSpaceNorLineTermiantor(uc16 c) { 557 return !IsWhiteSpaceOrLineTerminator(c); 558} 559 560 561static bool NotWord(uc16 c) { 562 return !IsRegExpWord(c); 563} 564 565 566static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) { 567 Zone zone(CcTest::i_isolate()->allocator()); 568 ZoneList<CharacterRange>* ranges = 569 new(&zone) ZoneList<CharacterRange>(2, &zone); 570 CharacterRange::AddClassEscape(c, ranges, &zone); 571 for (uc32 i = 0; i < (1 << 16); i++) { 572 bool in_class = false; 573 for (int j = 0; !in_class && j < ranges->length(); j++) { 574 CharacterRange& range = ranges->at(j); 575 in_class = (range.from() <= i && i <= range.to()); 576 } 577 CHECK_EQ(pred(i), in_class); 578 } 579} 580 581 582TEST(CharacterClassEscapes) { 583 TestCharacterClassEscapes('.', IsRegExpNewline); 584 TestCharacterClassEscapes('d', IsDigit); 585 TestCharacterClassEscapes('D', NotDigit); 586 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator); 587 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor); 588 TestCharacterClassEscapes('w', IsRegExpWord); 589 TestCharacterClassEscapes('W', NotWord); 590} 591 592 593static RegExpNode* Compile(const char* input, bool multiline, bool unicode, 594 bool is_one_byte, Zone* zone) { 595 Isolate* isolate = CcTest::i_isolate(); 596 FlatStringReader reader(isolate, CStrVector(input)); 597 RegExpCompileData compile_data; 598 JSRegExp::Flags flags = JSRegExp::kNone; 599 if (multiline) flags = JSRegExp::kMultiline; 600 if (unicode) flags = JSRegExp::kUnicode; 601 if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone, 602 &reader, flags, &compile_data)) 603 return NULL; 604 Handle<String> pattern = isolate->factory() 605 ->NewStringFromUtf8(CStrVector(input)) 606 .ToHandleChecked(); 607 Handle<String> sample_subject = 608 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked(); 609 RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern, 610 sample_subject, is_one_byte); 611 return compile_data.node; 612} 613 614 615static void Execute(const char* input, bool multiline, bool unicode, 616 bool is_one_byte, bool dot_output = false) { 617 v8::HandleScope scope(CcTest::isolate()); 618 Zone zone(CcTest::i_isolate()->allocator()); 619 RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone); 620 USE(node); 621#ifdef DEBUG 622 if (dot_output) { 623 RegExpEngine::DotPrint(input, node, false); 624 } 625#endif // DEBUG 626} 627 628 629class TestConfig { 630 public: 631 typedef int Key; 632 typedef int Value; 633 static const int kNoKey; 634 static int NoValue() { return 0; } 635 static inline int Compare(int a, int b) { 636 if (a < b) 637 return -1; 638 else if (a > b) 639 return 1; 640 else 641 return 0; 642 } 643}; 644 645 646const int TestConfig::kNoKey = 0; 647 648 649static unsigned PseudoRandom(int i, int j) { 650 return ~(~((i * 781) ^ (j * 329))); 651} 652 653 654TEST(SplayTreeSimple) { 655 static const unsigned kLimit = 1000; 656 Zone zone(CcTest::i_isolate()->allocator()); 657 ZoneSplayTree<TestConfig> tree(&zone); 658 bool seen[kLimit]; 659 for (unsigned i = 0; i < kLimit; i++) seen[i] = false; 660#define CHECK_MAPS_EQUAL() do { \ 661 for (unsigned k = 0; k < kLimit; k++) \ 662 CHECK_EQ(seen[k], tree.Find(k, &loc)); \ 663 } while (false) 664 for (int i = 0; i < 50; i++) { 665 for (int j = 0; j < 50; j++) { 666 int next = PseudoRandom(i, j) % kLimit; 667 if (seen[next]) { 668 // We've already seen this one. Check the value and remove 669 // it. 670 ZoneSplayTree<TestConfig>::Locator loc; 671 CHECK(tree.Find(next, &loc)); 672 CHECK_EQ(next, loc.key()); 673 CHECK_EQ(3 * next, loc.value()); 674 tree.Remove(next); 675 seen[next] = false; 676 CHECK_MAPS_EQUAL(); 677 } else { 678 // Check that it wasn't there already and then add it. 679 ZoneSplayTree<TestConfig>::Locator loc; 680 CHECK(!tree.Find(next, &loc)); 681 CHECK(tree.Insert(next, &loc)); 682 CHECK_EQ(next, loc.key()); 683 loc.set_value(3 * next); 684 seen[next] = true; 685 CHECK_MAPS_EQUAL(); 686 } 687 int val = PseudoRandom(j, i) % kLimit; 688 if (seen[val]) { 689 ZoneSplayTree<TestConfig>::Locator loc; 690 CHECK(tree.FindGreatestLessThan(val, &loc)); 691 CHECK_EQ(loc.key(), val); 692 break; 693 } 694 val = PseudoRandom(i + j, i - j) % kLimit; 695 if (seen[val]) { 696 ZoneSplayTree<TestConfig>::Locator loc; 697 CHECK(tree.FindLeastGreaterThan(val, &loc)); 698 CHECK_EQ(loc.key(), val); 699 break; 700 } 701 } 702 } 703} 704 705 706TEST(DispatchTableConstruction) { 707 // Initialize test data. 708 static const int kLimit = 1000; 709 static const int kRangeCount = 8; 710 static const int kRangeSize = 16; 711 uc16 ranges[kRangeCount][2 * kRangeSize]; 712 for (int i = 0; i < kRangeCount; i++) { 713 Vector<uc16> range(ranges[i], 2 * kRangeSize); 714 for (int j = 0; j < 2 * kRangeSize; j++) { 715 range[j] = PseudoRandom(i + 25, j + 87) % kLimit; 716 } 717 range.Sort(); 718 for (int j = 1; j < 2 * kRangeSize; j++) { 719 CHECK(range[j-1] <= range[j]); 720 } 721 } 722 // Enter test data into dispatch table. 723 Zone zone(CcTest::i_isolate()->allocator()); 724 DispatchTable table(&zone); 725 for (int i = 0; i < kRangeCount; i++) { 726 uc16* range = ranges[i]; 727 for (int j = 0; j < 2 * kRangeSize; j += 2) 728 table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone); 729 } 730 // Check that the table looks as we would expect 731 for (int p = 0; p < kLimit; p++) { 732 OutSet* outs = table.Get(p); 733 for (int j = 0; j < kRangeCount; j++) { 734 uc16* range = ranges[j]; 735 bool is_on = false; 736 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2) 737 is_on = (range[k] <= p && p <= range[k + 1]); 738 CHECK_EQ(is_on, outs->Get(j)); 739 } 740 } 741} 742 743 744// Test of debug-only syntax. 745#ifdef DEBUG 746 747TEST(ParsePossessiveRepetition) { 748 bool old_flag_value = FLAG_regexp_possessive_quantifier; 749 750 // Enable possessive quantifier syntax. 751 FLAG_regexp_possessive_quantifier = true; 752 753 CheckParseEq("a*+", "(# 0 - p 'a')"); 754 CheckParseEq("a++", "(# 1 - p 'a')"); 755 CheckParseEq("a?+", "(# 0 1 p 'a')"); 756 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')"); 757 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')"); 758 759 // Disable possessive quantifier syntax. 760 FLAG_regexp_possessive_quantifier = false; 761 762 CHECK_PARSE_ERROR("a*+"); 763 CHECK_PARSE_ERROR("a++"); 764 CHECK_PARSE_ERROR("a?+"); 765 CHECK_PARSE_ERROR("a{10,20}+"); 766 CHECK_PARSE_ERROR("a{10,20}+b"); 767 768 FLAG_regexp_possessive_quantifier = old_flag_value; 769} 770 771#endif 772 773// Tests of interpreter. 774 775 776#ifndef V8_INTERPRETED_REGEXP 777 778#if V8_TARGET_ARCH_IA32 779typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler; 780#elif V8_TARGET_ARCH_X64 781typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler; 782#elif V8_TARGET_ARCH_ARM 783typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler; 784#elif V8_TARGET_ARCH_ARM64 785typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler; 786#elif V8_TARGET_ARCH_S390 787typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler; 788#elif V8_TARGET_ARCH_PPC 789typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler; 790#elif V8_TARGET_ARCH_MIPS 791typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 792#elif V8_TARGET_ARCH_MIPS64 793typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler; 794#elif V8_TARGET_ARCH_X87 795typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler; 796#endif 797 798class ContextInitializer { 799 public: 800 ContextInitializer() 801 : scope_(CcTest::isolate()), 802 env_(v8::Context::New(CcTest::isolate())) { 803 env_->Enter(); 804 } 805 ~ContextInitializer() { 806 env_->Exit(); 807 } 808 private: 809 v8::HandleScope scope_; 810 v8::Local<v8::Context> env_; 811}; 812 813 814static ArchRegExpMacroAssembler::Result Execute(Code* code, 815 String* input, 816 int start_offset, 817 const byte* input_start, 818 const byte* input_end, 819 int* captures) { 820 return NativeRegExpMacroAssembler::Execute( 821 code, 822 input, 823 start_offset, 824 input_start, 825 input_end, 826 captures, 827 0, 828 CcTest::i_isolate()); 829} 830 831 832TEST(MacroAssemblerNativeSuccess) { 833 v8::V8::Initialize(); 834 ContextInitializer initializer; 835 Isolate* isolate = CcTest::i_isolate(); 836 Factory* factory = isolate->factory(); 837 Zone zone(CcTest::i_isolate()->allocator()); 838 839 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 840 4); 841 842 m.Succeed(); 843 844 Handle<String> source = factory->NewStringFromStaticChars(""); 845 Handle<Object> code_object = m.GetCode(source); 846 Handle<Code> code = Handle<Code>::cast(code_object); 847 848 int captures[4] = {42, 37, 87, 117}; 849 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 850 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 851 const byte* start_adr = 852 reinterpret_cast<const byte*>(seq_input->GetCharsAddress()); 853 854 NativeRegExpMacroAssembler::Result result = 855 Execute(*code, 856 *input, 857 0, 858 start_adr, 859 start_adr + seq_input->length(), 860 captures); 861 862 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 863 CHECK_EQ(-1, captures[0]); 864 CHECK_EQ(-1, captures[1]); 865 CHECK_EQ(-1, captures[2]); 866 CHECK_EQ(-1, captures[3]); 867} 868 869 870TEST(MacroAssemblerNativeSimple) { 871 v8::V8::Initialize(); 872 ContextInitializer initializer; 873 Isolate* isolate = CcTest::i_isolate(); 874 Factory* factory = isolate->factory(); 875 Zone zone(CcTest::i_isolate()->allocator()); 876 877 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 878 4); 879 880 Label fail, backtrack; 881 m.PushBacktrack(&fail); 882 m.CheckNotAtStart(0, NULL); 883 m.LoadCurrentCharacter(2, NULL); 884 m.CheckNotCharacter('o', NULL); 885 m.LoadCurrentCharacter(1, NULL, false); 886 m.CheckNotCharacter('o', NULL); 887 m.LoadCurrentCharacter(0, NULL, false); 888 m.CheckNotCharacter('f', NULL); 889 m.WriteCurrentPositionToRegister(0, 0); 890 m.WriteCurrentPositionToRegister(1, 3); 891 m.AdvanceCurrentPosition(3); 892 m.PushBacktrack(&backtrack); 893 m.Succeed(); 894 m.Bind(&backtrack); 895 m.Backtrack(); 896 m.Bind(&fail); 897 m.Fail(); 898 899 Handle<String> source = factory->NewStringFromStaticChars("^foo"); 900 Handle<Object> code_object = m.GetCode(source); 901 Handle<Code> code = Handle<Code>::cast(code_object); 902 903 int captures[4] = {42, 37, 87, 117}; 904 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 905 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 906 Address start_adr = seq_input->GetCharsAddress(); 907 908 NativeRegExpMacroAssembler::Result result = 909 Execute(*code, 910 *input, 911 0, 912 start_adr, 913 start_adr + input->length(), 914 captures); 915 916 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 917 CHECK_EQ(0, captures[0]); 918 CHECK_EQ(3, captures[1]); 919 CHECK_EQ(-1, captures[2]); 920 CHECK_EQ(-1, captures[3]); 921 922 input = factory->NewStringFromStaticChars("barbarbar"); 923 seq_input = Handle<SeqOneByteString>::cast(input); 924 start_adr = seq_input->GetCharsAddress(); 925 926 result = Execute(*code, 927 *input, 928 0, 929 start_adr, 930 start_adr + input->length(), 931 captures); 932 933 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 934} 935 936 937TEST(MacroAssemblerNativeSimpleUC16) { 938 v8::V8::Initialize(); 939 ContextInitializer initializer; 940 Isolate* isolate = CcTest::i_isolate(); 941 Factory* factory = isolate->factory(); 942 Zone zone(CcTest::i_isolate()->allocator()); 943 944 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16, 945 4); 946 947 Label fail, backtrack; 948 m.PushBacktrack(&fail); 949 m.CheckNotAtStart(0, NULL); 950 m.LoadCurrentCharacter(2, NULL); 951 m.CheckNotCharacter('o', NULL); 952 m.LoadCurrentCharacter(1, NULL, false); 953 m.CheckNotCharacter('o', NULL); 954 m.LoadCurrentCharacter(0, NULL, false); 955 m.CheckNotCharacter('f', NULL); 956 m.WriteCurrentPositionToRegister(0, 0); 957 m.WriteCurrentPositionToRegister(1, 3); 958 m.AdvanceCurrentPosition(3); 959 m.PushBacktrack(&backtrack); 960 m.Succeed(); 961 m.Bind(&backtrack); 962 m.Backtrack(); 963 m.Bind(&fail); 964 m.Fail(); 965 966 Handle<String> source = factory->NewStringFromStaticChars("^foo"); 967 Handle<Object> code_object = m.GetCode(source); 968 Handle<Code> code = Handle<Code>::cast(code_object); 969 970 int captures[4] = {42, 37, 87, 117}; 971 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', 972 static_cast<uc16>(0x2603)}; 973 Handle<String> input = factory->NewStringFromTwoByte( 974 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 975 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 976 Address start_adr = seq_input->GetCharsAddress(); 977 978 NativeRegExpMacroAssembler::Result result = 979 Execute(*code, 980 *input, 981 0, 982 start_adr, 983 start_adr + input->length(), 984 captures); 985 986 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 987 CHECK_EQ(0, captures[0]); 988 CHECK_EQ(3, captures[1]); 989 CHECK_EQ(-1, captures[2]); 990 CHECK_EQ(-1, captures[3]); 991 992 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', 993 static_cast<uc16>(0x2603)}; 994 input = factory->NewStringFromTwoByte( 995 Vector<const uc16>(input_data2, 9)).ToHandleChecked(); 996 seq_input = Handle<SeqTwoByteString>::cast(input); 997 start_adr = seq_input->GetCharsAddress(); 998 999 result = Execute(*code, 1000 *input, 1001 0, 1002 start_adr, 1003 start_adr + input->length() * 2, 1004 captures); 1005 1006 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 1007} 1008 1009 1010TEST(MacroAssemblerNativeBacktrack) { 1011 v8::V8::Initialize(); 1012 ContextInitializer initializer; 1013 Isolate* isolate = CcTest::i_isolate(); 1014 Factory* factory = isolate->factory(); 1015 Zone zone(CcTest::i_isolate()->allocator()); 1016 1017 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1018 0); 1019 1020 Label fail; 1021 Label backtrack; 1022 m.LoadCurrentCharacter(10, &fail); 1023 m.Succeed(); 1024 m.Bind(&fail); 1025 m.PushBacktrack(&backtrack); 1026 m.LoadCurrentCharacter(10, NULL); 1027 m.Succeed(); 1028 m.Bind(&backtrack); 1029 m.Fail(); 1030 1031 Handle<String> source = factory->NewStringFromStaticChars(".........."); 1032 Handle<Object> code_object = m.GetCode(source); 1033 Handle<Code> code = Handle<Code>::cast(code_object); 1034 1035 Handle<String> input = factory->NewStringFromStaticChars("foofoo"); 1036 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1037 Address start_adr = seq_input->GetCharsAddress(); 1038 1039 NativeRegExpMacroAssembler::Result result = 1040 Execute(*code, 1041 *input, 1042 0, 1043 start_adr, 1044 start_adr + input->length(), 1045 NULL); 1046 1047 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result); 1048} 1049 1050 1051TEST(MacroAssemblerNativeBackReferenceLATIN1) { 1052 v8::V8::Initialize(); 1053 ContextInitializer initializer; 1054 Isolate* isolate = CcTest::i_isolate(); 1055 Factory* factory = isolate->factory(); 1056 Zone zone(CcTest::i_isolate()->allocator()); 1057 1058 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1059 4); 1060 1061 m.WriteCurrentPositionToRegister(0, 0); 1062 m.AdvanceCurrentPosition(2); 1063 m.WriteCurrentPositionToRegister(1, 0); 1064 Label nomatch; 1065 m.CheckNotBackReference(0, false, &nomatch); 1066 m.Fail(); 1067 m.Bind(&nomatch); 1068 m.AdvanceCurrentPosition(2); 1069 Label missing_match; 1070 m.CheckNotBackReference(0, false, &missing_match); 1071 m.WriteCurrentPositionToRegister(2, 0); 1072 m.Succeed(); 1073 m.Bind(&missing_match); 1074 m.Fail(); 1075 1076 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1"); 1077 Handle<Object> code_object = m.GetCode(source); 1078 Handle<Code> code = Handle<Code>::cast(code_object); 1079 1080 Handle<String> input = factory->NewStringFromStaticChars("fooofo"); 1081 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1082 Address start_adr = seq_input->GetCharsAddress(); 1083 1084 int output[4]; 1085 NativeRegExpMacroAssembler::Result result = 1086 Execute(*code, 1087 *input, 1088 0, 1089 start_adr, 1090 start_adr + input->length(), 1091 output); 1092 1093 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1094 CHECK_EQ(0, output[0]); 1095 CHECK_EQ(2, output[1]); 1096 CHECK_EQ(6, output[2]); 1097 CHECK_EQ(-1, output[3]); 1098} 1099 1100 1101TEST(MacroAssemblerNativeBackReferenceUC16) { 1102 v8::V8::Initialize(); 1103 ContextInitializer initializer; 1104 Isolate* isolate = CcTest::i_isolate(); 1105 Factory* factory = isolate->factory(); 1106 Zone zone(CcTest::i_isolate()->allocator()); 1107 1108 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16, 1109 4); 1110 1111 m.WriteCurrentPositionToRegister(0, 0); 1112 m.AdvanceCurrentPosition(2); 1113 m.WriteCurrentPositionToRegister(1, 0); 1114 Label nomatch; 1115 m.CheckNotBackReference(0, false, &nomatch); 1116 m.Fail(); 1117 m.Bind(&nomatch); 1118 m.AdvanceCurrentPosition(2); 1119 Label missing_match; 1120 m.CheckNotBackReference(0, false, &missing_match); 1121 m.WriteCurrentPositionToRegister(2, 0); 1122 m.Succeed(); 1123 m.Bind(&missing_match); 1124 m.Fail(); 1125 1126 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1"); 1127 Handle<Object> code_object = m.GetCode(source); 1128 Handle<Code> code = Handle<Code>::cast(code_object); 1129 1130 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028}; 1131 Handle<String> input = factory->NewStringFromTwoByte( 1132 Vector<const uc16>(input_data, 6)).ToHandleChecked(); 1133 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input); 1134 Address start_adr = seq_input->GetCharsAddress(); 1135 1136 int output[4]; 1137 NativeRegExpMacroAssembler::Result result = 1138 Execute(*code, 1139 *input, 1140 0, 1141 start_adr, 1142 start_adr + input->length() * 2, 1143 output); 1144 1145 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1146 CHECK_EQ(0, output[0]); 1147 CHECK_EQ(2, output[1]); 1148 CHECK_EQ(6, output[2]); 1149 CHECK_EQ(-1, output[3]); 1150} 1151 1152 1153 1154TEST(MacroAssemblernativeAtStart) { 1155 v8::V8::Initialize(); 1156 ContextInitializer initializer; 1157 Isolate* isolate = CcTest::i_isolate(); 1158 Factory* factory = isolate->factory(); 1159 Zone zone(CcTest::i_isolate()->allocator()); 1160 1161 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1162 0); 1163 1164 Label not_at_start, newline, fail; 1165 m.CheckNotAtStart(0, ¬_at_start); 1166 // Check that prevchar = '\n' and current = 'f'. 1167 m.CheckCharacter('\n', &newline); 1168 m.Bind(&fail); 1169 m.Fail(); 1170 m.Bind(&newline); 1171 m.LoadCurrentCharacter(0, &fail); 1172 m.CheckNotCharacter('f', &fail); 1173 m.Succeed(); 1174 1175 m.Bind(¬_at_start); 1176 // Check that prevchar = 'o' and current = 'b'. 1177 Label prevo; 1178 m.CheckCharacter('o', &prevo); 1179 m.Fail(); 1180 m.Bind(&prevo); 1181 m.LoadCurrentCharacter(0, &fail); 1182 m.CheckNotCharacter('b', &fail); 1183 m.Succeed(); 1184 1185 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)"); 1186 Handle<Object> code_object = m.GetCode(source); 1187 Handle<Code> code = Handle<Code>::cast(code_object); 1188 1189 Handle<String> input = factory->NewStringFromStaticChars("foobar"); 1190 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1191 Address start_adr = seq_input->GetCharsAddress(); 1192 1193 NativeRegExpMacroAssembler::Result result = 1194 Execute(*code, 1195 *input, 1196 0, 1197 start_adr, 1198 start_adr + input->length(), 1199 NULL); 1200 1201 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1202 1203 result = Execute(*code, 1204 *input, 1205 3, 1206 start_adr + 3, 1207 start_adr + input->length(), 1208 NULL); 1209 1210 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1211} 1212 1213 1214TEST(MacroAssemblerNativeBackRefNoCase) { 1215 v8::V8::Initialize(); 1216 ContextInitializer initializer; 1217 Isolate* isolate = CcTest::i_isolate(); 1218 Factory* factory = isolate->factory(); 1219 Zone zone(CcTest::i_isolate()->allocator()); 1220 1221 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1222 4); 1223 1224 Label fail, succ; 1225 1226 m.WriteCurrentPositionToRegister(0, 0); 1227 m.WriteCurrentPositionToRegister(2, 0); 1228 m.AdvanceCurrentPosition(3); 1229 m.WriteCurrentPositionToRegister(3, 0); 1230 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC". 1231 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC". 1232 Label expected_fail; 1233 m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail); 1234 m.Bind(&fail); 1235 m.Fail(); 1236 1237 m.Bind(&expected_fail); 1238 m.AdvanceCurrentPosition(3); // Skip "xYz" 1239 m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ); 1240 m.Fail(); 1241 1242 m.Bind(&succ); 1243 m.WriteCurrentPositionToRegister(1, 0); 1244 m.Succeed(); 1245 1246 Handle<String> source = 1247 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)"); 1248 Handle<Object> code_object = m.GetCode(source); 1249 Handle<Code> code = Handle<Code>::cast(code_object); 1250 1251 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab"); 1252 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1253 Address start_adr = seq_input->GetCharsAddress(); 1254 1255 int output[4]; 1256 NativeRegExpMacroAssembler::Result result = 1257 Execute(*code, 1258 *input, 1259 0, 1260 start_adr, 1261 start_adr + input->length(), 1262 output); 1263 1264 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1265 CHECK_EQ(0, output[0]); 1266 CHECK_EQ(12, output[1]); 1267 CHECK_EQ(0, output[2]); 1268 CHECK_EQ(3, output[3]); 1269} 1270 1271 1272 1273TEST(MacroAssemblerNativeRegisters) { 1274 v8::V8::Initialize(); 1275 ContextInitializer initializer; 1276 Isolate* isolate = CcTest::i_isolate(); 1277 Factory* factory = isolate->factory(); 1278 Zone zone(CcTest::i_isolate()->allocator()); 1279 1280 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1281 6); 1282 1283 uc16 foo_chars[3] = {'f', 'o', 'o'}; 1284 Vector<const uc16> foo(foo_chars, 3); 1285 1286 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt }; 1287 Label fail; 1288 Label backtrack; 1289 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0] 1290 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1291 m.PushBacktrack(&backtrack); 1292 m.WriteStackPointerToRegister(sp); 1293 // Fill stack and registers 1294 m.AdvanceCurrentPosition(2); 1295 m.WriteCurrentPositionToRegister(out1, 0); 1296 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck); 1297 m.PushBacktrack(&fail); 1298 // Drop backtrack stack frames. 1299 m.ReadStackPointerFromRegister(sp); 1300 // And take the first backtrack (to &backtrack) 1301 m.Backtrack(); 1302 1303 m.PushCurrentPosition(); 1304 m.AdvanceCurrentPosition(2); 1305 m.PopCurrentPosition(); 1306 1307 m.Bind(&backtrack); 1308 m.PopRegister(out1); 1309 m.ReadCurrentPositionFromRegister(out1); 1310 m.AdvanceCurrentPosition(3); 1311 m.WriteCurrentPositionToRegister(out2, 0); // [0,3] 1312 1313 Label loop; 1314 m.SetRegister(loop_cnt, 0); // loop counter 1315 m.Bind(&loop); 1316 m.AdvanceRegister(loop_cnt, 1); 1317 m.AdvanceCurrentPosition(1); 1318 m.IfRegisterLT(loop_cnt, 3, &loop); 1319 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6] 1320 1321 Label loop2; 1322 m.SetRegister(loop_cnt, 2); // loop counter 1323 m.Bind(&loop2); 1324 m.AdvanceRegister(loop_cnt, -1); 1325 m.AdvanceCurrentPosition(1); 1326 m.IfRegisterGE(loop_cnt, 0, &loop2); 1327 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9] 1328 1329 Label loop3; 1330 Label exit_loop3; 1331 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1332 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck); 1333 m.ReadCurrentPositionFromRegister(out3); 1334 m.Bind(&loop3); 1335 m.AdvanceCurrentPosition(1); 1336 m.CheckGreedyLoop(&exit_loop3); 1337 m.GoTo(&loop3); 1338 m.Bind(&exit_loop3); 1339 m.PopCurrentPosition(); 1340 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1] 1341 1342 m.Succeed(); 1343 1344 m.Bind(&fail); 1345 m.Fail(); 1346 1347 Handle<String> source = factory->NewStringFromStaticChars("<loop test>"); 1348 Handle<Object> code_object = m.GetCode(source); 1349 Handle<Code> code = Handle<Code>::cast(code_object); 1350 1351 // String long enough for test (content doesn't matter). 1352 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo"); 1353 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1354 Address start_adr = seq_input->GetCharsAddress(); 1355 1356 int output[6]; 1357 NativeRegExpMacroAssembler::Result result = 1358 Execute(*code, 1359 *input, 1360 0, 1361 start_adr, 1362 start_adr + input->length(), 1363 output); 1364 1365 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1366 CHECK_EQ(0, output[0]); 1367 CHECK_EQ(3, output[1]); 1368 CHECK_EQ(6, output[2]); 1369 CHECK_EQ(9, output[3]); 1370 CHECK_EQ(9, output[4]); 1371 CHECK_EQ(-1, output[5]); 1372} 1373 1374 1375TEST(MacroAssemblerStackOverflow) { 1376 v8::V8::Initialize(); 1377 ContextInitializer initializer; 1378 Isolate* isolate = CcTest::i_isolate(); 1379 Factory* factory = isolate->factory(); 1380 Zone zone(CcTest::i_isolate()->allocator()); 1381 1382 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1383 0); 1384 1385 Label loop; 1386 m.Bind(&loop); 1387 m.PushBacktrack(&loop); 1388 m.GoTo(&loop); 1389 1390 Handle<String> source = 1391 factory->NewStringFromStaticChars("<stack overflow test>"); 1392 Handle<Object> code_object = m.GetCode(source); 1393 Handle<Code> code = Handle<Code>::cast(code_object); 1394 1395 // String long enough for test (content doesn't matter). 1396 Handle<String> input = factory->NewStringFromStaticChars("dummy"); 1397 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1398 Address start_adr = seq_input->GetCharsAddress(); 1399 1400 NativeRegExpMacroAssembler::Result result = 1401 Execute(*code, 1402 *input, 1403 0, 1404 start_adr, 1405 start_adr + input->length(), 1406 NULL); 1407 1408 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result); 1409 CHECK(isolate->has_pending_exception()); 1410 isolate->clear_pending_exception(); 1411} 1412 1413 1414TEST(MacroAssemblerNativeLotsOfRegisters) { 1415 v8::V8::Initialize(); 1416 ContextInitializer initializer; 1417 Isolate* isolate = CcTest::i_isolate(); 1418 Factory* factory = isolate->factory(); 1419 Zone zone(CcTest::i_isolate()->allocator()); 1420 1421 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1, 1422 2); 1423 1424 // At least 2048, to ensure the allocated space for registers 1425 // span one full page. 1426 const int large_number = 8000; 1427 m.WriteCurrentPositionToRegister(large_number, 42); 1428 m.WriteCurrentPositionToRegister(0, 0); 1429 m.WriteCurrentPositionToRegister(1, 1); 1430 Label done; 1431 m.CheckNotBackReference(0, false, &done); // Performs a system-stack push. 1432 m.Bind(&done); 1433 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck); 1434 m.PopRegister(1); 1435 m.Succeed(); 1436 1437 Handle<String> source = 1438 factory->NewStringFromStaticChars("<huge register space test>"); 1439 Handle<Object> code_object = m.GetCode(source); 1440 Handle<Code> code = Handle<Code>::cast(code_object); 1441 1442 // String long enough for test (content doesn't matter). 1443 Handle<String> input = factory->NewStringFromStaticChars("sample text"); 1444 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input); 1445 Address start_adr = seq_input->GetCharsAddress(); 1446 1447 int captures[2]; 1448 NativeRegExpMacroAssembler::Result result = 1449 Execute(*code, 1450 *input, 1451 0, 1452 start_adr, 1453 start_adr + input->length(), 1454 captures); 1455 1456 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result); 1457 CHECK_EQ(0, captures[0]); 1458 CHECK_EQ(42, captures[1]); 1459 1460 isolate->clear_pending_exception(); 1461} 1462 1463#else // V8_INTERPRETED_REGEXP 1464 1465TEST(MacroAssembler) { 1466 byte codes[1024]; 1467 Zone zone(CcTest::i_isolate()->allocator()); 1468 RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024), 1469 &zone); 1470 // ^f(o)o. 1471 Label start, fail, backtrack; 1472 1473 m.SetRegister(4, 42); 1474 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck); 1475 m.AdvanceRegister(4, 42); 1476 m.GoTo(&start); 1477 m.Fail(); 1478 m.Bind(&start); 1479 m.PushBacktrack(&fail); 1480 m.CheckNotAtStart(0, NULL); 1481 m.LoadCurrentCharacter(0, NULL); 1482 m.CheckNotCharacter('f', NULL); 1483 m.LoadCurrentCharacter(1, NULL); 1484 m.CheckNotCharacter('o', NULL); 1485 m.LoadCurrentCharacter(2, NULL); 1486 m.CheckNotCharacter('o', NULL); 1487 m.WriteCurrentPositionToRegister(0, 0); 1488 m.WriteCurrentPositionToRegister(1, 3); 1489 m.WriteCurrentPositionToRegister(2, 1); 1490 m.WriteCurrentPositionToRegister(3, 2); 1491 m.AdvanceCurrentPosition(3); 1492 m.PushBacktrack(&backtrack); 1493 m.Succeed(); 1494 m.Bind(&backtrack); 1495 m.ClearRegisters(2, 3); 1496 m.Backtrack(); 1497 m.Bind(&fail); 1498 m.PopRegister(0); 1499 m.Fail(); 1500 1501 Isolate* isolate = CcTest::i_isolate(); 1502 Factory* factory = isolate->factory(); 1503 HandleScope scope(isolate); 1504 1505 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o"); 1506 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source)); 1507 int captures[5]; 1508 1509 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'}; 1510 Handle<String> f1_16 = factory->NewStringFromTwoByte( 1511 Vector<const uc16>(str1, 6)).ToHandleChecked(); 1512 1513 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0)); 1514 CHECK_EQ(0, captures[0]); 1515 CHECK_EQ(3, captures[1]); 1516 CHECK_EQ(1, captures[2]); 1517 CHECK_EQ(2, captures[3]); 1518 CHECK_EQ(84, captures[4]); 1519 1520 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'}; 1521 Handle<String> f2_16 = factory->NewStringFromTwoByte( 1522 Vector<const uc16>(str2, 6)).ToHandleChecked(); 1523 1524 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0)); 1525 CHECK_EQ(42, captures[0]); 1526} 1527 1528#endif // V8_INTERPRETED_REGEXP 1529 1530 1531TEST(AddInverseToTable) { 1532 static const int kLimit = 1000; 1533 static const int kRangeCount = 16; 1534 for (int t = 0; t < 10; t++) { 1535 Zone zone(CcTest::i_isolate()->allocator()); 1536 ZoneList<CharacterRange>* ranges = 1537 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone); 1538 for (int i = 0; i < kRangeCount; i++) { 1539 int from = PseudoRandom(t + 87, i + 25) % kLimit; 1540 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20)); 1541 if (to > kLimit) to = kLimit; 1542 ranges->Add(CharacterRange::Range(from, to), &zone); 1543 } 1544 DispatchTable table(&zone); 1545 DispatchTableConstructor cons(&table, false, &zone); 1546 cons.set_choice_index(0); 1547 cons.AddInverse(ranges); 1548 for (int i = 0; i < kLimit; i++) { 1549 bool is_on = false; 1550 for (int j = 0; !is_on && j < kRangeCount; j++) 1551 is_on = ranges->at(j).Contains(i); 1552 OutSet* set = table.Get(i); 1553 CHECK_EQ(is_on, set->Get(0) == false); 1554 } 1555 } 1556 Zone zone(CcTest::i_isolate()->allocator()); 1557 ZoneList<CharacterRange>* ranges = 1558 new(&zone) ZoneList<CharacterRange>(1, &zone); 1559 ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone); 1560 DispatchTable table(&zone); 1561 DispatchTableConstructor cons(&table, false, &zone); 1562 cons.set_choice_index(0); 1563 cons.AddInverse(ranges); 1564 CHECK(!table.Get(0xFFFE)->Get(0)); 1565 CHECK(table.Get(0xFFFF)->Get(0)); 1566} 1567 1568 1569static uc32 canonicalize(uc32 c) { 1570 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth]; 1571 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL); 1572 if (count == 0) { 1573 return c; 1574 } else { 1575 CHECK_EQ(1, count); 1576 return canon[0]; 1577 } 1578} 1579 1580 1581TEST(LatinCanonicalize) { 1582 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1583 for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) { 1584 unibrow::uchar upper = lower + ('A' - 'a'); 1585 CHECK_EQ(canonicalize(lower), canonicalize(upper)); 1586 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1587 int length = un_canonicalize.get(lower, '\0', uncanon); 1588 CHECK_EQ(2, length); 1589 CHECK_EQ(upper, uncanon[0]); 1590 CHECK_EQ(lower, uncanon[1]); 1591 } 1592 for (uc32 c = 128; c < (1 << 21); c++) 1593 CHECK_GE(canonicalize(c), 128); 1594 unibrow::Mapping<unibrow::ToUppercase> to_upper; 1595 // Canonicalization is only defined for the Basic Multilingual Plane. 1596 for (uc32 c = 0; c < (1 << 16); c++) { 1597 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; 1598 int length = to_upper.get(c, '\0', upper); 1599 if (length == 0) { 1600 length = 1; 1601 upper[0] = c; 1602 } 1603 uc32 u = upper[0]; 1604 if (length > 1 || (c >= 128 && u < 128)) 1605 u = c; 1606 CHECK_EQ(u, canonicalize(c)); 1607 } 1608} 1609 1610 1611static uc32 CanonRangeEnd(uc32 c) { 1612 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; 1613 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); 1614 if (count == 0) { 1615 return c; 1616 } else { 1617 CHECK_EQ(1, count); 1618 return canon[0]; 1619 } 1620} 1621 1622 1623TEST(RangeCanonicalization) { 1624 // Check that we arrive at the same result when using the basic 1625 // range canonicalization primitives as when using immediate 1626 // canonicalization. 1627 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1628 int block_start = 0; 1629 while (block_start <= 0xFFFF) { 1630 uc32 block_end = CanonRangeEnd(block_start); 1631 unsigned block_length = block_end - block_start + 1; 1632 if (block_length > 1) { 1633 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1634 int first_length = un_canonicalize.get(block_start, '\0', first); 1635 for (unsigned i = 1; i < block_length; i++) { 1636 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1637 int succ_length = un_canonicalize.get(block_start + i, '\0', succ); 1638 CHECK_EQ(first_length, succ_length); 1639 for (int j = 0; j < succ_length; j++) { 1640 int calc = first[j] + i; 1641 int found = succ[j]; 1642 CHECK_EQ(calc, found); 1643 } 1644 } 1645 } 1646 block_start = block_start + block_length; 1647 } 1648} 1649 1650 1651TEST(UncanonicalizeEquivalence) { 1652 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1653 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1654 for (int i = 0; i < (1 << 16); i++) { 1655 int length = un_canonicalize.get(i, '\0', chars); 1656 for (int j = 0; j < length; j++) { 1657 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1658 int length2 = un_canonicalize.get(chars[j], '\0', chars2); 1659 CHECK_EQ(length, length2); 1660 for (int k = 0; k < length; k++) 1661 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k])); 1662 } 1663 } 1664} 1665 1666 1667static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input, 1668 Vector<CharacterRange> expected) { 1669 Zone zone(CcTest::i_isolate()->allocator()); 1670 int count = expected.length(); 1671 ZoneList<CharacterRange>* list = 1672 new(&zone) ZoneList<CharacterRange>(count, &zone); 1673 list->Add(input, &zone); 1674 CharacterRange::AddCaseEquivalents(isolate, &zone, list, false); 1675 list->Remove(0); // Remove the input before checking results. 1676 CHECK_EQ(count, list->length()); 1677 for (int i = 0; i < list->length(); i++) { 1678 CHECK_EQ(expected[i].from(), list->at(i).from()); 1679 CHECK_EQ(expected[i].to(), list->at(i).to()); 1680 } 1681} 1682 1683 1684static void TestSimpleRangeCaseIndependence(Isolate* isolate, 1685 CharacterRange input, 1686 CharacterRange expected) { 1687 EmbeddedVector<CharacterRange, 1> vector; 1688 vector[0] = expected; 1689 TestRangeCaseIndependence(isolate, input, vector); 1690} 1691 1692 1693TEST(CharacterRangeCaseIndependence) { 1694 Isolate* isolate = CcTest::i_isolate(); 1695 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'), 1696 CharacterRange::Singleton('A')); 1697 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'), 1698 CharacterRange::Singleton('Z')); 1699 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'), 1700 CharacterRange::Range('A', 'Z')); 1701 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'), 1702 CharacterRange::Range('C', 'F')); 1703 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'), 1704 CharacterRange::Range('A', 'B')); 1705 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'), 1706 CharacterRange::Range('Y', 'Z')); 1707 TestSimpleRangeCaseIndependence(isolate, 1708 CharacterRange::Range('a' - 1, 'z' + 1), 1709 CharacterRange::Range('A', 'Z')); 1710 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'), 1711 CharacterRange::Range('a', 'z')); 1712 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'), 1713 CharacterRange::Range('c', 'f')); 1714 TestSimpleRangeCaseIndependence(isolate, 1715 CharacterRange::Range('A' - 1, 'Z' + 1), 1716 CharacterRange::Range('a', 'z')); 1717 // Here we need to add [l-z] to complete the case independence of 1718 // [A-Za-z] but we expect [a-z] to be added since we always add a 1719 // whole block at a time. 1720 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'), 1721 CharacterRange::Range('a', 'z')); 1722} 1723 1724 1725static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) { 1726 if (ranges == NULL) 1727 return false; 1728 for (int i = 0; i < ranges->length(); i++) { 1729 CharacterRange range = ranges->at(i); 1730 if (range.from() <= c && c <= range.to()) 1731 return true; 1732 } 1733 return false; 1734} 1735 1736 1737TEST(UnicodeRangeSplitter) { 1738 Zone zone(CcTest::i_isolate()->allocator()); 1739 ZoneList<CharacterRange>* base = 1740 new(&zone) ZoneList<CharacterRange>(1, &zone); 1741 base->Add(CharacterRange::Everything(), &zone); 1742 UnicodeRangeSplitter splitter(&zone, base); 1743 // BMP 1744 for (uc32 c = 0; c < 0xd800; c++) { 1745 CHECK(InClass(c, splitter.bmp())); 1746 CHECK(!InClass(c, splitter.lead_surrogates())); 1747 CHECK(!InClass(c, splitter.trail_surrogates())); 1748 CHECK(!InClass(c, splitter.non_bmp())); 1749 } 1750 // Lead surrogates 1751 for (uc32 c = 0xd800; c < 0xdbff; c++) { 1752 CHECK(!InClass(c, splitter.bmp())); 1753 CHECK(InClass(c, splitter.lead_surrogates())); 1754 CHECK(!InClass(c, splitter.trail_surrogates())); 1755 CHECK(!InClass(c, splitter.non_bmp())); 1756 } 1757 // Trail surrogates 1758 for (uc32 c = 0xdc00; c < 0xdfff; c++) { 1759 CHECK(!InClass(c, splitter.bmp())); 1760 CHECK(!InClass(c, splitter.lead_surrogates())); 1761 CHECK(InClass(c, splitter.trail_surrogates())); 1762 CHECK(!InClass(c, splitter.non_bmp())); 1763 } 1764 // BMP 1765 for (uc32 c = 0xe000; c < 0xffff; c++) { 1766 CHECK(InClass(c, splitter.bmp())); 1767 CHECK(!InClass(c, splitter.lead_surrogates())); 1768 CHECK(!InClass(c, splitter.trail_surrogates())); 1769 CHECK(!InClass(c, splitter.non_bmp())); 1770 } 1771 // Non-BMP 1772 for (uc32 c = 0x10000; c < 0x10ffff; c++) { 1773 CHECK(!InClass(c, splitter.bmp())); 1774 CHECK(!InClass(c, splitter.lead_surrogates())); 1775 CHECK(!InClass(c, splitter.trail_surrogates())); 1776 CHECK(InClass(c, splitter.non_bmp())); 1777 } 1778} 1779 1780 1781TEST(CanonicalizeCharacterSets) { 1782 Zone zone(CcTest::i_isolate()->allocator()); 1783 ZoneList<CharacterRange>* list = 1784 new(&zone) ZoneList<CharacterRange>(4, &zone); 1785 CharacterSet set(list); 1786 1787 list->Add(CharacterRange::Range(10, 20), &zone); 1788 list->Add(CharacterRange::Range(30, 40), &zone); 1789 list->Add(CharacterRange::Range(50, 60), &zone); 1790 set.Canonicalize(); 1791 CHECK_EQ(3, list->length()); 1792 CHECK_EQ(10, list->at(0).from()); 1793 CHECK_EQ(20, list->at(0).to()); 1794 CHECK_EQ(30, list->at(1).from()); 1795 CHECK_EQ(40, list->at(1).to()); 1796 CHECK_EQ(50, list->at(2).from()); 1797 CHECK_EQ(60, list->at(2).to()); 1798 1799 list->Rewind(0); 1800 list->Add(CharacterRange::Range(10, 20), &zone); 1801 list->Add(CharacterRange::Range(50, 60), &zone); 1802 list->Add(CharacterRange::Range(30, 40), &zone); 1803 set.Canonicalize(); 1804 CHECK_EQ(3, list->length()); 1805 CHECK_EQ(10, list->at(0).from()); 1806 CHECK_EQ(20, list->at(0).to()); 1807 CHECK_EQ(30, list->at(1).from()); 1808 CHECK_EQ(40, list->at(1).to()); 1809 CHECK_EQ(50, list->at(2).from()); 1810 CHECK_EQ(60, list->at(2).to()); 1811 1812 list->Rewind(0); 1813 list->Add(CharacterRange::Range(30, 40), &zone); 1814 list->Add(CharacterRange::Range(10, 20), &zone); 1815 list->Add(CharacterRange::Range(25, 25), &zone); 1816 list->Add(CharacterRange::Range(100, 100), &zone); 1817 list->Add(CharacterRange::Range(1, 1), &zone); 1818 set.Canonicalize(); 1819 CHECK_EQ(5, list->length()); 1820 CHECK_EQ(1, list->at(0).from()); 1821 CHECK_EQ(1, list->at(0).to()); 1822 CHECK_EQ(10, list->at(1).from()); 1823 CHECK_EQ(20, list->at(1).to()); 1824 CHECK_EQ(25, list->at(2).from()); 1825 CHECK_EQ(25, list->at(2).to()); 1826 CHECK_EQ(30, list->at(3).from()); 1827 CHECK_EQ(40, list->at(3).to()); 1828 CHECK_EQ(100, list->at(4).from()); 1829 CHECK_EQ(100, list->at(4).to()); 1830 1831 list->Rewind(0); 1832 list->Add(CharacterRange::Range(10, 19), &zone); 1833 list->Add(CharacterRange::Range(21, 30), &zone); 1834 list->Add(CharacterRange::Range(20, 20), &zone); 1835 set.Canonicalize(); 1836 CHECK_EQ(1, list->length()); 1837 CHECK_EQ(10, list->at(0).from()); 1838 CHECK_EQ(30, list->at(0).to()); 1839} 1840 1841 1842TEST(CharacterRangeMerge) { 1843 Zone zone(CcTest::i_isolate()->allocator()); 1844 ZoneList<CharacterRange> l1(4, &zone); 1845 ZoneList<CharacterRange> l2(4, &zone); 1846 // Create all combinations of intersections of ranges, both singletons and 1847 // longer. 1848 1849 int offset = 0; 1850 1851 // The five kinds of singleton intersections: 1852 // X 1853 // Y - outside before 1854 // Y - outside touching start 1855 // Y - overlap 1856 // Y - outside touching end 1857 // Y - outside after 1858 1859 for (int i = 0; i < 5; i++) { 1860 l1.Add(CharacterRange::Singleton(offset + 2), &zone); 1861 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1862 offset += 6; 1863 } 1864 1865 // The seven kinds of singleton/non-singleton intersections: 1866 // XXX 1867 // Y - outside before 1868 // Y - outside touching start 1869 // Y - inside touching start 1870 // Y - entirely inside 1871 // Y - inside touching end 1872 // Y - outside touching end 1873 // Y - disjoint after 1874 1875 for (int i = 0; i < 7; i++) { 1876 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone); 1877 l2.Add(CharacterRange::Singleton(offset + i), &zone); 1878 offset += 8; 1879 } 1880 1881 // The eleven kinds of non-singleton intersections: 1882 // 1883 // XXXXXXXX 1884 // YYYY - outside before. 1885 // YYYY - outside touching start. 1886 // YYYY - overlapping start 1887 // YYYY - inside touching start 1888 // YYYY - entirely inside 1889 // YYYY - inside touching end 1890 // YYYY - overlapping end 1891 // YYYY - outside touching end 1892 // YYYY - outside after 1893 // YYYYYYYY - identical 1894 // YYYYYYYYYYYY - containing entirely. 1895 1896 for (int i = 0; i < 9; i++) { 1897 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8. 1898 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone); 1899 offset += 22; 1900 } 1901 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1902 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1903 offset += 22; 1904 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); 1905 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone); 1906 offset += 22; 1907 1908 // Different kinds of multi-range overlap: 1909 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX 1910 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y 1911 1912 l1.Add(CharacterRange::Range(offset, offset + 21), &zone); 1913 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone); 1914 for (int i = 0; i < 6; i++) { 1915 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone); 1916 l2.Add(CharacterRange::Singleton(offset + 8), &zone); 1917 offset += 9; 1918 } 1919 1920 CHECK(CharacterRange::IsCanonical(&l1)); 1921 CHECK(CharacterRange::IsCanonical(&l2)); 1922 1923 ZoneList<CharacterRange> first_only(4, &zone); 1924 ZoneList<CharacterRange> second_only(4, &zone); 1925 ZoneList<CharacterRange> both(4, &zone); 1926} 1927 1928 1929TEST(Graph) { 1930 Execute("\\b\\w+\\b", false, true, true); 1931} 1932 1933 1934namespace { 1935 1936int* global_use_counts = NULL; 1937 1938void MockUseCounterCallback(v8::Isolate* isolate, 1939 v8::Isolate::UseCounterFeature feature) { 1940 ++global_use_counts[feature]; 1941} 1942} 1943 1944 1945// Test that ES2015 RegExp compatibility fixes are in place, that they 1946// are not overly broad, and the appropriate UseCounters are incremented 1947TEST(UseCountRegExp) { 1948 v8::Isolate* isolate = CcTest::isolate(); 1949 v8::HandleScope scope(isolate); 1950 LocalContext env; 1951 int use_counts[v8::Isolate::kUseCounterFeatureCount] = {}; 1952 global_use_counts = use_counts; 1953 CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback); 1954 1955 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it 1956 v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky"); 1957 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 1958 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]); 1959 CHECK(resultSticky->IsUndefined()); 1960 1961 // re.sticky has approriate value and doesn't touch UseCounter 1962 v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky"); 1963 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 1964 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]); 1965 CHECK(resultReSticky->IsFalse()); 1966 1967 // When the getter is caleld on another object, throw an exception 1968 // and don't increment the UseCounter 1969 v8::Local<v8::Value> resultStickyError = CompileRun( 1970 "var exception;" 1971 "try { " 1972 " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')" 1973 " .get.call(null);" 1974 "} catch (e) {" 1975 " exception = e;" 1976 "}" 1977 "exception"); 1978 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 1979 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]); 1980 CHECK(resultStickyError->IsObject()); 1981 1982 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix; 1983 // a UseCounter is incremented to track it. 1984 v8::Local<v8::Value> resultToString = 1985 CompileRun("RegExp.prototype.toString().length"); 1986 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 1987 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]); 1988 CHECK(resultToString->IsInt32()); 1989 CHECK_EQ(6, 1990 resultToString->Int32Value(isolate->GetCurrentContext()).FromJust()); 1991 1992 // .toString() works on normal RegExps 1993 v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length"); 1994 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 1995 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]); 1996 CHECK(resultReToString->IsInt32()); 1997 CHECK_EQ( 1998 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust()); 1999 2000 // .toString() throws on non-RegExps that aren't RegExp.prototype 2001 v8::Local<v8::Value> resultToStringError = CompileRun( 2002 "var exception;" 2003 "try { RegExp.prototype.toString.call(null) }" 2004 "catch (e) { exception = e; }" 2005 "exception"); 2006 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); 2007 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]); 2008 CHECK(resultToStringError->IsObject()); 2009} 2010 2011class UncachedExternalString 2012 : public v8::String::ExternalOneByteStringResource { 2013 public: 2014 const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; } 2015 size_t length() const override { return 26; } 2016 bool IsCompressible() const override { return true; } 2017}; 2018 2019TEST(UncachedExternalString) { 2020 v8::Isolate* isolate = CcTest::isolate(); 2021 v8::HandleScope scope(isolate); 2022 LocalContext env; 2023 v8::Local<v8::String> external = 2024 v8::String::NewExternalOneByte(isolate, new UncachedExternalString()) 2025 .ToLocalChecked(); 2026 CHECK(v8::Utils::OpenHandle(*external)->map() == 2027 CcTest::i_isolate()->heap()->short_external_one_byte_string_map()); 2028 v8::Local<v8::Object> global = env->Global(); 2029 global->Set(env.local(), v8_str("external"), external).FromJust(); 2030 CompileRun("var re = /y(.)/; re.test('ab');"); 2031 ExpectString("external.substring(1).match(re)[1]", "z"); 2032} 2033