1b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Copyright 2012 the V8 project authors. All rights reserved. 2b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// found in the LICENSE file. 4b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 5014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/jsregexp.h" 6b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 7f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch#include <memory> 8f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch 9b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/base/platform/platform.h" 10b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/compilation-cache.h" 11f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch#include "src/elements.h" 12b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/execution.h" 13b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/factory.h" 14014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/isolate-inl.h" 15014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/messages.h" 16b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/ostreams.h" 17014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/interpreter-irregexp.h" 18014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/jsregexp-inl.h" 19014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/regexp-macro-assembler-irregexp.h" 20014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/regexp-macro-assembler-tracer.h" 21f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch#include "src/regexp/regexp-macro-assembler.h" 22014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/regexp-parser.h" 23014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/regexp-stack.h" 24958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/runtime/runtime.h" 25014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/splay-tree-inl.h" 26b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include "src/string-search.h" 27958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#include "src/unicode-decoder.h" 28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 29109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#ifdef V8_I18N_SUPPORT 3062ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch#include "unicode/uniset.h" 31109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#include "unicode/utypes.h" 32109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#endif // V8_I18N_SUPPORT 33109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 346ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifndef V8_INTERPRETED_REGEXP 35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#if V8_TARGET_ARCH_IA32 36014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/ia32/regexp-macro-assembler-ia32.h" 37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#elif V8_TARGET_ARCH_X64 38014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/x64/regexp-macro-assembler-x64.h" 39b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_ARM64 40014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 41a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#elif V8_TARGET_ARCH_ARM 42014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/arm/regexp-macro-assembler-arm.h" 43014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#elif V8_TARGET_ARCH_PPC 44014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/ppc/regexp-macro-assembler-ppc.h" 453b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch#elif V8_TARGET_ARCH_S390 463b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch#include "src/regexp/s390/regexp-macro-assembler-s390.h" 4744f0eee88ff00398ff7f715fab053374d808c90dSteve Block#elif V8_TARGET_ARCH_MIPS 48014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/mips/regexp-macro-assembler-mips.h" 49b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_MIPS64 50014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/mips64/regexp-macro-assembler-mips64.h" 51b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_X87 52014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/regexp/x87/regexp-macro-assembler-x87.h" 53a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#else 54a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#error Unsupported target architecture. 55a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 56a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 57a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 58a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 59a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace v8 { 60a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace internal { 61a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 62b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochMUST_USE_RESULT 63b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic inline MaybeHandle<Object> ThrowRegExpException( 64014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<JSRegExp> re, Handle<String> pattern, Handle<String> error_text) { 6544f0eee88ff00398ff7f715fab053374d808c90dSteve Block Isolate* isolate = re->GetIsolate(); 66014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp, 67014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch pattern, error_text), 68014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Object); 69014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 70014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 71014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 72014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochinline void ThrowRegExpException(Handle<JSRegExp> re, 73014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<String> error_text) { 74014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(ThrowRegExpException(re, Handle<String>(re->Pattern()), error_text)); 75b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 76b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 77b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 78b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochContainedInLattice AddRange(ContainedInLattice containment, 79b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int* ranges, 80b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int ranges_length, 81b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Interval new_range) { 82b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK((ranges_length & 1) == 1); 83109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(ranges[ranges_length - 1] == String::kMaxCodePoint + 1); 84b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (containment == kLatticeUnknown) return containment; 85b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool inside = false; 86b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int last = 0; 87b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) { 88b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Consider the range from last to ranges[i]. 89b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We haven't got to the new range yet. 90b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges[i] <= new_range.from()) continue; 91b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // New range is wholly inside last-ranges[i]. Note that new_range.to() is 92b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // inclusive, but the values in ranges are not. 93b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (last <= new_range.from() && new_range.to() < ranges[i]) { 94b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return Combine(containment, inside ? kLatticeIn : kLatticeOut); 95b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 96b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return kLatticeUnknown; 97b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 98b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return containment; 99b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 101b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 102b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// More makes code generation slower, less makes V8 benchmark score lower. 103b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochconst int kMaxLookaheadForBoyerMoore = 8; 104b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// In a 3-character pattern you can maximally step forwards 3 characters 105b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// at a time, which is not always enough to pay for the extra logic. 106b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochconst int kPatternTooShortForBoyerMoore = 2; 107b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 108b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 109b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Identifies the sort of regexps where the regexp engine is faster 110b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// than the code used for atom matches. 111b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic bool HasFewDifferentCharacters(Handle<String> pattern) { 112b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length = Min(kMaxLookaheadForBoyerMoore, pattern->length()); 113b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (length <= kPatternTooShortForBoyerMoore) return false; 114b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kMod = 128; 115b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool character_found[kMod]; 116b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int different = 0; 117b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch memset(&character_found[0], 0, sizeof(character_found)); 118b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < length; i++) { 119b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int ch = (pattern->Get(i) & (kMod - 1)); 120b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!character_found[ch]) { 121b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch character_found[ch] = true; 122b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch different++; 123b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We declare a regexp low-alphabet if it has at least 3 times as many 124b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // characters as it has different characters. 125b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (different * 3 > length) return false; 126b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 127b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 128b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return true; 129a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Generic RegExp methods. Dispatches to implementation specific methods. 133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 135b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochMaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, 136b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> pattern, 137958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier JSRegExp::Flags flags) { 13844f0eee88ff00398ff7f715fab053374d808c90dSteve Block Isolate* isolate = re->GetIsolate(); 139c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Zone zone(isolate->allocator(), ZONE_NAME); 14044f0eee88ff00398ff7f715fab053374d808c90dSteve Block CompilationCache* compilation_cache = isolate->compilation_cache(); 141b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch MaybeHandle<FixedArray> maybe_cached = 142b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compilation_cache->LookupRegExp(pattern, flags); 143b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<FixedArray> cached; 144c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (maybe_cached.ToHandle(&cached)) { 145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block re->set_data(*cached); 146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return re; 147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 148b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch pattern = String::Flatten(pattern); 14944f0eee88ff00398ff7f715fab053374d808c90dSteve Block PostponeInterruptsScope postpone(isolate); 150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompileData parse_result; 15144f0eee88ff00398ff7f715fab053374d808c90dSteve Block FlatStringReader reader(isolate, pattern); 152109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (!RegExpParser::ParseRegExp(re->GetIsolate(), &zone, &reader, flags, 153109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch &parse_result)) { 154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Throw an exception if we fail to parse the pattern. 155014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return ThrowRegExpException(re, pattern, parse_result.error); 156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 158b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool has_been_compiled = false; 159b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 160014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) && 161014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch !(flags & JSRegExp::kSticky) && !HasFewDifferentCharacters(pattern)) { 162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Parse-tree is a single atom that is equal to the pattern. 163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AtomCompile(re, pattern, flags, pattern); 164b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch has_been_compiled = true; 165014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) && 166014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch !(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) { 167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpAtom* atom = parse_result.tree->AsAtom(); 168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Vector<const uc16> atom_pattern = atom->data(); 169b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> atom_string; 170b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ASSIGN_RETURN_ON_EXCEPTION( 171b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch isolate, atom_string, 172b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch isolate->factory()->NewStringFromTwoByte(atom_pattern), 173b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Object); 174b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!HasFewDifferentCharacters(atom_string)) { 175b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AtomCompile(re, pattern, flags, atom_string); 176b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch has_been_compiled = true; 177b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 178b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 179b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!has_been_compiled) { 1806ded16be15dd865a9b21ea304d5273c8be299c87Steve Block IrregexpInitialize(re, pattern, flags, parse_result.capture_count); 181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 182b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(re->data()->IsFixedArray()); 183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Compilation succeeded so the data is set on the regexp 184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // and we can store it in the cache. 185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<FixedArray> data(FixedArray::cast(re->data())); 18644f0eee88ff00398ff7f715fab053374d808c90dSteve Block compilation_cache->PutRegExp(pattern, flags, data); 187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return re; 189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 191b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochMaybeHandle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 192f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch Handle<String> subject, int index, 193c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<RegExpMatchInfo> last_match_info) { 194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (regexp->TypeTag()) { 195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case JSRegExp::ATOM: 196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AtomExec(regexp, subject, index, last_match_info); 197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case JSRegExp::IRREGEXP: { 198b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return IrregexpExec(regexp, subject, index, last_match_info); 199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 202b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return MaybeHandle<Object>(); 203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// RegExp Atom implementation: Simple string search using indexOf. 208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid RegExpImpl::AtomCompile(Handle<JSRegExp> re, 211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern, 212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block JSRegExp::Flags flags, 213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> match_pattern) { 21444f0eee88ff00398ff7f715fab053374d808c90dSteve Block re->GetIsolate()->factory()->SetRegExpAtomData(re, 21544f0eee88ff00398ff7f715fab053374d808c90dSteve Block JSRegExp::ATOM, 21644f0eee88ff00398ff7f715fab053374d808c90dSteve Block pattern, 21744f0eee88ff00398ff7f715fab053374d808c90dSteve Block flags, 21844f0eee88ff00398ff7f715fab053374d808c90dSteve Block match_pattern); 219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 221c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdochstatic void SetAtomLastCapture(Handle<RegExpMatchInfo> last_match_info, 222c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch String* subject, int from, int to) { 223c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch SealHandleScope shs(last_match_info->GetIsolate()); 224c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch last_match_info->SetNumberOfCaptureRegisters(2); 225c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch last_match_info->SetLastSubject(subject); 226c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch last_match_info->SetLastInput(subject); 227c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch last_match_info->SetCapture(0, from); 228c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch last_match_info->SetCapture(1, to); 229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 232b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp, 233b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> subject, 234b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int index, 235b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* output, 236b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int output_size) { 237b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Isolate* isolate = regexp->GetIsolate(); 23844f0eee88ff00398ff7f715fab053374d808c90dSteve Block 239b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(0 <= index); 240b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(index <= subject->length()); 241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 242b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch subject = String::Flatten(subject); 243b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DisallowHeapAllocation no_gc; // ensure vectors stay valid 244b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 245b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); 246b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int needle_len = needle->length(); 247b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(needle->IsFlat()); 248b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(0, needle_len); 249b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch 250b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (index + needle_len > subject->length()) { 251b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return RegExpImpl::RE_FAILURE; 252b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 25344f0eee88ff00398ff7f715fab053374d808c90dSteve Block 254b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < output_size; i += 2) { 25569a99ed0b2b2ef69d393c371b03db3a98aaf880eBen Murdoch String::FlatContent needle_content = needle->GetFlatContent(); 25669a99ed0b2b2ef69d393c371b03db3a98aaf880eBen Murdoch String::FlatContent subject_content = subject->GetFlatContent(); 257b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(needle_content.IsFlat()); 258b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(subject_content.IsFlat()); 259b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // dispatch on type of strings 260b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch index = 261b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (needle_content.IsOneByte() 262b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ? (subject_content.IsOneByte() 263b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ? SearchString(isolate, subject_content.ToOneByteVector(), 264b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch needle_content.ToOneByteVector(), index) 265b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : SearchString(isolate, subject_content.ToUC16Vector(), 266b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch needle_content.ToOneByteVector(), index)) 267b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : (subject_content.IsOneByte() 268b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ? SearchString(isolate, subject_content.ToOneByteVector(), 269b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch needle_content.ToUC16Vector(), index) 270b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : SearchString(isolate, subject_content.ToUC16Vector(), 271b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch needle_content.ToUC16Vector(), index))); 272b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (index == -1) { 273b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return i / 2; // Return number of matches. 274b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 275b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output[i] = index; 276b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output[i+1] = index + needle_len; 277b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch index += needle_len; 278b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 280b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return output_size / 2; 281b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 282b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 283f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen MurdochHandle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, Handle<String> subject, 284b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int index, 285c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<RegExpMatchInfo> last_match_info) { 286b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Isolate* isolate = re->GetIsolate(); 287b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 288b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kNumRegisters = 2; 289b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); 290b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); 291b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 292b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); 293b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 294b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); 295b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 296b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(res, RegExpImpl::RE_SUCCESS); 297b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SealHandleScope shs(isolate); 298c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch SetAtomLastCapture(last_match_info, *subject, output_registers[0], 299c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch output_registers[1]); 300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return last_match_info; 301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 303a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 304a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Irregexp implementation. 305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Ensures that the regexp object contains a compiled version of the 307b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// source for either one-byte or two-byte subject strings. 308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// If the compiled version doesn't already exist, it is compiled 309a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// from the source pattern. 310a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// If compilation fails, an exception is thrown and this function 311a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// returns false. 312b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochbool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, 313b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> sample_subject, 314b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool is_one_byte) { 315b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte)); 3166ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifdef V8_INTERPRETED_REGEXP 317a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (compiled_code->IsByteArray()) return true; 3186ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#else // V8_INTERPRETED_REGEXP (RegExp native code) 3196ded16be15dd865a9b21ea304d5273c8be299c87Steve Block if (compiled_code->IsCode()) return true; 320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 321257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // We could potentially have marked this as flushable, but have kept 322257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // a saved version if we did not flush it yet. 323b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte)); 324257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (saved_code->IsCode()) { 325257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // Reinstate the code in the original place. 326b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code); 327b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(compiled_code->IsSmi()); 328257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch return true; 329257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 330b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return CompileIrregexp(re, sample_subject, is_one_byte); 331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 334b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochbool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, 335b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> sample_subject, 336b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool is_one_byte) { 337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Compile the RegExp. 33844f0eee88ff00398ff7f715fab053374d808c90dSteve Block Isolate* isolate = re->GetIsolate(); 339c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Zone zone(isolate->allocator(), ZONE_NAME); 34044f0eee88ff00398ff7f715fab053374d808c90dSteve Block PostponeInterruptsScope postpone(isolate); 341257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // If we had a compilation error the last time this is saved at the 342257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // saved code index. 343b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte)); 344257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // When arriving here entry can only be a smi, either representing an 345257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // uncompiled regexp, a previous compilation error, or code that has 346257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // been flushed. 347b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(entry->IsSmi()); 348257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int entry_value = Smi::cast(entry)->value(); 349b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(entry_value == JSRegExp::kUninitializedValue || 350257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch entry_value == JSRegExp::kCompilationErrorValue || 351257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch (entry_value < JSRegExp::kCodeAgeMask && entry_value >= 0)); 352257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 353257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (entry_value == JSRegExp::kCompilationErrorValue) { 354257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // A previous compilation failed and threw an error which we store in 355257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // the saved code index (we store the error message, not the actual 356257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // error). Recreate the error object and throw it. 357b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte)); 358b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(error_string->IsString()); 359257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch Handle<String> error_message(String::cast(error_string)); 360014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ThrowRegExpException(re, error_message); 361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block JSRegExp::Flags flags = re->GetFlags(); 365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern(re->Pattern()); 367b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch pattern = String::Flatten(pattern); 368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompileData compile_data; 36944f0eee88ff00398ff7f715fab053374d808c90dSteve Block FlatStringReader reader(isolate, pattern); 370109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, 371109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch &compile_data)) { 372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Throw an exception if we fail to parse the pattern. 373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 374014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(ThrowRegExpException(re, pattern, compile_data.error)); 375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 377109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpEngine::CompilationResult result = 378109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern, 379109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch sample_subject, is_one_byte); 380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (result.error_message != NULL) { 381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Unable to compile regexp. 382b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> error_message = isolate->factory()->NewStringFromUtf8( 383b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CStrVector(result.error_message)).ToHandleChecked(); 384014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ThrowRegExpException(re, error_message); 385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); 389b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch data->set(JSRegExp::code_index(is_one_byte), result.code); 39013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map); 391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int register_max = IrregexpMaxRegisterCount(*data); 392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (result.num_registers > register_max) { 393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SetIrregexpMaxRegisterCount(*data, result.num_registers); 394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 397a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 398a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 400a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockint RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { 401a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Smi::cast( 402a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); 403a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { 407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); 408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 409a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 41013e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdochvoid RegExpImpl::SetIrregexpCaptureNameMap(FixedArray* re, 41113e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch Handle<FixedArray> value) { 41213e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch if (value.is_null()) { 413c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch re->set(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::kZero); 41413e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch } else { 41513e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch re->set(JSRegExp::kIrregexpCaptureNameMapIndex, *value); 41613e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch } 41713e2dadd00298019ed862f2b2fc5068bba730bcfBen Murdoch} 418a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockint RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { 420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); 421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockint RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { 425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); 426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 429b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) { 430b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte))); 431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 434b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochCode* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) { 435b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return Code::cast(re->get(JSRegExp::code_index(is_one_byte))); 436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4396ded16be15dd865a9b21ea304d5273c8be299c87Steve Blockvoid RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re, 4406ded16be15dd865a9b21ea304d5273c8be299c87Steve Block Handle<String> pattern, 4416ded16be15dd865a9b21ea304d5273c8be299c87Steve Block JSRegExp::Flags flags, 4426ded16be15dd865a9b21ea304d5273c8be299c87Steve Block int capture_count) { 443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Initialize compiled code entries to null. 44444f0eee88ff00398ff7f715fab053374d808c90dSteve Block re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 44544f0eee88ff00398ff7f715fab053374d808c90dSteve Block JSRegExp::IRREGEXP, 44644f0eee88ff00398ff7f715fab053374d808c90dSteve Block pattern, 44744f0eee88ff00398ff7f715fab053374d808c90dSteve Block flags, 44844f0eee88ff00398ff7f715fab053374d808c90dSteve Block capture_count); 449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4526ded16be15dd865a9b21ea304d5273c8be299c87Steve Blockint RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 4536ded16be15dd865a9b21ea304d5273c8be299c87Steve Block Handle<String> subject) { 45462ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch DCHECK(subject->IsFlat()); 45569a99ed0b2b2ef69d393c371b03db3a98aaf880eBen Murdoch 456b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Check representation of the underlying storage. 457b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); 458b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1; 45969a99ed0b2b2ef69d393c371b03db3a98aaf880eBen Murdoch 4606ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifdef V8_INTERPRETED_REGEXP 4616ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Byte-code regexp needs space allocated for all its registers. 462b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // The result captures are copied to the start of the registers array 463b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // if the match succeeds. This way those registers are not clobbered 464b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // when we set the last match info from last successful match. 465b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + 466b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 4676ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#else // V8_INTERPRETED_REGEXP 4686ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Native regexp only needs room to output captures. Registers are handled 4696ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // internally. 4706ded16be15dd865a9b21ea304d5273c8be299c87Steve Block return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 4716ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#endif // V8_INTERPRETED_REGEXP 4726ded16be15dd865a9b21ea304d5273c8be299c87Steve Block} 4736ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 4746ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 475b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp, 476b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> subject, 477b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int index, 478b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* output, 479b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int output_size) { 48044f0eee88ff00398ff7f715fab053374d808c90dSteve Block Isolate* isolate = regexp->GetIsolate(); 48144f0eee88ff00398ff7f715fab053374d808c90dSteve Block 48244f0eee88ff00398ff7f715fab053374d808c90dSteve Block Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 4836ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 484b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(index >= 0); 485b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(index <= subject->length()); 486b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(subject->IsFlat()); 4876ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 488b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool is_one_byte = subject->IsOneByteRepresentationUnderneath(); 4898defd9ff6930b4e24729971a61cf7469daf119beSteve Block 4906ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifndef V8_INTERPRETED_REGEXP 491b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 4926ded16be15dd865a9b21ea304d5273c8be299c87Steve Block do { 493b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EnsureCompiledIrregexp(regexp, subject, is_one_byte); 494b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate); 495b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // The stack is used to allocate registers for the compiled regexp code. 496b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // This means that in case of failure, the output registers array is left 497b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // untouched and contains the capture results from the previous successful 498b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // match. We can use that to set the last match info lazily. 4996ded16be15dd865a9b21ea304d5273c8be299c87Steve Block NativeRegExpMacroAssembler::Result res = 5006ded16be15dd865a9b21ea304d5273c8be299c87Steve Block NativeRegExpMacroAssembler::Match(code, 5016ded16be15dd865a9b21ea304d5273c8be299c87Steve Block subject, 502b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output, 503b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output_size, 50444f0eee88ff00398ff7f715fab053374d808c90dSteve Block index, 50544f0eee88ff00398ff7f715fab053374d808c90dSteve Block isolate); 5066ded16be15dd865a9b21ea304d5273c8be299c87Steve Block if (res != NativeRegExpMacroAssembler::RETRY) { 507b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION || 50844f0eee88ff00398ff7f715fab053374d808c90dSteve Block isolate->has_pending_exception()); 5096ded16be15dd865a9b21ea304d5273c8be299c87Steve Block STATIC_ASSERT( 5106ded16be15dd865a9b21ea304d5273c8be299c87Steve Block static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 5116ded16be15dd865a9b21ea304d5273c8be299c87Steve Block STATIC_ASSERT( 5126ded16be15dd865a9b21ea304d5273c8be299c87Steve Block static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 5136ded16be15dd865a9b21ea304d5273c8be299c87Steve Block STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 5146ded16be15dd865a9b21ea304d5273c8be299c87Steve Block == RE_EXCEPTION); 5156ded16be15dd865a9b21ea304d5273c8be299c87Steve Block return static_cast<IrregexpResult>(res); 5166ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } 5176ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // If result is RETRY, the string has changed representation, and we 5186ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // must restart from scratch. 5196ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // In this case, it means we must make sure we are prepared to handle 5208defd9ff6930b4e24729971a61cf7469daf119beSteve Block // the, potentially, different subject (the string can switch between 521b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // being internal and external, and even between being Latin1 and UC16, 5226ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // but the characters are always the same). 5236ded16be15dd865a9b21ea304d5273c8be299c87Steve Block IrregexpPrepare(regexp, subject); 524b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch is_one_byte = subject->IsOneByteRepresentationUnderneath(); 5256ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } while (true); 5266ded16be15dd865a9b21ea304d5273c8be299c87Steve Block UNREACHABLE(); 5276ded16be15dd865a9b21ea304d5273c8be299c87Steve Block return RE_EXCEPTION; 5286ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#else // V8_INTERPRETED_REGEXP 5296ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 530b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp)); 5316ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // We must have done EnsureCompiledIrregexp, so we can get the number of 5326ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // registers. 5336ded16be15dd865a9b21ea304d5273c8be299c87Steve Block int number_of_capture_registers = 5346ded16be15dd865a9b21ea304d5273c8be299c87Steve Block (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 535b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* raw_output = &output[number_of_capture_registers]; 536b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We do not touch the actual capture result registers until we know there 537b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // has been a match so that we can use those capture results to set the 538b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // last match info. 5396ded16be15dd865a9b21ea304d5273c8be299c87Steve Block for (int i = number_of_capture_registers - 1; i >= 0; i--) { 540b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch raw_output[i] = -1; 5416ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } 542b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte), 543b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch isolate); 5446ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 5453ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch IrregexpResult result = IrregexpInterpreter::Match(isolate, 5463ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch byte_codes, 5473ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch subject, 548b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch raw_output, 5493ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch index); 550b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (result == RE_SUCCESS) { 551b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Copy capture results to the start of the registers array. 552b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch MemCopy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); 553b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 5543ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch if (result == RE_EXCEPTION) { 555b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!isolate->has_pending_exception()); 5563ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch isolate->StackOverflow(); 5576ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } 5583ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return result; 5596ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#endif // V8_INTERPRETED_REGEXP 5606ded16be15dd865a9b21ea304d5273c8be299c87Steve Block} 5616ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 562c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen MurdochMaybeHandle<Object> RegExpImpl::IrregexpExec( 563c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<JSRegExp> regexp, Handle<String> subject, int previous_index, 564c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<RegExpMatchInfo> last_match_info) { 565b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Isolate* isolate = regexp->GetIsolate(); 566b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); 567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 56862ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch subject = String::Flatten(subject); 56962ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch 570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Prepare space for the return values. 571b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) 572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (FLAG_trace_regexp_bytecodes) { 573b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch String* pattern = regexp->Pattern(); 574b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PrintF("\n\nRegexp match: /%s/\n\n", pattern->ToCString().get()); 575b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get()); 576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 578b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); 5796ded16be15dd865a9b21ea304d5273c8be299c87Steve Block if (required_registers < 0) { 5806ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Compiling failed with an exception. 581b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(isolate->has_pending_exception()); 582b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return MaybeHandle<Object>(); 583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 585b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* output_registers = NULL; 586b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { 587b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output_registers = NewArray<int32_t>(required_registers); 588b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 589f91f0611dbaf29ca0f1d4aecb357ce243a19d2faBen Murdoch std::unique_ptr<int32_t[]> auto_release(output_registers); 590b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (output_registers == NULL) { 591b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch output_registers = isolate->jsregexp_static_offsets_vector(); 592b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 593a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 594b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int res = RegExpImpl::IrregexpExecRaw( 595b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch regexp, subject, previous_index, output_registers, required_registers); 5966ded16be15dd865a9b21ea304d5273c8be299c87Steve Block if (res == RE_SUCCESS) { 597b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int capture_count = 598b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); 599b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return SetLastMatchInfo( 600b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch last_match_info, subject, capture_count, output_registers); 601a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6026ded16be15dd865a9b21ea304d5273c8be299c87Steve Block if (res == RE_EXCEPTION) { 603b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(isolate->has_pending_exception()); 604b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return MaybeHandle<Object>(); 605a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 606b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(res == RE_FAILURE); 6073ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch return isolate->factory()->null_value(); 608a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 609a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 610c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen MurdochHandle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo( 611c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<RegExpMatchInfo> last_match_info, Handle<String> subject, 612c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch int capture_count, int32_t* match) { 613c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // This is the only place where match infos can grow. If, after executing the 614c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // regexp, RegExpExecStub finds that the match info is too small, it restarts 615c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // execution in RegExpImpl::Exec, which finally grows the match info right 616c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // here. 617014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 618b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int capture_register_count = (capture_count + 1) * 2; 619c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Handle<RegExpMatchInfo> result = 620c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch RegExpMatchInfo::ReserveCaptures(last_match_info, capture_register_count); 621c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch result->SetNumberOfCaptureRegisters(capture_register_count); 622c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch 623c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (*result != *last_match_info) { 624c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // The match info has been reallocated, update the corresponding reference 625c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch // on the native context. 626c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch Isolate* isolate = last_match_info->GetIsolate(); 627c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (*last_match_info == *isolate->regexp_last_match_info()) { 628c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch isolate->native_context()->set_regexp_last_match_info(*result); 629c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch } else if (*last_match_info == *isolate->regexp_internal_match_info()) { 630c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch isolate->native_context()->set_regexp_internal_match_info(*result); 631c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch } 632c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch } 633c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch 634b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DisallowHeapAllocation no_allocation; 635b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (match != NULL) { 636b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < capture_register_count; i += 2) { 637c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch result->SetCapture(i, match[i]); 638c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch result->SetCapture(i + 1, match[i + 1]); 639b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 640b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 641c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch result->SetLastSubject(*subject); 642c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch result->SetLastInput(*subject); 643c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch return result; 644b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 645b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 646b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 647b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp, 648b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<String> subject, 649b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Isolate* isolate) 650b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : register_array_(NULL), 651b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch register_array_size_(0), 652b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch regexp_(regexp), 653b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch subject_(subject) { 654b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#ifdef V8_INTERPRETED_REGEXP 655b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool interpreted = true; 656b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#else 657b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool interpreted = false; 658b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#endif // V8_INTERPRETED_REGEXP 659b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 660b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (regexp_->TypeTag() == JSRegExp::ATOM) { 661b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kAtomRegistersPerMatch = 2; 662b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch registers_per_match_ = kAtomRegistersPerMatch; 663b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // There is no distinction between interpreted and native for atom regexps. 664b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch interpreted = false; 665b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 666b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); 667b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (registers_per_match_ < 0) { 668b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch num_matches_ = -1; // Signal exception. 669b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 670b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 671b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 672b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 673109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal); 674109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (!interpreted) { 675b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch register_array_size_ = 676b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); 677b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_matches_ = register_array_size_ / registers_per_match_; 678b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 679b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Global loop in interpreted regexp is not implemented. We choose 680b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // the size of the offsets vector so that it can only store one match. 681b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch register_array_size_ = registers_per_match_; 682b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_matches_ = 1; 683b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 684b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 685b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { 686b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch register_array_ = NewArray<int32_t>(register_array_size_); 687b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 688b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch register_array_ = isolate->jsregexp_static_offsets_vector(); 689b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 690b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 691b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Set state so that fetching the results the first time triggers a call 692b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // to the compiled regexp. 693b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch current_match_index_ = max_matches_ - 1; 694b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch num_matches_ = max_matches_; 695b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(registers_per_match_ >= 2); // Each match has at least one capture. 696b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_GE(register_array_size_, registers_per_match_); 697b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int32_t* last_match = 698b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ®ister_array_[current_match_index_ * registers_per_match_]; 699b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch last_match[0] = -1; 700b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch last_match[1] = 0; 701b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 702b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 703109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochint RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) { 704109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 && 705109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch last_index + 1 < subject_->length() && 706109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) && 707109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) { 708109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Advance over the surrogate pair. 709109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return last_index + 2; 710109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 711109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return last_index + 1; 712109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 713b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 714a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 715a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Implementation of the Irregexp regular expression engine. 716a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 717a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The Irregexp regular expression engine is intended to be a complete 718a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// implementation of ECMAScript regular expressions. It generates either 719a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// bytecodes or native code. 720a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 721a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The Irregexp regexp engine is structured in three steps. 722a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 1) The parser generates an abstract syntax tree. See ast.cc. 723a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 2) From the AST a node network is created. The nodes are all 724a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// subclasses of RegExpNode. The nodes represent states when 725a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// executing a regular expression. Several optimizations are 726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// performed on the node network. 727a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 3) From the nodes we generate either byte codes or native code 728a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// that can actually execute the regular expression (perform 729a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the search). The code generation step is described in more 730a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// detail below. 731a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 732a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Code generation. 733a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 734a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The nodes are divided into four main categories. 735a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Choice nodes 736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// These represent places where the regular expression can 737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// match in more than one way. For example on entry to an 738a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// alternation (foo|bar) or a repetition (*, +, ? or {}). 739a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Action nodes 740a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// These represent places where some action should be 741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// performed. Examples include recording the current position 742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// in the input string to a register (in order to implement 743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// captures) or other actions on register for example in order 744a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// to implement the counters needed for {} repetitions. 745a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Matching nodes 746a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// These attempt to match some element part of the input string. 747a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Examples of elements include character classes, plain strings 748a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// or back references. 749a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * End nodes 750a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// These are used to implement the actions required on finding 751a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// a successful match or failing to find a match. 752a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The code generated (whether as byte codes or native code) maintains 754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// some state as it runs. This consists of the following elements: 755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * The capture registers. Used for string captures. 757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Other registers. Used for counters etc. 758a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * The current position. 759a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * The stack of backtracking information. Used when a matching node 760a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// fails to find a match and needs to try an alternative. 761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Conceptual regular expression execution model: 763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// There is a simple conceptual model of regular expression execution 765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// which will be presented first. The actual code generated is a more 766a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// efficient simulation of the simple conceptual model: 767a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 768a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Choice nodes are implemented as follows: 769a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// For each choice except the last { 770a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// push current position 771a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// push backtrack code location 772a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <generate code to test for choice> 773a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// backtrack code location: 774a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// pop current position 775a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// } 776a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <generate code to test for last choice> 777a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 778a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Actions nodes are generated as follows 779a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <push affected registers on backtrack stack> 780a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <generate code to perform action> 781a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// push backtrack code location 782a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <generate code to test for following nodes> 783a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// backtrack code location: 784a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <pop affected registers to restore their state> 785a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <pop backtrack location from stack and go to it> 786a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 787a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Matching nodes are generated as follows: 788a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// if input string matches at current position 789a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// update current position 790a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <generate code to test for following nodes> 791a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// else 792a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// <pop backtrack location from stack and go to it> 793a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 794a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Thus it can be seen that the current position is saved and restored 795a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// by the choice nodes, whereas the registers are saved and restored by 796a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// by the action nodes that manipulate them. 797a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 798a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The other interesting aspect of this model is that nodes are generated 799a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// at the point where they are needed by a recursive call to Emit(). If 800a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the node has already been code generated then the Emit() call will 801a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// generate a jump to the previously generated code instead. In order to 802a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// limit recursion it is possible for the Emit() function to put the node 803a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// on a work list for later generation and instead generate a jump. The 804a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// destination of the jump is resolved later when the code is generated. 805a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 806a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Actual regular expression code generation. 807a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 808a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Code generation is actually more complicated than the above. In order 809a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// to improve the efficiency of the generated code some optimizations are 810a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// performed 811a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 812a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Choice nodes have 1-character lookahead. 813a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A choice node looks at the following character and eliminates some of 814a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the choices immediately based on that character. This is not yet 815a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// implemented. 816a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Simple greedy loops store reduced backtracking information. 817a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A quantifier like /.*foo/m will greedily match the whole input. It will 818a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// then need to backtrack to a point where it can match "foo". The naive 819a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// implementation of this would push each character position onto the 820a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// backtracking stack, then pop them off one by one. This would use space 821a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// proportional to the length of the input string. However since the "." 822a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// can only match in one way and always has a constant length (in this case 823a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// of 1) it suffices to store the current position on the top of the stack 824a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// once. Matching now becomes merely incrementing the current position and 825a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// backtracking becomes decrementing the current position and checking the 826a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// result against the stored current position. This is faster and saves 827a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// space. 828a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * The current state is virtualized. 829a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// This is used to defer expensive operations until it is clear that they 830a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// are needed and to generate code for a node more than once, allowing 831a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// specialized an efficient versions of the code to be created. This is 832a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// explained in the section below. 833a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 834a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Execution state virtualization. 835a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 836a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Instead of emitting code, nodes that manipulate the state can record their 837a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// manipulation in an object called the Trace. The Trace object can record a 838a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// current position offset, an optional backtrack code location on the top of 839a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the virtualized backtrack stack and some register changes. When a node is 840a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// to be emitted it can flush the Trace or update it. Flushing the Trace 841a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// will emit code to bring the actual state into line with the virtual state. 8423ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// Avoiding flushing the state can postpone some work (e.g. updates of capture 843a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// registers). Postponing work can save time when executing the regular 844a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// expression since it may be found that the work never has to be done as a 845a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// failure to match can occur. In addition it is much faster to jump to a 846a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// known backtrack code location than it is to pop an unknown backtrack 847a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// location from the stack and jump there. 848a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 849a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The virtual state found in the Trace affects code generation. For example 850a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the virtual state contains the difference between the actual current 851a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// position and the virtual current position, and matching code needs to use 852a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// this offset to attempt a match in the correct location of the input 853a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// string. Therefore code generated for a non-trivial trace is specialized 854a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// to that trace. The code generator therefore has the ability to generate 855a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// code for each node several times. In order to limit the size of the 856a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// generated code there is an arbitrary limit on how many specialized sets of 857a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// code may be generated for a given node. If the limit is reached, the 858a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// trace is flushed and a generic version of the code for a node is emitted. 859a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// This is subsequently used for that node. The code emitted for non-generic 860a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// trace is not recorded in the node and so it cannot currently be reused in 861a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the event that code generation is requested for an identical trace. 862a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 863a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 864b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid RegExpTree::AppendToText(RegExpText* text, Zone* zone) { 865a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 866a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 867a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 868a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 869b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid RegExpAtom::AppendToText(RegExpText* text, Zone* zone) { 870b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch text->AddElement(TextElement::Atom(this), zone); 871a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 872a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 873a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 874b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) { 875b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch text->AddElement(TextElement::CharClass(this), zone); 876a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 877a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 878a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 879b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid RegExpText::AppendToText(RegExpText* text, Zone* zone) { 880a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < elements()->length(); i++) 881b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch text->AddElement(elements()->at(i), zone); 882a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 883a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 884a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 885a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockTextElement TextElement::Atom(RegExpAtom* atom) { 886b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return TextElement(ATOM, atom); 887a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 888a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 889a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 890b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochTextElement TextElement::CharClass(RegExpCharacterClass* char_class) { 891b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return TextElement(CHAR_CLASS, char_class); 892a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 893a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 894a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 895b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint TextElement::length() const { 896b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (text_type()) { 897b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch case ATOM: 898b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return atom()->length(); 899b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 900b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch case CHAR_CLASS: 901b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return 1; 902a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 903b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch UNREACHABLE(); 904b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return 0; 905a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 906a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 907a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 908a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockDispatchTable* ChoiceNode::GetTable(bool ignore_case) { 909a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (table_ == NULL) { 910b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch table_ = new(zone()) DispatchTable(zone()); 911b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DispatchTableConstructor cons(table_, ignore_case, zone()); 912a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cons.BuildTable(this); 913a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 914a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return table_; 915a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 916a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 917a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 918b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochclass FrequencyCollator { 919b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch public: 920b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch FrequencyCollator() : total_samples_(0) { 921b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) { 922b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch frequencies_[i] = CharacterFrequency(i); 923b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 924b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 925b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 926b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch void CountCharacter(int character) { 927b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int index = (character & RegExpMacroAssembler::kTableMask); 928b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch frequencies_[index].Increment(); 929b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch total_samples_++; 930b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 931b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 932b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Does not measure in percent, but rather per-128 (the table size from the 933b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // regexp macro assembler). 934b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int Frequency(int in_character) { 935b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character); 936b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (total_samples_ < 1) return 1; // Division by zero. 937b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int freq_in_per128 = 938b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (frequencies_[in_character].counter() * 128) / total_samples_; 939b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return freq_in_per128; 940b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 941b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 942b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch private: 943b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch class CharacterFrequency { 944b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch public: 945b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterFrequency() : counter_(0), character_(-1) { } 946b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch explicit CharacterFrequency(int character) 947b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : counter_(0), character_(character) { } 948b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 949b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch void Increment() { counter_++; } 950b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int counter() { return counter_; } 951b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int character() { return character_; } 952b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 953b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch private: 954b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int counter_; 955b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int character_; 956b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch }; 957b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 958b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 959b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch private: 960b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize]; 961b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int total_samples_; 962b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch}; 963b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 964b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 965a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RegExpCompiler { 966a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 967014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 968109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags, bool is_one_byte); 969a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 970a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int AllocateRegister() { 971a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (next_register_ >= RegExpMacroAssembler::kMaxRegister) { 972a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block reg_exp_too_big_ = true; 973a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return next_register_; 974a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 975a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return next_register_++; 976a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 977a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 978109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Lookarounds to match lone surrogates for unicode character class matches 979109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // are never nested. We can therefore reuse registers. 980109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int UnicodeLookaroundStackRegister() { 981109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (unicode_lookaround_stack_register_ == kNoRegister) { 982109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unicode_lookaround_stack_register_ = AllocateRegister(); 983109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 984109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return unicode_lookaround_stack_register_; 985109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 986109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 987109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int UnicodeLookaroundPositionRegister() { 988109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (unicode_lookaround_position_register_ == kNoRegister) { 989109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unicode_lookaround_position_register_ = AllocateRegister(); 990109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 991109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return unicode_lookaround_position_register_; 992109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 993109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 994a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, 995a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* start, 996a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int capture_count, 997a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern); 998a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 999014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch inline void AddWork(RegExpNode* node) { 1000014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!node->on_work_list() && !node->label()->is_bound()) { 1001014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch node->set_on_work_list(true); 1002014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch work_list_->Add(node); 1003014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1004014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1005a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1006a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kImplementationOffset = 0; 1007a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kNumberOfRegistersOffset = 0; 1008a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kCodeOffset = 1; 1009a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1010a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1011a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EndNode* accept() { return accept_; } 1012a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1013a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kMaxRecursion = 100; 1014a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block inline int recursion_depth() { return recursion_depth_; } 1015a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block inline void IncrementRecursionDepth() { recursion_depth_++; } 1016a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block inline void DecrementRecursionDepth() { recursion_depth_--; } 1017a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1018a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void SetRegExpTooBig() { reg_exp_too_big_ = true; } 1019a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1020109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; } 1021109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; } 1022b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch inline bool one_byte() { return one_byte_; } 1023958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier inline bool optimize() { return optimize_; } 1024958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier inline void set_optimize(bool value) { optimize_ = value; } 1025014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch inline bool limiting_recursion() { return limiting_recursion_; } 1026014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch inline void set_limiting_recursion(bool value) { 1027014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch limiting_recursion_ = value; 1028014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1029014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool read_backward() { return read_backward_; } 1030014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch void set_read_backward(bool value) { read_backward_ = value; } 1031b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch FrequencyCollator* frequency_collator() { return &frequency_collator_; } 1032a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1033257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int current_expansion_factor() { return current_expansion_factor_; } 1034257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch void set_current_expansion_factor(int value) { 1035257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch current_expansion_factor_ = value; 1036257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 1037257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 1038014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate() const { return isolate_; } 1039b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone() const { return zone_; } 1040b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1041a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kNoRegister = -1; 10423fb3ca8c7ca439d408449a395897395c0faae8d1Ben Murdoch 1043a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1044a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EndNode* accept_; 1045a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int next_register_; 1046109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int unicode_lookaround_stack_register_; 1047109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int unicode_lookaround_position_register_; 1048a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block List<RegExpNode*>* work_list_; 1049a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int recursion_depth_; 1050a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler_; 1051109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags_; 1052b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte_; 1053a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool reg_exp_too_big_; 1054014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool limiting_recursion_; 1055958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier bool optimize_; 1056014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool read_backward_; 1057257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int current_expansion_factor_; 1058b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch FrequencyCollator frequency_collator_; 1059014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate_; 1060b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone_; 1061a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1062a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1063a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1064a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RecursionCheck { 1065a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1066a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1067a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler->IncrementRecursionDepth(); 1068a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1069a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1070a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1071a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler_; 1072a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1073a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1074a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1075b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) { 1076b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return RegExpEngine::CompilationResult(isolate, "RegExp too big"); 1077a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1078a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1079a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1080a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Attempts to compile the regexp using an Irregexp code generator. Returns 1081a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// a fixed array or a null handle depending on whether it succeeded. 1082014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben MurdochRegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count, 1083109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags, bool one_byte) 1084a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : next_register_(2 * (capture_count + 1)), 1085109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unicode_lookaround_stack_register_(kNoRegister), 1086109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unicode_lookaround_position_register_(kNoRegister), 1087a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block work_list_(NULL), 1088a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block recursion_depth_(0), 1089109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch flags_(flags), 1090b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch one_byte_(one_byte), 1091257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch reg_exp_too_big_(false), 1092014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch limiting_recursion_(false), 1093958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier optimize_(FLAG_regexp_optimization), 1094014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch read_backward_(false), 1095b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch current_expansion_factor_(1), 1096b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch frequency_collator_(), 1097014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch isolate_(isolate), 1098b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zone_(zone) { 1099b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch accept_ = new(zone) EndNode(EndNode::ACCEPT, zone); 1100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister); 1101a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1103a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1104a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpEngine::CompilationResult RegExpCompiler::Assemble( 1105a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler, 1106a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* start, 1107a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int capture_count, 1108a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern) { 110962ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch Isolate* isolate = pattern->GetHeap()->isolate(); 1110053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block 1111a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#ifdef DEBUG 1112a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (FLAG_trace_regexp_assembler) 111362ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch macro_assembler_ = new RegExpMacroAssemblerTracer(isolate, macro_assembler); 1114a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else 1115a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 1116a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler_ = macro_assembler; 1117053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block 1118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block List <RegExpNode*> work_list(0); 1119a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block work_list_ = &work_list; 1120a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label fail; 1121a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler_->PushBacktrack(&fail); 1122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace; 1123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block start->Emit(this, &new_trace); 1124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler_->Bind(&fail); 1125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler_->Fail(); 1126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (!work_list.is_empty()) { 1127014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpNode* node = work_list.RemoveLast(); 1128014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch node->set_on_work_list(false); 1129014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!node->label()->is_bound()) node->Emit(this, &new_trace); 1130014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1131014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (reg_exp_too_big_) { 1132014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch macro_assembler_->AbortedCodeGeneration(); 1133014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return IrregexpRegExpTooBig(isolate_); 1134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1136053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block Handle<HeapObject> code = macro_assembler_->GetCode(pattern); 113762ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch isolate->IncreaseTotalRegexpCodeGenerated(code->Size()); 1138a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block work_list_ = NULL; 1139958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#ifdef ENABLE_DISASSEMBLER 114044f0eee88ff00398ff7f715fab053374d808c90dSteve Block if (FLAG_print_code) { 114162ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch CodeTracer::Scope trace_scope(isolate->GetCodeTracer()); 1142b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OFStream os(trace_scope.file()); 1143b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os); 114444f0eee88ff00398ff7f715fab053374d808c90dSteve Block } 1145958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#endif 1146958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier#ifdef DEBUG 1147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (FLAG_trace_regexp_assembler) { 1148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block delete macro_assembler_; 1149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 1151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return RegExpEngine::CompilationResult(*code, next_register_); 1152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool Trace::DeferredAction::Mentions(int that) { 1156b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (action_type() == ActionNode::CLEAR_CAPTURES) { 1157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 1158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return range.Contains(that); 1159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return reg() == that; 1161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool Trace::mentions_reg(int reg) { 1166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (DeferredAction* action = actions_; 1167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action != NULL; 1168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action = action->next()) { 1169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (action->Mentions(reg)) 1170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 1171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 1173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool Trace::GetStoredPosition(int reg, int* cp_offset) { 1177b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(0, *cp_offset); 1178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (DeferredAction* action = actions_; 1179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action != NULL; 1180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action = action->next()) { 1181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (action->Mentions(reg)) { 1182b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (action->action_type() == ActionNode::STORE_POSITION) { 1183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset(); 1184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 1185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 1187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 1191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1194b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint Trace::FindAffectedRegisters(OutSet* affected_registers, 1195b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 1196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_register = RegExpCompiler::kNoRegister; 1197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (DeferredAction* action = actions_; 1198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action != NULL; 1199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action = action->next()) { 1200b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (action->action_type() == ActionNode::CLEAR_CAPTURES) { 1201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval range = static_cast<DeferredClearCaptures*>(action)->range(); 1202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = range.from(); i <= range.to(); i++) 1203b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch affected_registers->Set(i, zone); 1204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (range.to() > max_register) max_register = range.to(); 1205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1206b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch affected_registers->Set(action->reg(), zone); 1207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (action->reg() > max_register) max_register = action->reg(); 1208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return max_register; 1211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, 1215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_register, 1216b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const OutSet& registers_to_pop, 1217b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const OutSet& registers_to_clear) { 1218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int reg = max_register; reg >= 0; reg--) { 1219b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (registers_to_pop.Get(reg)) { 1220b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->PopRegister(reg); 1221b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else if (registers_to_clear.Get(reg)) { 1222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_to = reg; 1223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (reg > 0 && registers_to_clear.Get(reg - 1)) { 1224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block reg--; 1225a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ClearRegisters(reg, clear_to); 1227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 1233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_register, 1234b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const OutSet& affected_registers, 1235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* registers_to_pop, 1236b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OutSet* registers_to_clear, 1237b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 1238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. 1239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const int push_limit = (assembler->stack_limit_slack() + 1) / 2; 1240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Count pushes performed to force a stack limit check occasionally. 1242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int pushes = 0; 1243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int reg = 0; reg <= max_register; reg++) { 1245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!affected_registers.Get(reg)) { 1246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block continue; 1247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1248a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The chronologically first deferred action in the trace 1250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // is used to infer the action needed to restore a register 1251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to its previous state (or not, if it's safe to ignore it). 1252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR }; 1253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredActionUndoType undo_action = IGNORE; 1254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value = 0; 1256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool absolute = false; 1257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool clear = false; 1258014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch static const int kNoStore = kMinInt; 1259014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int store_position = kNoStore; 1260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This is a little tricky because we are scanning the actions in reverse 1261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // historical order (newest first). 1262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (DeferredAction* action = actions_; 1263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action != NULL; 1264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block action = action->next()) { 1265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (action->Mentions(reg)) { 1266b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (action->action_type()) { 1267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::SET_REGISTER: { 1268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredSetRegister* psr = 1269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static_cast<Trace::DeferredSetRegister*>(action); 1270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!absolute) { 1271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value += psr->value(); 1272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block absolute = true; 1273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // SET_REGISTER is currently only used for newly introduced loop 1275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // counters. They can have a significant previous value if they 1276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // occour in a loop. TODO(lrn): Propagate this information, so 1277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // we can set undo_action to IGNORE if we know there is no value to 1278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // restore. 1279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block undo_action = RESTORE; 1280014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_EQ(store_position, kNoStore); 1281b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!clear); 1282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1283a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::INCREMENT_REGISTER: 1285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!absolute) { 1286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value++; 1287a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1288014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_EQ(store_position, kNoStore); 1289b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!clear); 1290a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block undo_action = RESTORE; 1291a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1292a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::STORE_POSITION: { 1293a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredCapture* pc = 1294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static_cast<Trace::DeferredCapture*>(action); 1295014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!clear && store_position == kNoStore) { 1296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block store_position = pc->cp_offset(); 1297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1298a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1299a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For captures we know that stores and clears alternate. 1300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Other register, are never cleared, and if the occur 1301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // inside a loop, they might be assigned more than once. 1302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (reg <= 1) { 1303a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Registers zero and one, aka "capture zero", is 1304a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // always set correctly if we succeed. There is no 1305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // need to undo a setting on backtrack, because we 1306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // will set it again or fail. 1307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block undo_action = IGNORE; 1308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1309a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block undo_action = pc->is_capture() ? CLEAR : RESTORE; 1310a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1311b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!absolute); 1312b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(value, 0); 1313a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1314a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1315a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::CLEAR_CAPTURES: { 1316a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Since we're scanning in reverse order, if we've already 1317a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // set the position we have to ignore historically earlier 1318a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // clearing operations. 1319014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (store_position == kNoStore) { 1320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block clear = true; 1321a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1322a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block undo_action = RESTORE; 1323b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!absolute); 1324b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(value, 0); 1325a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1326a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1327a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 1328a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 1329a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1330a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Prepare for the undo-action (e.g., push if it's going to be popped). 1334a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (undo_action == RESTORE) { 1335a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pushes++; 1336a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler::StackCheckFlag stack_check = 1337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler::kNoStackLimitCheck; 1338a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (pushes == push_limit) { 1339a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block stack_check = RegExpMacroAssembler::kCheckStackLimit; 1340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pushes = 0; 1341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1343a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->PushRegister(reg, stack_check); 1344b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch registers_to_pop->Set(reg, zone); 1345a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (undo_action == CLEAR) { 1346b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch registers_to_clear->Set(reg, zone); 1347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Perform the chronologically last action (or accumulated increment) 1349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // for the register. 1350014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (store_position != kNoStore) { 1351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->WriteCurrentPositionToRegister(reg, store_position); 1352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (clear) { 1353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ClearRegisters(reg, reg); 1354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (absolute) { 1355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->SetRegister(reg, value); 1356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (value != 0) { 1357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->AdvanceRegister(reg, value); 1358a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// This is called as we come into a loop choice node and some other tricky 1364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// nodes. It normalizes the state of the code generator to ensure we can 1365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// generate generic code. 1366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 1367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1369b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!is_trivial()); 1370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (actions_ == NULL && backtrack() == NULL) { 1372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Here we just have some deferred cp advances to fix and we are back to 1373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // a normal situation. We may also have to forget some information gained 1374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // through a quick check that was already performed. 1375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 1376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Create a new trivial state and generate the node with that. 1377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_state; 1378a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block successor->Emit(compiler, &new_state); 1379a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 1380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Generate deferred actions here along with code to undo them again. 1383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet affected_registers; 1384a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (backtrack() != NULL) { 1386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Here we have a concrete backtrack location. These are set up by choice 1387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // nodes and so they indicate that we have a deferred save of the current 1388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // position which we may need to emit here. 1389a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->PushCurrentPosition(); 1390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1392b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int max_register = FindAffectedRegisters(&affected_registers, 1393b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler->zone()); 1394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet registers_to_pop; 1395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet registers_to_clear; 1396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PerformDeferredActions(assembler, 1397a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block max_register, 1398a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block affected_registers, 1399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ®isters_to_pop, 1400b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ®isters_to_clear, 1401b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler->zone()); 1402a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (cp_offset_ != 0) { 1403a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->AdvanceCurrentPosition(cp_offset_); 1404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Create a new trivial state and generate the node with that. 1407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label undo; 1408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->PushBacktrack(&undo); 1409014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (successor->KeepRecursing(compiler)) { 1410014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Trace new_state; 1411014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch successor->Emit(compiler, &new_state); 1412014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 1413014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->AddWork(successor); 1414014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch assembler->GoTo(successor->label()); 1415014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1417a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // On backtrack we need to restore state. 1418a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(&undo); 1419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RestoreAffectedRegisters(assembler, 1420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block max_register, 1421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block registers_to_pop, 1422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block registers_to_clear); 1423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (backtrack() == NULL) { 1424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Backtrack(); 1425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->PopCurrentPosition(); 1427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(backtrack()); 1428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 1433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Omit flushing the trace. We discard the entire stack frame anyway. 1436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!label()->is_bound()) { 1438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We are completely independent of the trace, since we ignore it, 1439a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // so this code can be used as the generic version. 1440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(label()); 1441a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Throw away everything on the backtrack stack since the start 1444a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // of the negative submatch and restore the character position. 1445a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ReadCurrentPositionFromRegister(current_position_register_); 1446a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ReadStackPointerFromRegister(stack_pointer_register_); 1447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (clear_capture_count_ > 0) { 1448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Clear any captures that might have been performed during the success 1449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // of the body of the negative look-ahead. 1450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1; 1451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ClearRegisters(clear_capture_start_, clear_capture_end); 1452a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1453a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Now that we have unwound the stack we find at the top of the stack the 1454a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // backtrack that the BeginSubmatch node got. 1455a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Backtrack(); 1456a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1457a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1458a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1459a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { 1460a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!trace->is_trivial()) { 1461a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 1462a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 1463a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1464a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1465a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!label()->is_bound()) { 1466a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(label()); 1467a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1468a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (action_) { 1469a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ACCEPT: 1470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Succeed(); 1471a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 1472a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case BACKTRACK: 1473a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(trace->backtrack()); 1474a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 1475a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case NEGATIVE_SUBMATCH_SUCCESS: 1476a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This case is handled in a different virtual method. 1477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 1478a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNIMPLEMENTED(); 1480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1481a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1483b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid GuardedAlternative::AddGuard(Guard* guard, Zone* zone) { 1484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (guards_ == NULL) 1485b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch guards_ = new(zone) ZoneList<Guard*>(1, zone); 1486b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch guards_->Add(guard, zone); 1487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1490a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::SetRegister(int reg, 1491a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int val, 1492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1493b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1494b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(SET_REGISTER, on_success); 1495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_store_register.reg = reg; 1496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_store_register.value = val; 1497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1498a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1499a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { 1502b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1503b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success); 1504a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_increment_register.reg = reg; 1505a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::StorePosition(int reg, 1510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_capture, 1511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1512b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1513b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(STORE_POSITION, on_success); 1514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_position_register.reg = reg; 1515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_position_register.is_capture = is_capture; 1516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1520a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::ClearCaptures(Interval range, 1521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1522b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1523b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success); 1524a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_clear_captures.range_from = range.from(); 1525a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_clear_captures.range_to = range.to(); 1526a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1527a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1530a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::BeginSubmatch(int stack_reg, 1531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_reg, 1532a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1533b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1534b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success); 1535a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.stack_pointer_register = stack_reg; 1536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.current_position_register = position_reg; 1537a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1541a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, 1542a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_reg, 1543a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_register_count, 1544a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_register_from, 1545a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1546b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1547b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); 1548a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.stack_pointer_register = stack_reg; 1549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.current_position_register = position_reg; 1550a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.clear_register_count = clear_register_count; 1551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_submatch.clear_register_from = clear_register_from; 1552a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1553a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1554a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1555a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockActionNode* ActionNode::EmptyMatchCheck(int start_register, 1557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_register, 1558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_limit, 1559a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 1560b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ActionNode* result = 1561b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success); 1562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_empty_match_check.start_register = start_register; 1563a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_empty_match_check.repetition_register = repetition_register; 1564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->data_.u_empty_match_check.repetition_limit = repetition_limit; 1565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 1566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1569a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define DEFINE_ACCEPT(Type) \ 1570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Type##Node::Accept(NodeVisitor* visitor) { \ 1571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block visitor->Visit##Type(this); \ 1572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_NODE_TYPE(DEFINE_ACCEPT) 1574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef DEFINE_ACCEPT 1575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid LoopChoiceNode::Accept(NodeVisitor* visitor) { 1578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block visitor->VisitLoopChoice(this); 1579a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1580a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1581a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1582a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 1583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Emit code. 1584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1586a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, 1587a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Guard* guard, 1588a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace) { 1589a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (guard->op()) { 1590a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case Guard::LT: 1591b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!trace->mentions_reg(guard->reg())); 1592a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->IfRegisterGE(guard->reg(), 1593a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block guard->value(), 1594a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->backtrack()); 1595a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1596a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case Guard::GEQ: 1597b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!trace->mentions_reg(guard->reg())); 1598a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->IfRegisterLT(guard->reg(), 1599a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block guard->value(), 1600a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->backtrack()); 1601a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1602a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1603a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1604a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1605a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1606a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Returns the number of characters in the equivalence class, omitting those 1607014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// that cannot occur in the source string because it is Latin1. 1608b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic int GetCaseIndependentLetters(Isolate* isolate, uc16 character, 1609b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte_subject, 1610a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block unibrow::uchar* letters) { 161144f0eee88ff00398ff7f715fab053374d808c90dSteve Block int length = 161244f0eee88ff00398ff7f715fab053374d808c90dSteve Block isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); 1613bb769b257e753aafcbd96767abb2abc645eaa20cBen Murdoch // Unibrow returns 0 or 1 for characters where case independence is 1614a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // trivial. 1615a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length == 0) { 1616a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block letters[0] = character; 1617a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block length = 1; 1618a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1619014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 1620014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (one_byte_subject) { 1621014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int new_length = 0; 1622014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = 0; i < length; i++) { 1623014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (letters[i] <= String::kMaxOneByteCharCode) { 1624014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch letters[new_length++] = letters[i]; 1625014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1626014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 1627014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch length = new_length; 1628a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1629b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1630014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return length; 1631a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1632a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1633a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 163444f0eee88ff00398ff7f715fab053374d808c90dSteve Blockstatic inline bool EmitSimpleCharacter(Isolate* isolate, 163544f0eee88ff00398ff7f715fab053374d808c90dSteve Block RegExpCompiler* compiler, 1636a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 c, 1637a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_failure, 1638a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset, 1639a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool check, 1640a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded) { 1641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool bound_checked = false; 1643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!preloaded) { 1644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->LoadCurrentCharacter( 1645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cp_offset, 1646a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_failure, 1647a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block check); 1648a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bound_checked = true; 1649a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1650a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckNotCharacter(c, on_failure); 1651a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return bound_checked; 1652a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1653a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1654a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1655a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Only emits non-letters (things that don't have case). Only used for case 1656a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// independent matches. 165744f0eee88ff00398ff7f715fab053374d808c90dSteve Blockstatic inline bool EmitAtomNonLetter(Isolate* isolate, 165844f0eee88ff00398ff7f715fab053374d808c90dSteve Block RegExpCompiler* compiler, 1659a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 c, 1660a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_failure, 1661a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset, 1662a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool check, 1663a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded) { 1664a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1665b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte = compiler->one_byte(); 1666a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1667b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); 1668a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length < 1) { 1669b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // This can't match. Must be an one-byte subject and a non-one-byte 1670b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // character. We do not need to do anything since the one-byte pass 1671b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // already handled this. 1672a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; // Bounds not checked. 1673a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1674a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool checked = false; 1675a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We handle the length > 1 case in a later pass. 1676a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length == 1) { 1677b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (one_byte && c > String::kMaxOneByteCharCodeU) { 1678a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Can't match - see above. 1679a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; // Bounds not checked. 1680a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1681a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!preloaded) { 1682a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1683a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block checked = check; 1684a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1685a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckNotCharacter(c, on_failure); 1686a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1687a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return checked; 1688a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1689a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1691a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1692b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte, uc16 c1, uc16 c2, 1693a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_failure) { 1694a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 char_mask; 1695b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (one_byte) { 1696b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch char_mask = String::kMaxOneByteCharCode; 1697a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 16983ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch char_mask = String::kMaxUtf16CodeUnit; 1699a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1700a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 exor = c1 ^ c2; 1701a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Check whether exor has only one bit set. 1702a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (((exor - 1) & exor) == 0) { 1703a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If c1 and c2 differ only by one bit. 1704a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Ecma262UnCanonicalize always gives the highest number last. 1705b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(c2 > c1); 1706a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 mask = char_mask ^ exor; 1707a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); 1708a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 1709a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1710b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(c2 > c1); 1711a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 diff = c2 - c1; 1712a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (((diff - 1) & diff) == 0 && c1 >= diff) { 1713a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the characters differ by 2^n but don't differ by one bit then 1714a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // subtract the difference from the found character, then do the or 1715a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // trick. We avoid the theoretical case where negative numbers are 1716a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // involved in order to simplify code generation. 1717a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 mask = char_mask ^ diff; 1718a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, 1719a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block diff, 1720a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask, 1721a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_failure); 1722a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 1723a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1724a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 1725a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1727a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 172844f0eee88ff00398ff7f715fab053374d808c90dSteve Blocktypedef bool EmitCharacterFunction(Isolate* isolate, 172944f0eee88ff00398ff7f715fab053374d808c90dSteve Block RegExpCompiler* compiler, 1730a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 c, 1731a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_failure, 1732a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset, 1733a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool check, 1734a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded); 1735a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Only emits letters (things that have case). Only used for case independent 1737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// matches. 173844f0eee88ff00398ff7f715fab053374d808c90dSteve Blockstatic inline bool EmitAtomLetter(Isolate* isolate, 173944f0eee88ff00398ff7f715fab053374d808c90dSteve Block RegExpCompiler* compiler, 1740a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 c, 1741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_failure, 1742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset, 1743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool check, 1744a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded) { 1745a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1746b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte = compiler->one_byte(); 1747a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1748b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); 1749a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length <= 1) return false; 1750a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We may not need to check against the end of the input string 1751a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // if this character lies before a character that matched. 1752a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!preloaded) { 1753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); 1754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label ok; 1756b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (length) { 1758a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 2: { 1759b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], 1760b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch chars[1], on_failure)) { 1761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 1762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckCharacter(chars[0], &ok); 1763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckNotCharacter(chars[1], on_failure); 1764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&ok); 1765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1766a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1767a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1768a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 4: 1769a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckCharacter(chars[3], &ok); 1770a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Fall through! 1771a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 3: 1772a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckCharacter(chars[0], &ok); 1773a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckCharacter(chars[1], &ok); 1774a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckNotCharacter(chars[2], on_failure); 1775a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&ok); 1776a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1777a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 1778a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 1779a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 1780a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1781a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 1782a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 1783a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1784a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1785b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void EmitBoundaryTest(RegExpMacroAssembler* masm, 1786b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int border, 1787b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* fall_through, 1788b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* above_or_equal, 1789b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* below) { 1790b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (below != fall_through) { 1791b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterLT(border, below); 1792b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (above_or_equal != fall_through) masm->GoTo(above_or_equal); 1793b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 1794b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterGT(border - 1, above_or_equal); 1795b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1796b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 1797b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1798b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1799b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, 1800b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int first, 1801b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int last, 1802b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* fall_through, 1803b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* in_range, 1804b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* out_of_range) { 1805b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (in_range == fall_through) { 1806b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (first == last) { 1807b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckNotCharacter(first, out_of_range); 1808b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 1809b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterNotInRange(first, last, out_of_range); 1810b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1811b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 1812b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (first == last) { 1813b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacter(first, in_range); 1814b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 1815b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterInRange(first, last, in_range); 1816b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1817b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (out_of_range != fall_through) masm->GoTo(out_of_range); 1818b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1819b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 1820b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1821b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1822b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even. 1823b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd. 1824b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void EmitUseLookupTable( 1825b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler* masm, 1826b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<int>* ranges, 1827b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int start_index, 1828b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int end_index, 1829b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int min_char, 1830b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* fall_through, 1831b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* even_label, 1832b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* odd_label) { 1833b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kSize = RegExpMacroAssembler::kTableSize; 1834b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kMask = RegExpMacroAssembler::kTableMask; 1835b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1836b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int base = (min_char & ~kMask); 1837b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch USE(base); 1838b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1839b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Assert that everything is on one kTableSize page. 1840b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = start_index; i <= end_index; i++) { 1841b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(ranges->at(i) & ~kMask, base); 1842b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1843b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base); 1844b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1845b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch char templ[kSize]; 1846b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* on_bit_set; 1847b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* on_bit_clear; 1848b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int bit; 1849b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (even_label == fall_through) { 1850b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_bit_set = odd_label; 1851b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_bit_clear = even_label; 1852b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bit = 1; 1853b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 1854b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_bit_set = even_label; 1855b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_bit_clear = odd_label; 1856b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bit = 0; 1857b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1858b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) { 1859b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch templ[i] = bit; 1860b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1861b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int j = 0; 1862b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bit ^= 1; 1863b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = start_index; i < end_index; i++) { 1864b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) { 1865b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch templ[j] = bit; 1866b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1867b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bit ^= 1; 1868b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1869b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = j; i < kSize; i++) { 1870b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch templ[i] = bit; 1871b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1872014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Factory* factory = masm->isolate()->factory(); 1873b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // TODO(erikcorry): Cache these. 1874b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED); 1875b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < kSize; i++) { 1876b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ba->set(i, templ[i]); 1877b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1878b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckBitInTable(ba, on_bit_set); 1879b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear); 1880b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 1881b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1882b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1883b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void CutOutRange(RegExpMacroAssembler* masm, 1884b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<int>* ranges, 1885b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int start_index, 1886b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int end_index, 1887b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int cut_index, 1888b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* even_label, 1889b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* odd_label) { 1890b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool odd = (((cut_index - start_index) & 1) == 1); 1891b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* in_range_label = odd ? odd_label : even_label; 1892b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label dummy; 1893b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitDoubleBoundaryTest(masm, 1894b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(cut_index), 1895b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(cut_index + 1) - 1, 1896b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &dummy, 1897b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch in_range_label, 1898b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &dummy); 1899b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!dummy.is_linked()); 1900b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Cut out the single range by rewriting the array. This creates a new 1901b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // range that is a merger of the two ranges on either side of the one we 1902b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // are cutting out. The oddity of the labels is preserved. 1903b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = cut_index; j > start_index; j--) { 1904b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(j) = ranges->at(j - 1); 1905b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1906b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = cut_index + 1; j < end_index; j++) { 1907b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(j) = ranges->at(j + 1); 1908b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1909b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 1910b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1911b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1912b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Unicode case. Split the search space into kSize spaces that are handled 1913b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// with recursion. 1914b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void SplitSearchSpace(ZoneList<int>* ranges, 1915b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int start_index, 1916b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int end_index, 1917b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int* new_start_index, 1918b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int* new_end_index, 1919b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int* border) { 1920b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kSize = RegExpMacroAssembler::kTableSize; 1921b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kMask = RegExpMacroAssembler::kTableMask; 1922b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1923b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int first = ranges->at(start_index); 1924b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int last = ranges->at(end_index) - 1; 1925b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1926b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *new_start_index = start_index; 1927b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *border = (ranges->at(start_index) & ~kMask) + kSize; 1928b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch while (*new_start_index < end_index) { 1929b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges->at(*new_start_index) > *border) break; 1930b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (*new_start_index)++; 1931b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1932b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // new_start_index is the index of the first edge that is beyond the 1933b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // current kSize space. 1934b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1935b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // For very large search spaces we do a binary chop search of the non-Latin1 1936b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // space instead of just going to the end of the current kSize space. The 1937b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // heuristics are complicated a little by the fact that any 128-character 1938b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // encoding space can be quickly tested with a table lookup, so we don't 1939b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // wish to do binary chop search at a smaller granularity than that. A 1940b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // 128-character space can take up a lot of space in the ranges array if, 1941b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // for example, we only want to match every second character (eg. the lower 1942b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // case characters on some Unicode pages). 1943b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int binary_chop_index = (end_index + start_index) / 2; 1944b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // The first test ensures that we get to the code that handles the Latin1 1945b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // range with a single not-taken branch, speeding up this important 1946b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // character range (even non-Latin1 charset-based text has spaces and 1947b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // punctuation). 1948b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case. 1949b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index - start_index > (*new_start_index - start_index) * 2 && 1950b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch last - first > kSize * 2 && binary_chop_index > *new_start_index && 1951b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(binary_chop_index) >= first + 2 * kSize) { 1952b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int scan_forward_for_section_border = binary_chop_index;; 1953b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int new_border = (ranges->at(binary_chop_index) | kMask) + 1; 1954b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1955b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch while (scan_forward_for_section_border < end_index) { 1956b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges->at(scan_forward_for_section_border) > new_border) { 1957b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *new_start_index = scan_forward_for_section_border; 1958b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *border = new_border; 1959b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch break; 1960b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1961b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch scan_forward_for_section_border++; 1962b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1963b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1964b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1965b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(*new_start_index > start_index); 1966b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *new_end_index = *new_start_index - 1; 1967b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges->at(*new_end_index) == *border) { 1968b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (*new_end_index)--; 1969b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1970b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (*border >= ranges->at(end_index)) { 1971b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *border = ranges->at(end_index); 1972b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *new_start_index = end_index; // Won't be used. 1973b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *new_end_index = end_index - 1; 1974b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 1975b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 1976b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1977b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1978b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Gets a series of segment boundaries representing a character class. If the 1979b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// character is in the range between an even and an odd boundary (counting from 1980b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// start_index) then go to even_label, otherwise go to odd_label. We already 1981b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// know that the character is in the range of min_char to max_char inclusive. 1982b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Either label can be NULL indicating backtracking. Either label can also be 1983b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// equal to the fall_through label. 1984109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges, 1985109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int start_index, int end_index, uc32 min_char, 1986109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 max_char, Label* fall_through, 1987109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Label* even_label, Label* odd_label) { 1988109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK_LE(min_char, String::kMaxUtf16CodeUnit); 1989109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK_LE(max_char, String::kMaxUtf16CodeUnit); 1990109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 1991b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int first = ranges->at(start_index); 1992b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int last = ranges->at(end_index) - 1; 1993b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1994b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(min_char, first); 1995b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1996b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Just need to test if the character is before or on-or-after 1997b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // a particular character. 1998b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (start_index == end_index) { 1999b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitBoundaryTest(masm, first, fall_through, even_label, odd_label); 2000b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2001b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2002b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2003b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Another almost trivial case: There is one interval in the middle that is 2004b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // different from the end intervals. 2005b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (start_index + 1 == end_index) { 2006b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitDoubleBoundaryTest( 2007b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm, first, last, fall_through, even_label, odd_label); 2008b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2009b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2010b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2011b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // It's not worth using table lookup if there are very few intervals in the 2012b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // character class. 2013b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (end_index - start_index <= 6) { 2014b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // It is faster to test for individual characters, so we look for those 2015b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // first, then try arbitrary ranges in the second round. 2016b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static int kNoCutIndex = -1; 2017b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int cut = kNoCutIndex; 2018b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = start_index; i < end_index; i++) { 2019b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges->at(i) == ranges->at(i + 1) - 1) { 2020b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch cut = i; 2021b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch break; 2022b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2023b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2024b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (cut == kNoCutIndex) cut = start_index; 2025b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CutOutRange( 2026b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm, ranges, start_index, end_index, cut, even_label, odd_label); 2027b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_GE(end_index - start_index, 2); 2028b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GenerateBranches(masm, 2029b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 2030b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch start_index + 1, 2031b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index - 1, 2032b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch min_char, 2033b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char, 2034b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch fall_through, 2035b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch even_label, 2036b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch odd_label); 2037b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2038b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2039b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2040b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If there are a lot of intervals in the regexp, then we will use tables to 2041b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // determine whether the character is inside or outside the character class. 2042b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kBits = RegExpMacroAssembler::kTableSizeBits; 2043b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2044b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if ((max_char >> kBits) == (min_char >> kBits)) { 2045b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitUseLookupTable(masm, 2046b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 2047b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch start_index, 2048b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index, 2049b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch min_char, 2050b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch fall_through, 2051b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch even_label, 2052b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch odd_label); 2053b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2054b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2055b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2056b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if ((min_char >> kBits) != (first >> kBits)) { 2057b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterLT(first, odd_label); 2058b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GenerateBranches(masm, 2059b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 2060b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch start_index + 1, 2061b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index, 2062b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch first, 2063b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char, 2064b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch fall_through, 2065b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch odd_label, 2066b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch even_label); 2067b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2068b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2069b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2070b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int new_start_index = 0; 2071b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int new_end_index = 0; 2072b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int border = 0; 2073b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2074b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SplitSearchSpace(ranges, 2075b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch start_index, 2076b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index, 2077b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &new_start_index, 2078b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &new_end_index, 2079b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &border); 2080b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2081b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label handle_rest; 2082b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* above = &handle_rest; 2083b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (border == last + 1) { 2084b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We didn't find any section that started after the limit, so everything 2085b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // above the border is one of the terminal labels. 2086b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch above = (end_index & 1) != (start_index & 1) ? odd_label : even_label; 2087b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(new_end_index == end_index - 1); 2088b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2089b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2090b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LE(start_index, new_end_index); 2091b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LE(new_start_index, end_index); 2092b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(start_index, new_start_index); 2093b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(new_end_index, end_index); 2094b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(new_end_index + 1 == new_start_index || 2095b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (new_end_index + 2 == new_start_index && 2096b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch border == ranges->at(new_end_index + 1))); 2097b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(min_char, border - 1); 2098b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(border, max_char); 2099b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_LT(ranges->at(new_end_index), border); 2100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(border < ranges->at(new_start_index) || 2101b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (border == ranges->at(new_start_index) && 2102b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_start_index == end_index && 2103b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_end_index == end_index - 1 && 2104b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch border == last + 1)); 2105b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1)); 2106b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2107b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterGT(border - 1, above); 2108b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label dummy; 2109b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GenerateBranches(masm, 2110b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 2111b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch start_index, 2112b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_end_index, 2113b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch min_char, 2114b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch border - 1, 2115b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &dummy, 2116b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch even_label, 2117b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch odd_label); 2118b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (handle_rest.is_linked()) { 2119b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->Bind(&handle_rest); 2120b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool flip = (new_start_index & 1) != (start_index & 1); 2121b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GenerateBranches(masm, 2122b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 2123b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_start_index, 2124b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index, 2125b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch border, 2126b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char, 2127b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &dummy, 2128b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch flip ? odd_label : even_label, 2129b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch flip ? even_label : odd_label); 2130b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2131b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2132b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2133b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic void EmitCharClass(RegExpMacroAssembler* macro_assembler, 2135b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* cc, bool one_byte, 2136b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* on_failure, int cp_offset, bool check_offset, 2137b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool preloaded, Zone* zone) { 2138b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = cc->ranges(zone); 2139109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Canonicalize(ranges); 2140b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_char; 2142b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (one_byte) { 2143b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char = String::kMaxOneByteCharCode; 2144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 21453ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch max_char = String::kMaxUtf16CodeUnit; 2146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int range_count = ranges->length(); 2149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int last_valid_range = range_count - 1; 2151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (last_valid_range >= 0) { 2152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange& range = ranges->at(last_valid_range); 2153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (range.from() <= max_char) { 2154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 2155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block last_valid_range--; 2157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (last_valid_range < 0) { 2160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!cc->is_negated()) { 2161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->GoTo(on_failure); 2162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (check_offset) { 2164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->CheckPosition(cp_offset, on_failure); 2165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (last_valid_range == 0 && 2170b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(0).IsEverything(max_char)) { 2171b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (cc->is_negated()) { 2172b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->GoTo(on_failure); 2173b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 2174b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // This is a common case hit by non-anchored expressions. 2175b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (check_offset) { 2176b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->CheckPosition(cp_offset, on_failure); 2177b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2178b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2179b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2180b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!preloaded) { 2183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 2184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2186b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (cc->is_standard(zone) && 2187109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), 2188109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch on_failure)) { 2189e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return; 2190e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 2191e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 2192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2193b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // A new list with ascending entries. Each entry is a code unit 2194b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // where there is a boundary between code units that are part of 2195b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // the class and code units that are not. Normally we insert an 2196b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // entry at zero which goes to the failure label, but if there 2197b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // was already one there we fall through for success on that entry. 2198b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Subsequent entries have alternating meaning (success/failure). 2199b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<int>* range_boundaries = 2200b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(zone) ZoneList<int>(last_valid_range, zone); 2201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2202b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool zeroth_entry_is_failure = !cc->is_negated(); 2203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2204b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i <= last_valid_range; i++) { 2205b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterRange& range = ranges->at(i); 2206b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range.from() == 0) { 2207b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(i, 0); 2208b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zeroth_entry_is_failure = !zeroth_entry_is_failure; 2209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2210b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch range_boundaries->Add(range.from(), zone); 2211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2212b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch range_boundaries->Add(range.to() + 1, zone); 2213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2214b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int end_index = range_boundaries->length() - 1; 2215b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range_boundaries->at(end_index) > max_char) { 2216b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index--; 2217b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2218b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2219b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label fall_through; 2220b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GenerateBranches(macro_assembler, 2221b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch range_boundaries, 2222b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 0, // start_index. 2223b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch end_index, 2224b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 0, // min_char. 2225b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char, 2226b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &fall_through, 2227b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zeroth_entry_is_failure ? &fall_through : on_failure, 2228b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zeroth_entry_is_failure ? on_failure : &fall_through); 2229b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->Bind(&fall_through); 2230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode::~RegExpNode() { 2234a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2236a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2237a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 2238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace) { 2239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we are generating a greedy loop then don't stop and don't reuse code. 2240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->stop_node() != NULL) { 2241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CONTINUE; 2242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->is_trivial()) { 2246014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) { 2247014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // If a generic version is already scheduled to be generated or we have 2248014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // recursed too deeply then just generate a jump to that code. 2249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->GoTo(&label_); 2250014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // This will queue it up for generation of a generic version if it hasn't 2251014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // already been queued. 2252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler->AddWork(this); 2253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return DONE; 2254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Generate generic version of the node and bind the label for later use. 2256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&label_); 2257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CONTINUE; 2258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2259a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We are being asked to make a non-generic version. Keep track of how many 2261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // non-generic versions we generate so as not to overdo it. 2262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace_count_++; 2263014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (KeepRecursing(compiler) && compiler->optimize() && 2264014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch trace_count_ < kMaxCopiesCodeGenerated) { 2265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CONTINUE; 2266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we get here code has been generated for this node too many times or 2269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // recursion is too deep. Time to switch to a generic version. The code for 2270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generic versions above can handle deep recursion properly. 2271014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool was_limiting = compiler->limiting_recursion(); 2272014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->set_limiting_recursion(true); 2273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 2274014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->set_limiting_recursion(was_limiting); 2275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return DONE; 2276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2279014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochbool RegExpNode::KeepRecursing(RegExpCompiler* compiler) { 2280014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return !compiler->limiting_recursion() && 2281014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion; 2282014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 2283014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 2284014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 2285b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint ActionNode::EatsAtLeast(int still_to_find, 2286b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2287b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2288b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return 0; 2289b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2290b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return on_success()->EatsAtLeast(still_to_find, 2291b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget - 1, 2292b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch not_at_start); 2293a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2296014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 2297014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BoyerMooreLookahead* bm, bool not_at_start) { 2298b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (action_type_ == BEGIN_SUBMATCH) { 2299b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetRest(offset); 2300b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { 2301014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 2302b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2303b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 2304b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2305b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2306b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2307b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint AssertionNode::EatsAtLeast(int still_to_find, 2308b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2309b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2310b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return 0; 2311b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // If we know we are not at the start and we are asked "how many characters 2312b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // will you match if you succeed?" then we can answer anything since false 2313b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // implies false. So lets just return the max answer (still_to_find) since 2314b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // that won't prevent us from preloading a lot of characters for the other 2315b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // branches in the node graph. 2316b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (assertion_type() == AT_START && not_at_start) return still_to_find; 2317b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return on_success()->EatsAtLeast(still_to_find, 2318b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget - 1, 2319b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch not_at_start); 2320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2321a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2322a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2323014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 2324014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BoyerMooreLookahead* bm, bool not_at_start) { 2325b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Match the behaviour of EatsAtLeast on this node. 2326b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (assertion_type() == AT_START && not_at_start) return; 2327014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 2328b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 2329b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2330b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2331b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2332b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint BackReferenceNode::EatsAtLeast(int still_to_find, 2333b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2334b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2335014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (read_backward()) return 0; 2336b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return 0; 2337b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return on_success()->EatsAtLeast(still_to_find, 2338b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget - 1, 2339b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch not_at_start); 2340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2343b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint TextNode::EatsAtLeast(int still_to_find, 2344b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2345b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2346014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (read_backward()) return 0; 2347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int answer = Length(); 2348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (answer >= still_to_find) return answer; 2349b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return answer; 2350b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // We are not at start after this node so we set the last argument to 'true'. 2351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return answer + on_success()->EatsAtLeast(still_to_find - answer, 2352b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget - 1, 2353b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch true); 2354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2357014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochint NegativeLookaroundChoiceNode::EatsAtLeast(int still_to_find, int budget, 2358014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool not_at_start) { 2359b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return 0; 2360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Alternative 0 is the negative lookahead, alternative 1 is what comes 2361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // afterwards. 2362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = alternatives_->at(1).node(); 2363b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return node->EatsAtLeast(still_to_find, budget - 1, not_at_start); 2364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2367014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid NegativeLookaroundChoiceNode::GetQuickCheckDetails( 2368014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in, 2369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 2370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Alternative 0 is the negative lookahead, alternative 1 is what comes 2371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // afterwards. 2372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = alternatives_->at(1).node(); 2373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start); 2374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockint ChoiceNode::EatsAtLeastHelper(int still_to_find, 2378b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2379b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch RegExpNode* ignore_this_node, 2380b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2381b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (budget <= 0) return 0; 2382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int min = 100; 2383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int choice_count = alternatives_->length(); 2384b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget = (budget - 1) / choice_count; 2385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < choice_count; i++) { 2386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = alternatives_->at(i).node(); 2387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node == ignore_this_node) continue; 2388b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int node_eats_at_least = 2389b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch node->EatsAtLeast(still_to_find, budget, not_at_start); 2390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node_eats_at_least < min) min = node_eats_at_least; 2391b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (min == 0) return 0; 2392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return min; 2394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2397b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint LoopChoiceNode::EatsAtLeast(int still_to_find, 2398b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2399b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2400b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return EatsAtLeastHelper(still_to_find, 2401b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget - 1, 2402b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch loop_node_, 2403b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch not_at_start); 2404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2407b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint ChoiceNode::EatsAtLeast(int still_to_find, 2408b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int budget, 2409b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { 2410b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch return EatsAtLeastHelper(still_to_find, 2411b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget, 2412b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch NULL, 2413b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch not_at_start); 2414a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2415a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2417a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Takes the left-most 1-bit and smears it out, setting all bits to its right. 2418a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic inline uint32_t SmearBitsRight(uint32_t v) { 2419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block v |= v >> 1; 2420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block v |= v >> 2; 2421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block v |= v >> 4; 2422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block v |= v >> 8; 2423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block v |= v >> 16; 2424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return v; 2425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool QuickCheckDetails::Rationalize(bool asc) { 2429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool found_useful_op = false; 2430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t char_mask; 2431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (asc) { 2432b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch char_mask = String::kMaxOneByteCharCode; 2433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 24343ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch char_mask = String::kMaxUtf16CodeUnit; 2435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask_ = 0; 2437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_ = 0; 2438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int char_shift = 0; 2439a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < characters_; i++) { 2440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Position* pos = &positions_[i]; 2441b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if ((pos->mask & String::kMaxOneByteCharCode) != 0) { 2442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block found_useful_op = true; 2443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2444a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask_ |= (pos->mask & char_mask) << char_shift; 2445a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_ |= (pos->value & char_mask) << char_shift; 2446a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block char_shift += asc ? 8 : 16; 2447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return found_useful_op; 2449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2452a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, 2453b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* bounds_check_trace, 2454a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 2455a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preload_has_checked_bounds, 2456a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_possible_success, 2457a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails* details, 2458a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool fall_through_on_failure) { 2459a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (details->characters() == 0) return false; 2460b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GetQuickCheckDetails( 2461b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE); 2462a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (details->cannot_match()) return false; 2463b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!details->Rationalize(compiler->one_byte())) return false; 2464b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(details->characters() == 1 || 2465a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler->macro_assembler()->CanReadUnaligned()); 2466a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t mask = details->mask(); 2467a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t value = details->value(); 2468a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2469a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 2470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2471a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->characters_preloaded() != details->characters()) { 2472b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset()); 2473b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We are attempting to preload the minimum number of characters 2474b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // any choice would eat, so if the bounds check fails, then none of the 2475b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // choices can succeed, so we can just immediately backtrack, rather 2476b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // than go to the next choice. 2477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->LoadCurrentCharacter(trace->cp_offset(), 2478b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bounds_check_trace->backtrack(), 2479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block !preload_has_checked_bounds, 2480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->characters()); 2481a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2483a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool need_mask = true; 2485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (details->characters() == 1) { 2487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If number of characters preloaded is 1 then we used a byte or 16 bit 2488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // load so the value is already masked down. 2489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t char_mask; 2490b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (compiler->one_byte()) { 2491b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch char_mask = String::kMaxOneByteCharCode; 2492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 24933ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch char_mask = String::kMaxUtf16CodeUnit; 2494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if ((mask & char_mask) == char_mask) need_mask = false; 2496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask &= char_mask; 2497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2498b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // For 2-character preloads in one-byte mode or 1-character preloads in 2499b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // two-byte mode we also use a 16 bit load with zero extend. 2500109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch static const uint32_t kTwoByteMask = 0xffff; 2501109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch static const uint32_t kFourByteMask = 0xffffffff; 2502b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (details->characters() == 2 && compiler->one_byte()) { 2503109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; 2504b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else if (details->characters() == 1 && !compiler->one_byte()) { 2505109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false; 2506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2507109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (mask == kFourByteMask) need_mask = false; 2508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (fall_through_on_failure) { 2512a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (need_mask) { 2513a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); 2514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacter(value, on_possible_success); 2516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (need_mask) { 2519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack()); 2520a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckNotCharacter(value, trace->backtrack()); 2522a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2523a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2524a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 2525a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2526a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2527a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Here is the meat of GetQuickCheckDetails (see also the comment on the 2529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// super-class in the .h file). 2530a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 2531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// We iterate along the text object, building up for each character a 2532a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// mask and value that can be used to test for a quick failure to match. 2533a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The masks and values for the positions will be combined into a single 2534a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// machine word for the current character width in order to be used in 2535a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// generating a quick check. 2536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid TextNode::GetQuickCheckDetails(QuickCheckDetails* details, 2537a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 2538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 2539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 2540014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Do not collect any quick check details if the text node reads backward, 2541014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // since it reads in the opposite direction than we use for quick checks. 2542014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (read_backward()) return; 2543014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate = compiler->macro_assembler()->isolate(); 2544b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(characters_filled_in < details->characters()); 2545a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters = details->characters(); 2546a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int char_mask; 2547b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (compiler->one_byte()) { 2548b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch char_mask = String::kMaxOneByteCharCode; 2549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 25503ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch char_mask = String::kMaxUtf16CodeUnit; 2551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2552014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int k = 0; k < elements()->length(); k++) { 2553014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->at(k); 2554b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (elm.text_type() == TextElement::ATOM) { 2555b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Vector<const uc16> quarks = elm.atom()->data(); 2556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < characters && i < quarks.length(); i++) { 2557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails::Position* pos = 2558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->positions(characters_filled_in); 2559a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 c = quarks[i]; 2560a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (compiler->ignore_case()) { 2561a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 2562b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length = GetCaseIndependentLetters(isolate, c, 2563b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler->one_byte(), chars); 2564014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (length == 0) { 2565014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // This can happen because all case variants are non-Latin1, but we 2566014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // know the input is Latin1. 2567014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch details->set_cannot_match(); 2568014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch pos->determines_perfectly = false; 2569014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return; 2570014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 2571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length == 1) { 2572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This letter has no case equivalents, so it's nice and simple 2573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // and the mask-compare will determine definitely whether we have 2574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // a match at this character position. 2575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask = char_mask; 2576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value = c; 2577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = true; 2578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2579a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t common_bits = char_mask; 2580a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t bits = chars[0]; 2581a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = 1; j < length; j++) { 2582a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); 2583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block common_bits ^= differing_bits; 2584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bits &= common_bits; 2585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2586a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If length is 2 and common bits has only one zero in it then 2587a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // our mask and compare instruction will determine definitely 2588a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // whether we have a match at this character position. Otherwise 2589a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // it can only be an approximate check. 2590a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t one_zero = (common_bits | ~char_mask); 2591a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { 2592a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = true; 2593a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2594a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask = common_bits; 2595a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value = bits; 2596a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2597a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2598a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Don't ignore case. Nice simple case where the mask-compare will 2599a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // determine definitely whether we have a match at this character 2600a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // position. 2601014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (c > char_mask) { 2602014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch details->set_cannot_match(); 2603014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch pos->determines_perfectly = false; 2604014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return; 2605014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 2606a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask = char_mask; 2607a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value = c; 2608a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = true; 2609a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2610a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_filled_in++; 2611b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(characters_filled_in <= details->characters()); 2612a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (characters_filled_in == details->characters()) { 2613a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2614a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2615a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2616a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2617a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails::Position* pos = 2618a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->positions(characters_filled_in); 2619b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* tree = elm.char_class(); 2620b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = tree->ranges(zone()); 2621a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (tree->is_negated()) { 2622a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A quick check uses multi-character mask and compare. There is no 2623a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // useful way to incorporate a negative char class into this scheme 2624a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // so we just conservatively create a mask and value that will always 2625a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // succeed. 2626a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask = 0; 2627a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value = 0; 2628a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 2629a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int first_range = 0; 2630a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (ranges->at(first_range).from() > char_mask) { 2631a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_range++; 2632a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first_range == ranges->length()) { 2633a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->set_cannot_match(); 2634a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = false; 2635a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2636a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2637a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2638a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange range = ranges->at(first_range); 2639a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from = range.from(); 2640a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to = range.to(); 2641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (to > char_mask) { 2642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block to = char_mask; 2643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t differing_bits = (from ^ to); 2645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A mask and compare is only perfect if the differing bits form a 2646a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // number like 00011111 with one single block of trailing 1s. 2647a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if ((differing_bits & (differing_bits + 1)) == 0 && 2648a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block from + differing_bits == to) { 2649a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = true; 2650a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2651a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t common_bits = ~SmearBitsRight(differing_bits); 2652a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t bits = (from & common_bits); 2653a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = first_range + 1; i < ranges->length(); i++) { 2654a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange range = ranges->at(i); 2655a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from = range.from(); 2656a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to = range.to(); 2657a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (from > char_mask) continue; 2658a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (to > char_mask) to = char_mask; 2659a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Here we are combining more ranges into the mask and compare 2660a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // value. With each new range the mask becomes more sparse and 2661a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // so the chances of a false positive rise. A character class 2662a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // with multiple ranges is assumed never to be equivalent to a 2663a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // mask and compare operation. 2664a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = false; 2665a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t new_common_bits = (from ^ to); 2666a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_common_bits = ~SmearBitsRight(new_common_bits); 2667a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block common_bits &= new_common_bits; 2668a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bits &= new_common_bits; 2669a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t differing_bits = (from & common_bits) ^ bits; 2670a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block common_bits ^= differing_bits; 2671a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bits &= common_bits; 2672a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2673a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask = common_bits; 2674a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value = bits; 2675a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2676a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_filled_in++; 2677b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(characters_filled_in <= details->characters()); 2678a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (characters_filled_in == details->characters()) { 2679a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2680a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2681a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2682a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2683b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(characters_filled_in != details->characters()); 2684b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!details->cannot_match()) { 2685b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_success()-> GetQuickCheckDetails(details, 2686b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler, 2687b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch characters_filled_in, 2688b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch true); 2689b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2691a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2692a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2693a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid QuickCheckDetails::Clear() { 2694a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < characters_; i++) { 2695a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].mask = 0; 2696a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].value = 0; 2697a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].determines_perfectly = false; 2698a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2699a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_ = 0; 2700a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2701a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2702a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2703b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid QuickCheckDetails::Advance(int by, bool one_byte) { 2704014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (by >= characters_ || by < 0) { 2705014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_IMPLIES(by < 0, characters_ == 0); 2706a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Clear(); 2707a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2708a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2709014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LE(characters_ - by, 4); 2710014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LE(characters_, 4); 2711a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < characters_ - by; i++) { 2712a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i] = positions_[by + i]; 2713a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2714a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = characters_ - by; i < characters_; i++) { 2715a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].mask = 0; 2716a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].value = 0; 2717a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block positions_[i].determines_perfectly = false; 2718a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2719a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_ -= by; 2720a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We could change mask_ and value_ here but we would never advance unless 2721a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // they had already been used in a check and they won't be used again because 2722a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // it would gain us nothing. So there's no point. 2723a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2724a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2725a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { 2727b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(characters_ == other->characters_); 2728a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (other->cannot_match_) { 2729a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2730a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2731a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (cannot_match_) { 2732a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block *this = *other; 2733a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 2734a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2735a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = from_index; i < characters_; i++) { 2736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails::Position* pos = positions(i); 2737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails::Position* other_pos = other->positions(i); 2738a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (pos->mask != other_pos->mask || 2739a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value != other_pos->value || 2740a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block !other_pos->determines_perfectly) { 2741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Our mask-compare operation will be approximate unless we have the 2742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // exact same operation on both sides of the alternation. 2743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->determines_perfectly = false; 2744a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2745a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask &= other_pos->mask; 2746a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value &= pos->mask; 2747a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block other_pos->value &= pos->mask; 2748a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 differing_bits = (pos->value ^ other_pos->value); 2749a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->mask &= ~differing_bits; 2750a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pos->value &= pos->mask; 2751a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2752a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass VisitMarker { 2756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 2757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit VisitMarker(NodeInfo* info) : info_(info) { 2758b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!info->visited); 2759a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block info->visited = true; 2760a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ~VisitMarker() { 2762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block info_->visited = false; 2763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 2765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo* info_; 2766a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 2767a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2768a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2769b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { 2770b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->replacement_calculated) return replacement(); 2771b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (depth < 0) return this; 2772b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!info()->visited); 2773b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch VisitMarker marker(info()); 2774b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return FilterSuccessor(depth - 1, ignore_case); 2775b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2776b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2777b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2778b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { 2779b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); 2780b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (next == NULL) return set_replacement(NULL); 2781b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_success_ = next; 2782b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return set_replacement(this); 2783b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2784b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2785b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2786b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// We need to check for the following characters: 0x39c 0x3bc 0x178. 2787b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic inline bool RangeContainsLatin1Equivalents(CharacterRange range) { 2788b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // TODO(dcarney): this could be a lot more efficient. 2789b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return range.Contains(0x39c) || 2790b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch range.Contains(0x3bc) || range.Contains(0x178); 2791b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2792b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2793b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2794b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { 2795b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < ranges->length(); i++) { 2796b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // TODO(dcarney): this could be a lot more efficient. 2797b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; 2798b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2799b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return false; 2800b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2801b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2802b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2803b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { 2804b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->replacement_calculated) return replacement(); 2805b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (depth < 0) return this; 2806b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!info()->visited); 2807b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch VisitMarker marker(info()); 2808014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int element_count = elements()->length(); 2809b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < element_count; i++) { 2810014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->at(i); 2811b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (elm.text_type() == TextElement::ATOM) { 2812b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Vector<const uc16> quarks = elm.atom()->data(); 2813b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < quarks.length(); j++) { 2814b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint16_t c = quarks[j]; 2815b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (c <= String::kMaxOneByteCharCode) continue; 2816b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!ignore_case) return set_replacement(NULL); 2817b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Here, we need to check for characters whose upper and lower cases 2818b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // are outside the Latin-1 range. 2819b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); 2820b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Character is outside Latin-1 completely 2821b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (converted == 0) return set_replacement(NULL); 2822b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Convert quark to Latin-1 in place. 2823b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint16_t* copy = const_cast<uint16_t*>(quarks.start()); 2824b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch copy[j] = converted; 2825b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2826b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 2827b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(elm.text_type() == TextElement::CHAR_CLASS); 2828b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* cc = elm.char_class(); 2829b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2830109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Canonicalize(ranges); 2831b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Now they are in order so we only need to look at the first. 2832b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int range_count = ranges->length(); 2833b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (cc->is_negated()) { 2834b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range_count != 0 && 2835b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(0).from() == 0 && 2836b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2837b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // This will be handled in a later filter. 2838b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2839b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return set_replacement(NULL); 2840b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2841b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 2842b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range_count == 0 || 2843b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->at(0).from() > String::kMaxOneByteCharCode) { 2844b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // This will be handled in a later filter. 2845b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; 2846b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return set_replacement(NULL); 2847b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2848b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2849b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2850b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2851b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return FilterSuccessor(depth - 1, ignore_case); 2852b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2853b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2854b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2855b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { 2856b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->replacement_calculated) return replacement(); 2857b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (depth < 0) return this; 2858b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->visited) return this; 2859b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch { 2860b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch VisitMarker marker(info()); 2861b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2862b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* continue_replacement = 2863b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch continue_node_->FilterOneByte(depth - 1, ignore_case); 2864b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If we can't continue after the loop then there is no sense in doing the 2865b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // loop. 2866b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (continue_replacement == NULL) return set_replacement(NULL); 2867b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2868b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2869b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return ChoiceNode::FilterOneByte(depth - 1, ignore_case); 2870b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2871b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2872b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2873b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { 2874b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->replacement_calculated) return replacement(); 2875b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (depth < 0) return this; 2876b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->visited) return this; 2877b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch VisitMarker marker(info()); 2878b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int choice_count = alternatives_->length(); 2879b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2880b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < choice_count; i++) { 2881b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GuardedAlternative alternative = alternatives_->at(i); 2882b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (alternative.guards() != NULL && alternative.guards()->length() != 0) { 2883b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch set_replacement(this); 2884b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return this; 2885b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2886b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2887b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2888b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int surviving = 0; 2889b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* survivor = NULL; 2890b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < choice_count; i++) { 2891b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GuardedAlternative alternative = alternatives_->at(i); 2892b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* replacement = 2893b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternative.node()->FilterOneByte(depth - 1, ignore_case); 2894b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. 2895b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (replacement != NULL) { 2896b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(i).set_node(replacement); 2897b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch surviving++; 2898b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch survivor = replacement; 2899b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2900b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2901b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (surviving < 2) return set_replacement(survivor); 2902b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2903b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch set_replacement(this); 2904b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (surviving == choice_count) { 2905b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return this; 2906b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2907b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Only some of the nodes survived the filtering. We need to rebuild the 2908b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // alternatives list. 2909b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<GuardedAlternative>* new_alternatives = 2910b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); 2911b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < choice_count; i++) { 2912b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* replacement = 2913b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); 2914b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (replacement != NULL) { 2915b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(i).set_node(replacement); 2916b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_alternatives->Add(alternatives_->at(i), zone()); 2917b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2918b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2919b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_ = new_alternatives; 2920b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return this; 2921b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2922b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2923b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2924014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben MurdochRegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, 2925014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool ignore_case) { 2926b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->replacement_calculated) return replacement(); 2927b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (depth < 0) return this; 2928b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (info()->visited) return this; 2929b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch VisitMarker marker(info()); 2930b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Alternative 0 is the negative lookahead, alternative 1 is what comes 2931b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // afterwards. 2932b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* node = alternatives_->at(1).node(); 2933b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); 2934b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (replacement == NULL) return set_replacement(NULL); 2935b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(1).set_node(replacement); 2936b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2937b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* neg_node = alternatives_->at(0).node(); 2938b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); 2939b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If the negative lookahead is always going to fail then 2940b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // we don't need to check it. 2941b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (neg_replacement == NULL) return set_replacement(replacement); 2942b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(0).set_node(neg_replacement); 2943b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return set_replacement(this); 2944b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2945b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2946b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2947a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2948a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 2949a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 2950a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 2951a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (body_can_be_zero_length_ || info()->visited) return; 2952a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VisitMarker marker(info()); 2953a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ChoiceNode::GetQuickCheckDetails(details, 2954a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler, 2955a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_filled_in, 2956a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start); 2957a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2958a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2959a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2960014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 2961014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BoyerMooreLookahead* bm, bool not_at_start) { 2962b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (body_can_be_zero_length_ || budget <= 0) { 2963b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetRest(offset); 2964b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 2965b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 2966b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 2967014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); 2968b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 2969b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 2970b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2971b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 2972a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, 2973a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 2974a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 2975a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 2976a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start = (not_at_start || not_at_start_); 2977a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int choice_count = alternatives_->length(); 2978b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(choice_count > 0); 2979a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alternatives_->at(0).node()->GetQuickCheckDetails(details, 2980a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler, 2981a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_filled_in, 2982a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start); 2983a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 1; i < choice_count; i++) { 2984a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails new_details(details->characters()); 2985a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = alternatives_->at(i).node(); 2986a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node->GetQuickCheckDetails(&new_details, compiler, 2987a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_filled_in, 2988a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start); 2989a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Here we merge the quick match details of the two branches. 2990a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->Merge(&new_details, characters_filled_in); 2991a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 2992a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 2993a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2994a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 2995a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Check for [0-9A-Z_a-z]. 2996a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic void EmitWordCheck(RegExpMacroAssembler* assembler, 2997a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* word, 2998a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* non_word, 2999a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool fall_through_on_word) { 3000e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (assembler->CheckSpecialCharacterClass( 3001e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke fall_through_on_word ? 'w' : 'W', 3002e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke fall_through_on_word ? non_word : word)) { 3003e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Optimized implementation available. 3004e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return; 3005e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 3006a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterGT('z', non_word); 3007a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterLT('0', non_word); 3008a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterGT('a' - 1, word); 3009a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterLT('9' + 1, word); 3010a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterLT('A', non_word); 3011a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacterLT('Z' + 1, word); 3012a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (fall_through_on_word) { 3013a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckNotCharacter('_', non_word); 3014a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3015a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckCharacter('_', word); 3016a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3017a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3018a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3019a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3020a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Emit the code to check for a ^ in multiline mode (1-character lookbehind 3021a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// that matches newline or the start of input). 3022a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic void EmitHat(RegExpCompiler* compiler, 3023a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success, 3024a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace) { 3025a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3026a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We will be loading the previous character into the current character 3027a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // register. 3028a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace(*trace); 3029a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.InvalidateCurrentCharacter(); 3030a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3031a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label ok; 3032a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (new_trace.cp_offset() == 0) { 3033a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The start of input counts as a newline in this context, so skip to 3034a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // ok if we are at the start. 3035a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckAtStart(&ok); 3036a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3037a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We already checked that we are not at the start of input so it must be 3038a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // OK to load the previous character. 3039a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, 3040a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.backtrack(), 3041a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block false); 3042e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (!assembler->CheckSpecialCharacterClass('n', 3043e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke new_trace.backtrack())) { 3044e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Newline means \n, \r, 0x2028 or 0x2029. 3045b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!compiler->one_byte()) { 3046e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); 3047e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 3048e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke assembler->CheckCharacter('\n', &ok); 3049e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke assembler->CheckNotCharacter('\r', new_trace.backtrack()); 3050a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3051a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(&ok); 3052a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success->Emit(compiler, &new_trace); 3053a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3054a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3055a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3056b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Emit the code to handle \b and \B (word-boundary or non-word-boundary). 3057b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { 3058e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3059014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate = assembler->isolate(); 3060b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace::TriBool next_is_word_character = Trace::UNKNOWN; 3061b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); 3062b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMooreLookahead* lookahead = bm_info(not_at_start); 3063b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (lookahead == NULL) { 3064b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int eats_at_least = 3065b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, 3066b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch kRecursionBudget, 3067b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch not_at_start)); 3068b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (eats_at_least >= 1) { 3069b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMooreLookahead* bm = 3070b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); 3071014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); 3072b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (bm->at(0)->is_non_word()) 3073b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch next_is_word_character = Trace::FALSE_VALUE; 3074b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; 3075b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3076b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 3077b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (lookahead->at(0)->is_non_word()) 3078b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch next_is_word_character = Trace::FALSE_VALUE; 3079b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (lookahead->at(0)->is_word()) 3080b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch next_is_word_character = Trace::TRUE_VALUE; 3081b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3082b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY); 3083b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (next_is_word_character == Trace::UNKNOWN) { 3084b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label before_non_word; 3085b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label before_word; 3086b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (trace->characters_preloaded() != 1) { 3087b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); 3088b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3089b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Fall through on non-word. 3090b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitWordCheck(assembler, &before_word, &before_non_word, false); 3091b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Next character is not a word character. 3092b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->Bind(&before_non_word); 3093b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label ok; 3094b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 3095b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->GoTo(&ok); 3096b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3097b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->Bind(&before_word); 3098b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 3099b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->Bind(&ok); 3100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else if (next_is_word_character == Trace::TRUE_VALUE) { 3101b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord); 3102b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 3103b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(next_is_word_character == Trace::FALSE_VALUE); 3104b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord); 3105e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 3106e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 3107e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 3108e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 3109b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid AssertionNode::BacktrackIfPrevious( 3110b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCompiler* compiler, 3111b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* trace, 3112b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AssertionNode::IfPrevious backtrack_if_previous) { 3113a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3114a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace(*trace); 3115a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.InvalidateCurrentCharacter(); 3116a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3117b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label fall_through, dummy; 3118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3119b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* non_word = backtrack_if_previous == kIsNonWord ? 3120b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.backtrack() : 3121b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &fall_through; 3122b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label* word = backtrack_if_previous == kIsNonWord ? 3123b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &fall_through : 3124b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.backtrack(); 3125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (new_trace.cp_offset() == 0) { 3127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The start of input counts as a non-word character, so the question is 3128a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // decided if we are at the start. 3129b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->CheckAtStart(non_word); 3130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We already checked that we are not at the start of input so it must be 3132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // OK to load the previous character. 3133b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false); 3134b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord); 3135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3136b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch assembler->Bind(&fall_through); 3137b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch on_success()->Emit(compiler, &new_trace); 3138a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3139a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3140a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details, 3142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 3143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int filled_in, 3144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 3145b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (assertion_type_ == AT_START && not_at_start) { 3146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details->set_cannot_match(); 3147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return on_success()->GetQuickCheckDetails(details, 3150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler, 3151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block filled_in, 3152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start); 3153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3158b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (assertion_type_) { 3159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AT_END: { 3160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label ok; 3161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->CheckPosition(trace->cp_offset(), &ok); 3162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(trace->backtrack()); 3163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(&ok); 3164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AT_START: { 3167b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (trace->at_start() == Trace::FALSE_VALUE) { 3168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(trace->backtrack()); 3169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->at_start() == Trace::UNKNOWN) { 3172014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); 3173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace at_start_trace = *trace; 3174014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch at_start_trace.set_at_start(Trace::TRUE_VALUE); 3175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &at_start_trace); 3176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3177a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AFTER_NEWLINE: 3181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EmitHat(compiler, on_success(), trace); 3182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AT_BOUNDARY: 3184e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke case AT_NON_BOUNDARY: { 3185b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitBoundaryCheck(compiler, trace); 3186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3187e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 3188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 3190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { 3194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (quick_check == NULL) return false; 3195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (offset >= quick_check->characters()) return false; 3196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return quick_check->positions(offset)->determines_perfectly; 3197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic void UpdateBoundsCheck(int index, int* checked_up_to) { 3201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (index > *checked_up_to) { 3202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block *checked_up_to = index; 3203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// We call this repeatedly to generate code for each pass over the text node. 3208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The passes are in increasing order of difficulty because we hope one 3209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// of the first passes will fail in which case we are saved the work of the 3210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// later passes. for example for the case independent regexp /%[asdfghjkl]a/ 3211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// we will check the '%' in the first pass, the case independent 'a' in the 3212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// second pass and the character class in the last pass. 3213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 3214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// The passes are done from right to left, so for example to test for /bar/ 3215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// we will first test for an 'r' with offset 2, then an 'a' with offset 1 3216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// and then a 'b' with offset 0. This means we can avoid the end-of-input 3217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// bounds check most of the time. In the example we only need to check for 3218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// end-of-input when loading the putative 'r'. 3219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 3220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A slight complication involves the fact that the first character may already 3221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// be fetched into a register by the previous node. In this case we want to 3222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// do the test for that character first. We do this in separate passes. The 3223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 'preloaded' argument indicates that we are doing such a 'pass'. If such a 3224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// pass has been performed then subsequent passes will have true in 3225a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// first_element_checked to indicate that that character does not need to be 3226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// checked again. 3227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 3228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// In addition to all this we are passed a Trace, which can 3229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// contain an AlternativeGeneration object. In this AlternativeGeneration 3230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// object we can see details of any quick check that was already passed in 3231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// order to get to the code we are now generating. The quick check can involve 3232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// loading characters, which means we do not need to recheck the bounds 3233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// up to the limit the quick check already checked. In addition the quick 3234a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// check can have involved a mask and compare operation which may simplify 3235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// or obviate the need for further checks at some character positions. 3236a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid TextNode::TextEmitPass(RegExpCompiler* compiler, 3237a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextEmitPassType pass, 3238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded, 3239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 3240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first_element_checked, 3241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int* checked_up_to) { 3242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3243014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate = assembler->isolate(); 3244b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte = compiler->one_byte(); 3245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* backtrack = trace->backtrack(); 3246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails* quick_check = trace->quick_check_performed(); 3247014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int element_count = elements()->length(); 3248014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int backward_offset = read_backward() ? -Length() : 0; 3249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { 3250014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->at(i); 3251014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; 3252b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (elm.text_type() == TextElement::ATOM) { 3253b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Vector<const uc16> quarks = elm.atom()->data(); 3254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { 3255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first_element_checked && i == 0 && j == 0) continue; 3256b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; 3257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EmitCharacterFunction* emit_function = NULL; 3258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (pass) { 3259b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch case NON_LATIN1_MATCH: 3260b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(one_byte); 3261b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (quarks[j] > String::kMaxOneByteCharCode) { 3262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(backtrack); 3263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case NON_LETTER_CHARACTER_MATCH: 3267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block emit_function = &EmitAtomNonLetter; 3268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case SIMPLE_CHARACTER_MATCH: 3270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block emit_function = &EmitSimpleCharacter; 3271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case CASE_CHARACTER_MATCH: 3273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block emit_function = &EmitAtomLetter; 3274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 3276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 3277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (emit_function != NULL) { 3279014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); 3280014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool bound_checked = 3281014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch emit_function(isolate, compiler, quarks[j], backtrack, 3282014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cp_offset + j, bounds_check, preloaded); 3283a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); 3284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3287b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); 3288a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (pass == CHARACTER_CLASS_MATCH) { 3289a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first_element_checked && i == 0) continue; 3290b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (DeterminedAlready(quick_check, elm.cp_offset())) continue; 3291b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* cc = elm.char_class(); 3292014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool bounds_check = *checked_up_to < cp_offset || read_backward(); 3293b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset, 3294014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bounds_check, preloaded, zone()); 3295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UpdateBoundsCheck(cp_offset, checked_up_to); 3296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3298a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3299a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockint TextNode::Length() { 3303014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->last(); 3304b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(elm.cp_offset() >= 0); 3305b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return elm.cp_offset() + elm.length(); 3306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3309a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockbool TextNode::SkipPass(int int_pass, bool ignore_case) { 3310a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass); 3311a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (ignore_case) { 3312a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return pass == SIMPLE_CHARACTER_MATCH; 3313a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3314a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH; 3315a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3316a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3317a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3318a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3319109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochTextNode* TextNode::CreateForCharacterRanges(Zone* zone, 3320109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* ranges, 3321109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool read_backward, 3322109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success) { 3323109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK_NOT_NULL(ranges); 3324109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone); 3325109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch elms->Add( 3326109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, false)), 3327109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone); 3328109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return new (zone) TextNode(elms, read_backward, on_success); 3329109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 3330109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3331109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3332109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochTextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead, 3333109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange trail, 3334109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool read_backward, 3335109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success) { 3336109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead); 3337109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail); 3338109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone); 3339109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch elms->Add(TextElement::CharClass( 3340109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch new (zone) RegExpCharacterClass(lead_ranges, false)), 3341109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone); 3342109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch elms->Add(TextElement::CharClass( 3343109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch new (zone) RegExpCharacterClass(trail_ranges, false)), 3344109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone); 3345109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return new (zone) TextNode(elms, read_backward, on_success); 3346109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 3347109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3348109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// This generates the code to match a text node. A text node can contain 3350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// straight character sequences (possibly to be matched in a case-independent 3351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// way) and character classes. For efficiency we do not do this in a single 3352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// pass from left to right. Instead we pass over the text node several times, 3353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// emitting code for some character positions every time. See the comment on 3354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// TextEmitPass for details. 3355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block LimitResult limit_result = LimitVersions(compiler, trace); 3357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (limit_result == DONE) return; 3358b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(limit_result == CONTINUE); 3359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { 3361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler->SetRegExpTooBig(); 3362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3365b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (compiler->one_byte()) { 3366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int dummy = 0; 3367b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); 3368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first_elt_done = false; 3371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int bound_checked_to = trace->cp_offset() - 1; 3372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bound_checked_to += trace->bound_checked_up_to(); 3373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If a character is preloaded into the current character register then 3375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // check that now. 3376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->characters_preloaded() == 1) { 3377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { 3378a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!SkipPass(pass, compiler->ignore_case())) { 3379a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextEmitPass(compiler, 3380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static_cast<TextEmitPassType>(pass), 3381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block true, 3382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace, 3383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block false, 3384a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block &bound_checked_to); 3385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_elt_done = true; 3388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3389a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int pass = kFirstRealPass; pass <= kLastPass; pass++) { 3391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!SkipPass(pass, compiler->ignore_case())) { 3392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextEmitPass(compiler, 3393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static_cast<TextEmitPassType>(pass), 3394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block false, 3395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace, 3396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_elt_done, 3397a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block &bound_checked_to); 3398a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3400a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3401a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace successor_trace(*trace); 3402014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // If we advance backward, we may end up at the start. 3403014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch successor_trace.AdvanceCurrentPositionInTrace( 3404014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch read_backward() ? -Length() : Length(), compiler); 3405014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN 3406014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch : Trace::FALSE_VALUE); 3407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RecursionCheck rc(compiler); 3408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &successor_trace); 3409a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3410a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3411a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3412a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Trace::InvalidateCurrentCharacter() { 3413a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_preloaded_ = 0; 3414a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3415a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3417a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { 3418a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We don't have an instruction for shifting the current character register 3419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // down or for using a shifted value for anything so lets just forget that 3420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // we preloaded any characters into it. 3421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_preloaded_ = 0; 3422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Adjust the offsets of the quick check performed information. This 3423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // information is used to find out what we already determined about the 3424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // characters by means of mask and compare. 3425b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch quick_check_performed_.Advance(by, compiler->one_byte()); 3426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cp_offset_ += by; 3427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) { 3428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler->SetRegExpTooBig(); 3429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cp_offset_ = 0; 3430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 3432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3435014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) { 3436014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int element_count = elements()->length(); 3437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < element_count; i++) { 3438014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->at(i); 3439b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (elm.text_type() == TextElement::CHAR_CLASS) { 3440b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* cc = elm.char_class(); 34413ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // None of the standard character classes is different in the case 3442d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block // independent case and it slows us down if we don't know that. 3443b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (cc->is_standard(zone())) continue; 3444b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 3445109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte); 3446a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3451014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochint TextNode::GreedyLoopTextLength() { return Length(); } 3452b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3453b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3454b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode( 3455b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCompiler* compiler) { 3456014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (read_backward()) return NULL; 3457014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (elements()->length() != 1) return NULL; 3458014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextElement elm = elements()->at(0); 3459b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (elm.text_type() != TextElement::CHAR_CLASS) return NULL; 3460b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* node = elm.char_class(); 3461b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = node->ranges(zone()); 3462109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Canonicalize(ranges); 3463b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (node->is_negated()) { 3464b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return ranges->length() == 0 ? on_success() : NULL; 3465b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3466b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (ranges->length() != 1) return NULL; 3467b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint32_t max_char; 3468b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (compiler->one_byte()) { 3469b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char = String::kMaxOneByteCharCode; 3470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3471b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char = String::kMaxUtf16CodeUnit; 3472a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3473b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return ranges->at(0).IsEverything(max_char) ? on_success() : NULL; 3474a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3475a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3476a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Finds the fixed match length of a sequence of nodes that goes from 3478a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// this alternative and back to this choice node. If there are variable 3479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// length nodes or other complications in the way then return a sentinel 3480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// value indicating that a greedy loop cannot be constructed. 3481589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdochint ChoiceNode::GreedyLoopTextLengthForAlternative( 3482589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch GuardedAlternative* alternative) { 3483a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length = 0; 3484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = alternative->node(); 3485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Later we will generate code for all these text nodes using recursion 3486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // so we have to limit the max number. 3487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int recursion_depth = 0; 3488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (node != this) { 3489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 3490a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return kNodeIsTooComplexForGreedyLoops; 3491a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int node_length = node->GreedyLoopTextLength(); 3493a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node_length == kNodeIsTooComplexForGreedyLoops) { 3494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return kNodeIsTooComplexForGreedyLoops; 3495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block length += node_length; 3497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 3498a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node = seq_node->on_success(); 3499a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3500014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return read_backward() ? -length : length; 3501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3503a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3504a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 3505014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_NULL(loop_node_); 3506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddAlternative(alt); 3507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loop_node_ = alt.node(); 3508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 3512014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_NULL(continue_node_); 3513a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddAlternative(alt); 3514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block continue_node_ = alt.node(); 3515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3520a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->stop_node() == this) { 3521b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Back edge of greedy optimized loop node graph. 3522589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int text_length = 3523589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch GreedyLoopTextLengthForAlternative(&(alternatives_->at(0))); 3524b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(text_length != kNodeIsTooComplexForGreedyLoops); 3525a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Update the counter-based backtracking info on the stack. This is an 3526a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // optimization for greedy loops (see below). 3527b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(trace->cp_offset() == text_length); 3528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->AdvanceCurrentPosition(text_length); 3529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->GoTo(trace->loop_label()); 3530a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3532014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_NULL(trace->stop_node()); 3533a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!trace->is_trivial()) { 3534a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 3535a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3537a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ChoiceNode::Emit(compiler, trace); 3538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3541b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdochint ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, 3542b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int eats_at_least) { 3543b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int preload_characters = Min(4, eats_at_least); 3544a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (compiler->macro_assembler()->CanReadUnaligned()) { 3545b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool one_byte = compiler->one_byte(); 3546b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (one_byte) { 3547a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (preload_characters > 4) preload_characters = 4; 3548a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We can't preload 3 characters because there is no machine instruction 3549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to do that. We can't just load 4 because we could be reading 3550a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // beyond the end of the string, which could cause a memory fault. 3551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (preload_characters == 3) preload_characters = 2; 3552a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3553a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (preload_characters > 2) preload_characters = 2; 3554a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3555a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 3556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (preload_characters > 1) preload_characters = 1; 3557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return preload_characters; 3559a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 3560a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3561a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// This class is used when generating the alternatives in a choice node. It 3563a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// records the way the alternative is being code generated. 3564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass AlternativeGeneration: public Malloced { 3565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 3566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AlternativeGeneration() 3567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : possible_success(), 3568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block expects_preload(false), 3569a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block after(), 3570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block quick_check_details() { } 3571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label possible_success; 3572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool expects_preload; 3573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label after; 3574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails quick_check_details; 3575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 3576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3578b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Creates a list of AlternativeGenerations. If the list has a reasonable 3579b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// size then it is on the stack, otherwise the excess is on the heap. 3580b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochclass AlternativeGenerationList { 3581b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch public: 3582b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGenerationList(int count, Zone* zone) 3583b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : alt_gens_(count, zone) { 3584b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < count && i < kAFew; i++) { 3585b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gens_.Add(a_few_alt_gens_ + i, zone); 3586b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3587b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = kAFew; i < count; i++) { 3588b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gens_.Add(new AlternativeGeneration(), zone); 3589b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3590b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3591b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ~AlternativeGenerationList() { 3592b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = kAFew; i < alt_gens_.length(); i++) { 3593b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch delete alt_gens_[i]; 3594b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gens_[i] = NULL; 3595b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3596b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3597b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3598b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGeneration* at(int i) { 3599b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return alt_gens_[i]; 3600b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3601b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3602b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch private: 3603b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kAFew = 10; 3604b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<AlternativeGeneration*> alt_gens_; 3605b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGeneration a_few_alt_gens_[kAFew]; 3606b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch}; 3607b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3608b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3609109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic const uc32 kRangeEndMarker = 0x110000; 3610109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3611b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// The '2' variant is has inclusive from and exclusive to. 3612b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// This covers \s as defined in ECMA-262 5.1, 15.10.2.12, 3613b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// which include WhiteSpace (7.2) or LineTerminator (7.3) values. 3614109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic const int kSpaceRanges[] = { 3615109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680, 0x1681, 3616109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, 3617109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker}; 3618b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kSpaceRangeCount = arraysize(kSpaceRanges); 3619b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3620b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kWordRanges[] = { 3621109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, kRangeEndMarker}; 3622b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kWordRangeCount = arraysize(kWordRanges); 3623109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic const int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker}; 3624b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kDigitRangeCount = arraysize(kDigitRanges); 3625109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic const int kSurrogateRanges[] = { 3626109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker}; 3627b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kSurrogateRangeCount = arraysize(kSurrogateRanges); 3628109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochstatic const int kLineTerminatorRanges[] = { 3629109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker}; 3630b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges); 3631b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3632b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid BoyerMoorePositionInfo::Set(int character) { 3633b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SetInterval(Interval(character, character)); 3634b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3635b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3636b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3637b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid BoyerMoorePositionInfo::SetInterval(const Interval& interval) { 3638b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval); 3639b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval); 3640b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval); 3641b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch surrogate_ = 3642b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval); 3643b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (interval.to() - interval.from() >= kMapSize - 1) { 3644b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map_count_ != kMapSize) { 3645b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch map_count_ = kMapSize; 3646b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < kMapSize; i++) map_->at(i) = true; 3647b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3648b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 3649b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3650b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = interval.from(); i <= interval.to(); i++) { 3651b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int mod_character = (i & kMask); 3652b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!map_->at(mod_character)) { 3653b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch map_count_++; 3654b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch map_->at(mod_character) = true; 3655b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3656b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map_count_ == kMapSize) return; 3657b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3658b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3659b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3660b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3661b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid BoyerMoorePositionInfo::SetAll() { 3662b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch s_ = w_ = d_ = kLatticeUnknown; 3663b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map_count_ != kMapSize) { 3664b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch map_count_ = kMapSize; 3665b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < kMapSize; i++) map_->at(i) = true; 3666b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3667b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3668b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3669b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3670b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochBoyerMooreLookahead::BoyerMooreLookahead( 3671b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length, RegExpCompiler* compiler, Zone* zone) 3672b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : length_(length), 3673b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler_(compiler) { 3674b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (compiler->one_byte()) { 3675b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char_ = String::kMaxOneByteCharCode; 3676b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 3677b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_char_ = String::kMaxUtf16CodeUnit; 3678b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3679b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bitmaps_ = new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); 3680b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < length; i++) { 3681b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bitmaps_->Add(new(zone) BoyerMoorePositionInfo(zone), zone); 3682b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3683b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3684b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3685b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3686b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Find the longest range of lookahead that has the fewest number of different 3687b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// characters that can occur at a given position. Since we are optimizing two 3688b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// different parameters at once this is a tradeoff. 3689b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochbool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) { 3690b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int biggest_points = 0; 3691b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If more than 32 characters out of 128 can occur it is unlikely that we can 3692b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // be lucky enough to step forwards much of the time. 3693b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kMaxMax = 32; 3694b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int max_number_of_chars = 4; 3695b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_number_of_chars < kMaxMax; 3696b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch max_number_of_chars *= 2) { 3697b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch biggest_points = 3698b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch FindBestInterval(max_number_of_chars, biggest_points, from, to); 3699b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3700b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (biggest_points == 0) return false; 3701b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return true; 3702b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3703b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3704b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3705b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Find the highest-points range between 0 and length_ where the character 3706b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// information is not too vague. 'Too vague' means that there are more than 3707b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// max_number_of_chars that can occur at this position. Calculates the number 3708b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// of points as the product of width-of-the-range and 3709b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// probability-of-finding-one-of-the-characters, where the probability is 3710b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// calculated using the frequency distribution of the sample subject string. 3711b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint BoyerMooreLookahead::FindBestInterval( 3712b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int max_number_of_chars, int old_biggest_points, int* from, int* to) { 3713b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int biggest_points = old_biggest_points; 3714b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kSize = RegExpMacroAssembler::kTableSize; 3715b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < length_; ) { 3716b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch while (i < length_ && Count(i) > max_number_of_chars) i++; 3717b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (i == length_) break; 3718b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int remembered_from = i; 3719b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool union_map[kSize]; 3720b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < kSize; j++) union_map[j] = false; 3721b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch while (i < length_ && Count(i) <= max_number_of_chars) { 3722b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMoorePositionInfo* map = bitmaps_->at(i); 3723b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j); 3724b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch i++; 3725b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3726b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int frequency = 0; 3727b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < kSize; j++) { 3728b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (union_map[j]) { 3729b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Add 1 to the frequency to give a small per-character boost for 3730b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // the cases where our sampling is not good enough and many 3731b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // characters have a frequency of zero. This means the frequency 3732b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // can theoretically be up to 2*kSize though we treat it mostly as 3733b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // a fraction of kSize. 3734b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch frequency += compiler_->frequency_collator()->Frequency(j) + 1; 3735b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3736b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3737b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // We use the probability of skipping times the distance we are skipping to 3738b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // judge the effectiveness of this. Actually we have a cut-off: By 3739b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // dividing by 2 we switch off the skipping if the probability of skipping 3740b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // is less than 50%. This is because the multibyte mask-and-compare 3741b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // skipping in quickcheck is more likely to do well on this case. 3742b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool in_quickcheck_range = 3743b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ((i - remembered_from < 4) || 3744b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2)); 3745b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Called 'probability' but it is only a rough estimate and can actually 3746b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // be outside the 0-kSize range. 3747b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency; 3748b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int points = (i - remembered_from) * probability; 3749b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (points > biggest_points) { 3750b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *from = remembered_from; 3751b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch *to = i - 1; 3752b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch biggest_points = points; 3753b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3754b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3755b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return biggest_points; 3756b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3757b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3758b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3759b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Take all the characters that will not prevent a successful match if they 3760b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// occur in the subject string in the range between min_lookahead and 3761b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// max_lookahead (inclusive) measured from the current position. If the 3762b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// character at max_lookahead offset is not one of these characters, then we 3763b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// can safely skip forwards by the number of characters in the range. 3764b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint BoyerMooreLookahead::GetSkipTable(int min_lookahead, 3765b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int max_lookahead, 3766b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<ByteArray> boolean_skip_table) { 3767b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kSize = RegExpMacroAssembler::kTableSize; 3768b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3769b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kSkipArrayEntry = 0; 3770b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kDontSkipArrayEntry = 1; 3771b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3772b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < kSize; i++) { 3773b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch boolean_skip_table->set(i, kSkipArrayEntry); 3774b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3775b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int skip = max_lookahead + 1 - min_lookahead; 3776b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3777b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = max_lookahead; i >= min_lookahead; i--) { 3778b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMoorePositionInfo* map = bitmaps_->at(i); 3779b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < kSize; j++) { 3780b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map->at(j)) { 3781b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch boolean_skip_table->set(j, kDontSkipArrayEntry); 3782b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3783a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3784a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3785b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3786b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return skip; 3787b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3788b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3789b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3790b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// See comment above on the implementation of GetSkipTable. 3791b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) { 3792b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int kSize = RegExpMacroAssembler::kTableSize; 3793b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3794b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int min_lookahead = 0; 3795b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int max_lookahead = 0; 3796b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3797b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return; 3798b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3799b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool found_single_character = false; 3800b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int single_character = 0; 3801b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = max_lookahead; i >= min_lookahead; i--) { 3802b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMoorePositionInfo* map = bitmaps_->at(i); 3803b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map->map_count() > 1 || 3804b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (found_single_character && map->map_count() != 0)) { 3805b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch found_single_character = false; 3806b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch break; 3807b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3808b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < kSize; j++) { 3809b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (map->at(j)) { 3810b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch found_single_character = true; 3811b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch single_character = j; 3812b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch break; 3813b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3814a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3815a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3816a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3817b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int lookahead_width = max_lookahead + 1 - min_lookahead; 3818b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3819b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (found_single_character && lookahead_width == 1 && max_lookahead < 3) { 3820b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // The mask-compare can probably handle this better. 3821b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 3822a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 38233fb3ca8c7ca439d408449a395897395c0faae8d1Ben Murdoch 3824b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (found_single_character) { 3825b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label cont, again; 3826b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->Bind(&again); 3827b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3828b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (max_char_ > kSize) { 3829b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacterAfterAnd(single_character, 3830b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler::kTableMask, 3831b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &cont); 3832b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 3833b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckCharacter(single_character, &cont); 3834b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3835b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->AdvanceCurrentPosition(lookahead_width); 3836b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->GoTo(&again); 3837b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->Bind(&cont); 3838b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 3839b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3840b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3841014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Factory* factory = masm->isolate()->factory(); 3842b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED); 3843b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int skip_distance = GetSkipTable( 3844b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch min_lookahead, max_lookahead, boolean_skip_table); 3845b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(skip_distance != 0); 3846b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3847b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label cont, again; 3848b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->Bind(&again); 3849b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->LoadCurrentCharacter(max_lookahead, &cont, true); 3850b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->CheckBitInTable(boolean_skip_table, &cont); 3851b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->AdvanceCurrentPosition(skip_distance); 3852b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->GoTo(&again); 3853b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch masm->Bind(&cont); 3854b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3855a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3856a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3857a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block/* Code generation for choice nodes. 3858a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * 3859a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * We generate quick checks that do a mask and compare to eliminate a 3860a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * choice. If the quick check succeeds then it jumps to the continuation to 3861a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * do slow checks and check subsequent nodes. If it fails (the common case) 3862a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * it falls through to the next choice. 3863a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * 3864a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * Here is the desired flow graph. Nodes directly below each other imply 3865a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * fallthrough. Alternatives 1 and 2 have quick checks. Alternative 3866a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * 3 doesn't have a quick check so we have to call the slow check. 3867a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * Nodes are marked Qn for quick checks and Sn for slow checks. The entire 3868a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * regexp continuation is generated directly after the Sn node, up to the 3869a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * next GoTo if we decide to reuse some already generated code. Some 3870a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * nodes expect preload_characters to be preloaded into the current 3871a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * character register. R nodes do this preloading. Vertices are marked 3872a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * F for failures and S for success (possible success in the case of quick 3873a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * nodes). L, V, < and > are used as arrow heads. 3874a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * 3875a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * ----------> R 3876a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | 3877a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * V 3878a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * Q1 -----> S1 3879a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | S / 3880a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * F| / 3881a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | F/ 3882a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | / 3883a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | R 3884a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | / 3885a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * V L 3886a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * Q2 -----> S2 3887a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | S / 3888a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * F| / 3889a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | F/ 3890a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | / 3891a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | R 3892a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | / 3893a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * V L 3894a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * S3 3895a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | 3896a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * F| 3897a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | 3898a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * R 3899a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * | 3900a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * backtrack V 3901a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * <----------Q4 3902a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * \ F | 3903a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * \ |S 3904a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * \ F V 3905a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * \-----S4 3906a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * 3907b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * For greedy loops we push the current position, then generate the code that 3908b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * eats the input specially in EmitGreedyLoop. The other choice (the 3909b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * continuation) is generated by the normal code in EmitChoices, and steps back 3910b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * in the input to the starting position when it fails to match. The loop code 3911b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * looks like this (U is the unwind code that steps back in the greedy loop). 3912b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * 3913a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * _____ 3914a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * / \ 3915a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * V | 3916a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * ----------> S1 | 3917a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * /| | 3918a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * / |S | 3919a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * F/ \_____/ 3920a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block * / 3921b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * |<----- 3922b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * | \ 3923b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * V |S 3924b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * Q2 ---> U----->backtrack 3925b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * | F / 3926b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * S| / 3927b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * V F / 3928b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch * S2--/ 3929a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block */ 3930a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3931b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochGreedyLoopState::GreedyLoopState(bool not_at_start) { 3932b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch counter_backtrack_trace_.set_backtrack(&label_); 3933014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE); 3934b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3935a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3936b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3937b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) { 3938a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#ifdef DEBUG 3939b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int choice_count = alternatives_->length(); 3940a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < choice_count - 1; i++) { 3941a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alternative = alternatives_->at(i); 3942a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<Guard*>* guards = alternative.guards(); 3943a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int guard_count = (guards == NULL) ? 0 : guards->length(); 3944a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = 0; j < guard_count; j++) { 3945b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(!trace->mentions_reg(guards->at(j)->reg())); 3946a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3947a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3948a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 3949b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3950b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3951b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3952b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, 3953b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* current_trace, 3954b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PreloadState* state) { 3955b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) { 3956b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Save some time by looking at most one machine word ahead. 3957b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch state->eats_at_least_ = 3958b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget, 3959b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch current_trace->at_start() == Trace::FALSE_VALUE); 3960b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 3961b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch state->preload_characters_ = 3962b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CalculatePreloadCharacters(compiler, state->eats_at_least_); 3963b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3964b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch state->preload_is_current_ = 3965b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch (current_trace->characters_preloaded() == state->preload_characters_); 3966b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch state->preload_has_checked_bounds_ = state->preload_is_current_; 3967b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 3968b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3969b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3970b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3971b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int choice_count = alternatives_->length(); 3972b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 3973109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (choice_count == 1 && alternatives_->at(0).guards() == NULL) { 3974109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch alternatives_->at(0).node()->Emit(compiler, trace); 3975109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return; 3976109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 3977109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 3978b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AssertGuardsMentionRegisters(trace); 3979a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3980a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block LimitResult limit_result = LimitVersions(compiler, trace); 3981a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (limit_result == DONE) return; 3982b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(limit_result == CONTINUE); 3983a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3984b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for 3985b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // other choice nodes we only flush if we are out of code size budget. 3986a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (trace->flush_budget() == 0 && trace->actions() != NULL) { 3987a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 3988a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 3989a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 3990a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3991a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RecursionCheck rc(compiler); 3992a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3993b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PreloadState preload; 3994b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload.init(); 3995b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GreedyLoopState greedy_loop_state(not_at_start()); 3996a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 3997b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0)); 3998b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGenerationList alt_gens(choice_count, zone()); 3999a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4000a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 4001b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch trace = EmitGreedyLoop(compiler, 4002b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch trace, 4003b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &alt_gens, 4004b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &preload, 4005b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &greedy_loop_state, 4006b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch text_length); 4007b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 4008b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // TODO(erikcorry): Delete this. We don't need this label, but it makes us 4009b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // match the traces produced pre-cleanup. 4010b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label second_choice; 4011b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler->macro_assembler()->Bind(&second_choice); 4012b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4013b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace); 4014b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4015b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitChoices(compiler, 4016b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &alt_gens, 4017b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 0, 4018b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch trace, 4019b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &preload); 4020b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4021b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4022b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // At this point we need to generate slow checks for the alternatives where 4023b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // the quick check was inlined. We can recognize these because the associated 4024b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // label was bound. 4025b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int new_flush_budget = trace->flush_budget() / choice_count; 4026b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < choice_count; i++) { 4027b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGeneration* alt_gen = alt_gens.at(i); 4028b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace new_trace(*trace); 4029b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If there are actions to be flushed we have to limit how many times 4030b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // they are flushed. Take the budget of the parent trace and distribute 4031b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // it fairly amongst the children. 4032b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (new_trace.actions() != NULL) { 4033b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_flush_budget(new_flush_budget); 4034b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4035b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool next_expects_preload = 4036b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; 4037b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitOutOfLineContinuation(compiler, 4038b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch &new_trace, 4039b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(i), 4040b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gen, 4041b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload.preload_characters_, 4042b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch next_expects_preload); 4043a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4044b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 4045b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4046b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4047b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochTrace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, 4048b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* trace, 4049b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGenerationList* alt_gens, 4050b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PreloadState* preload, 4051b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GreedyLoopState* greedy_loop_state, 4052b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int text_length) { 4053b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 4054b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Here we have special handling for greedy loops containing only text nodes 4055b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // and other simple nodes. These are handled by pushing the current 4056b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // position on the stack and then incrementing the current position each 4057b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // time around the switch. On backtrack we decrement the current position 4058b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // and check it against the pushed value. This avoids pushing backtrack 4059b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // information for each iteration of the loop, which could take up a lot of 4060b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // space. 4061b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(trace->stop_node() == NULL); 4062b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->PushCurrentPosition(); 4063b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label greedy_match_failed; 4064b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace greedy_match_trace; 4065014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); 4066b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch greedy_match_trace.set_backtrack(&greedy_match_failed); 4067b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Label loop_label; 4068b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->Bind(&loop_label); 4069b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch greedy_match_trace.set_stop_node(this); 4070b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch greedy_match_trace.set_loop_label(&loop_label); 4071b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace); 4072b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->Bind(&greedy_match_failed); 4073a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4074a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label second_choice; // For use in greedy matches. 4075a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&second_choice); 4076a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4077b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* new_trace = greedy_loop_state->counter_backtrack_trace(); 4078b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4079b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EmitChoices(compiler, 4080b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gens, 4081b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 1, 4082b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace, 4083b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload); 4084b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4085b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->Bind(greedy_loop_state->label()); 4086b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If we have unwound to the bottom then backtrack. 4087b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->CheckGreedyLoop(trace->backtrack()); 4088b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Otherwise try the second priority at an earlier position. 4089b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->AdvanceCurrentPosition(-text_length); 4090b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch macro_assembler->GoTo(&second_choice); 4091b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return new_trace; 4092b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 4093b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4094b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochint ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, 4095b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* trace) { 4096b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized; 4097b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (alternatives_->length() != 2) return eats_at_least; 4098b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4099b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GuardedAlternative alt1 = alternatives_->at(1); 4100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (alt1.guards() != NULL && alt1.guards()->length() != 0) { 4101b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return eats_at_least; 4102b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4103b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpNode* eats_anything_node = alt1.node(); 4104b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) { 4105b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return eats_at_least; 4106b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4107b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4108b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Really we should be creating a new trace when we execute this function, 4109b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // but there is no need, because the code it generates cannot backtrack, and 4110b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // we always arrive here with a trivial trace (since it's the entry to a 4111b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // loop. That also implies that there are no preloaded characters, which is 4112b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // good, because it means we won't be violating any assumptions by 4113b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // overwriting those characters with new load instructions. 4114b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(trace->is_trivial()); 4115b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4116b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 4117014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Isolate* isolate = macro_assembler->isolate(); 4118b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // At this point we know that we are at a non-greedy loop that will eat 4119b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // any character one at a time. Any non-anchored regexp has such a 4120b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // loop prepended to it in order to find where it starts. We look for 4121b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // a pattern of the form ...abc... where we can look 6 characters ahead 4122b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // and step forwards 3 if the character is not one of abc. Abc need 4123b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // not be atoms, they can be any reasonably limited character class or 4124b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // small alternation. 4125b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMooreLookahead* bm = bm_info(false); 4126b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (bm == NULL) { 4127b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch eats_at_least = Min(kMaxLookaheadForBoyerMoore, 4128b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch EatsAtLeast(kMaxLookaheadForBoyerMoore, 4129b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch kRecursionBudget, 4130b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch false)); 4131b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (eats_at_least >= 1) { 4132b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm = new(zone()) BoyerMooreLookahead(eats_at_least, 4133b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler, 4134b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zone()); 4135b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GuardedAlternative alt0 = alternatives_->at(0); 4136014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); 4137b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4138b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4139b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (bm != NULL) { 4140b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->EmitSkipInstructions(macro_assembler); 4141b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4142b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return eats_at_least; 4143b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 4144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4146b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid ChoiceNode::EmitChoices(RegExpCompiler* compiler, 4147b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGenerationList* alt_gens, 4148b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int first_choice, 4149b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace* trace, 4150b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch PreloadState* preload) { 4151b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 4152b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SetUpPreLoad(compiler, trace, preload); 4153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For now we just call all choices one after the other. The idea ultimately 4155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // is to use the Dispatch table to try only the relevant ones. 4156b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int choice_count = alternatives_->length(); 4157b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4158b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int new_flush_budget = trace->flush_budget() / choice_count; 4159b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4160b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = first_choice; i < choice_count; i++) { 4161b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool is_last = i == choice_count - 1; 4162b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool fall_through_on_failure = !is_last; 4163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alternative = alternatives_->at(i); 4164b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AlternativeGeneration* alt_gen = alt_gens->at(i); 4165b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gen->quick_check_details.set_characters(preload->preload_characters_); 4166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<Guard*>* guards = alternative.guards(); 4167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int guard_count = (guards == NULL) ? 0 : guards->length(); 4168b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Trace new_trace(*trace); 4169b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_characters_preloaded(preload->preload_is_current_ ? 4170b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload->preload_characters_ : 4171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 0); 4172b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (preload->preload_has_checked_bounds_) { 4173b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_bound_checked_up_to(preload->preload_characters_); 4174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.quick_check_performed()->Clear(); 4176b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); 4177b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!is_last) { 4178b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_backtrack(&alt_gen->after); 4179b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4180b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch alt_gen->expects_preload = preload->preload_is_current_; 4181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool generate_full_check_inline = false; 4182958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier if (compiler->optimize() && 4183b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch try_to_emit_quick_check_for_alternative(i == 0) && 4184958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier alternative.node()->EmitQuickCheck( 4185958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier compiler, trace, &new_trace, preload->preload_has_checked_bounds_, 4186958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier &alt_gen->possible_success, &alt_gen->quick_check_details, 4187958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier fall_through_on_failure)) { 4188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Quick check was generated for this choice. 4189b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload->preload_is_current_ = true; 4190b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload->preload_has_checked_bounds_ = true; 4191b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // If we generated the quick check to fall through on possible success, 4192b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // we now need to generate the full check inline. 4193b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!fall_through_on_failure) { 4194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&alt_gen->possible_success); 4195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.set_quick_check_performed(&alt_gen->quick_check_details); 4196b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_characters_preloaded(preload->preload_characters_); 4197b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new_trace.set_bound_checked_up_to(preload->preload_characters_); 4198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block generate_full_check_inline = true; 4199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (alt_gen->quick_check_details.cannot_match()) { 4201b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!fall_through_on_failure) { 4202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->GoTo(trace->backtrack()); 4203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block continue; 4205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // No quick check was generated. Put the full code here. 4207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If this is not the first choice then there could be slow checks from 4208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // previous cases that go here when they fail. There's no reason to 4209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // insist that they preload characters since the slow check we are about 4210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to generate probably can't use it. 4211b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (i != first_choice) { 4212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alt_gen->expects_preload = false; 4213e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke new_trace.InvalidateCurrentCharacter(); 4214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block generate_full_check_inline = true; 4216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (generate_full_check_inline) { 4218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (new_trace.actions() != NULL) { 4219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.set_flush_budget(new_flush_budget); 4220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = 0; j < guard_count; j++) { 4222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GenerateGuard(macro_assembler, guards->at(j), &new_trace); 4223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alternative.node()->Emit(compiler, &new_trace); 4225b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch preload->preload_is_current_ = false; 4226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&alt_gen->after); 4228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, 4233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 4234a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alternative, 4235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AlternativeGeneration* alt_gen, 4236a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int preload_characters, 4237a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool next_expects_preload) { 4238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!alt_gen->possible_success.is_linked()) return; 4239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 4241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&alt_gen->possible_success); 4242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace out_of_line_trace(*trace); 4243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block out_of_line_trace.set_characters_preloaded(preload_characters); 4244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); 4245b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE); 4246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<Guard*>* guards = alternative.guards(); 4247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int guard_count = (guards == NULL) ? 0 : guards->length(); 4248a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (next_expects_preload) { 4249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label reload_current_char; 4250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block out_of_line_trace.set_backtrack(&reload_current_char); 4251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = 0; j < guard_count; j++) { 4252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 4253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alternative.node()->Emit(compiler, &out_of_line_trace); 4255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->Bind(&reload_current_char); 4256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Reload the current character, since the next quick check expects that. 4257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We don't need to check bounds here because we only get into this 4258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // code through a quick check which already did the checked load. 4259a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->LoadCurrentCharacter(trace->cp_offset(), 4260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NULL, 4261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block false, 4262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block preload_characters); 4263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block macro_assembler->GoTo(&(alt_gen->after)); 4264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block out_of_line_trace.set_backtrack(&(alt_gen->after)); 4266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int j = 0; j < guard_count; j++) { 4267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 4268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alternative.node()->Emit(compiler, &out_of_line_trace); 4270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 4275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 4276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block LimitResult limit_result = LimitVersions(compiler, trace); 4277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (limit_result == DONE) return; 4278b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(limit_result == CONTINUE); 4279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RecursionCheck rc(compiler); 4281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4282b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (action_type_) { 4283a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case STORE_POSITION: { 4284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredCapture 4285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_capture(data_.u_position_register.reg, 4286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_position_register.is_capture, 4287a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace); 4288a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace = *trace; 4289a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.add_action(&new_capture); 4290a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &new_trace); 4291a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4292a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4293a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case INCREMENT_REGISTER: { 4294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredIncrementRegister 4295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_increment(data_.u_increment_register.reg); 4296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace = *trace; 4297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.add_action(&new_increment); 4298a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &new_trace); 4299a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case SET_REGISTER: { 4302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredSetRegister 4303a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_set(data_.u_store_register.reg, data_.u_store_register.value); 4304a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace = *trace; 4305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.add_action(&new_set); 4306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &new_trace); 4307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4309a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case CLEAR_CAPTURES: { 4310a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace::DeferredClearCaptures 4311a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_capture(Interval(data_.u_clear_captures.range_from, 4312a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_clear_captures.range_to)); 4313a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace = *trace; 4314a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.add_action(&new_capture); 4315a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &new_trace); 4316a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4317a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4318a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case BEGIN_SUBMATCH: 4319a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!trace->is_trivial()) { 4320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 4321a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4322a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->WriteCurrentPositionToRegister( 4323a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_submatch.current_position_register, 0); 4324a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->WriteStackPointerToRegister( 4325a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_submatch.stack_pointer_register); 4326a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 4327a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4328a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4329a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case EMPTY_MATCH_CHECK: { 4330a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int start_pos_reg = data_.u_empty_match_check.start_register; 4331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int stored_pos = 0; 4332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int rep_reg = data_.u_empty_match_check.repetition_register; 4333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); 4334a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); 4335a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { 4336a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we know we haven't advanced and there is no minimum we 4337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // can just backtrack immediately. 4338a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->GoTo(trace->backtrack()); 4339a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (know_dist && stored_pos < trace->cp_offset()) { 4340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we know we've advanced we can generate the continuation 4341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // immediately. 4342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 4343a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (!trace->is_trivial()) { 4344a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 4345a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4346a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label skip_empty_check; 4347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we have a minimum number of repetitions we check the current 4348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // number first and skip the empty check if it's not enough. 4349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (has_minimum) { 4350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int limit = data_.u_empty_match_check.repetition_limit; 4351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check); 4352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the match is empty we bail out, otherwise we fall through 4354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to the on-success continuation. 4355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, 4356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->backtrack()); 4357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(&skip_empty_check); 4358a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 4359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case POSITIVE_SUBMATCH_SUCCESS: { 4363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!trace->is_trivial()) { 4364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 4365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 4366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ReadCurrentPositionFromRegister( 4368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_submatch.current_position_register); 4369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ReadStackPointerFromRegister( 4370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data_.u_submatch.stack_pointer_register); 4371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_register_count = data_.u_submatch.clear_register_count; 4372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (clear_register_count == 0) { 4373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 4374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 4375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_registers_from = data_.u_submatch.clear_register_from; 4377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label clear_registers_backtrack; 4378a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace new_trace = *trace; 4379a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_trace.set_backtrack(&clear_registers_backtrack); 4380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, &new_trace); 4381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Bind(&clear_registers_backtrack); 4383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_registers_to = clear_registers_from + clear_register_count - 1; 4384a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->ClearRegisters(clear_registers_from, clear_registers_to); 4385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4386b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(trace->backtrack() == NULL); 4387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block assembler->Backtrack(); 4388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 4389a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 4391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 4392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 4397a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpMacroAssembler* assembler = compiler->macro_assembler(); 4398a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!trace->is_trivial()) { 4399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block trace->Flush(compiler, this); 4400a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 4401a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4402a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4403a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block LimitResult limit_result = LimitVersions(compiler, trace); 4404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (limit_result == DONE) return; 4405b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(limit_result == CONTINUE); 4406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RecursionCheck rc(compiler); 4408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4409b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(start_reg_ + 1, end_reg_); 4410a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (compiler->ignore_case()) { 4411109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch assembler->CheckNotBackReferenceIgnoreCase( 4412109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch start_reg_, read_backward(), compiler->unicode(), trace->backtrack()); 4413a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4414014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch assembler->CheckNotBackReference(start_reg_, read_backward(), 4415014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch trace->backtrack()); 4416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4417014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // We are going to advance backward, so we may end up at the start. 4418014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (read_backward()) trace->set_at_start(Trace::UNKNOWN); 4419109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4420109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Check that the back reference does not end inside a surrogate pair. 4421109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (compiler->unicode() && !compiler->one_byte()) { 4422109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack()); 4423109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 4424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success()->Emit(compiler, trace); 4425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 4429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Dot/dotty output 4430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#ifdef DEBUG 4433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass DotPrinter: public NodeVisitor { 4436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 4437958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier DotPrinter(std::ostream& os, bool ignore_case) // NOLINT 4438b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : os_(os), 4439b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ignore_case_(ignore_case) {} 4440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintNode(const char* label, RegExpNode* node); 4441a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Visit(RegExpNode* node); 4442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintAttributes(RegExpNode* from); 4443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintOnFailure(RegExpNode* from, RegExpNode* to); 4444a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define DECLARE_VISIT(Type) \ 4445a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Visit##Type(Type##Node* that); 4446a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_NODE_TYPE(DECLARE_VISIT) 4447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef DECLARE_VISIT 4448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 4449958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier std::ostream& os_; 4450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool ignore_case_; 4451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 4452a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4453a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4454a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::PrintNode(const char* label, RegExpNode* node) { 4455b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "digraph G {\n graph [label=\""; 4456a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; label[i]; i++) { 4457a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (label[i]) { 4458a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case '\\': 4459b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "\\\\"; 4460a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4461a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case '"': 4462b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "\""; 4463a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4464a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 4465b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << label[i]; 4466a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4467a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4468a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4469b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "\"];\n"; 4470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(node); 4471958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier os_ << "}" << std::endl; 4472a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4473a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4474a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4475a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::Visit(RegExpNode* node) { 4476a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node->info()->visited) return; 4477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node->info()->visited = true; 4478a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node->Accept(this); 4479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4481a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { 4483b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n"; 4484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(on_failure); 4485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass TableEntryBodyPrinter { 4489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 4490958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier TableEntryBodyPrinter(std::ostream& os, ChoiceNode* choice) // NOLINT 4491b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : os_(os), 4492b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch choice_(choice) {} 4493a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Call(uc16 from, DispatchTable::Entry entry) { 4494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* out_set = entry.out_set(); 4495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { 4496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (out_set->Get(i)) { 4497b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << choice() << ":s" << from << "o" << i << " -> n" 4498b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << choice()->alternatives()->at(i).node() << ";\n"; 4499a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 4503a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ChoiceNode* choice() { return choice_; } 4504958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier std::ostream& os_; 4505a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ChoiceNode* choice_; 4506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 4507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass TableEntryHeaderPrinter { 4510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 4511958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier explicit TableEntryHeaderPrinter(std::ostream& os) // NOLINT 4512b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : first_(true), 4513b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_(os) {} 4514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Call(uc16 from, DispatchTable::Entry entry) { 4515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first_) { 4516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_ = false; 4517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4518b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "|"; 4519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4520b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{"; 4521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* out_set = entry.out_set(); 4522a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int priority = 0; 4523a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { 4524a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (out_set->Get(i)) { 4525b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (priority > 0) os_ << "|"; 4526b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "<s" << from << "o" << i << "> " << priority; 4527a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block priority++; 4528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4530b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "}}"; 4531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 45323fb3ca8c7ca439d408449a395897395c0faae8d1Ben Murdoch 4533a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 4534a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first_; 4535958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier std::ostream& os_; 4536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 4537a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass AttributePrinter { 4540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 4541958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier explicit AttributePrinter(std::ostream& os) // NOLINT 4542b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : os_(os), 4543b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch first_(true) {} 4544a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintSeparator() { 4545a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first_) { 4546a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_ = false; 4547a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4548b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "|"; 4549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4550a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintBit(const char* name, bool value) { 4552a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!value) return; 4553a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintSeparator(); 4554b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "{" << name << "}"; 4555a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PrintPositive(const char* name, int value) { 4557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (value < 0) return; 4558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintSeparator(); 4559b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "{" << name << "|" << value << "}"; 4560a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4561b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 4563958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier std::ostream& os_; 4564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first_; 4565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 4566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::PrintAttributes(RegExpNode* that) { 4569b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, " 4570b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "margin=0.1, fontsize=10, label=\"{"; 4571b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AttributePrinter printer(os_); 4572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo* info = that->info(); 4573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block printer.PrintBit("NI", info->follows_newline_interest); 4574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block printer.PrintBit("WI", info->follows_word_interest); 4575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block printer.PrintBit("SI", info->follows_start_interest); 4576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* label = that->label(); 4577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (label->is_bound()) 4578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block printer.PrintPositive("@", label->pos()); 4579b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "}\"];\n" 4580b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << " a" << that << " -> n" << that 4581b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << " [style=dashed, color=grey, arrowhead=none];\n"; 4582a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic const bool kPrintDispatchTable = false; 4586a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitChoice(ChoiceNode* that) { 4587a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (kPrintDispatchTable) { 4588b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " [shape=Mrecord, label=\""; 4589b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch TableEntryHeaderPrinter header_printer(os_); 4590a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->GetTable(ignore_case_)->ForEach(&header_printer); 4591b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "\"]\n"; 4592a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4593b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch TableEntryBodyPrinter body_printer(os_, that); 4594a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->GetTable(ignore_case_)->ForEach(&body_printer); 4595a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4596b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n"; 4597a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < that->alternatives()->length(); i++) { 4598a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alt = that->alternatives()->at(i); 4599b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " -> n" << alt.node(); 4600a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4601a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4602a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < that->alternatives()->length(); i++) { 4603a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alt = that->alternatives()->at(i); 4604a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alt.node()->Accept(this); 4605a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4606a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4607a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4608a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4609a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitText(TextNode* that) { 4610b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone = that->zone(); 4611b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " [label=\""; 4612a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < that->elements()->length(); i++) { 4613b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (i > 0) os_ << " "; 4614a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextElement elm = that->elements()->at(i); 4615b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (elm.text_type()) { 4616a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case TextElement::ATOM: { 4617b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Vector<const uc16> data = elm.atom()->data(); 4618b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < data.length(); i++) { 4619b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << static_cast<char>(data[i]); 4620b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 4621a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4622a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4623a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case TextElement::CHAR_CLASS: { 4624b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* node = elm.char_class(); 4625b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "["; 4626b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (node->is_negated()) os_ << "^"; 4627b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < node->ranges(zone)->length(); j++) { 4628b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterRange range = node->ranges(zone)->at(j); 4629b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << AsUC16(range.from()) << "-" << AsUC16(range.to()); 4630a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4631b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "]"; 4632a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4633a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4634a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 4635a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 4636a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4637a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4638b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "\", shape=box, peripheries=2];\n"; 4639a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4640b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " -> n" << that->on_success() << ";\n"; 4641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(that->on_success()); 4642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitBackReference(BackReferenceNode* that) { 4646b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " [label=\"$" << that->start_register() << "..$" 4647b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << that->end_register() << "\", shape=doubleoctagon];\n"; 4648a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4649b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " -> n" << that->on_success() << ";\n"; 4650a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(that->on_success()); 4651a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4652a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4653a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4654a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitEnd(EndNode* that) { 4655b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " [style=bold, shape=point];\n"; 4656a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4657a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4658a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4659a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4660a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitAssertion(AssertionNode* that) { 4661b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " ["; 4662b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (that->assertion_type()) { 4663a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AssertionNode::AT_END: 4664b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$\", shape=septagon"; 4665a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4666a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AssertionNode::AT_START: 4667b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"^\", shape=septagon"; 4668a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4669a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AssertionNode::AT_BOUNDARY: 4670b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"\\b\", shape=septagon"; 4671a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4672a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AssertionNode::AT_NON_BOUNDARY: 4673b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"\\B\", shape=septagon"; 4674a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4675a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case AssertionNode::AFTER_NEWLINE: 4676b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"(?<=\\n)\", shape=septagon"; 4677e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke break; 4678a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4679b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "];\n"; 4680a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4681a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* successor = that->on_success(); 4682b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " -> n" << successor << ";\n"; 4683a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(successor); 4684a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4685a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4686a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4687a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DotPrinter::VisitAction(ActionNode* that) { 4688b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " ["; 4689b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (that->action_type_) { 4690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::SET_REGISTER: 4691b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$" << that->data_.u_store_register.reg 4692b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << ":=" << that->data_.u_store_register.value << "\", shape=octagon"; 4693a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4694a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::INCREMENT_REGISTER: 4695b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$" << that->data_.u_increment_register.reg 4696b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "++\", shape=octagon"; 4697a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4698a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::STORE_POSITION: 4699b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$" << that->data_.u_position_register.reg 4700b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << ":=$pos\", shape=octagon"; 4701a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4702a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::BEGIN_SUBMATCH: 4703b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$" << that->data_.u_submatch.current_position_register 4704b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << ":=$pos,begin\", shape=septagon"; 4705a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4706a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::POSITIVE_SUBMATCH_SUCCESS: 4707b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"escape\", shape=septagon"; 4708a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4709a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::EMPTY_MATCH_CHECK: 4710b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"$" << that->data_.u_empty_match_check.start_register 4711b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register 4712b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "<" << that->data_.u_empty_match_check.repetition_limit 4713b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "?\", shape=septagon"; 4714a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4715a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case ActionNode::CLEAR_CAPTURES: { 4716b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from 4717b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << " to $" << that->data_.u_clear_captures.range_to 4718b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch << "\", shape=septagon"; 4719a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 4720a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4721a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4722b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "];\n"; 4723a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block PrintAttributes(that); 4724a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* successor = that->on_success(); 4725b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << " n" << that << " -> n" << successor << ";\n"; 4726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Visit(successor); 4727a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4728a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4729a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4730a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass DispatchTableDumper { 4731a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 4732958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier explicit DispatchTableDumper(std::ostream& os) : os_(os) {} 4733a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Call(uc16 key, DispatchTable::Entry entry); 4734a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 4735958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier std::ostream& os_; 4736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 4737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4738a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4739a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) { 4740b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {"; 4741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* set = entry.out_set(); 4742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first = true; 4743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (unsigned i = 0; i < OutSet::kFirstLimit; i++) { 4744a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (set->Get(i)) { 4745a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (first) { 4746a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first = false; 4747a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 4748b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << ", "; 4749a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4750b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << i; 4751a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4752a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4753b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch os_ << "}\n"; 4754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTable::Dump() { 4758b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OFStream os(stderr); 4759b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DispatchTableDumper dumper(os); 4760a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block tree()->ForEach(&dumper); 4761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid RegExpEngine::DotPrint(const char* label, 4765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node, 4766a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool ignore_case) { 4767b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OFStream os(stdout); 4768b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DotPrinter printer(os, ignore_case); 4769a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block printer.PrintNode(label, node); 4770a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4771a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4772a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4773a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif // DEBUG 4774a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4775a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4776a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 4777a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Tree to graph conversion 4778a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4779a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, 4780a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 4781b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<TextElement>* elms = 4782b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone()); 4783b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch elms->Add(TextElement::Atom(this), compiler->zone()); 4784014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return new (compiler->zone()) 4785014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextNode(elms, compiler->read_backward(), on_success); 4786a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4787a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4788a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4789a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, 4790a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 4791014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return new (compiler->zone()) 4792014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextNode(elements(), compiler->read_backward(), on_success); 4793a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4794a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4795b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 4796a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, 4797b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int* special_class, 4798a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length) { 4799109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch length--; // Remove final marker. 4800109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(special_class[length] == kRangeEndMarker); 4801b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(ranges->length() != 0); 4802b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(length != 0); 4803b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(special_class[0] != 0); 4804a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (ranges->length() != (length >> 1) + 1) { 4805a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4806a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4807a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange range = ranges->at(0); 4808a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (range.from() != 0) { 4809a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4810a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4811a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < length; i += 2) { 4812a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (special_class[i] != (range.to() + 1)) { 4813a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4814a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4815a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block range = ranges->at((i >> 1) + 1); 4816b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (special_class[i+1] != range.from()) { 4817a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4818a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4819a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4820109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (range.to() != String::kMaxCodePoint) { 4821a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4822a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4823a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4824a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4825a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4826a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4827a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic bool CompareRanges(ZoneList<CharacterRange>* ranges, 4828b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch const int* special_class, 4829a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length) { 4830109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch length--; // Remove final marker. 4831109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(special_class[length] == kRangeEndMarker); 4832a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (ranges->length() * 2 != length) { 4833a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4834a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4835a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < length; i += 2) { 4836a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange range = ranges->at(i >> 1); 4837b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range.from() != special_class[i] || 4838b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch range.to() != special_class[i + 1] - 1) { 4839a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4840a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4841a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4842a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4843a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4844a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4845a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4846b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochbool RegExpCharacterClass::is_standard(Zone* zone) { 4847a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // TODO(lrn): Remove need for this function, by not throwing away information 4848a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // along the way. 4849a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (is_negated_) { 4850a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4851a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4852a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (set_.is_standard()) { 4853a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4854a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4855b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { 4856a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block set_.set_standard_set_type('s'); 4857a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4858a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4859b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) { 4860a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block set_.set_standard_set_type('S'); 4861a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4862a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4863b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareInverseRanges(set_.ranges(zone), 4864a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block kLineTerminatorRanges, 4865a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block kLineTerminatorRangeCount)) { 4866a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block set_.set_standard_set_type('.'); 4867a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return true; 4868a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 4869b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareRanges(set_.ranges(zone), 4870e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kLineTerminatorRanges, 4871e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kLineTerminatorRangeCount)) { 4872e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke set_.set_standard_set_type('n'); 4873e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return true; 4874e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 4875b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4876e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke set_.set_standard_set_type('w'); 4877e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return true; 4878e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 4879b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) { 4880e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke set_.set_standard_set_type('W'); 4881e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return true; 4882e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 4883a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 4884a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 4885a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4886a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 4887109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochUnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone, 4888109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* base) 4889109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch : zone_(zone), 4890109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_(zone), 4891109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bmp_(nullptr), 4892109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch lead_surrogates_(nullptr), 4893109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch trail_surrogates_(nullptr), 4894109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch non_bmp_(nullptr) { 4895109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // The unicode range splitter categorizes given character ranges into: 4896109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // - Code points from the BMP representable by one code unit. 4897109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // - Code points outside the BMP that need to be split into surrogate pairs. 4898109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // - Lone lead surrogates. 4899109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // - Lone trail surrogates. 4900109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Lone surrogates are valid code points, even though no actual characters. 4901109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // They require special matching to make sure we do not split surrogate pairs. 4902109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // We use the dispatch table to accomplish this. The base range is split up 4903109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // by the table by the overlay ranges, and the Call callback is used to 4904109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // filter and collect ranges for each category. 4905109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch for (int i = 0; i < base->length(); i++) { 4906109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange(base->at(i), kBase, zone_); 4907109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 4908109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Add overlay ranges. 4909109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1), 4910109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kBmpCodePoints, zone_); 4911109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd), 4912109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kLeadSurrogates, zone_); 4913109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange( 4914109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), 4915109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kTrailSurrogates, zone_); 4916109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange( 4917109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1), 4918109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kBmpCodePoints, zone_); 4919109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd), 4920109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch kNonBmpCodePoints, zone_); 4921109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch table_.ForEach(this); 4922109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 4923109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4924109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4925109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) { 4926109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch OutSet* outset = entry.out_set(); 4927109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (!outset->Get(kBase)) return; 4928109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>** target = NULL; 4929109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (outset->Get(kBmpCodePoints)) { 4930109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch target = &bmp_; 4931109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else if (outset->Get(kLeadSurrogates)) { 4932109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch target = &lead_surrogates_; 4933109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else if (outset->Get(kTrailSurrogates)) { 4934109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch target = &trail_surrogates_; 4935109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 4936109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(outset->Get(kNonBmpCodePoints)); 4937109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch target = &non_bmp_; 4938109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 4939109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (*target == NULL) *target = new (zone_) ZoneList<CharacterRange>(2, zone_); 4940109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_); 4941109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 4942109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4943109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4944109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result, 4945109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success, UnicodeRangeSplitter* splitter) { 4946109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* bmp = splitter->bmp(); 4947109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (bmp == nullptr) return; 4948109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges( 4949109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler->zone(), bmp, compiler->read_backward(), on_success))); 4950109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 4951109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4952109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 4953109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result, 4954109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success, 4955109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch UnicodeRangeSplitter* splitter) { 4956109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* non_bmp = splitter->non_bmp(); 4957109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (non_bmp == nullptr) return; 4958109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(compiler->unicode()); 4959109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(!compiler->one_byte()); 4960109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 4961109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Canonicalize(non_bmp); 4962109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch for (int i = 0; i < non_bmp->length(); i++) { 4963109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Match surrogate pair. 4964109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // E.g. [\u10005-\u11005] becomes 4965109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // \ud800[\udc05-\udfff]| 4966109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // [\ud801-\ud803][\udc00-\udfff]| 4967109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // \ud804[\udc00-\udc05] 4968109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 from = non_bmp->at(i).from(); 4969109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 to = non_bmp->at(i).to(); 4970109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc16 from_l = unibrow::Utf16::LeadSurrogate(from); 4971109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc16 from_t = unibrow::Utf16::TrailSurrogate(from); 4972109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc16 to_l = unibrow::Utf16::LeadSurrogate(to); 4973109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc16 to_t = unibrow::Utf16::TrailSurrogate(to); 4974109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (from_l == to_l) { 4975109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // The lead surrogate is the same. 4976109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative( 4977109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(TextNode::CreateForSurrogatePair( 4978109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Singleton(from_l), 4979109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(from_t, to_t), compiler->read_backward(), 4980109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch on_success))); 4981109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 4982109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (from_t != kTrailSurrogateStart) { 4983109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Add [from_l][from_t-\udfff] 4984109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative( 4985109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(TextNode::CreateForSurrogatePair( 4986109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Singleton(from_l), 4987109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(from_t, kTrailSurrogateEnd), 4988109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler->read_backward(), on_success))); 4989109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch from_l++; 4990109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 4991109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (to_t != kTrailSurrogateEnd) { 4992109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Add [to_l][\udc00-to_t] 4993109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative( 4994109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(TextNode::CreateForSurrogatePair( 4995109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Singleton(to_l), 4996109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(kTrailSurrogateStart, to_t), 4997109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler->read_backward(), on_success))); 4998109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch to_l--; 4999109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5000109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (from_l <= to_l) { 5001109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Add [from_l-to_l][\udc00-\udfff] 5002109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative( 5003109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(TextNode::CreateForSurrogatePair( 5004109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(from_l, to_l), 5005109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd), 5006109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler->read_backward(), on_success))); 5007109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5008109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5009109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5010109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5011109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5012109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5013109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch( 5014109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind, 5015109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* match, RegExpNode* on_success, 5016109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool read_backward) { 5017109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5018109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* match_node = TextNode::CreateForCharacterRanges( 5019109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, match, read_backward, on_success); 5020109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int stack_register = compiler->UnicodeLookaroundStackRegister(); 5021109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int position_register = compiler->UnicodeLookaroundPositionRegister(); 5022109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpLookaround::Builder lookaround(false, match_node, stack_register, 5023109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register); 5024109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* negative_match = TextNode::CreateForCharacterRanges( 5025109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, lookbehind, !read_backward, lookaround.on_match_success()); 5026109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return lookaround.ForMatch(negative_match); 5027109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5028109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5029109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5030109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpNode* MatchAndNegativeLookaroundInReadDirection( 5031109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpCompiler* compiler, ZoneList<CharacterRange>* match, 5032109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* lookahead, RegExpNode* on_success, 5033109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool read_backward) { 5034109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5035109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int stack_register = compiler->UnicodeLookaroundStackRegister(); 5036109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int position_register = compiler->UnicodeLookaroundPositionRegister(); 5037109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpLookaround::Builder lookaround(false, on_success, stack_register, 5038109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register); 5039109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* negative_match = TextNode::CreateForCharacterRanges( 5040109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, lookahead, read_backward, lookaround.on_match_success()); 5041109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return TextNode::CreateForCharacterRanges( 5042109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, match, read_backward, lookaround.ForMatch(negative_match)); 5043109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5044109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5045109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5046109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result, 5047109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success, 5048109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch UnicodeRangeSplitter* splitter) { 5049109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates(); 5050109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (lead_surrogates == nullptr) return; 5051109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5052109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]). 5053109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( 5054109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); 5055109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5056109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* match; 5057109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (compiler->read_backward()) { 5058109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Reading backward. Assert that reading forward, there is no trail 5059109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // surrogate, and then backward match the lead surrogate. 5060109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch match = NegativeLookaroundAgainstReadDirectionAndMatch( 5061109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler, trail_surrogates, lead_surrogates, on_success, true); 5062109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5063109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Reading forward. Forward match the lead surrogate and assert that 5064109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // no trail surrogate follows. 5065109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch match = MatchAndNegativeLookaroundInReadDirection( 5066109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler, lead_surrogates, trail_surrogates, on_success, false); 5067109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5068109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative(GuardedAlternative(match)); 5069109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5070109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5071109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5072109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result, 5073109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success, 5074109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch UnicodeRangeSplitter* splitter) { 5075109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates(); 5076109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (trail_surrogates == nullptr) return; 5077109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5078109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01 5079109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( 5080109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); 5081109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5082109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* match; 5083109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (compiler->read_backward()) { 5084109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Reading backward. Backward match the trail surrogate and assert that no 5085109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // lead surrogate precedes it. 5086109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch match = MatchAndNegativeLookaroundInReadDirection( 5087109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler, trail_surrogates, lead_surrogates, on_success, true); 5088109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5089109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Reading forward. Assert that reading backward, there is no lead 5090109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // surrogate, and then forward match the trail surrogate. 5091109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch match = NegativeLookaroundAgainstReadDirectionAndMatch( 5092109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch compiler, lead_surrogates, trail_surrogates, on_success, false); 5093109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5094109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result->AddAlternative(GuardedAlternative(match)); 5095109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5096109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5097109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpNode* UnanchoredAdvance(RegExpCompiler* compiler, 5098109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success) { 5099109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // This implements ES2015 21.2.5.2.3, AdvanceStringIndex. 5100109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(!compiler->read_backward()); 5101109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5102109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Advance any character. If the character happens to be a lead surrogate and 5103109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // we advanced into the middle of a surrogate pair, it will work out, as 5104109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // nothing will match from there. We will have to advance again, consuming 5105109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // the associated trail surrogate. 5106109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* range = CharacterRange::List( 5107109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit)); 5108109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return TextNode::CreateForCharacterRanges(zone, range, false, on_success); 5109109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5110109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5111109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5112109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochvoid AddUnicodeCaseEquivalents(RegExpCompiler* compiler, 5113109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* ranges) { 5114109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#ifdef V8_I18N_SUPPORT 5115109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Use ICU to compute the case fold closure over the ranges. 5116109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(compiler->unicode()); 5117109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(compiler->ignore_case()); 511862ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch icu::UnicodeSet set; 5119109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch for (int i = 0; i < ranges->length(); i++) { 512062ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch set.add(ranges->at(i).from(), ranges->at(i).to()); 5121109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5122109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Clear(); 512362ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch set.closeOver(USET_CASE_INSENSITIVE); 5124109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Full case mapping map single characters to multiple characters. 5125109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Those are represented as strings in the set. Remove them so that 5126109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // we end up with only simple and common case mappings. 512762ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch set.removeAllStrings(); 5128109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 512962ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch for (int i = 0; i < set.getRangeCount(); i++) { 513062ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)), 513162ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch zone); 5132109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5133109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // No errors and everything we collected have been ranges. 5134109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#else 5135109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Fallback if ICU is not included. 5136109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(), 5137109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges, compiler->one_byte()); 5138109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch#endif // V8_I18N_SUPPORT 5139109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Canonicalize(ranges); 5140109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5141109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5142109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, 5144a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5145109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch set_.Canonicalize(); 5146109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 5147109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* ranges = this->ranges(zone); 5148109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (compiler->unicode() && compiler->ignore_case()) { 5149109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddUnicodeCaseEquivalents(compiler, ranges); 5150109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5151109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (compiler->unicode() && !compiler->one_byte()) { 5152109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (is_negated()) { 5153109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* negated = 5154109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch new (zone) ZoneList<CharacterRange>(2, zone); 5155109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Negate(ranges, negated, zone); 5156109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges = negated; 5157109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5158109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (ranges->length() == 0) { 5159bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8Ben Murdoch ranges->Add(CharacterRange::Everything(), zone); 5160bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8Ben Murdoch RegExpCharacterClass* fail = 5161bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8Ben Murdoch new (zone) RegExpCharacterClass(ranges, true); 5162bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8Ben Murdoch return new (zone) TextNode(fail, compiler->read_backward(), on_success); 5163109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5164109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (standard_type() == '*') { 5165109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return UnanchoredAdvance(compiler, on_success); 5166109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5167109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ChoiceNode* result = new (zone) ChoiceNode(2, zone); 5168109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch UnicodeRangeSplitter splitter(zone, ranges); 5169109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddBmpCharacters(compiler, result, on_success, &splitter); 5170109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter); 5171109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddLoneLeadSurrogates(compiler, result, on_success, &splitter); 5172109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddLoneTrailSurrogates(compiler, result, on_success, &splitter); 5173109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return result; 5174109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5175109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5176109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return new (zone) TextNode(this, compiler->read_backward(), on_success); 5177109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5178014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5179014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5180014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5181014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochint CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { 5182014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom1 = (*a)->AsAtom(); 5183014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom2 = (*b)->AsAtom(); 5184014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc16 character1 = atom1->data().at(0); 5185014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uc16 character2 = atom2->data().at(0); 5186014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (character1 < character2) return -1; 5187014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (character1 > character2) return 1; 5188014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return 0; 5189014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5190014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5191014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5192014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochstatic unibrow::uchar Canonical( 5193014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize, 5194014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar c) { 5195014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth]; 5196014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length = canonicalize->get(c, '\0', chars); 5197014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LE(length, 1); 5198014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar canonical = c; 5199014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (length == 1) canonical = chars[0]; 5200014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return canonical; 5201014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5202014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5203014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5204014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochint CompareFirstCharCaseIndependent( 5205014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize, 5206014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* const* a, RegExpTree* const* b) { 5207014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom1 = (*a)->AsAtom(); 5208014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom2 = (*b)->AsAtom(); 5209014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar character1 = atom1->data().at(0); 5210014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar character2 = atom2->data().at(0); 5211014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (character1 == character2) return 0; 5212014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (character1 >= 'a' || character2 >= 'a') { 5213014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch character1 = Canonical(canonicalize, character1); 5214014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch character2 = Canonical(canonicalize, character2); 5215014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5216014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return static_cast<int>(character1) - static_cast<int>(character2); 5217014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5218014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5219014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5220014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// We can stable sort runs of atoms, since the order does not matter if they 5221014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// start with different characters. 5222014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Returns true if any consecutive atoms were found. 5223014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochbool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { 5224014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpTree*>* alternatives = this->alternatives(); 5225014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length = alternatives->length(); 5226014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool found_consecutive_atoms = false; 5227014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = 0; i < length; i++) { 5228014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5229014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* alternative = alternatives->at(i); 5230014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (alternative->IsAtom()) break; 5231014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5232014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5233014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // i is length or it is the index of an atom. 5234014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (i == length) break; 5235014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int first_atom = i; 5236014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5237014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5238014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* alternative = alternatives->at(i); 5239014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!alternative->IsAtom()) break; 5240014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5241014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5242014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Sort atoms to get ones with common prefixes together. 5243014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // This step is more tricky if we are in a case-independent regexp, 5244014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // because it would change /is|I/ to /I|is/, and order matters when 5245014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // the regexp parts don't match only disjoint starting points. To fix 5246014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // this we have a version of CompareFirstChar that uses case- 5247014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // independent character classes for comparison. 5248014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LT(first_atom, alternatives->length()); 5249014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LE(i, alternatives->length()); 5250014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_LE(first_atom, i); 5251014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (compiler->ignore_case()) { 5252014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = 5253014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->isolate()->regexp_macro_assembler_canonicalize(); 5254014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch auto compare_closure = 5255014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch [canonicalize](RegExpTree* const* a, RegExpTree* const* b) { 5256014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return CompareFirstCharCaseIndependent(canonicalize, a, b); 5257014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch }; 5258014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->StableSort(compare_closure, first_atom, i - first_atom); 5259014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 5260014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom); 5261014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5262014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (i - first_atom > 1) found_consecutive_atoms = true; 5263014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5264014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return found_consecutive_atoms; 5265014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5266014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5267014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5268014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Optimizes ab|ac|az to a(?:b|c|d). 5269014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { 5270014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone = compiler->zone(); 5271014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpTree*>* alternatives = this->alternatives(); 5272014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length = alternatives->length(); 5273014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5274014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int write_posn = 0; 5275014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int i = 0; 5276014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5277014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* alternative = alternatives->at(i); 5278014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!alternative->IsAtom()) { 5279014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = alternatives->at(i); 5280014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5281014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch continue; 5282014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5283014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom = alternative->AsAtom(); 5284014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar common_prefix = atom->data().at(0); 5285014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int first_with_prefix = i; 5286014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int prefix_length = atom->length(); 5287014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5288014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5289014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternative = alternatives->at(i); 5290014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!alternative->IsAtom()) break; 5291014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch atom = alternative->AsAtom(); 5292014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::uchar new_prefix = atom->data().at(0); 5293014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (new_prefix != common_prefix) { 5294014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!compiler->ignore_case()) break; 5295014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = 5296014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->isolate()->regexp_macro_assembler_canonicalize(); 5297014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new_prefix = Canonical(canonicalize, new_prefix); 5298014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch common_prefix = Canonical(canonicalize, common_prefix); 5299014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (new_prefix != common_prefix) break; 5300014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5301014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch prefix_length = Min(prefix_length, atom->length()); 5302014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5303014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5304014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (i > first_with_prefix + 2) { 5305014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Found worthwhile run of alternatives with common prefix of at least one 5306014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // character. The sorting function above did not sort on more than one 5307014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // character for reasons of correctness, but there may still be a longer 5308014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // common prefix if the terms were similar or presorted in the input. 5309014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Find out how long the common prefix is. 5310014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int run_length = i - first_with_prefix; 5311014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch atom = alternatives->at(first_with_prefix)->AsAtom(); 5312014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int j = 1; j < run_length && prefix_length > 1; j++) { 5313014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* old_atom = 5314014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(j + first_with_prefix)->AsAtom(); 5315014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int k = 1; k < prefix_length; k++) { 5316014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (atom->data().at(k) != old_atom->data().at(k)) { 5317014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch prefix_length = k; 5318014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch break; 5319014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5320014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5321014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5322014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* prefix = 5323014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length)); 5324014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone); 5325014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch pair->Add(prefix, zone); 5326014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpTree*>* suffixes = 5327014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new (zone) ZoneList<RegExpTree*>(run_length, zone); 5328014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int j = 0; j < run_length; j++) { 5329014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* old_atom = 5330014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(j + first_with_prefix)->AsAtom(); 5331014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int len = old_atom->length(); 5332014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (len == prefix_length) { 5333014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch suffixes->Add(new (zone) RegExpEmpty(), zone); 5334014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 5335014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* suffix = new (zone) RegExpAtom( 5336014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch old_atom->data().SubVector(prefix_length, old_atom->length())); 5337014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch suffixes->Add(suffix, zone); 5338014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5339014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5340014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch pair->Add(new (zone) RegExpDisjunction(suffixes), zone); 5341014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair); 5342014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 5343014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Just copy any non-worthwhile alternatives. 5344014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int j = first_with_prefix; j < i; j++) { 5345014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = alternatives->at(j); 5346014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5347014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5348014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5349014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->Rewind(write_posn); // Trim end of array. 5350014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 5351014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5352014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5353014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch// Optimizes b|c|z to [bcz]. 5354014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid RegExpDisjunction::FixSingleCharacterDisjunctions( 5355014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpCompiler* compiler) { 5356014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Zone* zone = compiler->zone(); 5357014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<RegExpTree*>* alternatives = this->alternatives(); 5358014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int length = alternatives->length(); 5359014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5360014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int write_posn = 0; 5361014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int i = 0; 5362014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5363014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpTree* alternative = alternatives->at(i); 5364014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!alternative->IsAtom()) { 5365014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = alternatives->at(i); 5366014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5367014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch continue; 5368014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5369014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* atom = alternative->AsAtom(); 5370014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (atom->length() != 1) { 5371014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = alternatives->at(i); 5372014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5373014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch continue; 5374014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5375014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int first_in_run = i; 5376014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5377014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch while (i < length) { 5378014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternative = alternatives->at(i); 5379014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!alternative->IsAtom()) break; 5380014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch atom = alternative->AsAtom(); 5381014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (atom->length() != 1) break; 5382014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch i++; 5383014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5384014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (i > first_in_run + 1) { 5385014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Found non-trivial run of single-character alternatives. 5386014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch int run_length = i - first_in_run; 5387014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<CharacterRange>* ranges = 5388014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new (zone) ZoneList<CharacterRange>(2, zone); 5389014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int j = 0; j < run_length; j++) { 5390014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom(); 5391014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_EQ(old_atom->length(), 1); 5392014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); 5393014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5394014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = 5395014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new (zone) RegExpCharacterClass(ranges, false); 5396014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 5397014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Just copy any trivial alternatives. 5398014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int j = first_in_run; j < i; j++) { 5399014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->at(write_posn++) = alternatives->at(j); 5400014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5401014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5402014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5403014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternatives->Rewind(write_posn); // Trim end of array. 5404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, 5408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5409a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<RegExpTree*>* alternatives = this->alternatives(); 5410014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5411014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (alternatives->length() > 2) { 5412014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool found_consecutive_atoms = SortConsecutiveAtoms(compiler); 5413014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler); 5414014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FixSingleCharacterDisjunctions(compiler); 5415014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (alternatives->length() == 1) { 5416014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return alternatives->at(0)->ToNode(compiler, on_success); 5417014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5418014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5419014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length = alternatives->length(); 5421014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 5422b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ChoiceNode* result = 5423b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(compiler->zone()) ChoiceNode(length, compiler->zone()); 5424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < length; i++) { 5425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, 5426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success)); 5427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->AddAlternative(alternative); 5428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 5430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, 5434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ToNode(min(), 5436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block max(), 5437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block is_greedy(), 5438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block body(), 5439a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler, 5440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block on_success); 5441a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5444257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch// Scoped object to keep track of how much we unroll quantifier loops in the 5445257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch// regexp graph generator. 5446257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdochclass RegExpExpansionLimiter { 5447257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch public: 5448257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch static const int kMaxExpansionFactor = 6; 5449257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpExpansionLimiter(RegExpCompiler* compiler, int factor) 5450257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch : compiler_(compiler), 5451257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch saved_expansion_factor_(compiler->current_expansion_factor()), 5452257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) { 5453b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(factor > 0); 5454257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (ok_to_expand_) { 5455257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (factor > kMaxExpansionFactor) { 5456257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // Avoid integer overflow of the current expansion factor. 5457257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch ok_to_expand_ = false; 5458257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch compiler->set_current_expansion_factor(kMaxExpansionFactor + 1); 5459257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } else { 5460257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int new_factor = saved_expansion_factor_ * factor; 5461257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch ok_to_expand_ = (new_factor <= kMaxExpansionFactor); 5462257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch compiler->set_current_expansion_factor(new_factor); 5463257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5464257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5465257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5466257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5467257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch ~RegExpExpansionLimiter() { 5468257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch compiler_->set_current_expansion_factor(saved_expansion_factor_); 5469257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5470257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5471257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch bool ok_to_expand() { return ok_to_expand_; } 5472257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5473257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch private: 5474257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpCompiler* compiler_; 5475257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int saved_expansion_factor_; 5476257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch bool ok_to_expand_; 5477257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5478257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); 5479257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch}; 5480257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5481257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 5482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpQuantifier::ToNode(int min, 5483a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max, 5484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_greedy, 5485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpTree* body, 5486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 5487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success, 5488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 5489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // x{f, t} becomes this: 5490a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // 5491a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // (r++)<-. 5492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // | ` 5493a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // | (x) 5494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // v ^ 5495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // (r=0)-->(?)---/ [if r < t] 5496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // | 5497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // [if r >= f] \----> ... 5498a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // 5499a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // 15.10.2.5 RepeatMatcher algorithm. 5501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The parser has already eliminated the case where max is 0. In the case 5502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // where max_match is zero the parser has removed the quantifier if min was 5503a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // > 0 and removed the atom if min was 0. See AddQuantifierToAtom. 5504a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5505a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we know that we cannot match zero length then things are a little 5506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // simpler since we don't need to make the special zero length match check 5507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // from step 2.1. If the min and max are small we can unroll a little in 5508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // this case. 5509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,} 5510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3} 5511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (max == 0) return on_success; // This can happen due to recursion. 5512a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool body_can_be_empty = (body->min_match() == 0); 5513a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int body_start_reg = RegExpCompiler::kNoRegister; 5514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval capture_registers = body->CaptureRegisters(); 5515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool needs_capture_clearing = !capture_registers.is_empty(); 5516b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone = compiler->zone(); 5517b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 5518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (body_can_be_empty) { 5519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block body_start_reg = compiler->AllocateRegister(); 5520958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } else if (compiler->optimize() && !needs_capture_clearing) { 5521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Only unroll if there are no captures and the body can't be 5522a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // empty. 5523257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch { 5524257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpExpansionLimiter limiter( 5525257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch compiler, min + ((max != min) ? 1 : 0)); 5526257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) { 5527257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch int new_max = (max == kInfinity) ? max : max - min; 5528257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // Recurse once to get the loop or optional matches after the fixed 5529257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // ones. 5530257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpNode* answer = ToNode( 5531257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch 0, new_max, is_greedy, body, compiler, on_success, true); 5532257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // Unroll the forced matches from 0 to min. This can cause chains of 5533257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // TextNodes (which the parser does not generate). These should be 5534257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // combined if it turns out they hinder good code generation. 5535257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch for (int i = 0; i < min; i++) { 5536257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch answer = body->ToNode(compiler, answer); 5537257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5538257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch return answer; 5539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5541257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (max <= kMaxUnrolledMaxMatches && min == 0) { 5542b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(max > 0); // Due to the 'if' above. 5543257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpExpansionLimiter limiter(compiler, max); 5544257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (limiter.ok_to_expand()) { 5545257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch // Unroll the optional matches up to max. 5546257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch RegExpNode* answer = on_success; 5547257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch for (int i = 0; i < max; i++) { 5548b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ChoiceNode* alternation = new(zone) ChoiceNode(2, zone); 5549257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch if (is_greedy) { 5550257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch alternation->AddAlternative( 5551257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch GuardedAlternative(body->ToNode(compiler, answer))); 5552257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch alternation->AddAlternative(GuardedAlternative(on_success)); 5553257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } else { 5554257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch alternation->AddAlternative(GuardedAlternative(on_success)); 5555257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch alternation->AddAlternative( 5556257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch GuardedAlternative(body->ToNode(compiler, answer))); 5557257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch } 5558257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch answer = alternation; 5559014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (not_at_start && !compiler->read_backward()) { 5560014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alternation->set_not_at_start(); 5561014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5563257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch return answer; 5564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool has_min = min > 0; 5568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool has_max = max < RegExpTree::kInfinity; 5569a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool needs_counter = has_min || has_max; 5570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg_ctr = needs_counter 5571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ? compiler->AllocateRegister() 5572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : RegExpCompiler::kNoRegister; 5573014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch LoopChoiceNode* center = new (zone) 5574014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone); 5575014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); 5576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* loop_return = needs_counter 5577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) 5578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : static_cast<RegExpNode*>(center); 5579a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (body_can_be_empty) { 5580a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the body can be empty we need to check if it was and then 5581a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // backtrack. 5582a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loop_return = ActionNode::EmptyMatchCheck(body_start_reg, 5583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block reg_ctr, 5584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block min, 5585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loop_return); 5586a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5587a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* body_node = body->ToNode(compiler, loop_return); 5588a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (body_can_be_empty) { 5589a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the body can be empty we need to store the start position 5590a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // so we can bail out if it was empty. 5591a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block body_node = ActionNode::StorePosition(body_start_reg, false, body_node); 5592a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5593a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (needs_capture_clearing) { 5594a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Before entering the body of this loop we need to clear captures. 5595a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block body_node = ActionNode::ClearCaptures(capture_registers, body_node); 5596a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5597a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative body_alt(body_node); 5598a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (has_max) { 5599b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Guard* body_guard = 5600b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(zone) Guard(reg_ctr, Guard::LT, max); 5601b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch body_alt.AddGuard(body_guard, zone); 5602a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5603a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative rest_alt(on_success); 5604a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (has_min) { 5605b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Guard* rest_guard = new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min); 5606b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch rest_alt.AddGuard(rest_guard, zone); 5607a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5608a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (is_greedy) { 5609a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block center->AddLoopAlternative(body_alt); 5610a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block center->AddContinueAlternative(rest_alt); 5611a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 5612a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block center->AddContinueAlternative(rest_alt); 5613a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block center->AddLoopAlternative(body_alt); 5614a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5615a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (needs_counter) { 5616a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ActionNode::SetRegister(reg_ctr, 0, center); 5617a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 5618a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return center; 5619a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5620a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5621a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5622a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5623a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 5624a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5625a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo info; 5626b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone = compiler->zone(); 5627b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 5628b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (assertion_type()) { 5629a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case START_OF_LINE: 5630a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AssertionNode::AfterNewline(on_success); 5631a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case START_OF_INPUT: 5632a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AssertionNode::AtStart(on_success); 5633a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case BOUNDARY: 5634a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AssertionNode::AtBoundary(on_success); 5635a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case NON_BOUNDARY: 5636a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AssertionNode::AtNonBoundary(on_success); 5637a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case END_OF_INPUT: 5638a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return AssertionNode::AtEnd(on_success); 5639a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case END_OF_LINE: { 5640a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Compile $ in multiline regexps as an alternation with a positive 5641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // lookahead in one side and an end-of-input on the other side. 5642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We need two registers for the lookahead. 5643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int stack_pointer_register = compiler->AllocateRegister(); 5644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_register = compiler->AllocateRegister(); 5645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The ChoiceNode to distinguish between a newline and end-of-input. 5646b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ChoiceNode* result = new(zone) ChoiceNode(2, zone); 5647a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Create a newline atom. 5648a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<CharacterRange>* newline_ranges = 5649b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch new(zone) ZoneList<CharacterRange>(3, zone); 5650b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterRange::AddClassEscape('n', newline_ranges, zone); 5651014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n'); 5652014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch TextNode* newline_matcher = new (zone) TextNode( 5653014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch newline_atom, false, ActionNode::PositiveSubmatchSuccess( 5654014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch stack_pointer_register, position_register, 5655014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 0, // No captures inside. 5656014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch -1, // Ignored if no captures. 5657014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch on_success)); 5658a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Create an end-of-input matcher. 5659a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* end_of_line = ActionNode::BeginSubmatch( 5660a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block stack_pointer_register, 5661a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block position_register, 5662a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block newline_matcher); 5663a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Add the two alternatives to the ChoiceNode. 5664a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative eol_alternative(end_of_line); 5665a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->AddAlternative(eol_alternative); 5666a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success)); 5667a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->AddAlternative(end_alternative); 5668a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 5669a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5670a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 5671a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 5672a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5673a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return on_success; 5674a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5675a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5676a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5677a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 5678a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5679014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return new (compiler->zone()) 5680b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BackReferenceNode(RegExpCapture::StartRegister(index()), 5681b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCapture::EndRegister(index()), 5682014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->read_backward(), on_success); 5683a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5684a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5685a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5686a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 5687a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5688a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return on_success; 5689a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5691a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5692109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success, 5693109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int stack_pointer_register, 5694109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int position_register, 5695109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int capture_register_count, 5696109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int capture_register_start) 5697109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch : is_positive_(is_positive), 5698109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch on_success_(on_success), 5699109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch stack_pointer_register_(stack_pointer_register), 5700109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register_(position_register) { 5701109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (is_positive_) { 5702109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch on_match_success_ = ActionNode::PositiveSubmatchSuccess( 5703109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch stack_pointer_register, position_register, capture_register_count, 5704109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch capture_register_start, on_success_); 5705109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5706109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = on_success_->zone(); 5707109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch on_match_success_ = new (zone) NegativeSubmatchSuccess( 5708109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch stack_pointer_register, position_register, capture_register_count, 5709109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch capture_register_start, zone); 5710109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5711109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5712109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5713109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5714109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) { 5715109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (is_positive_) { 5716109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return ActionNode::BeginSubmatch(stack_pointer_register_, 5717109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register_, match); 5718109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5719109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = on_success_->zone(); 5720109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // We use a ChoiceNode to represent the negative lookaround. The first 5721109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // alternative is the negative match. On success, the end node backtracks. 5722109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // On failure, the second alternative is tried and leads to success. 5723109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the 5724109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // first exit when calculating quick checks. 5725109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode( 5726109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(match), GuardedAlternative(on_success_), zone); 5727109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return ActionNode::BeginSubmatch(stack_pointer_register_, 5728109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register_, choice_node); 5729109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5730109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 5731109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5732109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 5733014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben MurdochRegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler, 5734014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpNode* on_success) { 5735a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int stack_pointer_register = compiler->AllocateRegister(); 5736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_register = compiler->AllocateRegister(); 5737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5738a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const int registers_per_capture = 2; 5739a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const int register_of_first_capture = 2; 5740a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int register_count = capture_count_ * registers_per_capture; 5741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int register_start = 5742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block register_of_first_capture + capture_from_ * registers_per_capture; 5743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5744014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpNode* result; 5745014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool was_reading_backward = compiler->read_backward(); 5746014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->set_read_backward(type() == LOOKBEHIND); 5747109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Builder builder(is_positive(), on_success, stack_pointer_register, 5748109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register, register_count, register_start); 5749109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* match = body_->ToNode(compiler, builder.on_match_success()); 5750109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch result = builder.ForMatch(match); 5751014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch compiler->set_read_backward(was_reading_backward); 5752014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return result; 5753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 5757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5758a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ToNode(body(), index(), compiler, on_success); 5759a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5760a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpCapture::ToNode(RegExpTree* body, 5763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int index, 5764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 5765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5766014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK_NOT_NULL(body); 5767a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int start_reg = RegExpCapture::StartRegister(index); 5768a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int end_reg = RegExpCapture::EndRegister(index); 5769014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (compiler->read_backward()) std::swap(start_reg, end_reg); 5770a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); 5771a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* body_node = body->ToNode(compiler, store_end); 5772a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ActionNode::StorePosition(start_reg, true, body_node); 5773a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5774a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5775a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5776a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockRegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 5777a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) { 5778a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<RegExpTree*>* children = nodes(); 5779a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* current = on_success; 5780014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (compiler->read_backward()) { 5781014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = 0; i < children->length(); i++) { 5782014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch current = children->at(i)->ToNode(compiler, current); 5783014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5784014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 5785014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = children->length() - 1; i >= 0; i--) { 5786014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch current = children->at(i)->ToNode(compiler, current); 5787014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 5788a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5789a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return current; 5790a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5791a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5792a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5793b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void AddClass(const int* elmv, 5794a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int elmc, 5795b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges, 5796b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 5797b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch elmc--; 5798109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(elmv[elmc] == kRangeEndMarker); 5799a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < elmc; i += 2) { 5800b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(elmv[i] < elmv[i + 1]); 5801109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone); 5802a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5803a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5804a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5805a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5806b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic void AddClassNegated(const int *elmv, 5807a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int elmc, 5808b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges, 5809b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 5810b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch elmc--; 5811109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(elmv[elmc] == kRangeEndMarker); 5812b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(elmv[0] != 0x0000); 5813109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(elmv[elmc - 1] != String::kMaxCodePoint); 5814a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 last = 0x0000; 5815a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < elmc; i += 2) { 5816b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(last <= elmv[i] - 1); 5817b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(elmv[i] < elmv[i + 1]); 5818109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone); 5819b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch last = elmv[i + 1]; 5820a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5821109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone); 5822a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5823a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5824a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5825a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid CharacterRange::AddClassEscape(uc16 type, 5826b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges, 5827b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 5828a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block switch (type) { 5829a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 's': 5830b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone); 5831a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5832a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'S': 5833b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone); 5834a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5835a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'w': 5836b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClass(kWordRanges, kWordRangeCount, ranges, zone); 5837a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5838a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'W': 5839b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone); 5840a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5841a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'd': 5842b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClass(kDigitRanges, kDigitRangeCount, ranges, zone); 5843a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5844a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'D': 5845b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone); 5846a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5847a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case '.': 5848a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddClassNegated(kLineTerminatorRanges, 5849a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block kLineTerminatorRangeCount, 5850b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 5851b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zone); 5852a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5853a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This is not a character range as defined by the spec but a 5854a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // convenient shorthand for a character class that matches any 5855a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // character. 5856a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case '*': 5857b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges->Add(CharacterRange::Everything(), zone); 5858a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5859a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This is the set of characters matched by the $ and ^ symbols 5860a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // in multiline mode. 5861a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case 'n': 5862a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddClass(kLineTerminatorRanges, 5863a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block kLineTerminatorRangeCount, 5864b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges, 5865b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zone); 5866a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 5867a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: 5868a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 5869a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5870a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5871a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5872a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5873b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochVector<const int> CharacterRange::GetWordBounds() { 5874b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return Vector<const int>(kWordRanges, kWordRangeCount - 1); 5875a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5876a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5877a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5878014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone, 5879014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ZoneList<CharacterRange>* ranges, 5880014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool is_one_byte) { 5881bcf72ee8e3b26f1d0726869c7ddb3921c68b09a8Ben Murdoch CharacterRange::Canonicalize(ranges); 5882109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int range_count = ranges->length(); 5883109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch for (int i = 0; i < range_count; i++) { 5884109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange range = ranges->at(i); 5885109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 bottom = range.from(); 5886109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (bottom > String::kMaxUtf16CodeUnit) return; 5887109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit); 5888109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // Nothing to be done for surrogates. 5889109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return; 5890109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (is_one_byte && !RangeContainsLatin1Equivalents(range)) { 5891109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (bottom > String::kMaxOneByteCharCode) return; 5892109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5893109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5894109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5895109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (top == bottom) { 5896109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // If this is a singleton we just expand the one character. 5897109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5898f2e3994fa5148cc3d9946666f0b0596290192b0eBen Murdoch for (int i = 0; i < length; i++) { 5899109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 chr = chars[i]; 5900109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (chr != bottom) { 5901109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Add(CharacterRange::Singleton(chars[i]), zone); 59028389745919cae02139ddc085a63c00d024269cf2Ben Murdoch } 59038389745919cae02139ddc085a63c00d024269cf2Ben Murdoch } 5904109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5905109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // If this is a range we expand the characters block by block, expanding 5906109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // contiguous subranges (blocks) one at a time. The approach is as 5907109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // follows. For a given start character we look up the remainder of the 5908109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // block that contains it (represented by the end point), for instance we 5909109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // find 'z' if the character is 'c'. A block is characterized by the 5910109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // property that all characters uncanonicalize in the same way, except 5911109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // that each entry in the result is incremented by the distance from the 5912109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // first element. So a-z is a block because 'a' uncanonicalizes to ['a', 5913109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. Once 5914109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // we've found the end point we look up its uncanonicalization and 5915109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // produce a range for each element. For instance for [c-f] we look up 5916109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // ['z', 'Z'] and produce [c-f] and [C-F]. We then only add a range if 5917109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // it is not already contained in the input, so [c-f] will be skipped but 5918109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // [C-F] will be added. If this range is not completely contained in a 5919109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // block we do this for all the blocks covered by the range (handling 5920109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // characters that is not in a block as a "singleton block"). 5921109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5922109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int pos = bottom; 5923109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch while (pos <= top) { 5924109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int length = 5925109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch isolate->jsregexp_canonrange()->get(pos, '\0', equivalents); 5926109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 block_end; 5927109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (length == 0) { 5928109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch block_end = pos; 5929109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else { 5930109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK_EQ(1, length); 5931109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch block_end = equivalents[0]; 5932109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5933109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int end = (block_end > top) ? top : block_end; 5934109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', 5935109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch equivalents); 5936109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch for (int i = 0; i < length; i++) { 5937109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 c = equivalents[i]; 5938109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 range_from = c - (block_end - pos); 5939109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 range_to = c - (block_end - end); 5940109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (!(bottom <= range_from && range_to <= top)) { 5941109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ranges->Add(CharacterRange::Range(range_from, range_to), zone); 5942109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5943109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5944109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch pos = end + 1; 5945109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 5946a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5947d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block } 5948d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block} 5949d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block 5950d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block 5951e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkebool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { 5952b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_NOT_NULL(ranges); 5953e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int n = ranges->length(); 5954e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (n <= 1) return true; 5955e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int max = ranges->at(0).to(); 5956e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke for (int i = 1; i < n; i++) { 5957e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange next_range = ranges->at(i); 5958e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (next_range.from() <= max + 1) return false; 5959e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke max = next_range.to(); 5960e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 5961e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return true; 5962e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 5963e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 5964e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 5965b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) { 5966a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (ranges_ == NULL) { 5967b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ranges_ = new(zone) ZoneList<CharacterRange>(2, zone); 5968b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterRange::AddClassEscape(standard_set_type_, ranges_, zone); 5969a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 5970a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return ranges_; 5971a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 5972a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5973a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 5974e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// Move a number of elements in a zonelist to another position 5975e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// in the same list. Handles overlapping source and target areas. 5976e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkestatic void MoveRanges(ZoneList<CharacterRange>* list, 5977e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int from, 5978e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int to, 5979e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int count) { 5980e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Ranges are potentially overlapping. 5981e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (from < to) { 5982e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke for (int i = count - 1; i >= 0; i--) { 5983e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke list->at(to + i) = list->at(from + i); 5984e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 5985e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } else { 5986e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke for (int i = 0; i < count; i++) { 5987e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke list->at(to + i) = list->at(from + i); 5988e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 5989e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 5990e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 5991e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 5992e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 5993e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkestatic int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, 5994e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int count, 5995e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange insert) { 5996e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Inserts a range into list[0..count[, which must be sorted 5997e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // by from value and non-overlapping and non-adjacent, using at most 5998e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // list[0..count] for the result. Returns the number of resulting 5999e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // canonicalized ranges. Inserting a range may collapse existing ranges into 6000e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // fewer ranges, so the return value can be anything in the range 1..count+1. 6001109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 from = insert.from(); 6002109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 to = insert.to(); 6003e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int start_pos = 0; 6004e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int end_pos = count; 6005e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke for (int i = count - 1; i >= 0; i--) { 6006e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange current = list->at(i); 6007e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (current.from() > to + 1) { 6008e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke end_pos = i; 6009e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } else if (current.to() + 1 < from) { 6010e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke start_pos = i + 1; 6011e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke break; 6012e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6013e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6014e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6015e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Inserted range overlaps, or is adjacent to, ranges at positions 6016e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are 6017e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // not affected by the insertion. 6018e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // If start_pos == end_pos, the range must be inserted before start_pos. 6019e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // if start_pos < end_pos, the entire range from start_pos to end_pos 6020e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // must be merged with the insert range. 6021e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6022e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (start_pos == end_pos) { 6023e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Insert between existing ranges at position start_pos. 6024e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (start_pos < count) { 6025e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke MoveRanges(list, start_pos, start_pos + 1, count - start_pos); 6026e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6027e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke list->at(start_pos) = insert; 6028e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return count + 1; 6029e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6030e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (start_pos + 1 == end_pos) { 6031e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Replace single existing range at position start_pos. 6032e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange to_replace = list->at(start_pos); 6033e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int new_from = Min(to_replace.from(), from); 6034e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int new_to = Max(to_replace.to(), to); 6035109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch list->at(start_pos) = CharacterRange::Range(new_from, new_to); 6036e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return count; 6037e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6038e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Replace a number of existing ranges from start_pos to end_pos - 1. 6039e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Move the remaining ranges down. 6040e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6041e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int new_from = Min(list->at(start_pos).from(), from); 6042e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int new_to = Max(list->at(end_pos - 1).to(), to); 6043e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (end_pos < count) { 6044e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke MoveRanges(list, end_pos, start_pos + 1, count - end_pos); 6045e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6046109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch list->at(start_pos) = CharacterRange::Range(new_from, new_to); 6047e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return count - (end_pos - start_pos) + 1; 6048e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6049e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6050e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6051e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkevoid CharacterSet::Canonicalize() { 6052e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Special/default classes are always considered canonical. The result 6053e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // of calling ranges() will be sorted. 6054e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (ranges_ == NULL) return; 6055e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange::Canonicalize(ranges_); 6056e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6057e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6058e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6059e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkevoid CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { 6060e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (character_ranges->length() <= 1) return; 6061e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Check whether ranges are already canonical (increasing, non-overlapping, 6062e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // non-adjacent). 6063e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int n = character_ranges->length(); 6064e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int max = character_ranges->at(0).to(); 6065e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int i = 1; 6066e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke while (i < n) { 6067e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange current = character_ranges->at(i); 6068e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (current.from() <= max + 1) { 6069e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke break; 6070e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6071e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke max = current.to(); 6072e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke i++; 6073e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6074e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Canonical until the i'th range. If that's all of them, we are done. 6075e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (i == n) return; 6076e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6077e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // The ranges at index i and forward are not canonicalized. Make them so by 6078e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // doing the equivalent of insertion sort (inserting each into the previous 6079e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // list, in order). 6080e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Notice that inserting a range can reduce the number of ranges in the 6081e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // result due to combining of adjacent and overlapping ranges. 6082e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int read = i; // Range to insert. 6083e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int num_canonical = i; // Length of canonicalized part of list. 6084e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke do { 6085e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke num_canonical = InsertRangeInCanonicalList(character_ranges, 6086e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke num_canonical, 6087e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke character_ranges->at(read)); 6088e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke read++; 6089e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } while (read < n); 6090e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke character_ranges->Rewind(num_canonical); 6091e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6092b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(CharacterRange::IsCanonical(character_ranges)); 6093e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6094e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6095e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6096e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkevoid CharacterRange::Negate(ZoneList<CharacterRange>* ranges, 6097b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* negated_ranges, 6098b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 6099b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(CharacterRange::IsCanonical(ranges)); 6100b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(0, negated_ranges->length()); 6101e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int range_count = ranges->length(); 6102109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch uc32 from = 0; 6103e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int i = 0; 6104e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (range_count > 0 && ranges->at(0).from() == 0) { 6105109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch from = ranges->at(0).to() + 1; 6106e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke i = 1; 6107e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6108e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke while (i < range_count) { 6109e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke CharacterRange range = ranges->at(i); 6110109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone); 6111109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch from = range.to() + 1; 6112e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke i++; 6113e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6114109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (from < String::kMaxCodePoint) { 6115109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint), 6116b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch zone); 6117a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6119a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6120a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6121a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 6122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Splay tree 6123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6125b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochOutSet* OutSet::Extend(unsigned value, Zone* zone) { 6126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (Get(value)) 6127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return this; 6128b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (successors(zone) != NULL) { 6129b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < successors(zone)->length(); i++) { 6130b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OutSet* successor = successors(zone)->at(i); 6131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (successor->Get(value)) 6132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return successor; 6133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6135b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch successors_ = new(zone) ZoneList<OutSet*>(2, zone); 6136a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6137b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch OutSet* result = new(zone) OutSet(first_, remaining_); 6138b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch result->Set(value, zone); 6139b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch successors(zone)->Add(result, zone); 6140a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 6141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6142a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6143a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6144b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid OutSet::Set(unsigned value, Zone *zone) { 6145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (value < kFirstLimit) { 6146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_ |= (1 << value); 6147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (remaining_ == NULL) 6149b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch remaining_ = new(zone) ZoneList<unsigned>(1, zone); 6150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (remaining_->is_empty() || !remaining_->Contains(value)) 6151b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch remaining_->Add(value, zone); 6152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6156b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochbool OutSet::Get(unsigned value) const { 6157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (value < kFirstLimit) { 6158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return (first_ & (1 << value)) != 0; 6159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else if (remaining_ == NULL) { 6160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 6161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return remaining_->Contains(value); 6163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6167109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdochconst uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar; 6168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6170b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochvoid DispatchTable::AddRange(CharacterRange full_range, int value, 6171b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Zone* zone) { 6172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange current = full_range; 6173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (tree()->is_empty()) { 6174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If this is the first range we just insert into the table. 6175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator loc; 6176014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool inserted = tree()->Insert(current.from(), &loc); 6177014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(inserted); 6178014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(inserted); 6179b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch loc.set_value(Entry(current.from(), current.to(), 6180b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch empty()->Extend(value, zone))); 6181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 6182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // First see if there is a range to the left of this one that 6184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // overlaps. 6185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator loc; 6186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (tree()->FindGreatestLessThan(current.from(), &loc)) { 6187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Entry* entry = &loc.value(); 6188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If we've found a range that overlaps with this one, and it 6189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // starts strictly to the left of this one, we have to fix it 6190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // because the following code only handles ranges that start on 6191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // or after the start point of the range we're adding. 6192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (entry->from() < current.from() && entry->to() >= current.from()) { 6193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Snap the overlapping range in half around the start point of 6194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the range we're adding. 6195109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange left = 6196109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange::Range(entry->from(), current.from() - 1); 6197109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch CharacterRange right = CharacterRange::Range(current.from(), entry->to()); 6198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The left part of the overlapping range doesn't overlap. 6199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Truncate the whole entry to be just the left part. 6200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->set_to(left.to()); 6201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The right part is the one that overlaps. We add this part 6202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to the map and let the next step deal with merging it with 6203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the range we're adding. 6204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator loc; 6205014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool inserted = tree()->Insert(right.from(), &loc); 6206014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(inserted); 6207014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(inserted); 6208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loc.set_value(Entry(right.from(), 6209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block right.to(), 6210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->out_set())); 6211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block while (current.is_valid()) { 6214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (tree()->FindLeastGreaterThan(current.from(), &loc) && 6215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (loc.value().from() <= current.to()) && 6216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (loc.value().to() >= current.from())) { 6217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Entry* entry = &loc.value(); 6218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // We have overlap. If there is space between the start point of 6219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the range we're adding and where the overlapping range starts 6220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // then we have to add a range covering just that space. 6221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (current.from() < entry->from()) { 6222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator ins; 6223014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool inserted = tree()->Insert(current.from(), &ins); 6224014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(inserted); 6225014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(inserted); 6226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ins.set_value(Entry(current.from(), 6227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->from() - 1, 6228b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch empty()->Extend(value, zone))); 6229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block current.set_from(entry->from()); 6230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6231b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(current.from(), entry->from()); 6232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the overlapping range extends beyond the one we want to add 6233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // we have to snap the right part off and add it separately. 6234a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (entry->to() > current.to()) { 6235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator ins; 6236014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool inserted = tree()->Insert(current.to() + 1, &ins); 6237014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(inserted); 6238014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(inserted); 6239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ins.set_value(Entry(current.to() + 1, 6240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->to(), 6241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->out_set())); 6242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block entry->set_to(current.to()); 6243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6244b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(entry->to() <= current.to()); 6245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The overlapping range is now completely contained by the range 6246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // we're adding so we can just update it and move the start point 6247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // of the range we're adding just past it. 6248b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch entry->AddValue(value, zone); 6249b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK(entry->to() + 1 > current.from()); 6250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block current.set_from(entry->to() + 1); 6251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // There is no overlap so we can just add the range 6253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator ins; 6254014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch bool inserted = tree()->Insert(current.from(), &ins); 6255014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(inserted); 6256014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch USE(inserted); 6257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ins.set_value(Entry(current.from(), 6258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block current.to(), 6259b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch empty()->Extend(value, zone))); 6260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 6261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6266109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochOutSet* DispatchTable::Get(uc32 value) { 6267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>::Locator loc; 6268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!tree()->FindGreatestLessThan(value, &loc)) 6269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return empty(); 6270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Entry* entry = &loc.value(); 6271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (value <= entry->to()) 6272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return entry->out_set(); 6273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else 6274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return empty(); 6275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 6279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Analysis 6280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::EnsureAnalyzed(RegExpNode* that) { 6283014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch StackLimitCheck check(isolate()); 6284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (check.HasOverflowed()) { 6285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block fail("Stack overflow"); 6286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 6287a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6288a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (that->info()->been_analyzed || that->info()->being_analyzed) 6289a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 6290a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->info()->being_analyzed = true; 6291a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->Accept(this); 6292a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->info()->being_analyzed = false; 6293a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->info()->been_analyzed = true; 6294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitEnd(EndNode* that) { 6298a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // nothing to do 6299a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid TextNode::CalculateOffsets() { 6303a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int element_count = elements()->length(); 6304a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Set up the offsets of the elements relative to the start. This is a fixed 6305a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // quantity since a TextNode can only contain fixed-width things. 6306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset = 0; 6307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < element_count; i++) { 6308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextElement& elm = elements()->at(i); 6309b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch elm.set_cp_offset(cp_offset); 6310b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch cp_offset += elm.length(); 6311a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6312a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6313a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6314a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6315a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitText(TextNode* that) { 6316109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (ignore_case()) { 6317014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch that->MakeCaseIndependent(isolate(), is_one_byte_); 6318a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6319a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(that->on_success()); 6320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!has_failed()) { 6321a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->CalculateOffsets(); 6322a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6323a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6324a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6325a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6326a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitAction(ActionNode* that) { 6327a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* target = that->on_success(); 6328a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(target); 6329a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!has_failed()) { 6330a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the next node is interested in what it follows then this node 6331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // has to be interested too so it can pass the information on. 6332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block that->info()->AddFromFollowing(target->info()); 6333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6334a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6335a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6336a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitChoice(ChoiceNode* that) { 6338a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo* info = that->info(); 6339a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < that->alternatives()->length(); i++) { 6340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = that->alternatives()->at(i).node(); 6341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(node); 6342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (has_failed()) return; 6343a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Anything the following nodes need to know has to be known by 6344a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // this node also, so it can pass it on. 6345a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block info->AddFromFollowing(node->info()); 6346a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitLoopChoice(LoopChoiceNode* that) { 6351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo* info = that->info(); 6352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < that->alternatives()->length(); i++) { 6353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = that->alternatives()->at(i).node(); 6354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node != that->loop_node()) { 6355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(node); 6356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (has_failed()) return; 6357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block info->AddFromFollowing(node->info()); 6358a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Check the loop last since it may need the value of this node 6361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to get a correct result. 6362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(that->loop_node()); 6363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (!has_failed()) { 6364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block info->AddFromFollowing(that->loop_node()->info()); 6365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitBackReference(BackReferenceNode* that) { 6370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(that->on_success()); 6371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid Analysis::VisitAssertion(AssertionNode* that) { 6375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EnsureAnalyzed(that->on_success()); 6376e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6377e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6378e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6379014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 6380b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch BoyerMooreLookahead* bm, 6381b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bool not_at_start) { 6382b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Working out the set of characters that a backreference can match is too 6383b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // hard, so we just say that any character can match. 6384b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetRest(offset); 6385b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 6386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6389b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochSTATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == 6390b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpMacroAssembler::kTableSize); 6391e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6392e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6393014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, 6394014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BoyerMooreLookahead* bm, bool not_at_start) { 6395b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<GuardedAlternative>* alts = alternatives(); 6396b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch budget = (budget - 1) / alts->length(); 6397b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < alts->length(); i++) { 6398b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch GuardedAlternative& alt = alts->at(i); 6399b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (alt.guards() != NULL && alt.guards()->length() != 0) { 6400b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetRest(offset); // Give up trying to fill in info. 6401b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 6402b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 6403e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6404014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); 6405e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6406b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch SaveBMInfo(bm, not_at_start, offset); 6407e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6408e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6409e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6410014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, 6411014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch BoyerMooreLookahead* bm, bool not_at_start) { 6412b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (initial_offset >= bm->length()) return; 6413b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int offset = initial_offset; 6414b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int max_char = bm->max_char(); 6415b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = 0; i < elements()->length(); i++) { 6416b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (offset >= bm->length()) { 6417b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (initial_offset == 0) set_bm_info(not_at_start, bm); 6418b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 6419e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6420b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch TextElement text = elements()->at(i); 6421b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (text.text_type() == TextElement::ATOM) { 6422b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpAtom* atom = text.atom(); 6423b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < atom->length(); j++, offset++) { 6424b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (offset >= bm->length()) { 6425b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (initial_offset == 0) set_bm_info(not_at_start, bm); 6426b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 6427b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6428b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uc16 character = atom->data()[j]; 6429b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (bm->compiler()->ignore_case()) { 6430b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 6431b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int length = GetCaseIndependentLetters( 6432014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch isolate, character, bm->max_char() == String::kMaxOneByteCharCode, 6433b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch chars); 6434b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int j = 0; j < length; j++) { 6435b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->Set(offset, chars[j]); 6436b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6437b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } else { 6438b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (character <= max_char) bm->Set(offset, character); 64396ded16be15dd865a9b21ea304d5273c8be299c87Steve Block } 6440e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6441e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } else { 6442b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); 6443b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* char_class = text.char_class(); 6444b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); 6445e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke if (char_class->is_negated()) { 6446b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetAll(offset); 6447e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } else { 6448b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int k = 0; k < ranges->length(); k++) { 6449b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch CharacterRange& range = ranges->at(k); 6450b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (range.from() > max_char) continue; 6451b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int to = Min(max_char, static_cast<int>(range.to())); 6452b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch bm->SetInterval(offset, Interval(range.from(), to)); 6453b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6454e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6455b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch offset++; 6456e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6457e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 6458b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (offset >= bm->length()) { 6459b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (initial_offset == 0) set_bm_info(not_at_start, bm); 6460b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return; 6461b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6462014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, 6463b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch true); // Not at start after a text node. 6464b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (initial_offset == 0) set_bm_info(not_at_start, bm); 6465e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke} 6466e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6467e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6468a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// ------------------------------------------------------------------- 6469a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Dispatch table construction 6470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6471a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6472a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitEnd(EndNode* that) { 6473a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddRange(CharacterRange::Everything()); 6474a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6475a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6476a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::BuildTable(ChoiceNode* node) { 6478a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node->set_being_calculated(true); 6479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<GuardedAlternative>* alternatives = node->alternatives(); 6480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < alternatives->length(); i++) { 6481a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block set_choice_index(i); 6482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block alternatives->at(i).node()->Accept(this); 6483a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node->set_being_calculated(false); 6485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass AddDispatchRange { 6489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 6490a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit AddDispatchRange(DispatchTableConstructor* constructor) 6491a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : constructor_(constructor) { } 6492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Call(uc32 from, DispatchTable::Entry entry); 6493a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 6494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTableConstructor* constructor_; 6495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 6496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6498a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid AddDispatchRange::Call(uc32 from, DispatchTable::Entry entry) { 6499109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch constructor_->AddRange(CharacterRange::Range(from, entry.to())); 6500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6503a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitChoice(ChoiceNode* node) { 6504a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (node->being_calculated()) 6505a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 6506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTable* table = node->GetTable(ignore_case_); 6507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddDispatchRange adder(this); 6508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block table->ForEach(&adder); 6509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6512a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) { 6513a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // TODO(160): Find the node that we refer back to and propagate its start 6514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // set back to here. For now we just accept anything. 6515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddRange(CharacterRange::Everything()); 6516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitAssertion(AssertionNode* that) { 6520a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* target = that->on_success(); 6521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block target->Accept(this); 6522a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6523a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6524a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6525a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstatic int CompareRangeByFrom(const CharacterRange* a, 6526a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const CharacterRange* b) { 6527a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Compare<uc16>(a->from(), b->from()); 6528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6530a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { 6532a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ranges->Sort(CompareRangeByFrom); 6533a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 last = 0; 6534a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < ranges->length(); i++) { 6535a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange range = ranges->at(i); 6536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (last < range.from()) 6537109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddRange(CharacterRange::Range(last, range.from() - 1)); 6538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (range.to() >= last) { 6539109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (range.to() == String::kMaxCodePoint) { 6540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 6541a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6542a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block last = range.to() + 1; 6543a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6544a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6545a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6546109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddRange(CharacterRange::Range(last, String::kMaxCodePoint)); 6547a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6548a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6550a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitText(TextNode* that) { 6551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextElement elm = that->elements()->at(0); 6552b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch switch (elm.text_type()) { 6553a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case TextElement::ATOM: { 6554b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uc16 c = elm.atom()->data()[0]; 6555109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch AddRange(CharacterRange::Range(c, c)); 6556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 6557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block case TextElement::CHAR_CLASS: { 6559b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch RegExpCharacterClass* tree = elm.char_class(); 6560b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ZoneList<CharacterRange>* ranges = tree->ranges(that->zone()); 6561a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (tree->is_negated()) { 6562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddInverse(ranges); 6563a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block for (int i = 0; i < ranges->length(); i++) 6565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddRange(ranges->at(i)); 6566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block break; 6568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6569a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block default: { 6570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNIMPLEMENTED(); 6571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockvoid DispatchTableConstructor::VisitAction(ActionNode* that) { 6577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* target = that->on_success(); 6578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block target->Accept(this); 6579a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6580a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6581a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6582109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben MurdochRegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler, 6583109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* on_success) { 6584109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // If the regexp matching starts within a surrogate pair, step back 6585109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch // to the lead surrogate and start matching from there. 6586109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch DCHECK(!compiler->read_backward()); 6587109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Zone* zone = compiler->zone(); 6588109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List( 6589109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd)); 6590109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List( 6591109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd)); 6592109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6593109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone); 6594109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6595109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int stack_register = compiler->UnicodeLookaroundStackRegister(); 6596109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch int position_register = compiler->UnicodeLookaroundPositionRegister(); 6597109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* step_back = TextNode::CreateForCharacterRanges( 6598109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, lead_surrogates, true, on_success); 6599109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpLookaround::Builder builder(true, step_back, stack_register, 6600109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch position_register); 6601109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpNode* match_trail = TextNode::CreateForCharacterRanges( 6602109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch zone, trail_surrogates, false, builder.on_match_success()); 6603109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6604109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch optional_step_back->AddAlternative( 6605109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch GuardedAlternative(builder.ForMatch(match_trail))); 6606109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch optional_step_back->AddAlternative(GuardedAlternative(on_success)); 6607109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6608109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch return optional_step_back; 6609109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch} 6610109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6611109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch 6612b8a8cc1952d61a2f3a2568848933943a543b5d3eBen MurdochRegExpEngine::CompilationResult RegExpEngine::Compile( 6613109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Isolate* isolate, Zone* zone, RegExpCompileData* data, 6614109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch JSRegExp::Flags flags, Handle<String> pattern, 6615014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<String> sample_subject, bool is_one_byte) { 6616a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { 6617014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return IrregexpRegExpTooBig(isolate); 6618b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6619109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool ignore_case = flags & JSRegExp::kIgnoreCase; 6620109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool is_sticky = flags & JSRegExp::kSticky; 6621109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool is_global = flags & JSRegExp::kGlobal; 6622109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch bool is_unicode = flags & JSRegExp::kUnicode; 6623109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpCompiler compiler(isolate, zone, data->capture_count, flags, 6624014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch is_one_byte); 6625b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 6626014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); 6627958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 6628b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Sample some characters from the middle of the string. 6629b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch static const int kSampleSize = 128; 6630b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 6631b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch sample_subject = String::Flatten(sample_subject); 6632b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int chars_sampled = 0; 6633b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch int half_way = (sample_subject->length() - kSampleSize) / 2; 6634b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch for (int i = Max(0, half_way); 6635b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch i < sample_subject->length() && chars_sampled < kSampleSize; 6636b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch i++, chars_sampled++) { 6637b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch compiler.frequency_collator()->CountCharacter(sample_subject->Get(i)); 6638a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6639b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 6640a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Wrap the body of the regexp in capture #0. 6641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 6642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 0, 6643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block &compiler, 6644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block compiler.accept()); 6645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node = captured_body; 6646f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch bool is_end_anchored = data->tree->IsAnchoredAtEnd(); 6647f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch bool is_start_anchored = data->tree->IsAnchoredAtStart(); 6648f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch int max_length = data->tree->max_match(); 6649b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (!is_start_anchored && !is_sticky) { 6650a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Add a .*? at the beginning, outside the body capture, unless 6651b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // this expression is anchored at the beginning or sticky. 6652014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpNode* loop_node = RegExpQuantifier::ToNode( 6653014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'), 6654014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch &compiler, captured_body, data->contains_anchor); 6655a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6656a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (data->contains_anchor) { 6657a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Unroll loop once, to take care of the case that might start 6658a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // at the start of input. 6659b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); 6660a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block first_step_node->AddAlternative(GuardedAlternative(captured_body)); 6661014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( 6662014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch new (zone) RegExpCharacterClass('*'), false, loop_node))); 6663a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node = first_step_node; 6664a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } else { 6665a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node = loop_node; 6666a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6667a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6668b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (is_one_byte) { 6669b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 6670b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // Do it again to propagate the new nodes to places where they were not 6671b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch // put because they had not been calculated yet. 6672b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (node != NULL) { 6673b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); 6674b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6675109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else if (compiler.unicode() && (is_global || is_sticky)) { 6676109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch node = OptionallyStepBackToLeadSurrogate(&compiler, node); 6677b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6678b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 6679b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); 6680a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data->node = node; 6681109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch Analysis analysis(isolate, flags, is_one_byte); 6682a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block analysis.EnsureAnalyzed(node); 6683a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (analysis.has_failed()) { 6684a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const char* error_message = analysis.error_message(); 6685014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return CompilationResult(isolate, error_message); 6686a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 6687a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6688a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Create the correct assembler for the architecture. 66896ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifndef V8_INTERPRETED_REGEXP 6690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Native regexp implementation. 6691a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6692a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NativeRegExpMacroAssembler::Mode mode = 6693b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch is_one_byte ? NativeRegExpMacroAssembler::LATIN1 6694b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch : NativeRegExpMacroAssembler::UC16; 6695a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6696a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#if V8_TARGET_ARCH_IA32 6697014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerIA32 macro_assembler(isolate, zone, mode, 6698014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6699a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#elif V8_TARGET_ARCH_X64 6700014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerX64 macro_assembler(isolate, zone, mode, 6701014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6702a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#elif V8_TARGET_ARCH_ARM 6703014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerARM macro_assembler(isolate, zone, mode, 6704014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6705b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_ARM64 6706014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerARM64 macro_assembler(isolate, zone, mode, 6707014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 67083b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch#elif V8_TARGET_ARCH_S390 67093b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch RegExpMacroAssemblerS390 macro_assembler(isolate, zone, mode, 67103b9bc31999c9787eb726ecdbfd5796bfdec32a18Ben Murdoch (data->capture_count + 1) * 2); 6711014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#elif V8_TARGET_ARCH_PPC 6712014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerPPC macro_assembler(isolate, zone, mode, 6713014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 671444f0eee88ff00398ff7f715fab053374d808c90dSteve Block#elif V8_TARGET_ARCH_MIPS 6715014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode, 6716014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6717b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_MIPS64 6718014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode, 6719014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6720b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#elif V8_TARGET_ARCH_X87 6721014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerX87 macro_assembler(isolate, zone, mode, 6722014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch (data->capture_count + 1) * 2); 6723b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#else 6724b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#error "Unsupported architecture" 6725a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 6726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 67276ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#else // V8_INTERPRETED_REGEXP 6728a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Interpreted regexp implementation. 6729a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EmbeddedVector<byte, 1024> codes; 6730014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch RegExpMacroAssemblerIrregexp macro_assembler(isolate, codes, zone); 67316ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#endif // V8_INTERPRETED_REGEXP 6732a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6733958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier macro_assembler.set_slow_safe(TooMuchRegExpCode(pattern)); 6734958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier 6735f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch // Inserted here, instead of in Assembler, because it depends on information 6736f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch // in the AST that isn't replicated in the Node structure. 6737f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch static const int kMaxBacksearchLimit = 1024; 673862ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch if (is_end_anchored && !is_start_anchored && !is_sticky && 6739f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch max_length < kMaxBacksearchLimit) { 6740f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch macro_assembler.SetCurrentPositionFromEnd(max_length); 6741f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch } 6742f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch 6743b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch if (is_global) { 6744109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL; 6745109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch if (data->tree->min_match() > 0) { 6746109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK; 6747109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } else if (is_unicode) { 6748109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch mode = RegExpMacroAssembler::GLOBAL_UNICODE; 6749109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch } 6750109988c7ccb6f3fd1a58574fa3dfb88beaef6632Ben Murdoch macro_assembler.set_global_mode(mode); 6751b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch } 6752b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 6753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return compiler.Assemble(¯o_assembler, 6754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node, 6755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block data->capture_count, 6756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block pattern); 6757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} 6758a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 6759e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 6760958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernierbool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) { 6761958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier Heap* heap = pattern->GetHeap(); 6762958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize; 676362ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch if (heap->isolate()->total_regexp_code_generated() > 676462ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch RegExpImpl::kRegExpCompiledLimit && 676562ed631aa0ff23db68a47fd423efa9c019ff2c9eBen Murdoch heap->CommittedMemoryExecutable() > 6766958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier RegExpImpl::kRegExpExecutableMemoryLimit) { 6767958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier too_much = true; 6768958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier } 6769958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier return too_much; 6770958fae7ec3f466955f8e5b50fa5b8d38b9e91675Emily Bernier} 6771014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6772014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6773014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben MurdochObject* RegExpResultsCache::Lookup(Heap* heap, String* key_string, 6774014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Object* key_pattern, 6775014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FixedArray** last_match_cache, 6776014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ResultsCacheType type) { 6777014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch FixedArray* cache; 6778c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (!key_string->IsInternalizedString()) return Smi::kZero; 6779014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (type == STRING_SPLIT_SUBSTRINGS) { 6780014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(key_pattern->IsString()); 6781c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (!key_pattern->IsInternalizedString()) return Smi::kZero; 6782014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache = heap->string_split_cache(); 6783014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 6784014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(type == REGEXP_MULTIPLE_INDICES); 6785014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(key_pattern->IsFixedArray()); 6786014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache = heap->regexp_multiple_cache(); 6787014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6788014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6789014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t hash = key_string->Hash(); 6790014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & 6791014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ~(kArrayEntriesPerCacheEntry - 1)); 6792014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (cache->get(index + kStringOffset) != key_string || 6793014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->get(index + kPatternOffset) != key_pattern) { 6794014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch index = 6795014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); 6796014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (cache->get(index + kStringOffset) != key_string || 6797014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->get(index + kPatternOffset) != key_pattern) { 6798c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch return Smi::kZero; 6799014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6800014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6801014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6802014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch *last_match_cache = FixedArray::cast(cache->get(index + kLastMatchOffset)); 6803014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch return cache->get(index + kArrayOffset); 6804014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 6805014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6806014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6807014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string, 6808014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<Object> key_pattern, 6809014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<FixedArray> value_array, 6810014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<FixedArray> last_match_cache, 6811014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ResultsCacheType type) { 6812014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Factory* factory = isolate->factory(); 6813014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<FixedArray> cache; 6814014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!key_string->IsInternalizedString()) return; 6815014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (type == STRING_SPLIT_SUBSTRINGS) { 6816014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(key_pattern->IsString()); 6817014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (!key_pattern->IsInternalizedString()) return; 6818014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache = factory->string_split_cache(); 6819014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 6820014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(type == REGEXP_MULTIPLE_INDICES); 6821014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch DCHECK(key_pattern->IsFixedArray()); 6822014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache = factory->regexp_multiple_cache(); 6823014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6824014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6825014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t hash = key_string->Hash(); 6826014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & 6827014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ~(kArrayEntriesPerCacheEntry - 1)); 6828c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (cache->get(index + kStringOffset) == Smi::kZero) { 6829014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kStringOffset, *key_string); 6830014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kPatternOffset, *key_pattern); 6831014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kArrayOffset, *value_array); 6832014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kLastMatchOffset, *last_match_cache); 6833014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 6834014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch uint32_t index2 = 6835014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); 6836c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch if (cache->get(index2 + kStringOffset) == Smi::kZero) { 6837014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index2 + kStringOffset, *key_string); 6838014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index2 + kPatternOffset, *key_pattern); 6839014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index2 + kArrayOffset, *value_array); 6840014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index2 + kLastMatchOffset, *last_match_cache); 6841014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } else { 6842c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch cache->set(index2 + kStringOffset, Smi::kZero); 6843c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch cache->set(index2 + kPatternOffset, Smi::kZero); 6844c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch cache->set(index2 + kArrayOffset, Smi::kZero); 6845c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch cache->set(index2 + kLastMatchOffset, Smi::kZero); 6846014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kStringOffset, *key_string); 6847014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kPatternOffset, *key_pattern); 6848014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kArrayOffset, *value_array); 6849014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch cache->set(index + kLastMatchOffset, *last_match_cache); 6850014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6851014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6852014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // If the array is a reasonably short list of substrings, convert it into a 6853014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // list of internalized strings. 6854014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) { 6855014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = 0; i < value_array->length(); i++) { 6856014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<String> str(String::cast(value_array->get(i)), isolate); 6857014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch Handle<String> internalized_str = factory->InternalizeString(str); 6858014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch value_array->set(i, *internalized_str); 6859014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6860014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6861014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch // Convert backing store to a copy-on-write array. 6862c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch value_array->set_map_no_write_barrier(isolate->heap()->fixed_cow_array_map()); 6863014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 6864014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6865014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6866014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochvoid RegExpResultsCache::Clear(FixedArray* cache) { 6867014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch for (int i = 0; i < kRegExpResultsCacheSize; i++) { 6868c8c1d9e03f4babd16833b0f8ccf6aab5fa6e8c7aBen Murdoch cache->set(i, Smi::kZero); 6869014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch } 6870014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} 6871014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch 6872014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} // namespace internal 6873014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch} // namespace v8 6874