13ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// Copyright 2012 the V8 project authors. All rights reserved. 2a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Redistribution and use in source and binary forms, with or without 3a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// modification, are permitted provided that the following conditions are 4a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// met: 5a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 6a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions of source code must retain the above copyright 7a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// notice, this list of conditions and the following disclaimer. 8a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Redistributions in binary form must reproduce the above 9a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// copyright notice, this list of conditions and the following 10a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// disclaimer in the documentation and/or other materials provided 11a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// with the distribution. 12a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// * Neither the name of Google Inc. nor the names of its 13a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// contributors may be used to endorse or promote products derived 14a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// from this software without specific prior written permission. 15a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 16a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 28a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#ifndef V8_JSREGEXP_H_ 29a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define V8_JSREGEXP_H_ 30a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 31257744e915dfc84d6d07a6b2accf8402d9ffc708Ben Murdoch#include "allocation.h" 323ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch#include "assembler.h" 336ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#include "zone-inl.h" 343ce2e2076e8e3e60cf1810eec160ea2d8557e9e7Steve Block 35a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace v8 { 36a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blocknamespace internal { 37a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 383ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochclass NodeVisitor; 393ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochclass RegExpCompiler; 40a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RegExpMacroAssembler; 413ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochclass RegExpNode; 423ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochclass RegExpTree; 43a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 44a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RegExpImpl { 45a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 46a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Whether V8 is compiled with native regexp support or not. 47a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static bool UsesNativeRegExp() { 486ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#ifdef V8_INTERPRETED_REGEXP 49a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return false; 506ded16be15dd865a9b21ea304d5273c8be299c87Steve Block#else 516ded16be15dd865a9b21ea304d5273c8be299c87Steve Block return true; 52a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif 53a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 54a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 55a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Creates a regular expression literal in the old space. 56a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This function calls the garbage collector if necessary. 57a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor, 58a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern, 59a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> flags, 60a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool* has_pending_exception); 61a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 62a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns a string representation of a regular expression. 63a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4. 64a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This function calls the garbage collector if necessary. 65a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<String> ToString(Handle<Object> value); 66a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 67a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Parses the RegExp pattern and prepares the JSRegExp object with 68a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generic data and choice of implementation - as well as what 69a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the implementation wants to store in the data field. 70a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns false if compilation fails. 71a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<Object> Compile(Handle<JSRegExp> re, 72a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern, 73a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> flags); 74a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 75a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // See ECMA-262 section 15.10.6.2. 76a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This function calls the garbage collector if necessary. 77a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<Object> Exec(Handle<JSRegExp> regexp, 78a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> subject, 79a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int index, 80a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<JSArray> lastMatchInfo); 81a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 82a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Prepares a JSRegExp object with Irregexp-specific data. 836ded16be15dd865a9b21ea304d5273c8be299c87Steve Block static void IrregexpInitialize(Handle<JSRegExp> re, 846ded16be15dd865a9b21ea304d5273c8be299c87Steve Block Handle<String> pattern, 856ded16be15dd865a9b21ea304d5273c8be299c87Steve Block JSRegExp::Flags flags, 866ded16be15dd865a9b21ea304d5273c8be299c87Steve Block int capture_register_count); 87a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 88a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 89a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void AtomCompile(Handle<JSRegExp> re, 90a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern, 91a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block JSRegExp::Flags flags, 92a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> match_pattern); 93a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 94a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<Object> AtomExec(Handle<JSRegExp> regexp, 95a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> subject, 96a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int index, 97a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<JSArray> lastMatchInfo); 98a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 996ded16be15dd865a9b21ea304d5273c8be299c87Steve Block enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 }; 1006ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 1016ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Prepare a RegExp for being executed one or more times (using 1026ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // IrregexpExecOnce) on the subject. 1036ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // This ensures that the regexp is compiled for the subject, and that 1046ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // the subject is flat. 1056ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Returns the number of integer spaces required by IrregexpExecOnce 1066ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // as its "registers" argument. If the regexp cannot be compiled, 1076ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // an exception is set as pending, and this function returns negative. 1086ded16be15dd865a9b21ea304d5273c8be299c87Steve Block static int IrregexpPrepare(Handle<JSRegExp> regexp, 1096ded16be15dd865a9b21ea304d5273c8be299c87Steve Block Handle<String> subject); 1106ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 1116ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // Execute a regular expression once on the subject, starting from 1126ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // character "index". 1136ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // If successful, returns RE_SUCCESS and set the capture positions 1146ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // in the first registers. 1156ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // If matching fails, returns RE_FAILURE. 1166ded16be15dd865a9b21ea304d5273c8be299c87Steve Block // If execution fails, sets a pending exception and returns RE_EXCEPTION. 1176ded16be15dd865a9b21ea304d5273c8be299c87Steve Block static IrregexpResult IrregexpExecOnce(Handle<JSRegExp> regexp, 1186ded16be15dd865a9b21ea304d5273c8be299c87Steve Block Handle<String> subject, 1196ded16be15dd865a9b21ea304d5273c8be299c87Steve Block int index, 120b8e0da25ee8efac3bb05cd6b2730aafbd96119f4Ben Murdoch Vector<int> registers); 1216ded16be15dd865a9b21ea304d5273c8be299c87Steve Block 122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Execute an Irregexp bytecode pattern. 123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // On a successful match, the result is a JSArray containing 124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // captured positions. On a failure, the result is the null value. 125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns an empty handle in case of an exception. 126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, 127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> subject, 128a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int index, 129a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<JSArray> lastMatchInfo); 130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 131e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Array index in the lastMatchInfo array. 132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kLastCaptureCount = 0; 133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kLastSubject = 1; 134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kLastInput = 2; 135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kFirstCapture = 3; 136a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kLastMatchOverhead = 3; 137a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 138e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Direct offset into the lastMatchInfo array. 139e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kLastCaptureCountOffset = 140e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize; 141e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kLastSubjectOffset = 142e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke FixedArray::kHeaderSize + kLastSubject * kPointerSize; 143e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kLastInputOffset = 144e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke FixedArray::kHeaderSize + kLastInput * kPointerSize; 145e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kFirstCaptureOffset = 146e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke FixedArray::kHeaderSize + kFirstCapture * kPointerSize; 147e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Used to access the lastMatchInfo array. 149a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int GetCapture(FixedArray* array, int index) { 150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Smi::cast(array->get(index + kFirstCapture))->value(); 151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetLastCaptureCount(FixedArray* array, int to) { 154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block array->set(kLastCaptureCount, Smi::FromInt(to)); 155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetLastSubject(FixedArray* array, String* to) { 158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block array->set(kLastSubject, to); 159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetLastInput(FixedArray* array, String* to) { 162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block array->set(kLastInput, to); 163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetCapture(FixedArray* array, int index, int to) { 166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block array->set(index + kFirstCapture, Smi::FromInt(to)); 167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int GetLastCaptureCount(FixedArray* array) { 170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Smi::cast(array->get(kLastCaptureCount))->value(); 171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For acting on the JSRegExp data FixedArray. 174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int IrregexpMaxRegisterCount(FixedArray* re); 175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); 176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int IrregexpNumberOfCaptures(FixedArray* re); 177a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int IrregexpNumberOfRegisters(FixedArray* re); 178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii); 179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii); 180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 181053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block // Limit the space regexps take up on the heap. In order to limit this we 182053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block // would like to keep track of the amount of regexp code on the heap. This 183053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block // is not tracked, however. As a conservative approximation we track the 184053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block // total regexp code compiled including code that has subsequently been freed 185053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block // and the total executable memory at any point. 186053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block static const int kRegExpExecutableMemoryLimit = 16 * MB; 187053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block static const int kRegWxpCompiledLimit = 1 * MB; 188053d10c438f14580aaf4ab1b2aad93a5a4fe8b82Steve Block 189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static String* last_ascii_string_; 191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static String* two_byte_cached_string_; 192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static bool CompileIrregexp(Handle<JSRegExp> re, bool is_ascii); 194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); 195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Set the subject cache. The previous string buffer is not deleted, so the 198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // caller should ensure that it doesn't leak. 199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void SetSubjectCache(String* subject, 200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block char* utf8_subject, 201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int uft8_length, 202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int character_position, 203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int utf8_position); 204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A one element cache of the last utf8_subject string and its length. The 206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // subject JS String object is cached in the heap. We also cache a 207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // translation between position and utf8 position. 208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static char* utf8_subject_cache_; 209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int utf8_length_cache_; 210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int utf8_position_; 211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static int character_position_; 212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 215e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// Represents the location of one element relative to the intersection of 216e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// two sets. Corresponds to the four areas of a Venn diagram. 217e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkeenum ElementInSetsRelation { 218e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInsideNone = 0, 219e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInsideFirst = 1, 220e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInsideSecond = 2, 221e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInsideBoth = 3 222e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke}; 223e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 224e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 225e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// Represents the relation of two sets. 226e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke// Sets can be either disjoint, partially or fully overlapping, or equal. 227e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkeclass SetRelation BASE_EMBEDDED { 228e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke public: 229e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Relation is represented by a bit saying whether there are elements in 230e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // one set that is not in the other, and a bit saying that there are elements 231e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // that are in both sets. 232e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 233e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Location of an element. Corresponds to the internal areas of 234e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // a Venn diagram. 235e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke enum { 236e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInFirst = 1 << kInsideFirst, 237e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInSecond = 1 << kInsideSecond, 238e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke kInBoth = 1 << kInsideBoth 239e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke }; 240e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke SetRelation() : bits_(0) {} 241e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ~SetRelation() {} 242e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Add the existence of objects in a particular 243e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void SetElementsInFirstSet() { bits_ |= kInFirst; } 244e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void SetElementsInSecondSet() { bits_ |= kInSecond; } 245e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void SetElementsInBothSets() { bits_ |= kInBoth; } 246e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Check the currently known relation of the sets (common functions only, 247e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // for other combinations, use value() to get the bits and check them 248e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // manually). 249e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Sets are completely disjoint. 250e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke bool Disjoint() { return (bits_ & kInBoth) == 0; } 251e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Sets are equal. 252e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke bool Equals() { return (bits_ & (kInFirst | kInSecond)) == 0; } 253e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // First set contains second. 254e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke bool Contains() { return (bits_ & kInSecond) == 0; } 255e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Second set contains first. 256e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke bool ContainedIn() { return (bits_ & kInFirst) == 0; } 257e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke bool NonTrivialIntersection() { 258e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return (bits_ == (kInFirst | kInSecond | kInBoth)); 259e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 260e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int value() { return bits_; } 261589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 262e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke private: 263e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int bits_; 264e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke}; 265e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 266e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass CharacterRange { 268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange() : from_(0), to_(0) { } 270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For compatibility with the CHECK_OK macro 271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT 272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { } 273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges); 274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Vector<const uc16> GetWordBounds(); 275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static inline CharacterRange Singleton(uc16 value) { 276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CharacterRange(value, value); 277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static inline CharacterRange Range(uc16 from, uc16 to) { 279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ASSERT(from <= to); 280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CharacterRange(from, to); 281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static inline CharacterRange Everything() { 283a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return CharacterRange(0, 0xFFFF); 284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Contains(uc16 i) { return from_ <= i && i <= to_; } 286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from() const { return from_; } 287a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_from(uc16 value) { from_ = value; } 288a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to() const { return to_; } 289a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_to(uc16 value) { to_ = value; } 290a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_valid() { return from_ <= to_; } 291a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } 292a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool IsSingleton() { return (from_ == to_); } 293d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii); 294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void Split(ZoneList<CharacterRange>* base, 295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Vector<const uc16> overlay, 296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<CharacterRange>** included, 297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<CharacterRange>** excluded); 298e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Whether a range list is in canonical form: Ranges ordered by from value, 299e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // and ranges non-overlapping and non-adjacent. 300e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static bool IsCanonical(ZoneList<CharacterRange>* ranges); 301e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Convert range list to canonical form. The characters covered by the ranges 302e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // will still be the same, but no character is in more than one range, and 303e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // adjacent ranges are merged. The resulting list may be shorter than the 304e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // original, but cannot be longer. 305e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static void Canonicalize(ZoneList<CharacterRange>* ranges); 306e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Check how the set of characters defined by a CharacterRange list relates 307e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // to the set of word characters. List must be in canonical form. 308e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static SetRelation WordCharacterRelation(ZoneList<CharacterRange>* ranges); 309e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Takes two character range lists (representing character sets) in canonical 310e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // form and merges them. 311e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // The characters that are only covered by the first set are added to 312e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // first_set_only_out. the characters that are only in the second set are 313e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // added to second_set_only_out, and the characters that are in both are 314e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // added to both_sets_out. 315e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // The pointers to first_set_only_out, second_set_only_out and both_sets_out 316e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // should be to empty lists, but they need not be distinct, and may be NULL. 317e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // If NULL, the characters are dropped, and if two arguments are the same 318e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // pointer, the result is the union of the two sets that would be created 319e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // if the pointers had been distinct. 320e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // This way, the Merge function can compute all the usual set operations: 321e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // union (all three out-sets are equal), intersection (only both_sets_out is 322e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // non-NULL), and set difference (only first_set is non-NULL). 323e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static void Merge(ZoneList<CharacterRange>* first_set, 324e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* second_set, 325e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* first_set_only_out, 326e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* second_set_only_out, 327e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* both_sets_out); 328e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Negate the contents of a character range in canonical form. 329e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static void Negate(ZoneList<CharacterRange>* src, 330e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* dst); 331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kStartMarker = (1 << 24); 332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kPayloadMask = (1 << 24) - 1; 333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 334a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 335a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from_; 336a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to_; 337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 338a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 339a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A set of unsigned integers that behaves especially well on small 341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// integers (< 32). May do zone-allocation. 342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass OutSet: public ZoneObject { 343a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 344a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } 345a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* Extend(unsigned value); 346a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Get(unsigned value); 347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const unsigned kFirstLimit = 32; 348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Destructively set a value in this set. In most cases you want 351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // to use Extend instead to ensure that only one instance exists 352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // that contains the same values. 353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Set(unsigned value); 354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // The successors are a list of sets that contain the same values 356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // as this set and the one more value that is not present in this 357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // set. 358a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<OutSet*>* successors() { return successors_; } 359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet(uint32_t first, ZoneList<unsigned>* remaining) 361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : first_(first), remaining_(remaining), successors_(NULL) { } 362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t first_; 363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<unsigned>* remaining_; 364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<OutSet*>* successors_; 365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block friend class Trace; 366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A mapping from integers, specified as ranges, to a set of integers. 370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Used for mapping character ranges to choices. 371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass DispatchTable : public ZoneObject { 372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class Entry { 374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Entry() : from_(0), to_(0), out_set_(NULL) { } 376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Entry(uc16 from, uc16 to, OutSet* out_set) 377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : from_(from), to_(to), out_set_(out_set) { } 378a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from() { return from_; } 379a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to() { return to_; } 380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_to(uc16 value) { to_ = value; } 381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddValue(int value) { out_set_ = out_set_->Extend(value); } 382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* out_set() { return out_set_; } 383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 384a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 from_; 385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 to_; 386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* out_set_; 387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 389a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class Config { 390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block typedef uc16 Key; 392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block typedef Entry Value; 393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const uc16 kNoKey; 3943ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch static const Entry NoValue() { return Value(); } 395a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static inline int Compare(uc16 a, uc16 b) { 396a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (a == b) 397a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return 0; 398a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else if (a < b) 399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return -1; 400a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else 401a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return 1; 402a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 403a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddRange(CharacterRange range, int value); 406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* Get(uc16 value); 407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Dump(); 408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 409a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block template <typename Callback> 410a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void ForEach(Callback* callback) { return tree()->ForEach(callback); } 411589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 412a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 413a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // There can't be a static empty set since it allocates its 414a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // successors in a zone and caches them. 415a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* empty() { return &empty_; } 416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet empty_; 417a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config>* tree() { return &tree_; } 418a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneSplayTree<Config> tree_; 419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define FOR_EACH_NODE_TYPE(VISIT) \ 423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(End) \ 424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Action) \ 425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Choice) \ 426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(BackReference) \ 427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Assertion) \ 428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Text) 429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ 432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Disjunction) \ 433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Alternative) \ 434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Assertion) \ 435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(CharacterClass) \ 436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Atom) \ 437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Quantifier) \ 438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Capture) \ 439a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Lookahead) \ 440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(BackReference) \ 441a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Empty) \ 442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block VISIT(Text) 443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 444a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 445a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define FORWARD_DECLARE(Name) class RegExp##Name; 446a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) 447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef FORWARD_DECLARE 448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass TextElement { 451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 452a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum Type {UNINITIALIZED, ATOM, CHAR_CLASS}; 453a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextElement() : type(UNINITIALIZED) { } 454a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit TextElement(Type t) : type(t), cp_offset(-1) { } 455a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static TextElement Atom(RegExpAtom* atom); 456a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static TextElement CharClass(RegExpCharacterClass* char_class); 457a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length(); 458a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Type type; 459a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block union { 460a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpAtom* u_atom; 461a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCharacterClass* u_char_class; 462a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } data; 463a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset; 464a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 465a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 466a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 467a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass Trace; 468a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 469a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 470a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstruct NodeInfo { 471a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo() 472a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : being_analyzed(false), 473a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block been_analyzed(false), 474a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_word_interest(false), 475a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_newline_interest(false), 476a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_start_interest(false), 477a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block at_end(false), 478a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block visited(false) { } 479a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 480a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns true if the interests and assumptions of this node 481a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // matches the given one. 482a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Matches(NodeInfo* that) { 483a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return (at_end == that->at_end) && 484a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (follows_word_interest == that->follows_word_interest) && 485a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (follows_newline_interest == that->follows_newline_interest) && 486a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block (follows_start_interest == that->follows_start_interest); 487a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 488a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 489a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Updates the interests of this node given the interests of the 490a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // node preceding it. 491a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddFromPreceding(NodeInfo* that) { 492a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block at_end |= that->at_end; 493a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_word_interest |= that->follows_word_interest; 494a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_newline_interest |= that->follows_newline_interest; 495a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_start_interest |= that->follows_start_interest; 496a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 497a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 498a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool HasLookbehind() { 499a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return follows_word_interest || 500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_newline_interest || 501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_start_interest; 502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 503a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 504a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Sets the interests of this node to include the interests of the 505a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // following node. 506a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddFromFollowing(NodeInfo* that) { 507a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_word_interest |= that->follows_word_interest; 508a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_newline_interest |= that->follows_newline_interest; 509a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block follows_start_interest |= that->follows_start_interest; 510a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 511a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 512a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void ResetCompilationState() { 513a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block being_analyzed = false; 514a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block been_analyzed = false; 515a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 516a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 517a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool being_analyzed: 1; 518a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool been_analyzed: 1; 519a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 520a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // These bits are set of this node has to know what the preceding 521a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // character was. 522a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool follows_word_interest: 1; 523a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool follows_newline_interest: 1; 524a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool follows_start_interest: 1; 525a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 526a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool at_end: 1; 527a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool visited: 1; 528a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 529a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 530a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 531a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass SiblingList { 532a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 533a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SiblingList() : list_(NULL) { } 534a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int length() { 535a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return list_ == NULL ? 0 : list_->length(); 536a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 537a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Ensure(RegExpNode* parent) { 538a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (list_ == NULL) { 539a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block list_ = new ZoneList<RegExpNode*>(2); 540a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block list_->Add(parent); 541a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 542a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 543a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Add(RegExpNode* node) { list_->Add(node); } 544a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* Get(int index) { return list_->at(index); } 545a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 546a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<RegExpNode*>* list_; 547a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 548a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 549a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 550a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Details of a quick mask-compare check that can look ahead in the 551a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// input stream. 552a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass QuickCheckDetails { 553a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 554a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails() 555a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : characters_(0), 556a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask_(0), 557a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_(0), 558a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cannot_match_(false) { } 559a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit QuickCheckDetails(int characters) 560a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : characters_(characters), 561a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block mask_(0), 562a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_(0), 563a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cannot_match_(false) { } 564a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Rationalize(bool ascii); 565a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Merge in the information from another branch of an alternation. 566a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Merge(QuickCheckDetails* other, int from_index); 567a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Advance the current position by some amount. 568a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Advance(int by, bool ascii); 569a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Clear(); 570a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool cannot_match() { return cannot_match_; } 571a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_cannot_match() { cannot_match_ = true; } 572a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct Position { 573a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Position() : mask(0), value(0), determines_perfectly(false) { } 574a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 mask; 575a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uc16 value; 576a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool determines_perfectly; 577a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 578a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters() { return characters_; } 579a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_characters(int characters) { characters_ = characters; } 580a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Position* positions(int index) { 581a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ASSERT(index >= 0); 582a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ASSERT(index < characters_); 583a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return positions_ + index; 584a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 585a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t mask() { return mask_; } 586a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t value() { return value_; } 587a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 588a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 589a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // How many characters do we have quick check information from. This is 590a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the same for all branches of a choice node. 591a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_; 592a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Position positions_[4]; 593a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // These values are the condensate of the above array after Rationalize(). 594a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t mask_; 595a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block uint32_t value_; 596a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If set to true, there is no way this quick check can match at all. 597a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // E.g., if it requires to be at the start of the input, and isn't. 598a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool cannot_match_; 599a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 600a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 601a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 602a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RegExpNode: public ZoneObject { 603a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 604e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke RegExpNode() : first_character_set_(NULL), trace_count_(0) { } 605a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual ~RegExpNode(); 606a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor) = 0; 607a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Generates a goto to this node or actually generates the code at this point. 608a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; 609a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // How many characters must this node consume at a minimum in order to 610a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // succeed. If we have found at least 'still_to_find' characters that 611a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // must be consumed there is no need to ask any following nodes whether 612b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // they are sure to eat any more characters. The not_at_start argument is 613b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // used to indicate that we know we are not at the start of the input. In 614b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // this case anchored branches will always fail and can be ignored when 615b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch // determining how many characters are consumed on success. 616b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 617b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 618b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) = 0; 619a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Emits some quick code that checks whether the preloaded characters match. 620a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Falls through on certain failure, jumps to the label on possible success. 621a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If the node cannot make a quick check it does nothing and returns false. 622a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool EmitQuickCheck(RegExpCompiler* compiler, 623a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 624a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preload_has_checked_bounds, 625a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* on_possible_success, 626a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails* details_return, 627a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool fall_through_on_failure); 628a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For a given number of characters this returns a mask and a value. The 629a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // next n characters are anded with the mask and compared with the value. 630a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A comparison failure indicates the node cannot match the next n characters. 631a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A comparison success indicates the node may match. 632a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 633a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 634a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 635a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) = 0; 636a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kNodeIsTooComplexForGreedyLoops = -1; 637a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 638a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* label() { return &label_; } 6393ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch // If non-generic code is generated for a node (i.e. the node is not at the 640a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // start of the trace) then it cannot be reused. This variable sets a limit 641a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // on how often we allow that to happen before we insist on starting a new 642a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // trace and generating generic code for a node that can be reused by flushing 643a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the deferred actions in the current trace and generating a goto. 644a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kMaxCopiesCodeGenerated = 10; 645a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 646a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo* info() { return &info_; } 647a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 648a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddSibling(RegExpNode* node) { siblings_.Add(node); } 649a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 650a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Static version of EnsureSibling that expresses the fact that the 651a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // result has the same type as the input. 652a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block template <class C> 653a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) { 654a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return static_cast<C*>(node->EnsureSibling(info, cloned)); 655a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 656a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 657a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SiblingList* siblings() { return &siblings_; } 658a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_siblings(SiblingList* other) { siblings_ = *other; } 659a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 660e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Return the set of possible next characters recognized by the regexp 661e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // (or a safe subset, potentially the set of all characters). 662e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* FirstCharacterSet(); 663e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 664e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Compute (if possible within the budget of traversed nodes) the 665e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // possible first characters of the input matched by this node and 666e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // its continuation. Returns the remaining budget after the computation. 667e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // If the budget is spent, the result is negative, and the cached 668e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // first_character_set_ value isn't set. 669e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 670e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 671e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Get and set the cached first character set value. 672e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* first_character_set() { 673e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke return first_character_set_; 674e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 675e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void set_first_character_set(ZoneList<CharacterRange>* character_set) { 676e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke first_character_set_ = character_set; 677e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 678e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 679a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block protected: 680a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum LimitResult { DONE, CONTINUE }; 681e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kComputeFirstCharacterSetFail = -1; 682e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 683a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace); 684a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 685a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns a sibling of this node whose interests and assumptions 686a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // match the ones in the given node info. If no sibling exists NULL 687a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // is returned. 688a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* TryGetSibling(NodeInfo* info); 689a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 690a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns a sibling of this node whose interests match the ones in 691a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // the given node info. The info must not contain any assertions. 692a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If no node exists a new one will be created by cloning the current 693a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // node. The result will always be an instance of the same concrete 694a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // class as this node. 695a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned); 696a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 697a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns a clone of this node initialized using the copy constructor 698a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // of its concrete class. Note that the node may have to be pre- 699a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // processed before it is on a usable state. 700a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual RegExpNode* Clone() = 0; 701a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 702a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 703e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kFirstCharBudget = 10; 704a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label label_; 705a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NodeInfo info_; 706a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SiblingList siblings_; 707e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke ZoneList<CharacterRange>* first_character_set_; 708a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // This variable keeps track of how many times code has been generated for 709a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // this node (in different traces). We don't keep track of where the 710a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generated code is located unless the code is generated at the start of 711a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // a trace, in which case it is generic and can be reused by flushing the 712a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // deferred operations in the current trace and generating a goto. 713a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int trace_count_; 714a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 715a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 716a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 717a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// A simple closed interval. 718a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass Interval { 719a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 720a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval() : from_(kNone), to_(kNone) { } 721a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval(int from, int to) : from_(from), to_(to) { } 722a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval Union(Interval that) { 723a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block if (that.from_ == kNone) 724a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return *this; 725a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else if (from_ == kNone) 726a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return that; 727a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block else 728a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return Interval(Min(from_, that.from_), Max(to_, that.to_)); 729a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 730a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Contains(int value) { 731a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return (from_ <= value) && (value <= to_); 732a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 733a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_empty() { return from_ == kNone; } 734a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int from() { return from_; } 735a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int to() { return to_; } 736a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static Interval Empty() { return Interval(); } 737a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kNone = -1; 738a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 739a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int from_; 740a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int to_; 741a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 742a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 743a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 744a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass SeqRegExpNode: public RegExpNode { 745a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 746a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit SeqRegExpNode(RegExpNode* on_success) 747a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : on_success_(on_success) { } 748a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success() { return on_success_; } 749a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_on_success(RegExpNode* node) { on_success_ = node; } 750a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 751a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success_; 752a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 753a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 754a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 755a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass ActionNode: public SeqRegExpNode { 756a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 757a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum Type { 758a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SET_REGISTER, 759a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block INCREMENT_REGISTER, 760a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block STORE_POSITION, 761a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block BEGIN_SUBMATCH, 762a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block POSITIVE_SUBMATCH_SUCCESS, 763a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block EMPTY_MATCH_CHECK, 764a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CLEAR_CAPTURES 765a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 766a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success); 767a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); 768a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* StorePosition(int reg, 769a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_capture, 770a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success); 771a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); 772a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* BeginSubmatch(int stack_pointer_reg, 773a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_reg, 774a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success); 775a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg, 776a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int restore_reg, 777a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_count, 778a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_from, 779a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success); 780a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static ActionNode* EmptyMatchCheck(int start_register, 781a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_register, 782a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_limit, 783a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success); 784a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 785a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 786b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 787b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 788b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 789a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 790a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 791a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int filled_in, 792a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 793a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return on_success()->GetQuickCheckDetails( 794a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block details, compiler, filled_in, not_at_start); 795a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 796a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Type type() { return type_; } 797a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // TODO(erikcorry): We should allow some action nodes in greedy loops. 798a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } 799a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual ActionNode* Clone() { return new ActionNode(*this); } 800e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 801589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 802a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 803a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block union { 804a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 805a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg; 806a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value; 807a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_store_register; 808a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 809a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg; 810a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_increment_register; 811a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 812a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg; 813a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_capture; 814a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_position_register; 815a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 816a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int stack_pointer_register; 817a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int current_position_register; 818a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_register_count; 819a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_register_from; 820a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_submatch; 821a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 822a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int start_register; 823a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_register; 824a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int repetition_limit; 825a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_empty_match_check; 826a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct { 827a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int range_from; 828a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int range_to; 829a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } u_clear_captures; 830a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } data_; 831a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ActionNode(Type type, RegExpNode* on_success) 832a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : SeqRegExpNode(on_success), 833a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block type_(type) { } 834a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Type type_; 835a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block friend class DotPrinter; 836a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 837a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 838a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 839a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass TextNode: public SeqRegExpNode { 840a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 841a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextNode(ZoneList<TextElement>* elms, 842a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) 843a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : SeqRegExpNode(on_success), 844a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block elms_(elms) { } 845a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextNode(RegExpCharacterClass* that, 846a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) 847a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : SeqRegExpNode(on_success), 848a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block elms_(new ZoneList<TextElement>(1)) { 849a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block elms_->Add(TextElement::CharClass(that)); 850a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 851a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 852a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 853b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 854b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 855b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 856a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 857a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 858a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 859a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start); 860a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<TextElement>* elements() { return elms_; } 861d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block void MakeCaseIndependent(bool is_ascii); 862a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual int GreedyLoopTextLength(); 863a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual TextNode* Clone() { 864a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextNode* result = new TextNode(*this); 865a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block result->CalculateOffsets(); 866a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return result; 867a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 868a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void CalculateOffsets(); 869e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 870589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 871a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 872a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum TextEmitPassType { 873a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NON_ASCII_MATCH, // Check for characters that can't match. 874a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block SIMPLE_CHARACTER_MATCH, // Case-dependent single character check. 875a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs. 876a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CASE_CHARACTER_MATCH, // Case-independent single character check. 877a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CHARACTER_CLASS_MATCH // Character class. 878a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 879a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static bool SkipPass(int pass, bool ignore_case); 880a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH; 881a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static const int kLastPass = CHARACTER_CLASS_MATCH; 882a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void TextEmitPass(RegExpCompiler* compiler, 883a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TextEmitPassType pass, 884a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool preloaded, 885a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 886a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool first_element_checked, 887a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int* checked_up_to); 888a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int Length(); 889a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<TextElement>* elms_; 890a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 891a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 892a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 893a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass AssertionNode: public SeqRegExpNode { 894a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 895a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum AssertionNodeType { 896a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AT_END, 897a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AT_START, 898a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AT_BOUNDARY, 899a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AT_NON_BOUNDARY, 900e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke AFTER_NEWLINE, 901e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Types not directly expressible in regexp syntax. 902e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // Used for modifying a boundary node if its following character is 903e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke // known to be word and/or non-word. 904e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke AFTER_NONWORD_CHARACTER, 905e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke AFTER_WORD_CHARACTER 906a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 907a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static AssertionNode* AtEnd(RegExpNode* on_success) { 908a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new AssertionNode(AT_END, on_success); 909a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 910a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static AssertionNode* AtStart(RegExpNode* on_success) { 911a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new AssertionNode(AT_START, on_success); 912a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 913a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static AssertionNode* AtBoundary(RegExpNode* on_success) { 914a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new AssertionNode(AT_BOUNDARY, on_success); 915a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 916a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static AssertionNode* AtNonBoundary(RegExpNode* on_success) { 917a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new AssertionNode(AT_NON_BOUNDARY, on_success); 918a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 919a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static AssertionNode* AfterNewline(RegExpNode* on_success) { 920a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return new AssertionNode(AFTER_NEWLINE, on_success); 921a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 922a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 923a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 924b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 925b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 926b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 927a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 928a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 929a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int filled_in, 930a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start); 931e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 932a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual AssertionNode* Clone() { return new AssertionNode(*this); } 933a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AssertionNodeType type() { return type_; } 934e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void set_type(AssertionNodeType type) { type_ = type; } 935589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 936a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 937a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AssertionNode(AssertionNodeType t, RegExpNode* on_success) 938a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : SeqRegExpNode(on_success), type_(t) { } 939a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AssertionNodeType type_; 940a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 941a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 942a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 943a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass BackReferenceNode: public SeqRegExpNode { 944a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 945a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block BackReferenceNode(int start_reg, 946a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int end_reg, 947a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* on_success) 948a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : SeqRegExpNode(on_success), 949a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block start_reg_(start_reg), 950a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block end_reg_(end_reg) { } 951a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 952a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int start_register() { return start_reg_; } 953a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int end_register() { return end_reg_; } 954a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 955b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 956b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 957b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 958a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 959a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 960a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 961a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 962a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return; 963a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 964a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } 965e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 966589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 967a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 968a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int start_reg_; 969a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int end_reg_; 970a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 971a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 972a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 973a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass EndNode: public RegExpNode { 974a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 975a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; 976a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit EndNode(Action action) : action_(action) { } 977a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 978a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 979b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 980b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 981b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start) { return 0; } 982a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 983a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 984a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 985a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start) { 986a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returning 0 from EatsAtLeast should ensure we never get here. 987a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNREACHABLE(); 988a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 989a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual EndNode* Clone() { return new EndNode(*this); } 990a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 991a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Action action_; 992a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 993a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 994a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 995a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass NegativeSubmatchSuccess: public EndNode { 996a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 997a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block NegativeSubmatchSuccess(int stack_pointer_reg, 998a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int position_reg, 999a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_count, 1000a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_start) 1001a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : EndNode(NEGATIVE_SUBMATCH_SUCCESS), 1002a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block stack_pointer_register_(stack_pointer_reg), 1003a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block current_position_register_(position_reg), 1004a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block clear_capture_count_(clear_capture_count), 1005a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block clear_capture_start_(clear_capture_start) { } 1006a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1007a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1008a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1009a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int stack_pointer_register_; 1010a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int current_position_register_; 1011a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_count_; 1012a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int clear_capture_start_; 1013a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1014a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1015a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1016a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass Guard: public ZoneObject { 1017a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1018a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum Relation { LT, GEQ }; 1019a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Guard(int reg, Relation op, int value) 1020a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : reg_(reg), 1021a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block op_(op), 1022a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_(value) { } 1023a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg() { return reg_; } 1024a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Relation op() { return op_; } 1025a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value() { return value_; } 1026a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1027a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1028a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg_; 1029a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Relation op_; 1030a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value_; 1031a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1032a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1033a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1034a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass GuardedAlternative { 1035a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1036a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { } 1037a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddGuard(Guard* guard); 1038a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node() { return node_; } 1039a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_node(RegExpNode* node) { node_ = node; } 1040a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<Guard*>* guards() { return guards_; } 1041a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1042a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1043a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node_; 1044a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<Guard*>* guards_; 1045a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1046a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1047a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1048a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass AlternativeGeneration; 1049a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1050a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1051a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass ChoiceNode: public RegExpNode { 1052a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1053a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit ChoiceNode(int expected_size) 1054a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : alternatives_(new ZoneList<GuardedAlternative>(expected_size)), 1055a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block table_(NULL), 1056a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block not_at_start_(false), 1057a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block being_calculated_(false) { } 1058a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 1059a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } 1060a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } 1061a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTable* GetTable(bool ignore_case); 1062a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1063b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 1064b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 1065b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 1066a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int EatsAtLeastHelper(int still_to_find, 1067a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int recursion_depth, 1068b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch RegExpNode* ignore_this_node, 1069b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 1070a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1071a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 1072a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 1073a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start); 1074a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } 1075a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1076a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool being_calculated() { return being_calculated_; } 1077a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start() { return not_at_start_; } 1078a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_not_at_start() { not_at_start_ = true; } 1079a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_being_calculated(bool b) { being_calculated_ = b; } 1080a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } 1081a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1082a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block protected: 1083589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); 1084a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ZoneList<GuardedAlternative>* alternatives_; 1085a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1086a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1087a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block friend class DispatchTableConstructor; 1088a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block friend class Analysis; 1089a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void GenerateGuard(RegExpMacroAssembler* macro_assembler, 1090a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Guard* guard, 1091a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace); 1092b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start); 1093a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void EmitOutOfLineContinuation(RegExpCompiler* compiler, 1094a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace* trace, 1095a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative alternative, 1096a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AlternativeGeneration* alt_gen, 1097a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int preload_characters, 1098a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool next_expects_preload); 1099a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTable* table_; 1100a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // If true, this node is never checked at the start of the input. 1101a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Allows a new trace to start with at_start() set to false. 1102a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start_; 1103a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool being_calculated_; 1104a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1105a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1106a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1107a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass NegativeLookaheadChoiceNode: public ChoiceNode { 1108a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1109a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, 1110a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block GuardedAlternative then_do_this) 1111a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : ChoiceNode(2) { 1112a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddAlternative(this_must_fail); 1113a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block AddAlternative(then_do_this); 1114a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1115b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 1116b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 1117b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 1118a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1119a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 1120a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 1121a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start); 1122a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // For a negative lookahead we don't emit the quick check for the 1123a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // alternative that is expected to fail. This is because quick check code 1124a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // starts by loading enough characters for the alternative that takes fewest 1125a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // characters, but on a negative lookahead the negative branch did not take 1126a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // part in that calculation (EatsAtLeast) so the assumptions don't hold. 1127a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } 1128e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 1129a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1130a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1131a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1132a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass LoopChoiceNode: public ChoiceNode { 1133a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1134a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit LoopChoiceNode(bool body_can_be_zero_length) 1135a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : ChoiceNode(2), 1136a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loop_node_(NULL), 1137a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block continue_node_(NULL), 1138a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block body_can_be_zero_length_(body_can_be_zero_length) { } 1139a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddLoopAlternative(GuardedAlternative alt); 1140a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddContinueAlternative(GuardedAlternative alt); 1141a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Emit(RegExpCompiler* compiler, Trace* trace); 1142b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch virtual int EatsAtLeast(int still_to_find, 1143b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch int recursion_depth, 1144b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch bool not_at_start); 1145a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void GetQuickCheckDetails(QuickCheckDetails* details, 1146a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompiler* compiler, 1147a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_filled_in, 1148a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool not_at_start); 1149e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke virtual int ComputeFirstCharacterSet(int budget); 1150a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } 1151a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* loop_node() { return loop_node_; } 1152a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* continue_node() { return continue_node_; } 1153a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool body_can_be_zero_length() { return body_can_be_zero_length_; } 1154a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Accept(NodeVisitor* visitor); 1155a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1156a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1157a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // AddAlternative is made private for loop nodes because alternatives 1158a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // should not be added freely, we need to keep track of which node 1159a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // goes back to the node itself. 1160a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddAlternative(GuardedAlternative node) { 1161a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ChoiceNode::AddAlternative(node); 1162a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1163a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1164a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* loop_node_; 1165a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* continue_node_; 1166a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool body_can_be_zero_length_; 1167a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1168a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1169a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1170a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// There are many ways to generate code for a node. This class encapsulates 1171a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the current way we should be generating. In other words it encapsulates 1172a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// the current state of the code generator. The effect of this is that we 1173a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// generate code for paths that the matcher can take through the regular 1174a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// expression. A given node in the regexp can be code-generated several times 1175a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// as it can be part of several traces. For example for the regexp: 1176a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part 1177a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code 1178a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// to match foo is generated only once (the traces have a common prefix). The 1179a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// code to store the capture is deferred and generated (twice) after the places 1180a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// where baz has been matched. 1181a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass Trace { 1182a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1183a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A value for a property that is either known to be true, know to be false, 1184a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // or not known. 1185a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block enum TriBool { 1186a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block UNKNOWN = -1, FALSE = 0, TRUE = 1 1187a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1188a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1189a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class DeferredAction { 1190a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1191a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredAction(ActionNode::Type type, int reg) 1192a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : type_(type), reg_(reg), next_(NULL) { } 1193a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredAction* next() { return next_; } 1194a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool Mentions(int reg); 1195a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg() { return reg_; } 1196a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ActionNode::Type type() { return type_; } 1197a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1198a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ActionNode::Type type_; 1199a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int reg_; 1200a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredAction* next_; 1201a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block friend class Trace; 1202a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1203a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1204a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class DeferredCapture : public DeferredAction { 1205a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1206a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredCapture(int reg, bool is_capture, Trace* trace) 1207a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : DeferredAction(ActionNode::STORE_POSITION, reg), 1208a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cp_offset_(trace->cp_offset()), 1209a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block is_capture_(is_capture) { } 1210a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset() { return cp_offset_; } 1211a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_capture() { return is_capture_; } 1212a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1213a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset_; 1214a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_capture_; 1215a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } 1216a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1217a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1218a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class DeferredSetRegister : public DeferredAction { 1219a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1220a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredSetRegister(int reg, int value) 1221a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : DeferredAction(ActionNode::SET_REGISTER, reg), 1222a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block value_(value) { } 1223a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value() { return value_; } 1224a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1225a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int value_; 1226a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1227a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1228a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class DeferredClearCaptures : public DeferredAction { 1229a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1230a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit DeferredClearCaptures(Interval range) 1231a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), 1232a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block range_(range) { } 1233a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval range() { return range_; } 1234a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1235a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Interval range_; 1236a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1237a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1238a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block class DeferredIncrementRegister : public DeferredAction { 1239a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1240a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit DeferredIncrementRegister(int reg) 1241a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } 1242a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1243a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1244a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Trace() 1245a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : cp_offset_(0), 1246a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block actions_(NULL), 1247a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block backtrack_(NULL), 1248a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block stop_node_(NULL), 1249a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block loop_label_(NULL), 1250a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_preloaded_(0), 1251a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bound_checked_up_to_(0), 1252a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block flush_budget_(100), 1253a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block at_start_(UNKNOWN) { } 1254a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1255a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // End the trace. This involves flushing the deferred actions in the trace 1256a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // and pushing a backtrack location onto the backtrack stack. Once this is 1257a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // done we can start a new trace or go to one that has already been 1258a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generated. 1259a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void Flush(RegExpCompiler* compiler, RegExpNode* successor); 1260a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset() { return cp_offset_; } 1261a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredAction* actions() { return actions_; } 1262a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // A trivial trace is one that has no deferred actions or other state that 1263a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // affects the assumptions used when generating code. There is no recorded 1264a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // backtrack location in a trivial trace, so with a trivial trace we will 1265a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generate code that, on a failure to match, gets the backtrack location 1266a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // from the backtrack stack rather than using a direct jump instruction. We 1267a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // always start code generation with a trivial trace and non-trivial traces 1268a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // are created as we emit code for nodes or add to the list of deferred 1269a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // actions in the trace. The location of the code generated for a node using 1270a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // a trivial trace is recorded in a label in the node so that gotos can be 1271a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // generated to that code. 1272a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_trivial() { 1273a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return backtrack_ == NULL && 1274a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block actions_ == NULL && 1275a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block cp_offset_ == 0 && 1276a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block characters_preloaded_ == 0 && 1277a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bound_checked_up_to_ == 0 && 1278a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block quick_check_performed_.characters() == 0 && 1279a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block at_start_ == UNKNOWN; 1280a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1281a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TriBool at_start() { return at_start_; } 1282a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; } 1283a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* backtrack() { return backtrack_; } 1284a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* loop_label() { return loop_label_; } 1285a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* stop_node() { return stop_node_; } 1286a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_preloaded() { return characters_preloaded_; } 1287a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int bound_checked_up_to() { return bound_checked_up_to_; } 1288a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int flush_budget() { return flush_budget_; } 1289a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; } 1290a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool mentions_reg(int reg); 1291a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // Returns true if a deferred position store exists to the specified 1292a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // register and stores the offset in the out-parameter. Otherwise 1293a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // returns false. 1294a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool GetStoredPosition(int reg, int* cp_offset); 1295a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // These set methods and AdvanceCurrentPositionInTrace should be used only on 1296a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block // new traces - the intention is that traces are immutable after creation. 1297a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void add_action(DeferredAction* new_action) { 1298a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ASSERT(new_action->next_ == NULL); 1299a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block new_action->next_ = actions_; 1300a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block actions_ = new_action; 1301a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1302a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_backtrack(Label* backtrack) { backtrack_ = backtrack; } 1303a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_stop_node(RegExpNode* node) { stop_node_ = node; } 1304a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_loop_label(Label* label) { loop_label_ = label; } 1305e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke void set_characters_preloaded(int count) { characters_preloaded_ = count; } 1306a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; } 1307a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_flush_budget(int to) { flush_budget_ = to; } 1308a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_quick_check_performed(QuickCheckDetails* d) { 1309a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block quick_check_performed_ = *d; 1310a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1311a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void InvalidateCurrentCharacter(); 1312a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); 1313589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1314a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1315a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int FindAffectedRegisters(OutSet* affected_registers); 1316a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void PerformDeferredActions(RegExpMacroAssembler* macro, 1317a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_register, 1318a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet& affected_registers, 1319a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* registers_to_pop, 1320a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet* registers_to_clear); 1321a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void RestoreAffectedRegisters(RegExpMacroAssembler* macro, 1322a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int max_register, 1323a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet& registers_to_pop, 1324a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block OutSet& registers_to_clear); 1325a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int cp_offset_; 1326a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DeferredAction* actions_; 1327a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* backtrack_; 1328a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* stop_node_; 1329a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Label* loop_label_; 1330a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int characters_preloaded_; 1331a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int bound_checked_up_to_; 1332a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block QuickCheckDetails quick_check_performed_; 1333a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int flush_budget_; 1334a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block TriBool at_start_; 1335a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1336a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1337a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1338a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass NodeVisitor { 1339a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1340a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual ~NodeVisitor() { } 1341a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define DECLARE_VISIT(Type) \ 1342a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Visit##Type(Type##Node* that) = 0; 1343a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_NODE_TYPE(DECLARE_VISIT) 1344a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef DECLARE_VISIT 1345a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } 1346a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1347a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1348a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1349a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Node visitor used to add the start set of the alternatives to the 1350a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// dispatch table of a choice node. 1351a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass DispatchTableConstructor: public NodeVisitor { 1352a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1353a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTableConstructor(DispatchTable* table, bool ignore_case) 1354a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : table_(table), 1355a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block choice_index_(-1), 1356a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ignore_case_(ignore_case) { } 1357a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1358a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void BuildTable(ChoiceNode* node); 1359a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1360a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddRange(CharacterRange range) { 1361a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block table()->AddRange(range, choice_index_); 1362a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1363a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1364a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void AddInverse(ZoneList<CharacterRange>* ranges); 1365a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1366a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define DECLARE_VISIT(Type) \ 1367a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Visit##Type(Type##Node* that); 1368a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_NODE_TYPE(DECLARE_VISIT) 1369a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef DECLARE_VISIT 1370a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1371a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTable* table() { return table_; } 1372a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void set_choice_index(int value) { choice_index_ = value; } 1373a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1374a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block protected: 1375a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DispatchTable* table_; 1376a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int choice_index_; 1377a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool ignore_case_; 1378a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1379a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1380a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1381a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// Assertion propagation moves information about assertions such as 1382a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// \b to the affected nodes. For instance, in /.\b./ information must 1383a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// be propagated to the first '.' that whatever follows needs to know 1384a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// if it matched a word or a non-word, and to the second '.' that it 1385a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// has to check if it succeeds a word or non-word. In this case the 1386a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// result will be something like: 1387a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// 1388a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// +-------+ +------------+ 1389a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// | . | | . | 1390a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// +-------+ ---> +------------+ 1391a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// | word? | | check word | 1392a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block// +-------+ +------------+ 1393a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass Analysis: public NodeVisitor { 1394a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1395d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block Analysis(bool ignore_case, bool is_ascii) 1396d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block : ignore_case_(ignore_case), 1397d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block is_ascii_(is_ascii), 1398d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block error_message_(NULL) { } 1399a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void EnsureAnalyzed(RegExpNode* node); 1400a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1401a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#define DECLARE_VISIT(Type) \ 1402a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void Visit##Type(Type##Node* that); 1403a7e24c173cf37484693b9abb38e494fa7bd7baebSteve BlockFOR_EACH_NODE_TYPE(DECLARE_VISIT) 1404a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#undef DECLARE_VISIT 1405a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block virtual void VisitLoopChoice(LoopChoiceNode* that); 1406a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1407a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool has_failed() { return error_message_ != NULL; } 1408a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const char* error_message() { 1409a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block ASSERT(error_message_ != NULL); 1410a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block return error_message_; 1411a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1412a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block void fail(const char* error_message) { 1413a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block error_message_ = error_message; 1414a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block } 1415589d6979ff2ef66fca2d8fa51404c369ca5e9250Ben Murdoch 1416a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block private: 1417a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool ignore_case_; 1418d0582a6c46733687d045e4188a1bcd0123c758a1Steve Block bool is_ascii_; 1419a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const char* error_message_; 1420a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1421a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); 1422a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1423a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1424a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1425a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockstruct RegExpCompileData { 1426a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpCompileData() 1427a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : tree(NULL), 1428a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block node(NULL), 1429a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block simple(true), 1430a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block contains_anchor(false), 1431a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block capture_count(0) { } 1432a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpTree* tree; 1433a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block RegExpNode* node; 1434a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool simple; 1435a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool contains_anchor; 1436a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> error; 1437a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int capture_count; 1438a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1439a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1440a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1441a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Blockclass RegExpEngine: public AllStatic { 1442a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block public: 1443a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block struct CompilationResult { 1444a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block explicit CompilationResult(const char* error_message) 1445a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : error_message(error_message), 144644f0eee88ff00398ff7f715fab053374d808c90dSteve Block code(HEAP->the_hole_value()), 1447a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block num_registers(0) {} 1448a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block CompilationResult(Object* code, int registers) 1449a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block : error_message(NULL), 1450a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block code(code), 1451a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block num_registers(registers) {} 1452a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block const char* error_message; 1453a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Object* code; 1454a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block int num_registers; 1455a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block }; 1456a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1457a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static CompilationResult Compile(RegExpCompileData* input, 1458a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool ignore_case, 1459a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool multiline, 1460a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block Handle<String> pattern, 1461a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block bool is_ascii); 1462a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1463a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); 1464a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block}; 1465a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1466a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1467e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarkeclass OffsetsVector { 1468e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke public: 14693ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch inline OffsetsVector(int num_registers, Isolate* isolate) 1470e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke : offsets_vector_length_(num_registers) { 147144f0eee88ff00398ff7f715fab053374d808c90dSteve Block if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { 1472e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke vector_ = NewArray<int>(offsets_vector_length_); 1473e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } else { 14743ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch vector_ = isolate->jsregexp_static_offsets_vector(); 1475e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 1476e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 1477e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke inline ~OffsetsVector() { 147844f0eee88ff00398ff7f715fab053374d808c90dSteve Block if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { 1479e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke DeleteArray(vector_); 1480e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke vector_ = NULL; 1481e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 1482e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 1483e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke inline int* vector() { return vector_; } 1484e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke inline int length() { return offsets_vector_length_; } 1485e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1486e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke static const int kStaticOffsetsVectorSize = 50; 1487e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1488e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke private: 148944f0eee88ff00398ff7f715fab053374d808c90dSteve Block static Address static_offsets_vector_address(Isolate* isolate) { 149044f0eee88ff00398ff7f715fab053374d808c90dSteve Block return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector()); 1491e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke } 1492e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1493e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int* vector_; 1494e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke int offsets_vector_length_; 1495e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1496e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke friend class ExternalReference; 1497e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke}; 1498e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1499e46be819fca9468a0cd4e74859ce0f778eb8ca60Leon Clarke 1500a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block} } // namespace v8::internal 1501a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block 1502a7e24c173cf37484693b9abb38e494fa7bd7baebSteve Block#endif // V8_JSREGEXP_H_ 1503