regexp-macro-assembler.cc revision 44f0eee88ff00398ff7f715fab053374d808c90d
1// Copyright 2008 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#include "v8.h" 29#include "ast.h" 30#include "assembler.h" 31#include "regexp-stack.h" 32#include "regexp-macro-assembler.h" 33#include "simulator.h" 34 35namespace v8 { 36namespace internal { 37 38RegExpMacroAssembler::RegExpMacroAssembler() { 39} 40 41 42RegExpMacroAssembler::~RegExpMacroAssembler() { 43} 44 45 46bool RegExpMacroAssembler::CanReadUnaligned() { 47#ifdef V8_HOST_CAN_READ_UNALIGNED 48 return true; 49#else 50 return false; 51#endif 52} 53 54 55#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. 56 57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() { 58} 59 60 61NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() { 62} 63 64 65bool NativeRegExpMacroAssembler::CanReadUnaligned() { 66#ifdef V8_TARGET_CAN_READ_UNALIGNED 67 return true; 68#else 69 return false; 70#endif 71} 72 73const byte* NativeRegExpMacroAssembler::StringCharacterPosition( 74 String* subject, 75 int start_index) { 76 // Not just flat, but ultra flat. 77 ASSERT(subject->IsExternalString() || subject->IsSeqString()); 78 ASSERT(start_index >= 0); 79 ASSERT(start_index <= subject->length()); 80 if (subject->IsAsciiRepresentation()) { 81 const byte* address; 82 if (StringShape(subject).IsExternal()) { 83 const char* data = ExternalAsciiString::cast(subject)->resource()->data(); 84 address = reinterpret_cast<const byte*>(data); 85 } else { 86 ASSERT(subject->IsSeqAsciiString()); 87 char* data = SeqAsciiString::cast(subject)->GetChars(); 88 address = reinterpret_cast<const byte*>(data); 89 } 90 return address + start_index; 91 } 92 const uc16* data; 93 if (StringShape(subject).IsExternal()) { 94 data = ExternalTwoByteString::cast(subject)->resource()->data(); 95 } else { 96 ASSERT(subject->IsSeqTwoByteString()); 97 data = SeqTwoByteString::cast(subject)->GetChars(); 98 } 99 return reinterpret_cast<const byte*>(data + start_index); 100} 101 102 103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( 104 Handle<Code> regexp_code, 105 Handle<String> subject, 106 int* offsets_vector, 107 int offsets_vector_length, 108 int previous_index, 109 Isolate* isolate) { 110 111 ASSERT(subject->IsFlat()); 112 ASSERT(previous_index >= 0); 113 ASSERT(previous_index <= subject->length()); 114 115 // No allocations before calling the regexp, but we can't use 116 // AssertNoAllocation, since regexps might be preempted, and another thread 117 // might do allocation anyway. 118 119 String* subject_ptr = *subject; 120 // Character offsets into string. 121 int start_offset = previous_index; 122 int end_offset = subject_ptr->length(); 123 124 // The string has been flattened, so it it is a cons string it contains the 125 // full string in the first part. 126 if (StringShape(subject_ptr).IsCons()) { 127 ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length()); 128 subject_ptr = ConsString::cast(subject_ptr)->first(); 129 } 130 // Ensure that an underlying string has the same ascii-ness. 131 bool is_ascii = subject_ptr->IsAsciiRepresentation(); 132 ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString()); 133 // String is now either Sequential or External 134 int char_size_shift = is_ascii ? 0 : 1; 135 int char_length = end_offset - start_offset; 136 137 const byte* input_start = 138 StringCharacterPosition(subject_ptr, start_offset); 139 int byte_length = char_length << char_size_shift; 140 const byte* input_end = input_start + byte_length; 141 Result res = Execute(*regexp_code, 142 subject_ptr, 143 start_offset, 144 input_start, 145 input_end, 146 offsets_vector, 147 isolate); 148 return res; 149} 150 151 152NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute( 153 Code* code, 154 String* input, 155 int start_offset, 156 const byte* input_start, 157 const byte* input_end, 158 int* output, 159 Isolate* isolate) { 160 ASSERT(isolate == Isolate::Current()); 161 // Ensure that the minimum stack has been allocated. 162 RegExpStackScope stack_scope(isolate); 163 Address stack_base = stack_scope.stack()->stack_base(); 164 165 int direct_call = 0; 166 int result = CALL_GENERATED_REGEXP_CODE(code->entry(), 167 input, 168 start_offset, 169 input_start, 170 input_end, 171 output, 172 stack_base, 173 direct_call, 174 isolate); 175 ASSERT(result <= SUCCESS); 176 ASSERT(result >= RETRY); 177 178 if (result == EXCEPTION && !isolate->has_pending_exception()) { 179 // We detected a stack overflow (on the backtrack stack) in RegExp code, 180 // but haven't created the exception yet. 181 isolate->StackOverflow(); 182 } 183 return static_cast<Result>(result); 184} 185 186 187const byte NativeRegExpMacroAssembler::word_character_map[] = { 188 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 189 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 190 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 191 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 192 193 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 194 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 195 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // '0' - '7' 196 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9' 197 198 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'A' - 'G' 199 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'H' - 'O' 200 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'P' - 'W' 201 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu, // 'X' - 'Z', '_' 202 203 0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'a' - 'g' 204 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'h' - 'o' 205 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, // 'p' - 'w' 206 0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z' 207}; 208 209 210int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( 211 Address byte_offset1, 212 Address byte_offset2, 213 size_t byte_length, 214 Isolate* isolate) { 215 ASSERT(isolate == Isolate::Current()); 216 unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = 217 isolate->regexp_macro_assembler_canonicalize(); 218 // This function is not allowed to cause a garbage collection. 219 // A GC might move the calling generated code and invalidate the 220 // return address on the stack. 221 ASSERT(byte_length % 2 == 0); 222 uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); 223 uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); 224 size_t length = byte_length >> 1; 225 226 for (size_t i = 0; i < length; i++) { 227 unibrow::uchar c1 = substring1[i]; 228 unibrow::uchar c2 = substring2[i]; 229 if (c1 != c2) { 230 unibrow::uchar s1[1] = { c1 }; 231 canonicalize->get(c1, '\0', s1); 232 if (s1[0] != c2) { 233 unibrow::uchar s2[1] = { c2 }; 234 canonicalize->get(c2, '\0', s2); 235 if (s1[0] != s2[0]) { 236 return 0; 237 } 238 } 239 } 240 } 241 return 1; 242} 243 244 245Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, 246 Address* stack_base, 247 Isolate* isolate) { 248 ASSERT(isolate == Isolate::Current()); 249 RegExpStack* regexp_stack = isolate->regexp_stack(); 250 size_t size = regexp_stack->stack_capacity(); 251 Address old_stack_base = regexp_stack->stack_base(); 252 ASSERT(old_stack_base == *stack_base); 253 ASSERT(stack_pointer <= old_stack_base); 254 ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size); 255 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2); 256 if (new_stack_base == NULL) { 257 return NULL; 258 } 259 *stack_base = new_stack_base; 260 intptr_t stack_content_size = old_stack_base - stack_pointer; 261 return new_stack_base - stack_content_size; 262} 263 264#endif // V8_INTERPRETED_REGEXP 265 266} } // namespace v8::internal 267