regexp-macro-assembler.cc revision c7cc028aaeedbbfa11c11d0b7b243b3d9e837ed9
1// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
31#include "regexp-stack.h"
32#include "regexp-macro-assembler.h"
33#include "simulator.h"
34
35namespace v8 {
36namespace internal {
37
38RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
46bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48  return true;
49#else
50  return false;
51#endif
52}
53
54
55#ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
56
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler()
58    : RegExpMacroAssembler() {
59}
60
61
62NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
63}
64
65
66bool NativeRegExpMacroAssembler::CanReadUnaligned() {
67#ifdef V8_TARGET_CAN_READ_UNALIGNED
68  return !slow_safe();
69#else
70  return false;
71#endif
72}
73
74const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
75    String* subject,
76    int start_index) {
77  // Not just flat, but ultra flat.
78  ASSERT(subject->IsExternalString() || subject->IsSeqString());
79  ASSERT(start_index >= 0);
80  ASSERT(start_index <= subject->length());
81  if (subject->IsAsciiRepresentation()) {
82    const byte* address;
83    if (StringShape(subject).IsExternal()) {
84      const char* data = ExternalAsciiString::cast(subject)->GetChars();
85      address = reinterpret_cast<const byte*>(data);
86    } else {
87      ASSERT(subject->IsSeqAsciiString());
88      char* data = SeqAsciiString::cast(subject)->GetChars();
89      address = reinterpret_cast<const byte*>(data);
90    }
91    return address + start_index;
92  }
93  const uc16* data;
94  if (StringShape(subject).IsExternal()) {
95    data = ExternalTwoByteString::cast(subject)->GetChars();
96  } else {
97    ASSERT(subject->IsSeqTwoByteString());
98    data = SeqTwoByteString::cast(subject)->GetChars();
99  }
100  return reinterpret_cast<const byte*>(data + start_index);
101}
102
103
104NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
105    Handle<Code> regexp_code,
106    Handle<String> subject,
107    int* offsets_vector,
108    int offsets_vector_length,
109    int previous_index,
110    Isolate* isolate) {
111
112  ASSERT(subject->IsFlat());
113  ASSERT(previous_index >= 0);
114  ASSERT(previous_index <= subject->length());
115
116  // No allocations before calling the regexp, but we can't use
117  // AssertNoAllocation, since regexps might be preempted, and another thread
118  // might do allocation anyway.
119
120  String* subject_ptr = *subject;
121  // Character offsets into string.
122  int start_offset = previous_index;
123  int char_length = subject_ptr->length() - start_offset;
124  int slice_offset = 0;
125
126  // The string has been flattened, so if it is a cons string it contains the
127  // full string in the first part.
128  if (StringShape(subject_ptr).IsCons()) {
129    ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
130    subject_ptr = ConsString::cast(subject_ptr)->first();
131  } else if (StringShape(subject_ptr).IsSliced()) {
132    SlicedString* slice = SlicedString::cast(subject_ptr);
133    subject_ptr = slice->parent();
134    slice_offset = slice->offset();
135  }
136  // Ensure that an underlying string has the same ASCII-ness.
137  bool is_ascii = subject_ptr->IsAsciiRepresentation();
138  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
139  // String is now either Sequential or External
140  int char_size_shift = is_ascii ? 0 : 1;
141
142  const byte* input_start =
143      StringCharacterPosition(subject_ptr, start_offset + slice_offset);
144  int byte_length = char_length << char_size_shift;
145  const byte* input_end = input_start + byte_length;
146  Result res = Execute(*regexp_code,
147                       *subject,
148                       start_offset,
149                       input_start,
150                       input_end,
151                       offsets_vector,
152                       isolate);
153  return res;
154}
155
156
157NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
158    Code* code,
159    String* input,  // This needs to be the unpacked (sliced, cons) string.
160    int start_offset,
161    const byte* input_start,
162    const byte* input_end,
163    int* output,
164    Isolate* isolate) {
165  ASSERT(isolate == Isolate::Current());
166  // Ensure that the minimum stack has been allocated.
167  RegExpStackScope stack_scope(isolate);
168  Address stack_base = stack_scope.stack()->stack_base();
169
170  int direct_call = 0;
171  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
172                                          input,
173                                          start_offset,
174                                          input_start,
175                                          input_end,
176                                          output,
177                                          stack_base,
178                                          direct_call,
179                                          isolate);
180  ASSERT(result <= SUCCESS);
181  ASSERT(result >= RETRY);
182
183  if (result == EXCEPTION && !isolate->has_pending_exception()) {
184    // We detected a stack overflow (on the backtrack stack) in RegExp code,
185    // but haven't created the exception yet.
186    isolate->StackOverflow();
187  }
188  return static_cast<Result>(result);
189}
190
191
192const byte NativeRegExpMacroAssembler::word_character_map[] = {
193    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
197
198    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
201    0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
202
203    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
204    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
205    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
206    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
207
208    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
209    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
210    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
211    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
212};
213
214
215int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
216    Address byte_offset1,
217    Address byte_offset2,
218    size_t byte_length,
219    Isolate* isolate) {
220  ASSERT(isolate == Isolate::Current());
221  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
222      isolate->regexp_macro_assembler_canonicalize();
223  // This function is not allowed to cause a garbage collection.
224  // A GC might move the calling generated code and invalidate the
225  // return address on the stack.
226  ASSERT(byte_length % 2 == 0);
227  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
228  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
229  size_t length = byte_length >> 1;
230
231  for (size_t i = 0; i < length; i++) {
232    unibrow::uchar c1 = substring1[i];
233    unibrow::uchar c2 = substring2[i];
234    if (c1 != c2) {
235      unibrow::uchar s1[1] = { c1 };
236      canonicalize->get(c1, '\0', s1);
237      if (s1[0] != c2) {
238        unibrow::uchar s2[1] = { c2 };
239        canonicalize->get(c2, '\0', s2);
240        if (s1[0] != s2[0]) {
241          return 0;
242        }
243      }
244    }
245  }
246  return 1;
247}
248
249
250Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
251                                              Address* stack_base,
252                                              Isolate* isolate) {
253  ASSERT(isolate == Isolate::Current());
254  RegExpStack* regexp_stack = isolate->regexp_stack();
255  size_t size = regexp_stack->stack_capacity();
256  Address old_stack_base = regexp_stack->stack_base();
257  ASSERT(old_stack_base == *stack_base);
258  ASSERT(stack_pointer <= old_stack_base);
259  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
260  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
261  if (new_stack_base == NULL) {
262    return NULL;
263  }
264  *stack_base = new_stack_base;
265  intptr_t stack_content_size = old_stack_base - stack_pointer;
266  return new_stack_base - stack_content_size;
267}
268
269#endif  // V8_INTERPRETED_REGEXP
270
271} }  // namespace v8::internal
272