regexp-macro-assembler.cc revision 44f0eee88ff00398ff7f715fab053374d808c90d
1// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
31#include "regexp-stack.h"
32#include "regexp-macro-assembler.h"
33#include "simulator.h"
34
35namespace v8 {
36namespace internal {
37
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
46bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48  return true;
49#else
50  return false;
51#endif
52}
53
54
55#ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
56
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
59
60
61NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
63
64
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67  return true;
68#else
69  return false;
70#endif
71}
72
73const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74    String* subject,
75    int start_index) {
76  // Not just flat, but ultra flat.
77  ASSERT(subject->IsExternalString() || subject->IsSeqString());
78  ASSERT(start_index >= 0);
79  ASSERT(start_index <= subject->length());
80  if (subject->IsAsciiRepresentation()) {
81    const byte* address;
82    if (StringShape(subject).IsExternal()) {
83      const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84      address = reinterpret_cast<const byte*>(data);
85    } else {
86      ASSERT(subject->IsSeqAsciiString());
87      char* data = SeqAsciiString::cast(subject)->GetChars();
88      address = reinterpret_cast<const byte*>(data);
89    }
90    return address + start_index;
91  }
92  const uc16* data;
93  if (StringShape(subject).IsExternal()) {
94    data = ExternalTwoByteString::cast(subject)->resource()->data();
95  } else {
96    ASSERT(subject->IsSeqTwoByteString());
97    data = SeqTwoByteString::cast(subject)->GetChars();
98  }
99  return reinterpret_cast<const byte*>(data + start_index);
100}
101
102
103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104    Handle<Code> regexp_code,
105    Handle<String> subject,
106    int* offsets_vector,
107    int offsets_vector_length,
108    int previous_index,
109    Isolate* isolate) {
110
111  ASSERT(subject->IsFlat());
112  ASSERT(previous_index >= 0);
113  ASSERT(previous_index <= subject->length());
114
115  // No allocations before calling the regexp, but we can't use
116  // AssertNoAllocation, since regexps might be preempted, and another thread
117  // might do allocation anyway.
118
119  String* subject_ptr = *subject;
120  // Character offsets into string.
121  int start_offset = previous_index;
122  int end_offset = subject_ptr->length();
123
124  // The string has been flattened, so it it is a cons string it contains the
125  // full string in the first part.
126  if (StringShape(subject_ptr).IsCons()) {
127    ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
128    subject_ptr = ConsString::cast(subject_ptr)->first();
129  }
130  // Ensure that an underlying string has the same ascii-ness.
131  bool is_ascii = subject_ptr->IsAsciiRepresentation();
132  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
133  // String is now either Sequential or External
134  int char_size_shift = is_ascii ? 0 : 1;
135  int char_length = end_offset - start_offset;
136
137  const byte* input_start =
138      StringCharacterPosition(subject_ptr, start_offset);
139  int byte_length = char_length << char_size_shift;
140  const byte* input_end = input_start + byte_length;
141  Result res = Execute(*regexp_code,
142                       subject_ptr,
143                       start_offset,
144                       input_start,
145                       input_end,
146                       offsets_vector,
147                       isolate);
148  return res;
149}
150
151
152NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
153    Code* code,
154    String* input,
155    int start_offset,
156    const byte* input_start,
157    const byte* input_end,
158    int* output,
159    Isolate* isolate) {
160  ASSERT(isolate == Isolate::Current());
161  // Ensure that the minimum stack has been allocated.
162  RegExpStackScope stack_scope(isolate);
163  Address stack_base = stack_scope.stack()->stack_base();
164
165  int direct_call = 0;
166  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
167                                          input,
168                                          start_offset,
169                                          input_start,
170                                          input_end,
171                                          output,
172                                          stack_base,
173                                          direct_call,
174                                          isolate);
175  ASSERT(result <= SUCCESS);
176  ASSERT(result >= RETRY);
177
178  if (result == EXCEPTION && !isolate->has_pending_exception()) {
179    // We detected a stack overflow (on the backtrack stack) in RegExp code,
180    // but haven't created the exception yet.
181    isolate->StackOverflow();
182  }
183  return static_cast<Result>(result);
184}
185
186
187const byte NativeRegExpMacroAssembler::word_character_map[] = {
188    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
192
193    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
196    0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
197
198    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
199    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
200    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
201    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
202
203    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
204    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
205    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
206    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
207};
208
209
210int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
211    Address byte_offset1,
212    Address byte_offset2,
213    size_t byte_length,
214    Isolate* isolate) {
215  ASSERT(isolate == Isolate::Current());
216  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
217      isolate->regexp_macro_assembler_canonicalize();
218  // This function is not allowed to cause a garbage collection.
219  // A GC might move the calling generated code and invalidate the
220  // return address on the stack.
221  ASSERT(byte_length % 2 == 0);
222  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
223  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
224  size_t length = byte_length >> 1;
225
226  for (size_t i = 0; i < length; i++) {
227    unibrow::uchar c1 = substring1[i];
228    unibrow::uchar c2 = substring2[i];
229    if (c1 != c2) {
230      unibrow::uchar s1[1] = { c1 };
231      canonicalize->get(c1, '\0', s1);
232      if (s1[0] != c2) {
233        unibrow::uchar s2[1] = { c2 };
234        canonicalize->get(c2, '\0', s2);
235        if (s1[0] != s2[0]) {
236          return 0;
237        }
238      }
239    }
240  }
241  return 1;
242}
243
244
245Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
246                                              Address* stack_base,
247                                              Isolate* isolate) {
248  ASSERT(isolate == Isolate::Current());
249  RegExpStack* regexp_stack = isolate->regexp_stack();
250  size_t size = regexp_stack->stack_capacity();
251  Address old_stack_base = regexp_stack->stack_base();
252  ASSERT(old_stack_base == *stack_base);
253  ASSERT(stack_pointer <= old_stack_base);
254  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
255  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
256  if (new_stack_base == NULL) {
257    return NULL;
258  }
259  *stack_base = new_stack_base;
260  intptr_t stack_content_size = old_stack_base - stack_pointer;
261  return new_stack_base - stack_content_size;
262}
263
264#endif  // V8_INTERPRETED_REGEXP
265
266} }  // namespace v8::internal
267