1// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "assembler.h"
31#include "regexp-stack.h"
32#include "regexp-macro-assembler.h"
33#include "simulator.h"
34
35namespace v8 {
36namespace internal {
37
38RegExpMacroAssembler::RegExpMacroAssembler() {
39}
40
41
42RegExpMacroAssembler::~RegExpMacroAssembler() {
43}
44
45
46bool RegExpMacroAssembler::CanReadUnaligned() {
47#ifdef V8_HOST_CAN_READ_UNALIGNED
48  return true;
49#else
50  return false;
51#endif
52}
53
54
55#ifdef V8_NATIVE_REGEXP  // Avoid unused code, e.g., on ARM.
56
57NativeRegExpMacroAssembler::NativeRegExpMacroAssembler() {
58}
59
60
61NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
62}
63
64
65bool NativeRegExpMacroAssembler::CanReadUnaligned() {
66#ifdef V8_TARGET_CAN_READ_UNALIGNED
67  return true;
68#else
69  return false;
70#endif
71}
72
73const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
74    String* subject,
75    int start_index) {
76  // Not just flat, but ultra flat.
77  ASSERT(subject->IsExternalString() || subject->IsSeqString());
78  ASSERT(start_index >= 0);
79  ASSERT(start_index <= subject->length());
80  if (subject->IsAsciiRepresentation()) {
81    const byte* address;
82    if (StringShape(subject).IsExternal()) {
83      const char* data = ExternalAsciiString::cast(subject)->resource()->data();
84      address = reinterpret_cast<const byte*>(data);
85    } else {
86      ASSERT(subject->IsSeqAsciiString());
87      char* data = SeqAsciiString::cast(subject)->GetChars();
88      address = reinterpret_cast<const byte*>(data);
89    }
90    return address + start_index;
91  }
92  const uc16* data;
93  if (StringShape(subject).IsExternal()) {
94    data = ExternalTwoByteString::cast(subject)->resource()->data();
95  } else {
96    ASSERT(subject->IsSeqTwoByteString());
97    data = SeqTwoByteString::cast(subject)->GetChars();
98  }
99  return reinterpret_cast<const byte*>(data + start_index);
100}
101
102
103NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
104    Handle<Code> regexp_code,
105    Handle<String> subject,
106    int* offsets_vector,
107    int offsets_vector_length,
108    int previous_index) {
109
110  ASSERT(subject->IsFlat());
111  ASSERT(previous_index >= 0);
112  ASSERT(previous_index <= subject->length());
113
114  // No allocations before calling the regexp, but we can't use
115  // AssertNoAllocation, since regexps might be preempted, and another thread
116  // might do allocation anyway.
117
118  String* subject_ptr = *subject;
119  // Character offsets into string.
120  int start_offset = previous_index;
121  int end_offset = subject_ptr->length();
122
123  bool is_ascii = subject->IsAsciiRepresentation();
124
125  // The string has been flattened, so it it is a cons string it contains the
126  // full string in the first part.
127  if (StringShape(subject_ptr).IsCons()) {
128    ASSERT_EQ(0, ConsString::cast(subject_ptr)->second()->length());
129    subject_ptr = ConsString::cast(subject_ptr)->first();
130  }
131  // Ensure that an underlying string has the same ascii-ness.
132  ASSERT(subject_ptr->IsAsciiRepresentation() == is_ascii);
133  ASSERT(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
134  // String is now either Sequential or External
135  int char_size_shift = is_ascii ? 0 : 1;
136  int char_length = end_offset - start_offset;
137
138  const byte* input_start =
139      StringCharacterPosition(subject_ptr, start_offset);
140  int byte_length = char_length << char_size_shift;
141  const byte* input_end = input_start + byte_length;
142  Result res = Execute(*regexp_code,
143                       subject_ptr,
144                       start_offset,
145                       input_start,
146                       input_end,
147                       offsets_vector);
148  return res;
149}
150
151
152NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
153    Code* code,
154    String* input,
155    int start_offset,
156    const byte* input_start,
157    const byte* input_end,
158    int* output) {
159  typedef int (*matcher)(String*, int, const byte*,
160                         const byte*, int*, Address, int);
161  matcher matcher_func = FUNCTION_CAST<matcher>(code->entry());
162
163  // Ensure that the minimum stack has been allocated.
164  RegExpStack stack;
165  Address stack_base = RegExpStack::stack_base();
166
167  int direct_call = 0;
168  int result = CALL_GENERATED_REGEXP_CODE(matcher_func,
169                                          input,
170                                          start_offset,
171                                          input_start,
172                                          input_end,
173                                          output,
174                                          stack_base,
175                                          direct_call);
176  ASSERT(result <= SUCCESS);
177  ASSERT(result >= RETRY);
178
179  if (result == EXCEPTION && !Top::has_pending_exception()) {
180    // We detected a stack overflow (on the backtrack stack) in RegExp code,
181    // but haven't created the exception yet.
182    Top::StackOverflow();
183  }
184  return static_cast<Result>(result);
185}
186
187
188static unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
189
190
191byte NativeRegExpMacroAssembler::word_character_map[] = {
192    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196
197    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
200    0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
201
202    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
203    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
204    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
205    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
206
207    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
208    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
209    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
210    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
211};
212
213
214int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
215    Address byte_offset1,
216    Address byte_offset2,
217    size_t byte_length) {
218  // This function is not allowed to cause a garbage collection.
219  // A GC might move the calling generated code and invalidate the
220  // return address on the stack.
221  ASSERT(byte_length % 2 == 0);
222  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
223  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
224  size_t length = byte_length >> 1;
225
226  for (size_t i = 0; i < length; i++) {
227    unibrow::uchar c1 = substring1[i];
228    unibrow::uchar c2 = substring2[i];
229    if (c1 != c2) {
230      unibrow::uchar s1[1] = { c1 };
231      canonicalize.get(c1, '\0', s1);
232      if (s1[0] != c2) {
233        unibrow::uchar s2[1] = { c2 };
234        canonicalize.get(c2, '\0', s2);
235        if (s1[0] != s2[0]) {
236          return 0;
237        }
238      }
239    }
240  }
241  return 1;
242}
243
244
245Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
246                                              Address* stack_base) {
247  size_t size = RegExpStack::stack_capacity();
248  Address old_stack_base = RegExpStack::stack_base();
249  ASSERT(old_stack_base == *stack_base);
250  ASSERT(stack_pointer <= old_stack_base);
251  ASSERT(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
252  Address new_stack_base = RegExpStack::EnsureCapacity(size * 2);
253  if (new_stack_base == NULL) {
254    return NULL;
255  }
256  *stack_base = new_stack_base;
257  intptr_t stack_content_size = old_stack_base - stack_pointer;
258  return new_stack_base - stack_content_size;
259}
260
261#endif  // V8_NATIVE_REGEXP
262} }  // namespace v8::internal
263