1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/v8.h"
6
7#include "src/assembler.h"
8#include "src/ast.h"
9#include "src/regexp-macro-assembler.h"
10#include "src/regexp-stack.h"
11#include "src/simulator.h"
12
13namespace v8 {
14namespace internal {
15
16RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
17  : slow_safe_compiler_(false),
18    global_mode_(NOT_GLOBAL),
19    zone_(zone) {
20}
21
22
23RegExpMacroAssembler::~RegExpMacroAssembler() {
24}
25
26
27#ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
28
29NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Zone* zone)
30    : RegExpMacroAssembler(zone) {
31}
32
33
34NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() {
35}
36
37
38bool NativeRegExpMacroAssembler::CanReadUnaligned() {
39  return FLAG_enable_unaligned_accesses && !slow_safe();
40}
41
42const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
43    String* subject,
44    int start_index) {
45  // Not just flat, but ultra flat.
46  DCHECK(subject->IsExternalString() || subject->IsSeqString());
47  DCHECK(start_index >= 0);
48  DCHECK(start_index <= subject->length());
49  if (subject->IsOneByteRepresentation()) {
50    const byte* address;
51    if (StringShape(subject).IsExternal()) {
52      const uint8_t* data = ExternalOneByteString::cast(subject)->GetChars();
53      address = reinterpret_cast<const byte*>(data);
54    } else {
55      DCHECK(subject->IsSeqOneByteString());
56      const uint8_t* data = SeqOneByteString::cast(subject)->GetChars();
57      address = reinterpret_cast<const byte*>(data);
58    }
59    return address + start_index;
60  }
61  const uc16* data;
62  if (StringShape(subject).IsExternal()) {
63    data = ExternalTwoByteString::cast(subject)->GetChars();
64  } else {
65    DCHECK(subject->IsSeqTwoByteString());
66    data = SeqTwoByteString::cast(subject)->GetChars();
67  }
68  return reinterpret_cast<const byte*>(data + start_index);
69}
70
71
72NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
73    Handle<Code> regexp_code,
74    Handle<String> subject,
75    int* offsets_vector,
76    int offsets_vector_length,
77    int previous_index,
78    Isolate* isolate) {
79
80  DCHECK(subject->IsFlat());
81  DCHECK(previous_index >= 0);
82  DCHECK(previous_index <= subject->length());
83
84  // No allocations before calling the regexp, but we can't use
85  // DisallowHeapAllocation, since regexps might be preempted, and another
86  // thread might do allocation anyway.
87
88  String* subject_ptr = *subject;
89  // Character offsets into string.
90  int start_offset = previous_index;
91  int char_length = subject_ptr->length() - start_offset;
92  int slice_offset = 0;
93
94  // The string has been flattened, so if it is a cons string it contains the
95  // full string in the first part.
96  if (StringShape(subject_ptr).IsCons()) {
97    DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
98    subject_ptr = ConsString::cast(subject_ptr)->first();
99  } else if (StringShape(subject_ptr).IsSliced()) {
100    SlicedString* slice = SlicedString::cast(subject_ptr);
101    subject_ptr = slice->parent();
102    slice_offset = slice->offset();
103  }
104  // Ensure that an underlying string has the same representation.
105  bool is_one_byte = subject_ptr->IsOneByteRepresentation();
106  DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
107  // String is now either Sequential or External
108  int char_size_shift = is_one_byte ? 0 : 1;
109
110  const byte* input_start =
111      StringCharacterPosition(subject_ptr, start_offset + slice_offset);
112  int byte_length = char_length << char_size_shift;
113  const byte* input_end = input_start + byte_length;
114  Result res = Execute(*regexp_code,
115                       *subject,
116                       start_offset,
117                       input_start,
118                       input_end,
119                       offsets_vector,
120                       offsets_vector_length,
121                       isolate);
122  return res;
123}
124
125
126NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
127    Code* code,
128    String* input,  // This needs to be the unpacked (sliced, cons) string.
129    int start_offset,
130    const byte* input_start,
131    const byte* input_end,
132    int* output,
133    int output_size,
134    Isolate* isolate) {
135  // Ensure that the minimum stack has been allocated.
136  RegExpStackScope stack_scope(isolate);
137  Address stack_base = stack_scope.stack()->stack_base();
138
139  int direct_call = 0;
140  int result = CALL_GENERATED_REGEXP_CODE(code->entry(),
141                                          input,
142                                          start_offset,
143                                          input_start,
144                                          input_end,
145                                          output,
146                                          output_size,
147                                          stack_base,
148                                          direct_call,
149                                          isolate);
150  DCHECK(result >= RETRY);
151
152  if (result == EXCEPTION && !isolate->has_pending_exception()) {
153    // We detected a stack overflow (on the backtrack stack) in RegExp code,
154    // but haven't created the exception yet.
155    isolate->StackOverflow();
156  }
157  return static_cast<Result>(result);
158}
159
160
161const byte NativeRegExpMacroAssembler::word_character_map[] = {
162    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
163    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
164    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
165    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
166
167    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
168    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
169    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // '0' - '7'
170    0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // '8' - '9'
171
172    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'A' - 'G'
173    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'H' - 'O'
174    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'P' - 'W'
175    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0xffu,  // 'X' - 'Z', '_'
176
177    0x00u, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'a' - 'g'
178    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'h' - 'o'
179    0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu, 0xffu,  // 'p' - 'w'
180    0xffu, 0xffu, 0xffu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,  // 'x' - 'z'
181    // Latin-1 range
182    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
183    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
184    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
185    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
186
187    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
188    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
189    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
190    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
191
192    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
193    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
194    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
195    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
196
197    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
198    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
199    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
200    0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
201};
202
203
204int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
205    Address byte_offset1,
206    Address byte_offset2,
207    size_t byte_length,
208    Isolate* isolate) {
209  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
210      isolate->regexp_macro_assembler_canonicalize();
211  // This function is not allowed to cause a garbage collection.
212  // A GC might move the calling generated code and invalidate the
213  // return address on the stack.
214  DCHECK(byte_length % 2 == 0);
215  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
216  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
217  size_t length = byte_length >> 1;
218
219  for (size_t i = 0; i < length; i++) {
220    unibrow::uchar c1 = substring1[i];
221    unibrow::uchar c2 = substring2[i];
222    if (c1 != c2) {
223      unibrow::uchar s1[1] = { c1 };
224      canonicalize->get(c1, '\0', s1);
225      if (s1[0] != c2) {
226        unibrow::uchar s2[1] = { c2 };
227        canonicalize->get(c2, '\0', s2);
228        if (s1[0] != s2[0]) {
229          return 0;
230        }
231      }
232    }
233  }
234  return 1;
235}
236
237
238Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
239                                              Address* stack_base,
240                                              Isolate* isolate) {
241  RegExpStack* regexp_stack = isolate->regexp_stack();
242  size_t size = regexp_stack->stack_capacity();
243  Address old_stack_base = regexp_stack->stack_base();
244  DCHECK(old_stack_base == *stack_base);
245  DCHECK(stack_pointer <= old_stack_base);
246  DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
247  Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
248  if (new_stack_base == NULL) {
249    return NULL;
250  }
251  *stack_base = new_stack_base;
252  intptr_t stack_content_size = old_stack_base - stack_pointer;
253  return new_stack_base - stack_content_size;
254}
255
256#endif  // V8_INTERPRETED_REGEXP
257
258} }  // namespace v8::internal
259