1// Copyright 2008-2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#include "v8.h"
29#include "ast.h"
30#include "bytecodes-irregexp.h"
31#include "regexp-macro-assembler.h"
32#include "regexp-macro-assembler-irregexp.h"
33#include "regexp-macro-assembler-irregexp-inl.h"
34
35
36namespace v8 {
37namespace internal {
38
39#ifdef V8_INTERPRETED_REGEXP
40
41RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer)
42    : buffer_(buffer),
43      pc_(0),
44      own_buffer_(false),
45      advance_current_end_(kInvalidPC) {
46}
47
48
49RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
50  if (backtrack_.is_linked()) backtrack_.Unuse();
51  if (own_buffer_) buffer_.Dispose();
52}
53
54
55RegExpMacroAssemblerIrregexp::IrregexpImplementation
56RegExpMacroAssemblerIrregexp::Implementation() {
57  return kBytecodeImplementation;
58}
59
60
61void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
62  advance_current_end_ = kInvalidPC;
63  ASSERT(!l->is_bound());
64  if (l->is_linked()) {
65    int pos = l->pos();
66    while (pos != 0) {
67      int fixup = pos;
68      pos = *reinterpret_cast<int32_t*>(buffer_.start() + fixup);
69      *reinterpret_cast<uint32_t*>(buffer_.start() + fixup) = pc_;
70    }
71  }
72  l->bind_to(pc_);
73}
74
75
76void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
77  if (l == NULL) l = &backtrack_;
78  if (l->is_bound()) {
79    Emit32(l->pos());
80  } else {
81    int pos = 0;
82    if (l->is_linked()) {
83      pos = l->pos();
84    }
85    l->link_to(pc_);
86    Emit32(pos);
87  }
88}
89
90
91void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) {
92  ASSERT(register_index >= 0);
93  ASSERT(register_index <= kMaxRegister);
94  Emit(BC_POP_REGISTER, register_index);
95}
96
97
98void RegExpMacroAssemblerIrregexp::PushRegister(
99    int register_index,
100    StackCheckFlag check_stack_limit) {
101  ASSERT(register_index >= 0);
102  ASSERT(register_index <= kMaxRegister);
103  Emit(BC_PUSH_REGISTER, register_index);
104}
105
106
107void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
108    int register_index, int cp_offset) {
109  ASSERT(register_index >= 0);
110  ASSERT(register_index <= kMaxRegister);
111  Emit(BC_SET_REGISTER_TO_CP, register_index);
112  Emit32(cp_offset);  // Current position offset.
113}
114
115
116void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
117  ASSERT(reg_from <= reg_to);
118  for (int reg = reg_from; reg <= reg_to; reg++) {
119    SetRegister(reg, -1);
120  }
121}
122
123
124void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister(
125    int register_index) {
126  ASSERT(register_index >= 0);
127  ASSERT(register_index <= kMaxRegister);
128  Emit(BC_SET_CP_TO_REGISTER, register_index);
129}
130
131
132void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister(
133    int register_index) {
134  ASSERT(register_index >= 0);
135  ASSERT(register_index <= kMaxRegister);
136  Emit(BC_SET_REGISTER_TO_SP, register_index);
137}
138
139
140void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister(
141    int register_index) {
142  ASSERT(register_index >= 0);
143  ASSERT(register_index <= kMaxRegister);
144  Emit(BC_SET_SP_TO_REGISTER, register_index);
145}
146
147
148void RegExpMacroAssemblerIrregexp::SetCurrentPositionFromEnd(int by) {
149  ASSERT(is_uint24(by));
150  Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
151}
152
153
154void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) {
155  ASSERT(register_index >= 0);
156  ASSERT(register_index <= kMaxRegister);
157  Emit(BC_SET_REGISTER, register_index);
158  Emit32(to);
159}
160
161
162void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) {
163  ASSERT(register_index >= 0);
164  ASSERT(register_index <= kMaxRegister);
165  Emit(BC_ADVANCE_REGISTER, register_index);
166  Emit32(by);
167}
168
169
170void RegExpMacroAssemblerIrregexp::PopCurrentPosition() {
171  Emit(BC_POP_CP, 0);
172}
173
174
175void RegExpMacroAssemblerIrregexp::PushCurrentPosition() {
176  Emit(BC_PUSH_CP, 0);
177}
178
179
180void RegExpMacroAssemblerIrregexp::Backtrack() {
181  Emit(BC_POP_BT, 0);
182}
183
184
185void RegExpMacroAssemblerIrregexp::GoTo(Label* l) {
186  if (advance_current_end_ == pc_) {
187    // Combine advance current and goto.
188    pc_ = advance_current_start_;
189    Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
190    EmitOrLink(l);
191    advance_current_end_ = kInvalidPC;
192  } else {
193    // Regular goto.
194    Emit(BC_GOTO, 0);
195    EmitOrLink(l);
196  }
197}
198
199
200void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) {
201  Emit(BC_PUSH_BT, 0);
202  EmitOrLink(l);
203}
204
205
206void RegExpMacroAssemblerIrregexp::Succeed() {
207  Emit(BC_SUCCEED, 0);
208}
209
210
211void RegExpMacroAssemblerIrregexp::Fail() {
212  Emit(BC_FAIL, 0);
213}
214
215
216void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
217  ASSERT(by >= kMinCPOffset);
218  ASSERT(by <= kMaxCPOffset);
219  advance_current_start_ = pc_;
220  advance_current_offset_ = by;
221  Emit(BC_ADVANCE_CP, by);
222  advance_current_end_ = pc_;
223}
224
225
226void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
227      Label* on_tos_equals_current_position) {
228  Emit(BC_CHECK_GREEDY, 0);
229  EmitOrLink(on_tos_equals_current_position);
230}
231
232
233void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
234                                                        Label* on_failure,
235                                                        bool check_bounds,
236                                                        int characters) {
237  ASSERT(cp_offset >= kMinCPOffset);
238  ASSERT(cp_offset <= kMaxCPOffset);
239  int bytecode;
240  if (check_bounds) {
241    if (characters == 4) {
242      bytecode = BC_LOAD_4_CURRENT_CHARS;
243    } else if (characters == 2) {
244      bytecode = BC_LOAD_2_CURRENT_CHARS;
245    } else {
246      ASSERT(characters == 1);
247      bytecode = BC_LOAD_CURRENT_CHAR;
248    }
249  } else {
250    if (characters == 4) {
251      bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
252    } else if (characters == 2) {
253      bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
254    } else {
255      ASSERT(characters == 1);
256      bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
257    }
258  }
259  Emit(bytecode, cp_offset);
260  if (check_bounds) EmitOrLink(on_failure);
261}
262
263
264void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
265                                                    Label* on_less) {
266  Emit(BC_CHECK_LT, limit);
267  EmitOrLink(on_less);
268}
269
270
271void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
272                                                    Label* on_greater) {
273  Emit(BC_CHECK_GT, limit);
274  EmitOrLink(on_greater);
275}
276
277
278void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
279  if (c > MAX_FIRST_ARG) {
280    Emit(BC_CHECK_4_CHARS, 0);
281    Emit32(c);
282  } else {
283    Emit(BC_CHECK_CHAR, c);
284  }
285  EmitOrLink(on_equal);
286}
287
288
289void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
290  Emit(BC_CHECK_AT_START, 0);
291  EmitOrLink(on_at_start);
292}
293
294
295void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
296  Emit(BC_CHECK_NOT_AT_START, 0);
297  EmitOrLink(on_not_at_start);
298}
299
300
301void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
302                                                     Label* on_not_equal) {
303  if (c > MAX_FIRST_ARG) {
304    Emit(BC_CHECK_NOT_4_CHARS, 0);
305    Emit32(c);
306  } else {
307    Emit(BC_CHECK_NOT_CHAR, c);
308  }
309  EmitOrLink(on_not_equal);
310}
311
312
313void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
314    uint32_t c,
315    uint32_t mask,
316    Label* on_equal) {
317  if (c > MAX_FIRST_ARG) {
318    Emit(BC_AND_CHECK_4_CHARS, 0);
319    Emit32(c);
320  } else {
321    Emit(BC_AND_CHECK_CHAR, c);
322  }
323  Emit32(mask);
324  EmitOrLink(on_equal);
325}
326
327
328void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
329    uint32_t c,
330    uint32_t mask,
331    Label* on_not_equal) {
332  if (c > MAX_FIRST_ARG) {
333    Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
334    Emit32(c);
335  } else {
336    Emit(BC_AND_CHECK_NOT_CHAR, c);
337  }
338  Emit32(mask);
339  EmitOrLink(on_not_equal);
340}
341
342
343void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
344    uc16 c,
345    uc16 minus,
346    uc16 mask,
347    Label* on_not_equal) {
348  Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
349  Emit16(minus);
350  Emit16(mask);
351  EmitOrLink(on_not_equal);
352}
353
354
355void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
356                                                         Label* on_not_equal) {
357  ASSERT(start_reg >= 0);
358  ASSERT(start_reg <= kMaxRegister);
359  Emit(BC_CHECK_NOT_BACK_REF, start_reg);
360  EmitOrLink(on_not_equal);
361}
362
363
364void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
365    int start_reg,
366    Label* on_not_equal) {
367  ASSERT(start_reg >= 0);
368  ASSERT(start_reg <= kMaxRegister);
369  Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
370  EmitOrLink(on_not_equal);
371}
372
373
374void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
375                                                          int reg2,
376                                                          Label* on_not_equal) {
377  ASSERT(reg1 >= 0);
378  ASSERT(reg1 <= kMaxRegister);
379  Emit(BC_CHECK_NOT_REGS_EQUAL, reg1);
380  Emit32(reg2);
381  EmitOrLink(on_not_equal);
382}
383
384
385void RegExpMacroAssemblerIrregexp::CheckCharacters(
386  Vector<const uc16> str,
387  int cp_offset,
388  Label* on_failure,
389  bool check_end_of_string) {
390  ASSERT(cp_offset >= kMinCPOffset);
391  ASSERT(cp_offset + str.length() - 1 <= kMaxCPOffset);
392  // It is vital that this loop is backwards due to the unchecked character
393  // load below.
394  for (int i = str.length() - 1; i >= 0; i--) {
395    if (check_end_of_string && i == str.length() - 1) {
396      Emit(BC_LOAD_CURRENT_CHAR, cp_offset + i);
397      EmitOrLink(on_failure);
398    } else {
399      Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED, cp_offset + i);
400    }
401    Emit(BC_CHECK_NOT_CHAR, str[i]);
402    EmitOrLink(on_failure);
403  }
404}
405
406
407void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index,
408                                                int comparand,
409                                                Label* on_less_than) {
410  ASSERT(register_index >= 0);
411  ASSERT(register_index <= kMaxRegister);
412  Emit(BC_CHECK_REGISTER_LT, register_index);
413  Emit32(comparand);
414  EmitOrLink(on_less_than);
415}
416
417
418void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
419                                                int comparand,
420                                                Label* on_greater_or_equal) {
421  ASSERT(register_index >= 0);
422  ASSERT(register_index <= kMaxRegister);
423  Emit(BC_CHECK_REGISTER_GE, register_index);
424  Emit32(comparand);
425  EmitOrLink(on_greater_or_equal);
426}
427
428
429void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
430                                                   Label* on_eq) {
431  ASSERT(register_index >= 0);
432  ASSERT(register_index <= kMaxRegister);
433  Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
434  EmitOrLink(on_eq);
435}
436
437
438Handle<HeapObject> RegExpMacroAssemblerIrregexp::GetCode(
439    Handle<String> source) {
440  Bind(&backtrack_);
441  Emit(BC_POP_BT, 0);
442  Handle<ByteArray> array = FACTORY->NewByteArray(length());
443  Copy(array->GetDataStartAddress());
444  return array;
445}
446
447
448int RegExpMacroAssemblerIrregexp::length() {
449  return pc_;
450}
451
452
453void RegExpMacroAssemblerIrregexp::Copy(Address a) {
454  memcpy(a, buffer_.start(), length());
455}
456
457
458void RegExpMacroAssemblerIrregexp::Expand() {
459  bool old_buffer_was_our_own = own_buffer_;
460  Vector<byte> old_buffer = buffer_;
461  buffer_ = Vector<byte>::New(old_buffer.length() * 2);
462  own_buffer_ = true;
463  memcpy(buffer_.start(), old_buffer.start(), old_buffer.length());
464  if (old_buffer_was_our_own) {
465    old_buffer.Dispose();
466  }
467}
468
469#endif  // V8_INTERPRETED_REGEXP
470
471} }  // namespace v8::internal
472