1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if V8_TARGET_ARCH_IA32
6
7#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
8
9#include "src/log.h"
10#include "src/macro-assembler.h"
11#include "src/regexp/regexp-macro-assembler.h"
12#include "src/regexp/regexp-stack.h"
13#include "src/unicode.h"
14
15namespace v8 {
16namespace internal {
17
18#ifndef V8_INTERPRETED_REGEXP
19/*
20 * This assembler uses the following register assignment convention
21 * - edx : Current character.  Must be loaded using LoadCurrentCharacter
22 *         before using any of the dispatch methods.  Temporarily stores the
23 *         index of capture start after a matching pass for a global regexp.
24 * - edi : Current position in input, as negative offset from end of string.
25 *         Please notice that this is the byte offset, not the character offset!
26 * - esi : end of input (points to byte after last character in input).
27 * - ebp : Frame pointer.  Used to access arguments, local variables and
28 *         RegExp registers.
29 * - esp : Points to tip of C stack.
30 * - ecx : Points to tip of backtrack stack
31 *
32 * The registers eax and ebx are free to use for computations.
33 *
34 * Each call to a public method should retain this convention.
35 * The stack will have the following structure:
36 *       - Isolate* isolate     (address of the current isolate)
37 *       - direct_call          (if 1, direct call from JavaScript code, if 0
38 *                               call through the runtime system)
39 *       - stack_area_base      (high end of the memory area to use as
40 *                               backtracking stack)
41 *       - capture array size   (may fit multiple sets of matches)
42 *       - int* capture_array   (int[num_saved_registers_], for output).
43 *       - end of input         (address of end of string)
44 *       - start of input       (address of first character in string)
45 *       - start index          (character index of start)
46 *       - String* input_string (location of a handle containing the string)
47 *       --- frame alignment (if applicable) ---
48 *       - return address
49 * ebp-> - old ebp
50 *       - backup of caller esi
51 *       - backup of caller edi
52 *       - backup of caller ebx
53 *       - success counter      (only for global regexps to count matches).
54 *       - Offset of location before start of input (effectively character
55 *         string start - 1). Used to initialize capture registers to a
56 *         non-position.
57 *       - register 0  ebp[-4]  (only positions must be stored in the first
58 *       - register 1  ebp[-8]   num_saved_registers_ registers)
59 *       - ...
60 *
61 * The first num_saved_registers_ registers are initialized to point to
62 * "character -1" in the string (i.e., char_size() bytes before the first
63 * character of the string). The remaining registers starts out as garbage.
64 *
65 * The data up to the return address must be placed there by the calling
66 * code, by calling the code entry as cast to a function with the signature:
67 * int (*match)(String* input_string,
68 *              int start_index,
69 *              Address start,
70 *              Address end,
71 *              int* capture_output_array,
72 *              bool at_start,
73 *              byte* stack_area_base,
74 *              bool direct_call)
75 */
76
77#define __ ACCESS_MASM(masm_)
78
79RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(Isolate* isolate, Zone* zone,
80                                                   Mode mode,
81                                                   int registers_to_save)
82    : NativeRegExpMacroAssembler(isolate, zone),
83      masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize,
84                               CodeObjectRequired::kYes)),
85      mode_(mode),
86      num_registers_(registers_to_save),
87      num_saved_registers_(registers_to_save),
88      entry_label_(),
89      start_label_(),
90      success_label_(),
91      backtrack_label_(),
92      exit_label_() {
93  DCHECK_EQ(0, registers_to_save % 2);
94  __ jmp(&entry_label_);   // We'll write the entry code later.
95  __ bind(&start_label_);  // And then continue from here.
96}
97
98
99RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
100  delete masm_;
101  // Unuse labels in case we throw away the assembler without calling GetCode.
102  entry_label_.Unuse();
103  start_label_.Unuse();
104  success_label_.Unuse();
105  backtrack_label_.Unuse();
106  exit_label_.Unuse();
107  check_preempt_label_.Unuse();
108  stack_overflow_label_.Unuse();
109}
110
111
112int RegExpMacroAssemblerIA32::stack_limit_slack()  {
113  return RegExpStack::kStackLimitSlack;
114}
115
116
117void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
118  if (by != 0) {
119    __ add(edi, Immediate(by * char_size()));
120  }
121}
122
123
124void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
125  DCHECK(reg >= 0);
126  DCHECK(reg < num_registers_);
127  if (by != 0) {
128    __ add(register_location(reg), Immediate(by));
129  }
130}
131
132
133void RegExpMacroAssemblerIA32::Backtrack() {
134  CheckPreemption();
135  // Pop Code* offset from backtrack stack, add Code* and jump to location.
136  Pop(ebx);
137  __ add(ebx, Immediate(masm_->CodeObject()));
138  __ jmp(ebx);
139}
140
141
142void RegExpMacroAssemblerIA32::Bind(Label* label) {
143  __ bind(label);
144}
145
146
147void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
148  __ cmp(current_character(), c);
149  BranchOrBacktrack(equal, on_equal);
150}
151
152
153void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
154  __ cmp(current_character(), limit);
155  BranchOrBacktrack(greater, on_greater);
156}
157
158
159void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
160  __ lea(eax, Operand(edi, -char_size()));
161  __ cmp(eax, Operand(ebp, kStringStartMinusOne));
162  BranchOrBacktrack(equal, on_at_start);
163}
164
165
166void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset,
167                                               Label* on_not_at_start) {
168  __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
169  __ cmp(eax, Operand(ebp, kStringStartMinusOne));
170  BranchOrBacktrack(not_equal, on_not_at_start);
171}
172
173
174void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
175  __ cmp(current_character(), limit);
176  BranchOrBacktrack(less, on_less);
177}
178
179
180void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
181  Label fallthrough;
182  __ cmp(edi, Operand(backtrack_stackpointer(), 0));
183  __ j(not_equal, &fallthrough);
184  __ add(backtrack_stackpointer(), Immediate(kPointerSize));  // Pop.
185  BranchOrBacktrack(no_condition, on_equal);
186  __ bind(&fallthrough);
187}
188
189
190void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
191    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
192  Label fallthrough;
193  __ mov(edx, register_location(start_reg));  // Index of start of capture
194  __ mov(ebx, register_location(start_reg + 1));  // Index of end of capture
195  __ sub(ebx, edx);  // Length of capture.
196
197  // At this point, the capture registers are either both set or both cleared.
198  // If the capture length is zero, then the capture is either empty or cleared.
199  // Fall through in both cases.
200  __ j(equal, &fallthrough);
201
202  // Check that there are sufficient characters left in the input.
203  if (read_backward) {
204    __ mov(eax, Operand(ebp, kStringStartMinusOne));
205    __ add(eax, ebx);
206    __ cmp(edi, eax);
207    BranchOrBacktrack(less_equal, on_no_match);
208  } else {
209    __ mov(eax, edi);
210    __ add(eax, ebx);
211    BranchOrBacktrack(greater, on_no_match);
212  }
213
214  if (mode_ == LATIN1) {
215    Label success;
216    Label fail;
217    Label loop_increment;
218    // Save register contents to make the registers available below.
219    __ push(edi);
220    __ push(backtrack_stackpointer());
221    // After this, the eax, ecx, and edi registers are available.
222
223    __ add(edx, esi);  // Start of capture
224    __ add(edi, esi);  // Start of text to match against capture.
225    if (read_backward) {
226      __ sub(edi, ebx);  // Offset by length when matching backwards.
227    }
228    __ add(ebx, edi);  // End of text to match against capture.
229
230    Label loop;
231    __ bind(&loop);
232    __ movzx_b(eax, Operand(edi, 0));
233    __ cmpb_al(Operand(edx, 0));
234    __ j(equal, &loop_increment);
235
236    // Mismatch, try case-insensitive match (converting letters to lower-case).
237    __ or_(eax, 0x20);  // Convert match character to lower-case.
238    __ lea(ecx, Operand(eax, -'a'));
239    __ cmp(ecx, static_cast<int32_t>('z' - 'a'));  // Is eax a lowercase letter?
240    Label convert_capture;
241    __ j(below_equal, &convert_capture);  // In range 'a'-'z'.
242    // Latin-1: Check for values in range [224,254] but not 247.
243    __ sub(ecx, Immediate(224 - 'a'));
244    __ cmp(ecx, Immediate(254 - 224));
245    __ j(above, &fail);  // Weren't Latin-1 letters.
246    __ cmp(ecx, Immediate(247 - 224));  // Check for 247.
247    __ j(equal, &fail);
248    __ bind(&convert_capture);
249    // Also convert capture character.
250    __ movzx_b(ecx, Operand(edx, 0));
251    __ or_(ecx, 0x20);
252
253    __ cmp(eax, ecx);
254    __ j(not_equal, &fail);
255
256    __ bind(&loop_increment);
257    // Increment pointers into match and capture strings.
258    __ add(edx, Immediate(1));
259    __ add(edi, Immediate(1));
260    // Compare to end of match, and loop if not done.
261    __ cmp(edi, ebx);
262    __ j(below, &loop);
263    __ jmp(&success);
264
265    __ bind(&fail);
266    // Restore original values before failing.
267    __ pop(backtrack_stackpointer());
268    __ pop(edi);
269    BranchOrBacktrack(no_condition, on_no_match);
270
271    __ bind(&success);
272    // Restore original value before continuing.
273    __ pop(backtrack_stackpointer());
274    // Drop original value of character position.
275    __ add(esp, Immediate(kPointerSize));
276    // Compute new value of character position after the matched part.
277    __ sub(edi, esi);
278    if (read_backward) {
279      // Subtract match length if we matched backward.
280      __ add(edi, register_location(start_reg));
281      __ sub(edi, register_location(start_reg + 1));
282    }
283  } else {
284    DCHECK(mode_ == UC16);
285    // Save registers before calling C function.
286    __ push(esi);
287    __ push(edi);
288    __ push(backtrack_stackpointer());
289    __ push(ebx);
290
291    static const int argument_count = 4;
292    __ PrepareCallCFunction(argument_count, ecx);
293    // Put arguments into allocated stack area, last argument highest on stack.
294    // Parameters are
295    //   Address byte_offset1 - Address captured substring's start.
296    //   Address byte_offset2 - Address of current character position.
297    //   size_t byte_length - length of capture in bytes(!)
298//   Isolate* isolate or 0 if unicode flag.
299
300    // Set isolate.
301#ifdef V8_I18N_SUPPORT
302    if (unicode) {
303      __ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
304    } else  // NOLINT
305#endif      // V8_I18N_SUPPORT
306    {
307      __ mov(Operand(esp, 3 * kPointerSize),
308             Immediate(ExternalReference::isolate_address(isolate())));
309    }
310    // Set byte_length.
311    __ mov(Operand(esp, 2 * kPointerSize), ebx);
312    // Set byte_offset2.
313    // Found by adding negative string-end offset of current position (edi)
314    // to end of string.
315    __ add(edi, esi);
316    if (read_backward) {
317      __ sub(edi, ebx);  // Offset by length when matching backwards.
318    }
319    __ mov(Operand(esp, 1 * kPointerSize), edi);
320    // Set byte_offset1.
321    // Start of capture, where edx already holds string-end negative offset.
322    __ add(edx, esi);
323    __ mov(Operand(esp, 0 * kPointerSize), edx);
324
325    {
326      AllowExternalCallThatCantCauseGC scope(masm_);
327      ExternalReference compare =
328          ExternalReference::re_case_insensitive_compare_uc16(isolate());
329      __ CallCFunction(compare, argument_count);
330    }
331    // Pop original values before reacting on result value.
332    __ pop(ebx);
333    __ pop(backtrack_stackpointer());
334    __ pop(edi);
335    __ pop(esi);
336
337    // Check if function returned non-zero for success or zero for failure.
338    __ or_(eax, eax);
339    BranchOrBacktrack(zero, on_no_match);
340    // On success, advance position by length of capture.
341    if (read_backward) {
342      __ sub(edi, ebx);
343    } else {
344      __ add(edi, ebx);
345    }
346  }
347  __ bind(&fallthrough);
348}
349
350
351void RegExpMacroAssemblerIA32::CheckNotBackReference(int start_reg,
352                                                     bool read_backward,
353                                                     Label* on_no_match) {
354  Label fallthrough;
355  Label success;
356  Label fail;
357
358  // Find length of back-referenced capture.
359  __ mov(edx, register_location(start_reg));
360  __ mov(eax, register_location(start_reg + 1));
361  __ sub(eax, edx);  // Length to check.
362
363  // At this point, the capture registers are either both set or both cleared.
364  // If the capture length is zero, then the capture is either empty or cleared.
365  // Fall through in both cases.
366  __ j(equal, &fallthrough);
367
368  // Check that there are sufficient characters left in the input.
369  if (read_backward) {
370    __ mov(ebx, Operand(ebp, kStringStartMinusOne));
371    __ add(ebx, eax);
372    __ cmp(edi, ebx);
373    BranchOrBacktrack(less_equal, on_no_match);
374  } else {
375    __ mov(ebx, edi);
376    __ add(ebx, eax);
377    BranchOrBacktrack(greater, on_no_match);
378  }
379
380  // Save register to make it available below.
381  __ push(backtrack_stackpointer());
382
383  // Compute pointers to match string and capture string
384  __ add(edx, esi);  // Start of capture.
385  __ lea(ebx, Operand(esi, edi, times_1, 0));  // Start of match.
386  if (read_backward) {
387    __ sub(ebx, eax);  // Offset by length when matching backwards.
388  }
389  __ lea(ecx, Operand(eax, ebx, times_1, 0));  // End of match
390
391  Label loop;
392  __ bind(&loop);
393  if (mode_ == LATIN1) {
394    __ movzx_b(eax, Operand(edx, 0));
395    __ cmpb_al(Operand(ebx, 0));
396  } else {
397    DCHECK(mode_ == UC16);
398    __ movzx_w(eax, Operand(edx, 0));
399    __ cmpw_ax(Operand(ebx, 0));
400  }
401  __ j(not_equal, &fail);
402  // Increment pointers into capture and match string.
403  __ add(edx, Immediate(char_size()));
404  __ add(ebx, Immediate(char_size()));
405  // Check if we have reached end of match area.
406  __ cmp(ebx, ecx);
407  __ j(below, &loop);
408  __ jmp(&success);
409
410  __ bind(&fail);
411  // Restore backtrack stackpointer.
412  __ pop(backtrack_stackpointer());
413  BranchOrBacktrack(no_condition, on_no_match);
414
415  __ bind(&success);
416  // Move current character position to position after match.
417  __ mov(edi, ecx);
418  __ sub(edi, esi);
419  if (read_backward) {
420    // Subtract match length if we matched backward.
421    __ add(edi, register_location(start_reg));
422    __ sub(edi, register_location(start_reg + 1));
423  }
424  // Restore backtrack stackpointer.
425  __ pop(backtrack_stackpointer());
426
427  __ bind(&fallthrough);
428}
429
430
431void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
432                                                 Label* on_not_equal) {
433  __ cmp(current_character(), c);
434  BranchOrBacktrack(not_equal, on_not_equal);
435}
436
437
438void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
439                                                      uint32_t mask,
440                                                      Label* on_equal) {
441  if (c == 0) {
442    __ test(current_character(), Immediate(mask));
443  } else {
444    __ mov(eax, mask);
445    __ and_(eax, current_character());
446    __ cmp(eax, c);
447  }
448  BranchOrBacktrack(equal, on_equal);
449}
450
451
452void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
453                                                         uint32_t mask,
454                                                         Label* on_not_equal) {
455  if (c == 0) {
456    __ test(current_character(), Immediate(mask));
457  } else {
458    __ mov(eax, mask);
459    __ and_(eax, current_character());
460    __ cmp(eax, c);
461  }
462  BranchOrBacktrack(not_equal, on_not_equal);
463}
464
465
466void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
467    uc16 c,
468    uc16 minus,
469    uc16 mask,
470    Label* on_not_equal) {
471  DCHECK(minus < String::kMaxUtf16CodeUnit);
472  __ lea(eax, Operand(current_character(), -minus));
473  if (c == 0) {
474    __ test(eax, Immediate(mask));
475  } else {
476    __ and_(eax, mask);
477    __ cmp(eax, c);
478  }
479  BranchOrBacktrack(not_equal, on_not_equal);
480}
481
482
483void RegExpMacroAssemblerIA32::CheckCharacterInRange(
484    uc16 from,
485    uc16 to,
486    Label* on_in_range) {
487  __ lea(eax, Operand(current_character(), -from));
488  __ cmp(eax, to - from);
489  BranchOrBacktrack(below_equal, on_in_range);
490}
491
492
493void RegExpMacroAssemblerIA32::CheckCharacterNotInRange(
494    uc16 from,
495    uc16 to,
496    Label* on_not_in_range) {
497  __ lea(eax, Operand(current_character(), -from));
498  __ cmp(eax, to - from);
499  BranchOrBacktrack(above, on_not_in_range);
500}
501
502
503void RegExpMacroAssemblerIA32::CheckBitInTable(
504    Handle<ByteArray> table,
505    Label* on_bit_set) {
506  __ mov(eax, Immediate(table));
507  Register index = current_character();
508  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
509    __ mov(ebx, kTableSize - 1);
510    __ and_(ebx, current_character());
511    index = ebx;
512  }
513  __ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize),
514          Immediate(0));
515  BranchOrBacktrack(not_equal, on_bit_set);
516}
517
518
519bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
520                                                          Label* on_no_match) {
521  // Range checks (c in min..max) are generally implemented by an unsigned
522  // (c - min) <= (max - min) check
523  switch (type) {
524  case 's':
525    // Match space-characters
526    if (mode_ == LATIN1) {
527      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
528      Label success;
529      __ cmp(current_character(), ' ');
530      __ j(equal, &success, Label::kNear);
531      // Check range 0x09..0x0d
532      __ lea(eax, Operand(current_character(), -'\t'));
533      __ cmp(eax, '\r' - '\t');
534      __ j(below_equal, &success, Label::kNear);
535      // \u00a0 (NBSP).
536      __ cmp(eax, 0x00a0 - '\t');
537      BranchOrBacktrack(not_equal, on_no_match);
538      __ bind(&success);
539      return true;
540    }
541    return false;
542  case 'S':
543    // The emitted code for generic character classes is good enough.
544    return false;
545  case 'd':
546    // Match ASCII digits ('0'..'9')
547    __ lea(eax, Operand(current_character(), -'0'));
548    __ cmp(eax, '9' - '0');
549    BranchOrBacktrack(above, on_no_match);
550    return true;
551  case 'D':
552    // Match non ASCII-digits
553    __ lea(eax, Operand(current_character(), -'0'));
554    __ cmp(eax, '9' - '0');
555    BranchOrBacktrack(below_equal, on_no_match);
556    return true;
557  case '.': {
558    // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
559    __ mov(eax, current_character());
560    __ xor_(eax, Immediate(0x01));
561    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
562    __ sub(eax, Immediate(0x0b));
563    __ cmp(eax, 0x0c - 0x0b);
564    BranchOrBacktrack(below_equal, on_no_match);
565    if (mode_ == UC16) {
566      // Compare original value to 0x2028 and 0x2029, using the already
567      // computed (current_char ^ 0x01 - 0x0b). I.e., check for
568      // 0x201d (0x2028 - 0x0b) or 0x201e.
569      __ sub(eax, Immediate(0x2028 - 0x0b));
570      __ cmp(eax, 0x2029 - 0x2028);
571      BranchOrBacktrack(below_equal, on_no_match);
572    }
573    return true;
574  }
575  case 'w': {
576    if (mode_ != LATIN1) {
577      // Table is 256 entries, so all Latin1 characters can be tested.
578      __ cmp(current_character(), Immediate('z'));
579      BranchOrBacktrack(above, on_no_match);
580    }
581    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
582    ExternalReference word_map = ExternalReference::re_word_character_map();
583    __ test_b(current_character(),
584              Operand::StaticArray(current_character(), times_1, word_map));
585    BranchOrBacktrack(zero, on_no_match);
586    return true;
587  }
588  case 'W': {
589    Label done;
590    if (mode_ != LATIN1) {
591      // Table is 256 entries, so all Latin1 characters can be tested.
592      __ cmp(current_character(), Immediate('z'));
593      __ j(above, &done);
594    }
595    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
596    ExternalReference word_map = ExternalReference::re_word_character_map();
597    __ test_b(current_character(),
598              Operand::StaticArray(current_character(), times_1, word_map));
599    BranchOrBacktrack(not_zero, on_no_match);
600    if (mode_ != LATIN1) {
601      __ bind(&done);
602    }
603    return true;
604  }
605  // Non-standard classes (with no syntactic shorthand) used internally.
606  case '*':
607    // Match any character.
608    return true;
609  case 'n': {
610    // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
611    // The opposite of '.'.
612    __ mov(eax, current_character());
613    __ xor_(eax, Immediate(0x01));
614    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
615    __ sub(eax, Immediate(0x0b));
616    __ cmp(eax, 0x0c - 0x0b);
617    if (mode_ == LATIN1) {
618      BranchOrBacktrack(above, on_no_match);
619    } else {
620      Label done;
621      BranchOrBacktrack(below_equal, &done);
622      DCHECK_EQ(UC16, mode_);
623      // Compare original value to 0x2028 and 0x2029, using the already
624      // computed (current_char ^ 0x01 - 0x0b). I.e., check for
625      // 0x201d (0x2028 - 0x0b) or 0x201e.
626      __ sub(eax, Immediate(0x2028 - 0x0b));
627      __ cmp(eax, 1);
628      BranchOrBacktrack(above, on_no_match);
629      __ bind(&done);
630    }
631    return true;
632  }
633  // No custom implementation (yet): s(UC16), S(UC16).
634  default:
635    return false;
636  }
637}
638
639
640void RegExpMacroAssemblerIA32::Fail() {
641  STATIC_ASSERT(FAILURE == 0);  // Return value for failure is zero.
642  if (!global()) {
643    __ Move(eax, Immediate(FAILURE));
644  }
645  __ jmp(&exit_label_);
646}
647
648
649Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
650  Label return_eax;
651  // Finalize code - write the entry point code now we know how many
652  // registers we need.
653
654  // Entry code:
655  __ bind(&entry_label_);
656
657  // Tell the system that we have a stack frame.  Because the type is MANUAL, no
658  // code is generated.
659  FrameScope scope(masm_, StackFrame::MANUAL);
660
661  // Actually emit code to start a new stack frame.
662  __ push(ebp);
663  __ mov(ebp, esp);
664  // Save callee-save registers. Order here should correspond to order of
665  // kBackup_ebx etc.
666  __ push(esi);
667  __ push(edi);
668  __ push(ebx);  // Callee-save on MacOS.
669  __ push(Immediate(0));  // Number of successful matches in a global regexp.
670  __ push(Immediate(0));  // Make room for "string start - 1" constant.
671
672  // Check if we have space on the stack for registers.
673  Label stack_limit_hit;
674  Label stack_ok;
675
676  ExternalReference stack_limit =
677      ExternalReference::address_of_stack_limit(isolate());
678  __ mov(ecx, esp);
679  __ sub(ecx, Operand::StaticVariable(stack_limit));
680  // Handle it if the stack pointer is already below the stack limit.
681  __ j(below_equal, &stack_limit_hit);
682  // Check if there is room for the variable number of registers above
683  // the stack limit.
684  __ cmp(ecx, num_registers_ * kPointerSize);
685  __ j(above_equal, &stack_ok);
686  // Exit with OutOfMemory exception. There is not enough space on the stack
687  // for our working registers.
688  __ mov(eax, EXCEPTION);
689  __ jmp(&return_eax);
690
691  __ bind(&stack_limit_hit);
692  CallCheckStackGuardState(ebx);
693  __ or_(eax, eax);
694  // If returned value is non-zero, we exit with the returned value as result.
695  __ j(not_zero, &return_eax);
696
697  __ bind(&stack_ok);
698  // Load start index for later use.
699  __ mov(ebx, Operand(ebp, kStartIndex));
700
701  // Allocate space on stack for registers.
702  __ sub(esp, Immediate(num_registers_ * kPointerSize));
703  // Load string length.
704  __ mov(esi, Operand(ebp, kInputEnd));
705  // Load input position.
706  __ mov(edi, Operand(ebp, kInputStart));
707  // Set up edi to be negative offset from string end.
708  __ sub(edi, esi);
709
710  // Set eax to address of char before start of the string.
711  // (effectively string position -1).
712  __ neg(ebx);
713  if (mode_ == UC16) {
714    __ lea(eax, Operand(edi, ebx, times_2, -char_size()));
715  } else {
716    __ lea(eax, Operand(edi, ebx, times_1, -char_size()));
717  }
718  // Store this value in a local variable, for use when clearing
719  // position registers.
720  __ mov(Operand(ebp, kStringStartMinusOne), eax);
721
722#if V8_OS_WIN
723  // Ensure that we write to each stack page, in order. Skipping a page
724  // on Windows can cause segmentation faults. Assuming page size is 4k.
725  const int kPageSize = 4096;
726  const int kRegistersPerPage = kPageSize / kPointerSize;
727  for (int i = num_saved_registers_ + kRegistersPerPage - 1;
728      i < num_registers_;
729      i += kRegistersPerPage) {
730    __ mov(register_location(i), eax);  // One write every page.
731  }
732#endif  // V8_OS_WIN
733
734  Label load_char_start_regexp, start_regexp;
735  // Load newline if index is at start, previous character otherwise.
736  __ cmp(Operand(ebp, kStartIndex), Immediate(0));
737  __ j(not_equal, &load_char_start_regexp, Label::kNear);
738  __ mov(current_character(), '\n');
739  __ jmp(&start_regexp, Label::kNear);
740
741  // Global regexp restarts matching here.
742  __ bind(&load_char_start_regexp);
743  // Load previous char as initial value of current character register.
744  LoadCurrentCharacterUnchecked(-1, 1);
745  __ bind(&start_regexp);
746
747  // Initialize on-stack registers.
748  if (num_saved_registers_ > 0) {  // Always is, if generated from a regexp.
749    // Fill saved registers with initial value = start offset - 1
750    // Fill in stack push order, to avoid accessing across an unwritten
751    // page (a problem on Windows).
752    if (num_saved_registers_ > 8) {
753      __ mov(ecx, kRegisterZero);
754      Label init_loop;
755      __ bind(&init_loop);
756      __ mov(Operand(ebp, ecx, times_1, 0), eax);
757      __ sub(ecx, Immediate(kPointerSize));
758      __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
759      __ j(greater, &init_loop);
760    } else {  // Unroll the loop.
761      for (int i = 0; i < num_saved_registers_; i++) {
762        __ mov(register_location(i), eax);
763      }
764    }
765  }
766
767  // Initialize backtrack stack pointer.
768  __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
769
770  __ jmp(&start_label_);
771
772  // Exit code:
773  if (success_label_.is_linked()) {
774    // Save captures when successful.
775    __ bind(&success_label_);
776    if (num_saved_registers_ > 0) {
777      // copy captures to output
778      __ mov(ebx, Operand(ebp, kRegisterOutput));
779      __ mov(ecx, Operand(ebp, kInputEnd));
780      __ mov(edx, Operand(ebp, kStartIndex));
781      __ sub(ecx, Operand(ebp, kInputStart));
782      if (mode_ == UC16) {
783        __ lea(ecx, Operand(ecx, edx, times_2, 0));
784      } else {
785        __ add(ecx, edx);
786      }
787      for (int i = 0; i < num_saved_registers_; i++) {
788        __ mov(eax, register_location(i));
789        if (i == 0 && global_with_zero_length_check()) {
790          // Keep capture start in edx for the zero-length check later.
791          __ mov(edx, eax);
792        }
793        // Convert to index from start of string, not end.
794        __ add(eax, ecx);
795        if (mode_ == UC16) {
796          __ sar(eax, 1);  // Convert byte index to character index.
797        }
798        __ mov(Operand(ebx, i * kPointerSize), eax);
799      }
800    }
801
802    if (global()) {
803      // Restart matching if the regular expression is flagged as global.
804      // Increment success counter.
805      __ inc(Operand(ebp, kSuccessfulCaptures));
806      // Capture results have been stored, so the number of remaining global
807      // output registers is reduced by the number of stored captures.
808      __ mov(ecx, Operand(ebp, kNumOutputRegisters));
809      __ sub(ecx, Immediate(num_saved_registers_));
810      // Check whether we have enough room for another set of capture results.
811      __ cmp(ecx, Immediate(num_saved_registers_));
812      __ j(less, &exit_label_);
813
814      __ mov(Operand(ebp, kNumOutputRegisters), ecx);
815      // Advance the location for output.
816      __ add(Operand(ebp, kRegisterOutput),
817             Immediate(num_saved_registers_ * kPointerSize));
818
819      // Prepare eax to initialize registers with its value in the next run.
820      __ mov(eax, Operand(ebp, kStringStartMinusOne));
821
822      if (global_with_zero_length_check()) {
823        // Special case for zero-length matches.
824        // edx: capture start index
825        __ cmp(edi, edx);
826        // Not a zero-length match, restart.
827        __ j(not_equal, &load_char_start_regexp);
828        // edi (offset from the end) is zero if we already reached the end.
829        __ test(edi, edi);
830        __ j(zero, &exit_label_, Label::kNear);
831        // Advance current position after a zero-length match.
832        Label advance;
833        __ bind(&advance);
834        if (mode_ == UC16) {
835          __ add(edi, Immediate(2));
836        } else {
837          __ inc(edi);
838        }
839        if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
840      }
841      __ jmp(&load_char_start_regexp);
842    } else {
843      __ mov(eax, Immediate(SUCCESS));
844    }
845  }
846
847  __ bind(&exit_label_);
848  if (global()) {
849    // Return the number of successful captures.
850    __ mov(eax, Operand(ebp, kSuccessfulCaptures));
851  }
852
853  __ bind(&return_eax);
854  // Skip esp past regexp registers.
855  __ lea(esp, Operand(ebp, kBackup_ebx));
856  // Restore callee-save registers.
857  __ pop(ebx);
858  __ pop(edi);
859  __ pop(esi);
860  // Exit function frame, restore previous one.
861  __ pop(ebp);
862  __ ret(0);
863
864  // Backtrack code (branch target for conditional backtracks).
865  if (backtrack_label_.is_linked()) {
866    __ bind(&backtrack_label_);
867    Backtrack();
868  }
869
870  Label exit_with_exception;
871
872  // Preempt-code
873  if (check_preempt_label_.is_linked()) {
874    SafeCallTarget(&check_preempt_label_);
875
876    __ push(backtrack_stackpointer());
877    __ push(edi);
878
879    CallCheckStackGuardState(ebx);
880    __ or_(eax, eax);
881    // If returning non-zero, we should end execution with the given
882    // result as return value.
883    __ j(not_zero, &return_eax);
884
885    __ pop(edi);
886    __ pop(backtrack_stackpointer());
887    // String might have moved: Reload esi from frame.
888    __ mov(esi, Operand(ebp, kInputEnd));
889    SafeReturn();
890  }
891
892  // Backtrack stack overflow code.
893  if (stack_overflow_label_.is_linked()) {
894    SafeCallTarget(&stack_overflow_label_);
895    // Reached if the backtrack-stack limit has been hit.
896
897    Label grow_failed;
898    // Save registers before calling C function
899    __ push(esi);
900    __ push(edi);
901
902    // Call GrowStack(backtrack_stackpointer())
903    static const int num_arguments = 3;
904    __ PrepareCallCFunction(num_arguments, ebx);
905    __ mov(Operand(esp, 2 * kPointerSize),
906           Immediate(ExternalReference::isolate_address(isolate())));
907    __ lea(eax, Operand(ebp, kStackHighEnd));
908    __ mov(Operand(esp, 1 * kPointerSize), eax);
909    __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer());
910    ExternalReference grow_stack =
911        ExternalReference::re_grow_stack(isolate());
912    __ CallCFunction(grow_stack, num_arguments);
913    // If return NULL, we have failed to grow the stack, and
914    // must exit with a stack-overflow exception.
915    __ or_(eax, eax);
916    __ j(equal, &exit_with_exception);
917    // Otherwise use return value as new stack pointer.
918    __ mov(backtrack_stackpointer(), eax);
919    // Restore saved registers and continue.
920    __ pop(edi);
921    __ pop(esi);
922    SafeReturn();
923  }
924
925  if (exit_with_exception.is_linked()) {
926    // If any of the code above needed to exit with an exception.
927    __ bind(&exit_with_exception);
928    // Exit with Result EXCEPTION(-1) to signal thrown exception.
929    __ mov(eax, EXCEPTION);
930    __ jmp(&return_eax);
931  }
932
933  CodeDesc code_desc;
934  masm_->GetCode(&code_desc);
935  Handle<Code> code =
936      isolate()->factory()->NewCode(code_desc,
937                                    Code::ComputeFlags(Code::REGEXP),
938                                    masm_->CodeObject());
939  PROFILE(masm_->isolate(),
940          RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
941  return Handle<HeapObject>::cast(code);
942}
943
944
945void RegExpMacroAssemblerIA32::GoTo(Label* to) {
946  BranchOrBacktrack(no_condition, to);
947}
948
949
950void RegExpMacroAssemblerIA32::IfRegisterGE(int reg,
951                                            int comparand,
952                                            Label* if_ge) {
953  __ cmp(register_location(reg), Immediate(comparand));
954  BranchOrBacktrack(greater_equal, if_ge);
955}
956
957
958void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
959                                            int comparand,
960                                            Label* if_lt) {
961  __ cmp(register_location(reg), Immediate(comparand));
962  BranchOrBacktrack(less, if_lt);
963}
964
965
966void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
967                                               Label* if_eq) {
968  __ cmp(edi, register_location(reg));
969  BranchOrBacktrack(equal, if_eq);
970}
971
972
973RegExpMacroAssembler::IrregexpImplementation
974    RegExpMacroAssemblerIA32::Implementation() {
975  return kIA32Implementation;
976}
977
978
979void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
980                                                    Label* on_end_of_input,
981                                                    bool check_bounds,
982                                                    int characters) {
983  DCHECK(cp_offset < (1<<30));  // Be sane! (And ensure negation works)
984  if (check_bounds) {
985    if (cp_offset >= 0) {
986      CheckPosition(cp_offset + characters - 1, on_end_of_input);
987    } else {
988      CheckPosition(cp_offset, on_end_of_input);
989    }
990  }
991  LoadCurrentCharacterUnchecked(cp_offset, characters);
992}
993
994
995void RegExpMacroAssemblerIA32::PopCurrentPosition() {
996  Pop(edi);
997}
998
999
1000void RegExpMacroAssemblerIA32::PopRegister(int register_index) {
1001  Pop(eax);
1002  __ mov(register_location(register_index), eax);
1003}
1004
1005
1006void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
1007  Push(Immediate::CodeRelativeOffset(label));
1008  CheckStackLimit();
1009}
1010
1011
1012void RegExpMacroAssemblerIA32::PushCurrentPosition() {
1013  Push(edi);
1014}
1015
1016
1017void RegExpMacroAssemblerIA32::PushRegister(int register_index,
1018                                            StackCheckFlag check_stack_limit) {
1019  __ mov(eax, register_location(register_index));
1020  Push(eax);
1021  if (check_stack_limit) CheckStackLimit();
1022}
1023
1024
1025void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) {
1026  __ mov(edi, register_location(reg));
1027}
1028
1029
1030void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) {
1031  __ mov(backtrack_stackpointer(), register_location(reg));
1032  __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
1033}
1034
1035void RegExpMacroAssemblerIA32::SetCurrentPositionFromEnd(int by)  {
1036  Label after_position;
1037  __ cmp(edi, -by * char_size());
1038  __ j(greater_equal, &after_position, Label::kNear);
1039  __ mov(edi, -by * char_size());
1040  // On RegExp code entry (where this operation is used), the character before
1041  // the current position is expected to be already loaded.
1042  // We have advanced the position, so it's safe to read backwards.
1043  LoadCurrentCharacterUnchecked(-1, 1);
1044  __ bind(&after_position);
1045}
1046
1047
1048void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
1049  DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1050  __ mov(register_location(register_index), Immediate(to));
1051}
1052
1053
1054bool RegExpMacroAssemblerIA32::Succeed() {
1055  __ jmp(&success_label_);
1056  return global();
1057}
1058
1059
1060void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
1061                                                              int cp_offset) {
1062  if (cp_offset == 0) {
1063    __ mov(register_location(reg), edi);
1064  } else {
1065    __ lea(eax, Operand(edi, cp_offset * char_size()));
1066    __ mov(register_location(reg), eax);
1067  }
1068}
1069
1070
1071void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
1072  DCHECK(reg_from <= reg_to);
1073  __ mov(eax, Operand(ebp, kStringStartMinusOne));
1074  for (int reg = reg_from; reg <= reg_to; reg++) {
1075    __ mov(register_location(reg), eax);
1076  }
1077}
1078
1079
1080void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
1081  __ mov(eax, backtrack_stackpointer());
1082  __ sub(eax, Operand(ebp, kStackHighEnd));
1083  __ mov(register_location(reg), eax);
1084}
1085
1086
1087// Private methods:
1088
1089void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) {
1090  static const int num_arguments = 3;
1091  __ PrepareCallCFunction(num_arguments, scratch);
1092  // RegExp code frame pointer.
1093  __ mov(Operand(esp, 2 * kPointerSize), ebp);
1094  // Code* of self.
1095  __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject()));
1096  // Next address on the stack (will be address of return address).
1097  __ lea(eax, Operand(esp, -kPointerSize));
1098  __ mov(Operand(esp, 0 * kPointerSize), eax);
1099  ExternalReference check_stack_guard =
1100      ExternalReference::re_check_stack_guard_state(isolate());
1101  __ CallCFunction(check_stack_guard, num_arguments);
1102}
1103
1104
1105// Helper function for reading a value out of a stack frame.
1106template <typename T>
1107static T& frame_entry(Address re_frame, int frame_offset) {
1108  return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
1109}
1110
1111
1112template <typename T>
1113static T* frame_entry_address(Address re_frame, int frame_offset) {
1114  return reinterpret_cast<T*>(re_frame + frame_offset);
1115}
1116
1117
1118int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
1119                                                   Code* re_code,
1120                                                   Address re_frame) {
1121  return NativeRegExpMacroAssembler::CheckStackGuardState(
1122      frame_entry<Isolate*>(re_frame, kIsolate),
1123      frame_entry<int>(re_frame, kStartIndex),
1124      frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1125      frame_entry_address<String*>(re_frame, kInputString),
1126      frame_entry_address<const byte*>(re_frame, kInputStart),
1127      frame_entry_address<const byte*>(re_frame, kInputEnd));
1128}
1129
1130
1131Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
1132  DCHECK(register_index < (1<<30));
1133  if (num_registers_ <= register_index) {
1134    num_registers_ = register_index + 1;
1135  }
1136  return Operand(ebp, kRegisterZero - register_index * kPointerSize);
1137}
1138
1139
1140void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
1141                                             Label* on_outside_input) {
1142  if (cp_offset >= 0) {
1143    __ cmp(edi, -cp_offset * char_size());
1144    BranchOrBacktrack(greater_equal, on_outside_input);
1145  } else {
1146    __ lea(eax, Operand(edi, cp_offset * char_size()));
1147    __ cmp(eax, Operand(ebp, kStringStartMinusOne));
1148    BranchOrBacktrack(less_equal, on_outside_input);
1149  }
1150}
1151
1152
1153void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
1154                                                 Label* to) {
1155  if (condition < 0) {  // No condition
1156    if (to == NULL) {
1157      Backtrack();
1158      return;
1159    }
1160    __ jmp(to);
1161    return;
1162  }
1163  if (to == NULL) {
1164    __ j(condition, &backtrack_label_);
1165    return;
1166  }
1167  __ j(condition, to);
1168}
1169
1170
1171void RegExpMacroAssemblerIA32::SafeCall(Label* to) {
1172  Label return_to;
1173  __ push(Immediate::CodeRelativeOffset(&return_to));
1174  __ jmp(to);
1175  __ bind(&return_to);
1176}
1177
1178
1179void RegExpMacroAssemblerIA32::SafeReturn() {
1180  __ pop(ebx);
1181  __ add(ebx, Immediate(masm_->CodeObject()));
1182  __ jmp(ebx);
1183}
1184
1185
1186void RegExpMacroAssemblerIA32::SafeCallTarget(Label* name) {
1187  __ bind(name);
1188}
1189
1190
1191void RegExpMacroAssemblerIA32::Push(Register source) {
1192  DCHECK(!source.is(backtrack_stackpointer()));
1193  // Notice: This updates flags, unlike normal Push.
1194  __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1195  __ mov(Operand(backtrack_stackpointer(), 0), source);
1196}
1197
1198
1199void RegExpMacroAssemblerIA32::Push(Immediate value) {
1200  // Notice: This updates flags, unlike normal Push.
1201  __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1202  __ mov(Operand(backtrack_stackpointer(), 0), value);
1203}
1204
1205
1206void RegExpMacroAssemblerIA32::Pop(Register target) {
1207  DCHECK(!target.is(backtrack_stackpointer()));
1208  __ mov(target, Operand(backtrack_stackpointer(), 0));
1209  // Notice: This updates flags, unlike normal Pop.
1210  __ add(backtrack_stackpointer(), Immediate(kPointerSize));
1211}
1212
1213
1214void RegExpMacroAssemblerIA32::CheckPreemption() {
1215  // Check for preemption.
1216  Label no_preempt;
1217  ExternalReference stack_limit =
1218      ExternalReference::address_of_stack_limit(isolate());
1219  __ cmp(esp, Operand::StaticVariable(stack_limit));
1220  __ j(above, &no_preempt);
1221
1222  SafeCall(&check_preempt_label_);
1223
1224  __ bind(&no_preempt);
1225}
1226
1227
1228void RegExpMacroAssemblerIA32::CheckStackLimit() {
1229  Label no_stack_overflow;
1230  ExternalReference stack_limit =
1231      ExternalReference::address_of_regexp_stack_limit(isolate());
1232  __ cmp(backtrack_stackpointer(), Operand::StaticVariable(stack_limit));
1233  __ j(above, &no_stack_overflow);
1234
1235  SafeCall(&stack_overflow_label_);
1236
1237  __ bind(&no_stack_overflow);
1238}
1239
1240
1241void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
1242                                                             int characters) {
1243  if (mode_ == LATIN1) {
1244    if (characters == 4) {
1245      __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
1246    } else if (characters == 2) {
1247      __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
1248    } else {
1249      DCHECK(characters == 1);
1250      __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
1251    }
1252  } else {
1253    DCHECK(mode_ == UC16);
1254    if (characters == 2) {
1255      __ mov(current_character(),
1256             Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1257    } else {
1258      DCHECK(characters == 1);
1259      __ movzx_w(current_character(),
1260                 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1261    }
1262  }
1263}
1264
1265
1266#undef __
1267
1268#endif  // V8_INTERPRETED_REGEXP
1269
1270}  // namespace internal
1271}  // namespace v8
1272
1273#endif  // V8_TARGET_ARCH_IA32
1274