1// Copyright 2012 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#if V8_TARGET_ARCH_ARM 6 7#include "src/regexp/arm/regexp-macro-assembler-arm.h" 8 9#include "src/code-stubs.h" 10#include "src/log.h" 11#include "src/macro-assembler.h" 12#include "src/regexp/regexp-macro-assembler.h" 13#include "src/regexp/regexp-stack.h" 14#include "src/unicode.h" 15 16namespace v8 { 17namespace internal { 18 19#ifndef V8_INTERPRETED_REGEXP 20/* 21 * This assembler uses the following register assignment convention 22 * - r4 : Temporarily stores the index of capture start after a matching pass 23 * for a global regexp. 24 * - r5 : Pointer to current code object (Code*) including heap object tag. 25 * - r6 : Current position in input, as negative offset from end of string. 26 * Please notice that this is the byte offset, not the character offset! 27 * - r7 : Currently loaded character. Must be loaded using 28 * LoadCurrentCharacter before using any of the dispatch methods. 29 * - r8 : Points to tip of backtrack stack 30 * - r9 : Unused, might be used by C code and expected unchanged. 31 * - r10 : End of input (points to byte after last character in input). 32 * - r11 : Frame pointer. Used to access arguments, local variables and 33 * RegExp registers. 34 * - r12 : IP register, used by assembler. Very volatile. 35 * - r13/sp : Points to tip of C stack. 36 * 37 * The remaining registers are free for computations. 38 * Each call to a public method should retain this convention. 39 * 40 * The stack will have the following structure: 41 * - fp[56] Isolate* isolate (address of the current isolate) 42 * - fp[52] direct_call (if 1, direct call from JavaScript code, 43 * if 0, call through the runtime system). 44 * - fp[48] stack_area_base (high end of the memory area to use as 45 * backtracking stack). 46 * - fp[44] capture array size (may fit multiple sets of matches) 47 * - fp[40] int* capture_array (int[num_saved_registers_], for output). 48 * - fp[36] secondary link/return address used by native call. 49 * --- sp when called --- 50 * - fp[32] return address (lr). 51 * - fp[28] old frame pointer (r11). 52 * - fp[0..24] backup of registers r4..r10. 53 * --- frame pointer ---- 54 * - fp[-4] end of input (address of end of string). 55 * - fp[-8] start of input (address of first character in string). 56 * - fp[-12] start index (character index of start). 57 * - fp[-16] void* input_string (location of a handle containing the string). 58 * - fp[-20] success counter (only for global regexps to count matches). 59 * - fp[-24] Offset of location before start of input (effectively character 60 * string start - 1). Used to initialize capture registers to a 61 * non-position. 62 * - fp[-28] At start (if 1, we are starting at the start of the 63 * string, otherwise 0) 64 * - fp[-32] register 0 (Only positions must be stored in the first 65 * - register 1 num_saved_registers_ registers) 66 * - ... 67 * - register num_registers-1 68 * --- sp --- 69 * 70 * The first num_saved_registers_ registers are initialized to point to 71 * "character -1" in the string (i.e., char_size() bytes before the first 72 * character of the string). The remaining registers start out as garbage. 73 * 74 * The data up to the return address must be placed there by the calling 75 * code and the remaining arguments are passed in registers, e.g. by calling the 76 * code entry as cast to a function with the signature: 77 * int (*match)(String* input_string, 78 * int start_index, 79 * Address start, 80 * Address end, 81 * Address secondary_return_address, // Only used by native call. 82 * int* capture_output_array, 83 * byte* stack_area_base, 84 * bool direct_call = false) 85 * The call is performed by NativeRegExpMacroAssembler::Execute() 86 * (in regexp-macro-assembler.cc) via the CALL_GENERATED_REGEXP_CODE macro 87 * in arm/simulator-arm.h. 88 * When calling as a non-direct call (i.e., from C++ code), the return address 89 * area is overwritten with the LR register by the RegExp code. When doing a 90 * direct call from generated code, the return address is placed there by 91 * the calling code, as in a normal exit frame. 92 */ 93 94#define __ ACCESS_MASM(masm_) 95 96RegExpMacroAssemblerARM::RegExpMacroAssemblerARM(Isolate* isolate, Zone* zone, 97 Mode mode, 98 int registers_to_save) 99 : NativeRegExpMacroAssembler(isolate, zone), 100 masm_(new MacroAssembler(isolate, NULL, kRegExpCodeSize, 101 CodeObjectRequired::kYes)), 102 mode_(mode), 103 num_registers_(registers_to_save), 104 num_saved_registers_(registers_to_save), 105 entry_label_(), 106 start_label_(), 107 success_label_(), 108 backtrack_label_(), 109 exit_label_() { 110 DCHECK_EQ(0, registers_to_save % 2); 111 __ jmp(&entry_label_); // We'll write the entry code later. 112 __ bind(&start_label_); // And then continue from here. 113} 114 115 116RegExpMacroAssemblerARM::~RegExpMacroAssemblerARM() { 117 delete masm_; 118 // Unuse labels in case we throw away the assembler without calling GetCode. 119 entry_label_.Unuse(); 120 start_label_.Unuse(); 121 success_label_.Unuse(); 122 backtrack_label_.Unuse(); 123 exit_label_.Unuse(); 124 check_preempt_label_.Unuse(); 125 stack_overflow_label_.Unuse(); 126} 127 128 129int RegExpMacroAssemblerARM::stack_limit_slack() { 130 return RegExpStack::kStackLimitSlack; 131} 132 133 134void RegExpMacroAssemblerARM::AdvanceCurrentPosition(int by) { 135 if (by != 0) { 136 __ add(current_input_offset(), 137 current_input_offset(), Operand(by * char_size())); 138 } 139} 140 141 142void RegExpMacroAssemblerARM::AdvanceRegister(int reg, int by) { 143 DCHECK(reg >= 0); 144 DCHECK(reg < num_registers_); 145 if (by != 0) { 146 __ ldr(r0, register_location(reg)); 147 __ add(r0, r0, Operand(by)); 148 __ str(r0, register_location(reg)); 149 } 150} 151 152 153void RegExpMacroAssemblerARM::Backtrack() { 154 CheckPreemption(); 155 // Pop Code* offset from backtrack stack, add Code* and jump to location. 156 Pop(r0); 157 __ add(pc, r0, Operand(code_pointer())); 158} 159 160 161void RegExpMacroAssemblerARM::Bind(Label* label) { 162 __ bind(label); 163} 164 165 166void RegExpMacroAssemblerARM::CheckCharacter(uint32_t c, Label* on_equal) { 167 __ cmp(current_character(), Operand(c)); 168 BranchOrBacktrack(eq, on_equal); 169} 170 171 172void RegExpMacroAssemblerARM::CheckCharacterGT(uc16 limit, Label* on_greater) { 173 __ cmp(current_character(), Operand(limit)); 174 BranchOrBacktrack(gt, on_greater); 175} 176 177 178void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) { 179 __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); 180 __ add(r0, current_input_offset(), Operand(-char_size())); 181 __ cmp(r0, r1); 182 BranchOrBacktrack(eq, on_at_start); 183} 184 185 186void RegExpMacroAssemblerARM::CheckNotAtStart(int cp_offset, 187 Label* on_not_at_start) { 188 __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); 189 __ add(r0, current_input_offset(), 190 Operand(-char_size() + cp_offset * char_size())); 191 __ cmp(r0, r1); 192 BranchOrBacktrack(ne, on_not_at_start); 193} 194 195 196void RegExpMacroAssemblerARM::CheckCharacterLT(uc16 limit, Label* on_less) { 197 __ cmp(current_character(), Operand(limit)); 198 BranchOrBacktrack(lt, on_less); 199} 200 201 202void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) { 203 __ ldr(r0, MemOperand(backtrack_stackpointer(), 0)); 204 __ cmp(current_input_offset(), r0); 205 __ add(backtrack_stackpointer(), 206 backtrack_stackpointer(), Operand(kPointerSize), LeaveCC, eq); 207 BranchOrBacktrack(eq, on_equal); 208} 209 210 211void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase( 212 int start_reg, bool read_backward, bool unicode, Label* on_no_match) { 213 Label fallthrough; 214 __ ldr(r0, register_location(start_reg)); // Index of start of capture 215 __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture 216 __ sub(r1, r1, r0, SetCC); // Length of capture. 217 218 // At this point, the capture registers are either both set or both cleared. 219 // If the capture length is zero, then the capture is either empty or cleared. 220 // Fall through in both cases. 221 __ b(eq, &fallthrough); 222 223 // Check that there are enough characters left in the input. 224 if (read_backward) { 225 __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); 226 __ add(r3, r3, r1); 227 __ cmp(current_input_offset(), r3); 228 BranchOrBacktrack(le, on_no_match); 229 } else { 230 __ cmn(r1, Operand(current_input_offset())); 231 BranchOrBacktrack(gt, on_no_match); 232 } 233 234 if (mode_ == LATIN1) { 235 Label success; 236 Label fail; 237 Label loop_check; 238 239 // r0 - offset of start of capture 240 // r1 - length of capture 241 __ add(r0, r0, end_of_input_address()); 242 __ add(r2, end_of_input_address(), current_input_offset()); 243 if (read_backward) { 244 __ sub(r2, r2, r1); // Offset by length when matching backwards. 245 } 246 __ add(r1, r0, r1); 247 248 // r0 - Address of start of capture. 249 // r1 - Address of end of capture 250 // r2 - Address of current input position. 251 252 Label loop; 253 __ bind(&loop); 254 __ ldrb(r3, MemOperand(r0, char_size(), PostIndex)); 255 __ ldrb(r4, MemOperand(r2, char_size(), PostIndex)); 256 __ cmp(r4, r3); 257 __ b(eq, &loop_check); 258 259 // Mismatch, try case-insensitive match (converting letters to lower-case). 260 __ orr(r3, r3, Operand(0x20)); // Convert capture character to lower-case. 261 __ orr(r4, r4, Operand(0x20)); // Also convert input character. 262 __ cmp(r4, r3); 263 __ b(ne, &fail); 264 __ sub(r3, r3, Operand('a')); 265 __ cmp(r3, Operand('z' - 'a')); // Is r3 a lowercase letter? 266 __ b(ls, &loop_check); // In range 'a'-'z'. 267 // Latin-1: Check for values in range [224,254] but not 247. 268 __ sub(r3, r3, Operand(224 - 'a')); 269 __ cmp(r3, Operand(254 - 224)); 270 __ b(hi, &fail); // Weren't Latin-1 letters. 271 __ cmp(r3, Operand(247 - 224)); // Check for 247. 272 __ b(eq, &fail); 273 274 __ bind(&loop_check); 275 __ cmp(r0, r1); 276 __ b(lt, &loop); 277 __ jmp(&success); 278 279 __ bind(&fail); 280 BranchOrBacktrack(al, on_no_match); 281 282 __ bind(&success); 283 // Compute new value of character position after the matched part. 284 __ sub(current_input_offset(), r2, end_of_input_address()); 285 if (read_backward) { 286 __ ldr(r0, register_location(start_reg)); // Index of start of capture 287 __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture 288 __ add(current_input_offset(), current_input_offset(), r0); 289 __ sub(current_input_offset(), current_input_offset(), r1); 290 } 291 } else { 292 DCHECK(mode_ == UC16); 293 int argument_count = 4; 294 __ PrepareCallCFunction(argument_count, r2); 295 296 // r0 - offset of start of capture 297 // r1 - length of capture 298 299 // Put arguments into arguments registers. 300 // Parameters are 301 // r0: Address byte_offset1 - Address captured substring's start. 302 // r1: Address byte_offset2 - Address of current character position. 303 // r2: size_t byte_length - length of capture in bytes(!) 304 // r3: Isolate* isolate or 0 if unicode flag. 305 306 // Address of start of capture. 307 __ add(r0, r0, Operand(end_of_input_address())); 308 // Length of capture. 309 __ mov(r2, Operand(r1)); 310 // Save length in callee-save register for use on return. 311 __ mov(r4, Operand(r1)); 312 // Address of current input position. 313 __ add(r1, current_input_offset(), end_of_input_address()); 314 if (read_backward) { 315 __ sub(r1, r1, r4); 316 } 317 // Isolate. 318#ifdef V8_I18N_SUPPORT 319 if (unicode) { 320 __ mov(r3, Operand(0)); 321 } else // NOLINT 322#endif // V8_I18N_SUPPORT 323 { 324 __ mov(r3, Operand(ExternalReference::isolate_address(isolate()))); 325 } 326 327 { 328 AllowExternalCallThatCantCauseGC scope(masm_); 329 ExternalReference function = 330 ExternalReference::re_case_insensitive_compare_uc16(isolate()); 331 __ CallCFunction(function, argument_count); 332 } 333 334 // Check if function returned non-zero for success or zero for failure. 335 __ cmp(r0, Operand::Zero()); 336 BranchOrBacktrack(eq, on_no_match); 337 338 // On success, advance position by length of capture. 339 if (read_backward) { 340 __ sub(current_input_offset(), current_input_offset(), r4); 341 } else { 342 __ add(current_input_offset(), current_input_offset(), r4); 343 } 344 } 345 346 __ bind(&fallthrough); 347} 348 349 350void RegExpMacroAssemblerARM::CheckNotBackReference(int start_reg, 351 bool read_backward, 352 Label* on_no_match) { 353 Label fallthrough; 354 Label success; 355 356 // Find length of back-referenced capture. 357 __ ldr(r0, register_location(start_reg)); 358 __ ldr(r1, register_location(start_reg + 1)); 359 __ sub(r1, r1, r0, SetCC); // Length to check. 360 361 // At this point, the capture registers are either both set or both cleared. 362 // If the capture length is zero, then the capture is either empty or cleared. 363 // Fall through in both cases. 364 __ b(eq, &fallthrough); 365 366 // Check that there are enough characters left in the input. 367 if (read_backward) { 368 __ ldr(r3, MemOperand(frame_pointer(), kStringStartMinusOne)); 369 __ add(r3, r3, r1); 370 __ cmp(current_input_offset(), r3); 371 BranchOrBacktrack(lt, on_no_match); 372 } else { 373 __ cmn(r1, Operand(current_input_offset())); 374 BranchOrBacktrack(gt, on_no_match); 375 } 376 377 // r0 - offset of start of capture 378 // r1 - length of capture 379 __ add(r0, r0, end_of_input_address()); 380 __ add(r2, end_of_input_address(), current_input_offset()); 381 if (read_backward) { 382 __ sub(r2, r2, r1); // Offset by length when matching backwards. 383 } 384 __ add(r1, r0, r1); 385 386 Label loop; 387 __ bind(&loop); 388 if (mode_ == LATIN1) { 389 __ ldrb(r3, MemOperand(r0, char_size(), PostIndex)); 390 __ ldrb(r4, MemOperand(r2, char_size(), PostIndex)); 391 } else { 392 DCHECK(mode_ == UC16); 393 __ ldrh(r3, MemOperand(r0, char_size(), PostIndex)); 394 __ ldrh(r4, MemOperand(r2, char_size(), PostIndex)); 395 } 396 __ cmp(r3, r4); 397 BranchOrBacktrack(ne, on_no_match); 398 __ cmp(r0, r1); 399 __ b(lt, &loop); 400 401 // Move current character position to position after match. 402 __ sub(current_input_offset(), r2, end_of_input_address()); 403 if (read_backward) { 404 __ ldr(r0, register_location(start_reg)); // Index of start of capture 405 __ ldr(r1, register_location(start_reg + 1)); // Index of end of capture 406 __ add(current_input_offset(), current_input_offset(), r0); 407 __ sub(current_input_offset(), current_input_offset(), r1); 408 } 409 410 __ bind(&fallthrough); 411} 412 413 414void RegExpMacroAssemblerARM::CheckNotCharacter(unsigned c, 415 Label* on_not_equal) { 416 __ cmp(current_character(), Operand(c)); 417 BranchOrBacktrack(ne, on_not_equal); 418} 419 420 421void RegExpMacroAssemblerARM::CheckCharacterAfterAnd(uint32_t c, 422 uint32_t mask, 423 Label* on_equal) { 424 if (c == 0) { 425 __ tst(current_character(), Operand(mask)); 426 } else { 427 __ and_(r0, current_character(), Operand(mask)); 428 __ cmp(r0, Operand(c)); 429 } 430 BranchOrBacktrack(eq, on_equal); 431} 432 433 434void RegExpMacroAssemblerARM::CheckNotCharacterAfterAnd(unsigned c, 435 unsigned mask, 436 Label* on_not_equal) { 437 if (c == 0) { 438 __ tst(current_character(), Operand(mask)); 439 } else { 440 __ and_(r0, current_character(), Operand(mask)); 441 __ cmp(r0, Operand(c)); 442 } 443 BranchOrBacktrack(ne, on_not_equal); 444} 445 446 447void RegExpMacroAssemblerARM::CheckNotCharacterAfterMinusAnd( 448 uc16 c, 449 uc16 minus, 450 uc16 mask, 451 Label* on_not_equal) { 452 DCHECK(minus < String::kMaxUtf16CodeUnit); 453 __ sub(r0, current_character(), Operand(minus)); 454 __ and_(r0, r0, Operand(mask)); 455 __ cmp(r0, Operand(c)); 456 BranchOrBacktrack(ne, on_not_equal); 457} 458 459 460void RegExpMacroAssemblerARM::CheckCharacterInRange( 461 uc16 from, 462 uc16 to, 463 Label* on_in_range) { 464 __ sub(r0, current_character(), Operand(from)); 465 __ cmp(r0, Operand(to - from)); 466 BranchOrBacktrack(ls, on_in_range); // Unsigned lower-or-same condition. 467} 468 469 470void RegExpMacroAssemblerARM::CheckCharacterNotInRange( 471 uc16 from, 472 uc16 to, 473 Label* on_not_in_range) { 474 __ sub(r0, current_character(), Operand(from)); 475 __ cmp(r0, Operand(to - from)); 476 BranchOrBacktrack(hi, on_not_in_range); // Unsigned higher condition. 477} 478 479 480void RegExpMacroAssemblerARM::CheckBitInTable( 481 Handle<ByteArray> table, 482 Label* on_bit_set) { 483 __ mov(r0, Operand(table)); 484 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) { 485 __ and_(r1, current_character(), Operand(kTableSize - 1)); 486 __ add(r1, r1, Operand(ByteArray::kHeaderSize - kHeapObjectTag)); 487 } else { 488 __ add(r1, 489 current_character(), 490 Operand(ByteArray::kHeaderSize - kHeapObjectTag)); 491 } 492 __ ldrb(r0, MemOperand(r0, r1)); 493 __ cmp(r0, Operand::Zero()); 494 BranchOrBacktrack(ne, on_bit_set); 495} 496 497 498bool RegExpMacroAssemblerARM::CheckSpecialCharacterClass(uc16 type, 499 Label* on_no_match) { 500 // Range checks (c in min..max) are generally implemented by an unsigned 501 // (c - min) <= (max - min) check 502 switch (type) { 503 case 's': 504 // Match space-characters 505 if (mode_ == LATIN1) { 506 // One byte space characters are '\t'..'\r', ' ' and \u00a0. 507 Label success; 508 __ cmp(current_character(), Operand(' ')); 509 __ b(eq, &success); 510 // Check range 0x09..0x0d 511 __ sub(r0, current_character(), Operand('\t')); 512 __ cmp(r0, Operand('\r' - '\t')); 513 __ b(ls, &success); 514 // \u00a0 (NBSP). 515 __ cmp(r0, Operand(0x00a0 - '\t')); 516 BranchOrBacktrack(ne, on_no_match); 517 __ bind(&success); 518 return true; 519 } 520 return false; 521 case 'S': 522 // The emitted code for generic character classes is good enough. 523 return false; 524 case 'd': 525 // Match ASCII digits ('0'..'9') 526 __ sub(r0, current_character(), Operand('0')); 527 __ cmp(r0, Operand('9' - '0')); 528 BranchOrBacktrack(hi, on_no_match); 529 return true; 530 case 'D': 531 // Match non ASCII-digits 532 __ sub(r0, current_character(), Operand('0')); 533 __ cmp(r0, Operand('9' - '0')); 534 BranchOrBacktrack(ls, on_no_match); 535 return true; 536 case '.': { 537 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 538 __ eor(r0, current_character(), Operand(0x01)); 539 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 540 __ sub(r0, r0, Operand(0x0b)); 541 __ cmp(r0, Operand(0x0c - 0x0b)); 542 BranchOrBacktrack(ls, on_no_match); 543 if (mode_ == UC16) { 544 // Compare original value to 0x2028 and 0x2029, using the already 545 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 546 // 0x201d (0x2028 - 0x0b) or 0x201e. 547 __ sub(r0, r0, Operand(0x2028 - 0x0b)); 548 __ cmp(r0, Operand(1)); 549 BranchOrBacktrack(ls, on_no_match); 550 } 551 return true; 552 } 553 case 'n': { 554 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) 555 __ eor(r0, current_character(), Operand(0x01)); 556 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c 557 __ sub(r0, r0, Operand(0x0b)); 558 __ cmp(r0, Operand(0x0c - 0x0b)); 559 if (mode_ == LATIN1) { 560 BranchOrBacktrack(hi, on_no_match); 561 } else { 562 Label done; 563 __ b(ls, &done); 564 // Compare original value to 0x2028 and 0x2029, using the already 565 // computed (current_char ^ 0x01 - 0x0b). I.e., check for 566 // 0x201d (0x2028 - 0x0b) or 0x201e. 567 __ sub(r0, r0, Operand(0x2028 - 0x0b)); 568 __ cmp(r0, Operand(1)); 569 BranchOrBacktrack(hi, on_no_match); 570 __ bind(&done); 571 } 572 return true; 573 } 574 case 'w': { 575 if (mode_ != LATIN1) { 576 // Table is 256 entries, so all Latin1 characters can be tested. 577 __ cmp(current_character(), Operand('z')); 578 BranchOrBacktrack(hi, on_no_match); 579 } 580 ExternalReference map = ExternalReference::re_word_character_map(); 581 __ mov(r0, Operand(map)); 582 __ ldrb(r0, MemOperand(r0, current_character())); 583 __ cmp(r0, Operand::Zero()); 584 BranchOrBacktrack(eq, on_no_match); 585 return true; 586 } 587 case 'W': { 588 Label done; 589 if (mode_ != LATIN1) { 590 // Table is 256 entries, so all Latin1 characters can be tested. 591 __ cmp(current_character(), Operand('z')); 592 __ b(hi, &done); 593 } 594 ExternalReference map = ExternalReference::re_word_character_map(); 595 __ mov(r0, Operand(map)); 596 __ ldrb(r0, MemOperand(r0, current_character())); 597 __ cmp(r0, Operand::Zero()); 598 BranchOrBacktrack(ne, on_no_match); 599 if (mode_ != LATIN1) { 600 __ bind(&done); 601 } 602 return true; 603 } 604 case '*': 605 // Match any character. 606 return true; 607 // No custom implementation (yet): s(UC16), S(UC16). 608 default: 609 return false; 610 } 611} 612 613 614void RegExpMacroAssemblerARM::Fail() { 615 __ mov(r0, Operand(FAILURE)); 616 __ jmp(&exit_label_); 617} 618 619 620Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) { 621 Label return_r0; 622 // Finalize code - write the entry point code now we know how many 623 // registers we need. 624 625 // Entry code: 626 __ bind(&entry_label_); 627 628 // Tell the system that we have a stack frame. Because the type is MANUAL, no 629 // is generated. 630 FrameScope scope(masm_, StackFrame::MANUAL); 631 632 // Actually emit code to start a new stack frame. 633 // Push arguments 634 // Save callee-save registers. 635 // Start new stack frame. 636 // Store link register in existing stack-cell. 637 // Order here should correspond to order of offset constants in header file. 638 RegList registers_to_retain = r4.bit() | r5.bit() | r6.bit() | 639 r7.bit() | r8.bit() | r9.bit() | r10.bit() | fp.bit(); 640 RegList argument_registers = r0.bit() | r1.bit() | r2.bit() | r3.bit(); 641 __ stm(db_w, sp, argument_registers | registers_to_retain | lr.bit()); 642 // Set frame pointer in space for it if this is not a direct call 643 // from generated code. 644 __ add(frame_pointer(), sp, Operand(4 * kPointerSize)); 645 __ mov(r0, Operand::Zero()); 646 __ push(r0); // Make room for success counter and initialize it to 0. 647 __ push(r0); // Make room for "string start - 1" constant. 648 // Check if we have space on the stack for registers. 649 Label stack_limit_hit; 650 Label stack_ok; 651 652 ExternalReference stack_limit = 653 ExternalReference::address_of_stack_limit(isolate()); 654 __ mov(r0, Operand(stack_limit)); 655 __ ldr(r0, MemOperand(r0)); 656 __ sub(r0, sp, r0, SetCC); 657 // Handle it if the stack pointer is already below the stack limit. 658 __ b(ls, &stack_limit_hit); 659 // Check if there is room for the variable number of registers above 660 // the stack limit. 661 __ cmp(r0, Operand(num_registers_ * kPointerSize)); 662 __ b(hs, &stack_ok); 663 // Exit with OutOfMemory exception. There is not enough space on the stack 664 // for our working registers. 665 __ mov(r0, Operand(EXCEPTION)); 666 __ jmp(&return_r0); 667 668 __ bind(&stack_limit_hit); 669 CallCheckStackGuardState(r0); 670 __ cmp(r0, Operand::Zero()); 671 // If returned value is non-zero, we exit with the returned value as result. 672 __ b(ne, &return_r0); 673 674 __ bind(&stack_ok); 675 676 // Allocate space on stack for registers. 677 __ sub(sp, sp, Operand(num_registers_ * kPointerSize)); 678 // Load string end. 679 __ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 680 // Load input start. 681 __ ldr(r0, MemOperand(frame_pointer(), kInputStart)); 682 // Find negative length (offset of start relative to end). 683 __ sub(current_input_offset(), r0, end_of_input_address()); 684 // Set r0 to address of char before start of the input string 685 // (effectively string position -1). 686 __ ldr(r1, MemOperand(frame_pointer(), kStartIndex)); 687 __ sub(r0, current_input_offset(), Operand(char_size())); 688 __ sub(r0, r0, Operand(r1, LSL, (mode_ == UC16) ? 1 : 0)); 689 // Store this value in a local variable, for use when clearing 690 // position registers. 691 __ str(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); 692 693 // Initialize code pointer register 694 __ mov(code_pointer(), Operand(masm_->CodeObject())); 695 696 Label load_char_start_regexp, start_regexp; 697 // Load newline if index is at start, previous character otherwise. 698 __ cmp(r1, Operand::Zero()); 699 __ b(ne, &load_char_start_regexp); 700 __ mov(current_character(), Operand('\n'), LeaveCC, eq); 701 __ jmp(&start_regexp); 702 703 // Global regexp restarts matching here. 704 __ bind(&load_char_start_regexp); 705 // Load previous char as initial value of current character register. 706 LoadCurrentCharacterUnchecked(-1, 1); 707 __ bind(&start_regexp); 708 709 // Initialize on-stack registers. 710 if (num_saved_registers_ > 0) { // Always is, if generated from a regexp. 711 // Fill saved registers with initial value = start offset - 1 712 if (num_saved_registers_ > 8) { 713 // Address of register 0. 714 __ add(r1, frame_pointer(), Operand(kRegisterZero)); 715 __ mov(r2, Operand(num_saved_registers_)); 716 Label init_loop; 717 __ bind(&init_loop); 718 __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex)); 719 __ sub(r2, r2, Operand(1), SetCC); 720 __ b(ne, &init_loop); 721 } else { 722 for (int i = 0; i < num_saved_registers_; i++) { 723 __ str(r0, register_location(i)); 724 } 725 } 726 } 727 728 // Initialize backtrack stack pointer. 729 __ ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd)); 730 731 __ jmp(&start_label_); 732 733 // Exit code: 734 if (success_label_.is_linked()) { 735 // Save captures when successful. 736 __ bind(&success_label_); 737 if (num_saved_registers_ > 0) { 738 // copy captures to output 739 __ ldr(r1, MemOperand(frame_pointer(), kInputStart)); 740 __ ldr(r0, MemOperand(frame_pointer(), kRegisterOutput)); 741 __ ldr(r2, MemOperand(frame_pointer(), kStartIndex)); 742 __ sub(r1, end_of_input_address(), r1); 743 // r1 is length of input in bytes. 744 if (mode_ == UC16) { 745 __ mov(r1, Operand(r1, LSR, 1)); 746 } 747 // r1 is length of input in characters. 748 __ add(r1, r1, Operand(r2)); 749 // r1 is length of string in characters. 750 751 DCHECK_EQ(0, num_saved_registers_ % 2); 752 // Always an even number of capture registers. This allows us to 753 // unroll the loop once to add an operation between a load of a register 754 // and the following use of that register. 755 for (int i = 0; i < num_saved_registers_; i += 2) { 756 __ ldr(r2, register_location(i)); 757 __ ldr(r3, register_location(i + 1)); 758 if (i == 0 && global_with_zero_length_check()) { 759 // Keep capture start in r4 for the zero-length check later. 760 __ mov(r4, r2); 761 } 762 if (mode_ == UC16) { 763 __ add(r2, r1, Operand(r2, ASR, 1)); 764 __ add(r3, r1, Operand(r3, ASR, 1)); 765 } else { 766 __ add(r2, r1, Operand(r2)); 767 __ add(r3, r1, Operand(r3)); 768 } 769 __ str(r2, MemOperand(r0, kPointerSize, PostIndex)); 770 __ str(r3, MemOperand(r0, kPointerSize, PostIndex)); 771 } 772 } 773 774 if (global()) { 775 // Restart matching if the regular expression is flagged as global. 776 __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 777 __ ldr(r1, MemOperand(frame_pointer(), kNumOutputRegisters)); 778 __ ldr(r2, MemOperand(frame_pointer(), kRegisterOutput)); 779 // Increment success counter. 780 __ add(r0, r0, Operand(1)); 781 __ str(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 782 // Capture results have been stored, so the number of remaining global 783 // output registers is reduced by the number of stored captures. 784 __ sub(r1, r1, Operand(num_saved_registers_)); 785 // Check whether we have enough room for another set of capture results. 786 __ cmp(r1, Operand(num_saved_registers_)); 787 __ b(lt, &return_r0); 788 789 __ str(r1, MemOperand(frame_pointer(), kNumOutputRegisters)); 790 // Advance the location for output. 791 __ add(r2, r2, Operand(num_saved_registers_ * kPointerSize)); 792 __ str(r2, MemOperand(frame_pointer(), kRegisterOutput)); 793 794 // Prepare r0 to initialize registers with its value in the next run. 795 __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); 796 797 if (global_with_zero_length_check()) { 798 // Special case for zero-length matches. 799 // r4: capture start index 800 __ cmp(current_input_offset(), r4); 801 // Not a zero-length match, restart. 802 __ b(ne, &load_char_start_regexp); 803 // Offset from the end is zero if we already reached the end. 804 __ cmp(current_input_offset(), Operand::Zero()); 805 __ b(eq, &exit_label_); 806 // Advance current position after a zero-length match. 807 Label advance; 808 __ bind(&advance); 809 __ add(current_input_offset(), 810 current_input_offset(), 811 Operand((mode_ == UC16) ? 2 : 1)); 812 if (global_unicode()) CheckNotInSurrogatePair(0, &advance); 813 } 814 815 __ b(&load_char_start_regexp); 816 } else { 817 __ mov(r0, Operand(SUCCESS)); 818 } 819 } 820 821 // Exit and return r0 822 __ bind(&exit_label_); 823 if (global()) { 824 __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures)); 825 } 826 827 __ bind(&return_r0); 828 // Skip sp past regexp registers and local variables.. 829 __ mov(sp, frame_pointer()); 830 // Restore registers r4..r11 and return (restoring lr to pc). 831 __ ldm(ia_w, sp, registers_to_retain | pc.bit()); 832 833 // Backtrack code (branch target for conditional backtracks). 834 if (backtrack_label_.is_linked()) { 835 __ bind(&backtrack_label_); 836 Backtrack(); 837 } 838 839 Label exit_with_exception; 840 841 // Preempt-code 842 if (check_preempt_label_.is_linked()) { 843 SafeCallTarget(&check_preempt_label_); 844 845 CallCheckStackGuardState(r0); 846 __ cmp(r0, Operand::Zero()); 847 // If returning non-zero, we should end execution with the given 848 // result as return value. 849 __ b(ne, &return_r0); 850 851 // String might have moved: Reload end of string from frame. 852 __ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd)); 853 SafeReturn(); 854 } 855 856 // Backtrack stack overflow code. 857 if (stack_overflow_label_.is_linked()) { 858 SafeCallTarget(&stack_overflow_label_); 859 // Reached if the backtrack-stack limit has been hit. 860 Label grow_failed; 861 862 // Call GrowStack(backtrack_stackpointer(), &stack_base) 863 static const int num_arguments = 3; 864 __ PrepareCallCFunction(num_arguments, r0); 865 __ mov(r0, backtrack_stackpointer()); 866 __ add(r1, frame_pointer(), Operand(kStackHighEnd)); 867 __ mov(r2, Operand(ExternalReference::isolate_address(isolate()))); 868 ExternalReference grow_stack = 869 ExternalReference::re_grow_stack(isolate()); 870 __ CallCFunction(grow_stack, num_arguments); 871 // If return NULL, we have failed to grow the stack, and 872 // must exit with a stack-overflow exception. 873 __ cmp(r0, Operand::Zero()); 874 __ b(eq, &exit_with_exception); 875 // Otherwise use return value as new stack pointer. 876 __ mov(backtrack_stackpointer(), r0); 877 // Restore saved registers and continue. 878 SafeReturn(); 879 } 880 881 if (exit_with_exception.is_linked()) { 882 // If any of the code above needed to exit with an exception. 883 __ bind(&exit_with_exception); 884 // Exit with Result EXCEPTION(-1) to signal thrown exception. 885 __ mov(r0, Operand(EXCEPTION)); 886 __ jmp(&return_r0); 887 } 888 889 CodeDesc code_desc; 890 masm_->GetCode(&code_desc); 891 Handle<Code> code = isolate()->factory()->NewCode( 892 code_desc, Code::ComputeFlags(Code::REGEXP), masm_->CodeObject()); 893 PROFILE(masm_->isolate(), 894 RegExpCodeCreateEvent(AbstractCode::cast(*code), *source)); 895 return Handle<HeapObject>::cast(code); 896} 897 898 899void RegExpMacroAssemblerARM::GoTo(Label* to) { 900 BranchOrBacktrack(al, to); 901} 902 903 904void RegExpMacroAssemblerARM::IfRegisterGE(int reg, 905 int comparand, 906 Label* if_ge) { 907 __ ldr(r0, register_location(reg)); 908 __ cmp(r0, Operand(comparand)); 909 BranchOrBacktrack(ge, if_ge); 910} 911 912 913void RegExpMacroAssemblerARM::IfRegisterLT(int reg, 914 int comparand, 915 Label* if_lt) { 916 __ ldr(r0, register_location(reg)); 917 __ cmp(r0, Operand(comparand)); 918 BranchOrBacktrack(lt, if_lt); 919} 920 921 922void RegExpMacroAssemblerARM::IfRegisterEqPos(int reg, 923 Label* if_eq) { 924 __ ldr(r0, register_location(reg)); 925 __ cmp(r0, Operand(current_input_offset())); 926 BranchOrBacktrack(eq, if_eq); 927} 928 929 930RegExpMacroAssembler::IrregexpImplementation 931 RegExpMacroAssemblerARM::Implementation() { 932 return kARMImplementation; 933} 934 935 936void RegExpMacroAssemblerARM::LoadCurrentCharacter(int cp_offset, 937 Label* on_end_of_input, 938 bool check_bounds, 939 int characters) { 940 DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works) 941 if (check_bounds) { 942 if (cp_offset >= 0) { 943 CheckPosition(cp_offset + characters - 1, on_end_of_input); 944 } else { 945 CheckPosition(cp_offset, on_end_of_input); 946 } 947 } 948 LoadCurrentCharacterUnchecked(cp_offset, characters); 949} 950 951 952void RegExpMacroAssemblerARM::PopCurrentPosition() { 953 Pop(current_input_offset()); 954} 955 956 957void RegExpMacroAssemblerARM::PopRegister(int register_index) { 958 Pop(r0); 959 __ str(r0, register_location(register_index)); 960} 961 962 963void RegExpMacroAssemblerARM::PushBacktrack(Label* label) { 964 __ mov_label_offset(r0, label); 965 Push(r0); 966 CheckStackLimit(); 967} 968 969 970void RegExpMacroAssemblerARM::PushCurrentPosition() { 971 Push(current_input_offset()); 972} 973 974 975void RegExpMacroAssemblerARM::PushRegister(int register_index, 976 StackCheckFlag check_stack_limit) { 977 __ ldr(r0, register_location(register_index)); 978 Push(r0); 979 if (check_stack_limit) CheckStackLimit(); 980} 981 982 983void RegExpMacroAssemblerARM::ReadCurrentPositionFromRegister(int reg) { 984 __ ldr(current_input_offset(), register_location(reg)); 985} 986 987 988void RegExpMacroAssemblerARM::ReadStackPointerFromRegister(int reg) { 989 __ ldr(backtrack_stackpointer(), register_location(reg)); 990 __ ldr(r0, MemOperand(frame_pointer(), kStackHighEnd)); 991 __ add(backtrack_stackpointer(), backtrack_stackpointer(), Operand(r0)); 992} 993 994 995void RegExpMacroAssemblerARM::SetCurrentPositionFromEnd(int by) { 996 Label after_position; 997 __ cmp(current_input_offset(), Operand(-by * char_size())); 998 __ b(ge, &after_position); 999 __ mov(current_input_offset(), Operand(-by * char_size())); 1000 // On RegExp code entry (where this operation is used), the character before 1001 // the current position is expected to be already loaded. 1002 // We have advanced the position, so it's safe to read backwards. 1003 LoadCurrentCharacterUnchecked(-1, 1); 1004 __ bind(&after_position); 1005} 1006 1007 1008void RegExpMacroAssemblerARM::SetRegister(int register_index, int to) { 1009 DCHECK(register_index >= num_saved_registers_); // Reserved for positions! 1010 __ mov(r0, Operand(to)); 1011 __ str(r0, register_location(register_index)); 1012} 1013 1014 1015bool RegExpMacroAssemblerARM::Succeed() { 1016 __ jmp(&success_label_); 1017 return global(); 1018} 1019 1020 1021void RegExpMacroAssemblerARM::WriteCurrentPositionToRegister(int reg, 1022 int cp_offset) { 1023 if (cp_offset == 0) { 1024 __ str(current_input_offset(), register_location(reg)); 1025 } else { 1026 __ add(r0, current_input_offset(), Operand(cp_offset * char_size())); 1027 __ str(r0, register_location(reg)); 1028 } 1029} 1030 1031 1032void RegExpMacroAssemblerARM::ClearRegisters(int reg_from, int reg_to) { 1033 DCHECK(reg_from <= reg_to); 1034 __ ldr(r0, MemOperand(frame_pointer(), kStringStartMinusOne)); 1035 for (int reg = reg_from; reg <= reg_to; reg++) { 1036 __ str(r0, register_location(reg)); 1037 } 1038} 1039 1040 1041void RegExpMacroAssemblerARM::WriteStackPointerToRegister(int reg) { 1042 __ ldr(r1, MemOperand(frame_pointer(), kStackHighEnd)); 1043 __ sub(r0, backtrack_stackpointer(), r1); 1044 __ str(r0, register_location(reg)); 1045} 1046 1047 1048// Private methods: 1049 1050void RegExpMacroAssemblerARM::CallCheckStackGuardState(Register scratch) { 1051 __ PrepareCallCFunction(3, scratch); 1052 1053 // RegExp code frame pointer. 1054 __ mov(r2, frame_pointer()); 1055 // Code* of self. 1056 __ mov(r1, Operand(masm_->CodeObject())); 1057 1058 // We need to make room for the return address on the stack. 1059 int stack_alignment = base::OS::ActivationFrameAlignment(); 1060 DCHECK(IsAligned(stack_alignment, kPointerSize)); 1061 __ sub(sp, sp, Operand(stack_alignment)); 1062 1063 // r0 will point to the return address, placed by DirectCEntry. 1064 __ mov(r0, sp); 1065 1066 ExternalReference stack_guard_check = 1067 ExternalReference::re_check_stack_guard_state(isolate()); 1068 __ mov(ip, Operand(stack_guard_check)); 1069 DirectCEntryStub stub(isolate()); 1070 stub.GenerateCall(masm_, ip); 1071 1072 // Drop the return address from the stack. 1073 __ add(sp, sp, Operand(stack_alignment)); 1074 1075 DCHECK(stack_alignment != 0); 1076 __ ldr(sp, MemOperand(sp, 0)); 1077 1078 __ mov(code_pointer(), Operand(masm_->CodeObject())); 1079} 1080 1081 1082// Helper function for reading a value out of a stack frame. 1083template <typename T> 1084static T& frame_entry(Address re_frame, int frame_offset) { 1085 return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset)); 1086} 1087 1088 1089template <typename T> 1090static T* frame_entry_address(Address re_frame, int frame_offset) { 1091 return reinterpret_cast<T*>(re_frame + frame_offset); 1092} 1093 1094 1095int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address, 1096 Code* re_code, 1097 Address re_frame) { 1098 return NativeRegExpMacroAssembler::CheckStackGuardState( 1099 frame_entry<Isolate*>(re_frame, kIsolate), 1100 frame_entry<int>(re_frame, kStartIndex), 1101 frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, 1102 frame_entry_address<String*>(re_frame, kInputString), 1103 frame_entry_address<const byte*>(re_frame, kInputStart), 1104 frame_entry_address<const byte*>(re_frame, kInputEnd)); 1105} 1106 1107 1108MemOperand RegExpMacroAssemblerARM::register_location(int register_index) { 1109 DCHECK(register_index < (1<<30)); 1110 if (num_registers_ <= register_index) { 1111 num_registers_ = register_index + 1; 1112 } 1113 return MemOperand(frame_pointer(), 1114 kRegisterZero - register_index * kPointerSize); 1115} 1116 1117 1118void RegExpMacroAssemblerARM::CheckPosition(int cp_offset, 1119 Label* on_outside_input) { 1120 if (cp_offset >= 0) { 1121 __ cmp(current_input_offset(), Operand(-cp_offset * char_size())); 1122 BranchOrBacktrack(ge, on_outside_input); 1123 } else { 1124 __ ldr(r1, MemOperand(frame_pointer(), kStringStartMinusOne)); 1125 __ add(r0, current_input_offset(), Operand(cp_offset * char_size())); 1126 __ cmp(r0, r1); 1127 BranchOrBacktrack(le, on_outside_input); 1128 } 1129} 1130 1131 1132void RegExpMacroAssemblerARM::BranchOrBacktrack(Condition condition, 1133 Label* to) { 1134 if (condition == al) { // Unconditional. 1135 if (to == NULL) { 1136 Backtrack(); 1137 return; 1138 } 1139 __ jmp(to); 1140 return; 1141 } 1142 if (to == NULL) { 1143 __ b(condition, &backtrack_label_); 1144 return; 1145 } 1146 __ b(condition, to); 1147} 1148 1149 1150void RegExpMacroAssemblerARM::SafeCall(Label* to, Condition cond) { 1151 __ bl(to, cond); 1152} 1153 1154 1155void RegExpMacroAssemblerARM::SafeReturn() { 1156 __ pop(lr); 1157 __ add(pc, lr, Operand(masm_->CodeObject())); 1158} 1159 1160 1161void RegExpMacroAssemblerARM::SafeCallTarget(Label* name) { 1162 __ bind(name); 1163 __ sub(lr, lr, Operand(masm_->CodeObject())); 1164 __ push(lr); 1165} 1166 1167 1168void RegExpMacroAssemblerARM::Push(Register source) { 1169 DCHECK(!source.is(backtrack_stackpointer())); 1170 __ str(source, 1171 MemOperand(backtrack_stackpointer(), kPointerSize, NegPreIndex)); 1172} 1173 1174 1175void RegExpMacroAssemblerARM::Pop(Register target) { 1176 DCHECK(!target.is(backtrack_stackpointer())); 1177 __ ldr(target, 1178 MemOperand(backtrack_stackpointer(), kPointerSize, PostIndex)); 1179} 1180 1181 1182void RegExpMacroAssemblerARM::CheckPreemption() { 1183 // Check for preemption. 1184 ExternalReference stack_limit = 1185 ExternalReference::address_of_stack_limit(isolate()); 1186 __ mov(r0, Operand(stack_limit)); 1187 __ ldr(r0, MemOperand(r0)); 1188 __ cmp(sp, r0); 1189 SafeCall(&check_preempt_label_, ls); 1190} 1191 1192 1193void RegExpMacroAssemblerARM::CheckStackLimit() { 1194 ExternalReference stack_limit = 1195 ExternalReference::address_of_regexp_stack_limit(isolate()); 1196 __ mov(r0, Operand(stack_limit)); 1197 __ ldr(r0, MemOperand(r0)); 1198 __ cmp(backtrack_stackpointer(), Operand(r0)); 1199 SafeCall(&stack_overflow_label_, ls); 1200} 1201 1202 1203void RegExpMacroAssemblerARM::LoadCurrentCharacterUnchecked(int cp_offset, 1204 int characters) { 1205 Register offset = current_input_offset(); 1206 if (cp_offset != 0) { 1207 // r4 is not being used to store the capture start index at this point. 1208 __ add(r4, current_input_offset(), Operand(cp_offset * char_size())); 1209 offset = r4; 1210 } 1211 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU 1212 // and the operating system running on the target allow it. 1213 // If unaligned load/stores are not supported then this function must only 1214 // be used to load a single character at a time. 1215 if (!CanReadUnaligned()) { 1216 DCHECK(characters == 1); 1217 } 1218 1219 if (mode_ == LATIN1) { 1220 if (characters == 4) { 1221 __ ldr(current_character(), MemOperand(end_of_input_address(), offset)); 1222 } else if (characters == 2) { 1223 __ ldrh(current_character(), MemOperand(end_of_input_address(), offset)); 1224 } else { 1225 DCHECK(characters == 1); 1226 __ ldrb(current_character(), MemOperand(end_of_input_address(), offset)); 1227 } 1228 } else { 1229 DCHECK(mode_ == UC16); 1230 if (characters == 2) { 1231 __ ldr(current_character(), MemOperand(end_of_input_address(), offset)); 1232 } else { 1233 DCHECK(characters == 1); 1234 __ ldrh(current_character(), MemOperand(end_of_input_address(), offset)); 1235 } 1236 } 1237} 1238 1239 1240#undef __ 1241 1242#endif // V8_INTERPRETED_REGEXP 1243 1244} // namespace internal 1245} // namespace v8 1246 1247#endif // V8_TARGET_ARCH_ARM 1248