deoptimizer-arm.cc revision b0fe1620dcb4135ac3ab2d66ff93072373911299
1// Copyright 2010 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#include "v8.h" 29 30#include "codegen.h" 31#include "deoptimizer.h" 32#include "full-codegen.h" 33#include "safepoint-table.h" 34 35namespace v8 { 36namespace internal { 37 38int Deoptimizer::table_entry_size_ = 16; 39 40void Deoptimizer::DeoptimizeFunction(JSFunction* function) { 41 AssertNoAllocation no_allocation; 42 43 if (!function->IsOptimized()) return; 44 45 // Get the optimized code. 46 Code* code = function->code(); 47 48 // Invalidate the relocation information, as it will become invalid by the 49 // code patching below, and is not needed any more. 50 code->InvalidateRelocation(); 51 52 // For each return after a safepoint insert an absolute call to the 53 // corresponding deoptimization entry. 54 unsigned last_pc_offset = 0; 55 SafepointTable table(function->code()); 56 for (unsigned i = 0; i < table.length(); i++) { 57 unsigned pc_offset = table.GetPcOffset(i); 58 int deoptimization_index = table.GetDeoptimizationIndex(i); 59 int gap_code_size = table.GetGapCodeSize(i); 60 // Check that we did not shoot past next safepoint. 61 // TODO(srdjan): How do we guarantee that safepoint code does not 62 // overlap other safepoint patching code? 63 CHECK(pc_offset >= last_pc_offset); 64#ifdef DEBUG 65 // Destroy the code which is not supposed to be run again. 66 int instructions = (pc_offset - last_pc_offset) / Assembler::kInstrSize; 67 CodePatcher destroyer(code->instruction_start() + last_pc_offset, 68 instructions); 69 for (int x = 0; x < instructions; x++) { 70 destroyer.masm()->bkpt(0); 71 } 72#endif 73 last_pc_offset = pc_offset; 74 if (deoptimization_index != Safepoint::kNoDeoptimizationIndex) { 75 const int kCallInstructionSizeInWords = 3; 76 CodePatcher patcher(code->instruction_start() + pc_offset + gap_code_size, 77 kCallInstructionSizeInWords); 78 Address deoptimization_entry = Deoptimizer::GetDeoptimizationEntry( 79 deoptimization_index, Deoptimizer::LAZY); 80 patcher.masm()->Call(deoptimization_entry, RelocInfo::NONE); 81 last_pc_offset += 82 gap_code_size + kCallInstructionSizeInWords * Assembler::kInstrSize; 83 } 84 } 85 86 87#ifdef DEBUG 88 // Destroy the code which is not supposed to be run again. 89 int instructions = 90 (code->safepoint_table_start() - last_pc_offset) / Assembler::kInstrSize; 91 CodePatcher destroyer(code->instruction_start() + last_pc_offset, 92 instructions); 93 for (int x = 0; x < instructions; x++) { 94 destroyer.masm()->bkpt(0); 95 } 96#endif 97 98 // Add the deoptimizing code to the list. 99 DeoptimizingCodeListNode* node = new DeoptimizingCodeListNode(code); 100 node->set_next(deoptimizing_code_list_); 101 deoptimizing_code_list_ = node; 102 103 // Set the code for the function to non-optimized version. 104 function->ReplaceCode(function->shared()->code()); 105 106 if (FLAG_trace_deopt) { 107 PrintF("[forced deoptimization: "); 108 function->PrintName(); 109 PrintF(" / %x]\n", reinterpret_cast<uint32_t>(function)); 110 } 111} 112 113 114void Deoptimizer::PatchStackCheckCode(RelocInfo* rinfo, 115 Code* replacement_code) { 116 UNIMPLEMENTED(); 117} 118 119 120void Deoptimizer::RevertStackCheckCode(RelocInfo* rinfo, Code* check_code) { 121 UNIMPLEMENTED(); 122} 123 124 125void Deoptimizer::DoComputeOsrOutputFrame() { 126 UNIMPLEMENTED(); 127} 128 129 130// This code is very similar to ia32 code, but relies on register names (fp, sp) 131// and how the frame is laid out. 132void Deoptimizer::DoComputeFrame(TranslationIterator* iterator, 133 int frame_index) { 134 // Read the ast node id, function, and frame height for this output frame. 135 Translation::Opcode opcode = 136 static_cast<Translation::Opcode>(iterator->Next()); 137 USE(opcode); 138 ASSERT(Translation::FRAME == opcode); 139 int node_id = iterator->Next(); 140 JSFunction* function = JSFunction::cast(ComputeLiteral(iterator->Next())); 141 unsigned height = iterator->Next(); 142 unsigned height_in_bytes = height * kPointerSize; 143 if (FLAG_trace_deopt) { 144 PrintF(" translating "); 145 function->PrintName(); 146 PrintF(" => node=%d, height=%d\n", node_id, height_in_bytes); 147 } 148 149 // The 'fixed' part of the frame consists of the incoming parameters and 150 // the part described by JavaScriptFrameConstants. 151 unsigned fixed_frame_size = ComputeFixedSize(function); 152 unsigned input_frame_size = input_->GetFrameSize(); 153 unsigned output_frame_size = height_in_bytes + fixed_frame_size; 154 155 // Allocate and store the output frame description. 156 FrameDescription* output_frame = 157 new(output_frame_size) FrameDescription(output_frame_size, function); 158 159 bool is_bottommost = (0 == frame_index); 160 bool is_topmost = (output_count_ - 1 == frame_index); 161 ASSERT(frame_index >= 0 && frame_index < output_count_); 162 ASSERT(output_[frame_index] == NULL); 163 output_[frame_index] = output_frame; 164 165 // The top address for the bottommost output frame can be computed from 166 // the input frame pointer and the output frame's height. For all 167 // subsequent output frames, it can be computed from the previous one's 168 // top address and the current frame's size. 169 uint32_t top_address; 170 if (is_bottommost) { 171 // 2 = context and function in the frame. 172 top_address = 173 input_->GetRegister(fp.code()) - (2 * kPointerSize) - height_in_bytes; 174 } else { 175 top_address = output_[frame_index - 1]->GetTop() - output_frame_size; 176 } 177 output_frame->SetTop(top_address); 178 179 // Compute the incoming parameter translation. 180 int parameter_count = function->shared()->formal_parameter_count() + 1; 181 unsigned output_offset = output_frame_size; 182 unsigned input_offset = input_frame_size; 183 for (int i = 0; i < parameter_count; ++i) { 184 output_offset -= kPointerSize; 185 DoTranslateCommand(iterator, frame_index, output_offset); 186 } 187 input_offset -= (parameter_count * kPointerSize); 188 189 // There are no translation commands for the caller's pc and fp, the 190 // context, and the function. Synthesize their values and set them up 191 // explicitly. 192 // 193 // The caller's pc for the bottommost output frame is the same as in the 194 // input frame. For all subsequent output frames, it can be read from the 195 // previous one. This frame's pc can be computed from the non-optimized 196 // function code and AST id of the bailout. 197 output_offset -= kPointerSize; 198 input_offset -= kPointerSize; 199 intptr_t value; 200 if (is_bottommost) { 201 value = input_->GetFrameSlot(input_offset); 202 } else { 203 value = output_[frame_index - 1]->GetPc(); 204 } 205 output_frame->SetFrameSlot(output_offset, value); 206 if (FLAG_trace_deopt) { 207 PrintF(" 0x%08x: [top + %d] <- 0x%08x ; caller's pc\n", 208 top_address + output_offset, output_offset, value); 209 } 210 211 // The caller's frame pointer for the bottommost output frame is the same 212 // as in the input frame. For all subsequent output frames, it can be 213 // read from the previous one. Also compute and set this frame's frame 214 // pointer. 215 output_offset -= kPointerSize; 216 input_offset -= kPointerSize; 217 if (is_bottommost) { 218 value = input_->GetFrameSlot(input_offset); 219 } else { 220 value = output_[frame_index - 1]->GetFp(); 221 } 222 output_frame->SetFrameSlot(output_offset, value); 223 intptr_t fp_value = top_address + output_offset; 224 ASSERT(!is_bottommost || input_->GetRegister(fp.code()) == fp_value); 225 output_frame->SetFp(fp_value); 226 if (is_topmost) { 227 output_frame->SetRegister(fp.code(), fp_value); 228 } 229 if (FLAG_trace_deopt) { 230 PrintF(" 0x%08x: [top + %d] <- 0x%08x ; caller's fp\n", 231 fp_value, output_offset, value); 232 } 233 234 // The context can be gotten from the function so long as we don't 235 // optimize functions that need local contexts. 236 output_offset -= kPointerSize; 237 input_offset -= kPointerSize; 238 value = reinterpret_cast<intptr_t>(function->context()); 239 // The context for the bottommost output frame should also agree with the 240 // input frame. 241 ASSERT(!is_bottommost || input_->GetFrameSlot(input_offset) == value); 242 output_frame->SetFrameSlot(output_offset, value); 243 if (is_topmost) { 244 output_frame->SetRegister(cp.code(), value); 245 } 246 if (FLAG_trace_deopt) { 247 PrintF(" 0x%08x: [top + %d] <- 0x%08x ; context\n", 248 top_address + output_offset, output_offset, value); 249 } 250 251 // The function was mentioned explicitly in the BEGIN_FRAME. 252 output_offset -= kPointerSize; 253 input_offset -= kPointerSize; 254 value = reinterpret_cast<uint32_t>(function); 255 // The function for the bottommost output frame should also agree with the 256 // input frame. 257 ASSERT(!is_bottommost || input_->GetFrameSlot(input_offset) == value); 258 output_frame->SetFrameSlot(output_offset, value); 259 if (FLAG_trace_deopt) { 260 PrintF(" 0x%08x: [top + %d] <- 0x%08x ; function\n", 261 top_address + output_offset, output_offset, value); 262 } 263 264 // Translate the rest of the frame. 265 for (unsigned i = 0; i < height; ++i) { 266 output_offset -= kPointerSize; 267 DoTranslateCommand(iterator, frame_index, output_offset); 268 } 269 ASSERT(0 == output_offset); 270 271 // Compute this frame's PC, state, and continuation. 272 Code* non_optimized_code = function->shared()->code(); 273 FixedArray* raw_data = non_optimized_code->deoptimization_data(); 274 DeoptimizationOutputData* data = DeoptimizationOutputData::cast(raw_data); 275 Address start = non_optimized_code->instruction_start(); 276 unsigned pc_and_state = GetOutputInfo(data, node_id, function->shared()); 277 unsigned pc_offset = FullCodeGenerator::PcField::decode(pc_and_state); 278 uint32_t pc_value = reinterpret_cast<uint32_t>(start + pc_offset); 279 output_frame->SetPc(pc_value); 280 if (is_topmost) { 281 output_frame->SetRegister(pc.code(), pc_value); 282 } 283 284 FullCodeGenerator::State state = 285 FullCodeGenerator::StateField::decode(pc_and_state); 286 output_frame->SetState(Smi::FromInt(state)); 287 288 // Set the continuation for the topmost frame. 289 if (is_topmost) { 290 Code* continuation = (bailout_type_ == EAGER) 291 ? Builtins::builtin(Builtins::NotifyDeoptimized) 292 : Builtins::builtin(Builtins::NotifyLazyDeoptimized); 293 output_frame->SetContinuation( 294 reinterpret_cast<uint32_t>(continuation->entry())); 295 } 296 297 if (output_count_ - 1 == frame_index) iterator->Done(); 298} 299 300 301#define __ masm()-> 302 303 304// This code tries to be close to ia32 code so that any changes can be 305// easily ported. 306void Deoptimizer::EntryGenerator::Generate() { 307 GeneratePrologue(); 308 // TOS: bailout-id; TOS+1: return address if not EAGER. 309 CpuFeatures::Scope scope(VFP3); 310 // Save all general purpose registers before messing with them. 311 const int kNumberOfRegisters = Register::kNumRegisters; 312 313 // Everything but pc, lr and ip which will be saved but not restored. 314 RegList restored_regs = kJSCallerSaved | kCalleeSaved | ip.bit(); 315 316 const int kDoubleRegsSize = 317 kDoubleSize * DwVfpRegister::kNumAllocatableRegisters; 318 319 // Save all general purpose registers before messing with them. 320 __ sub(sp, sp, Operand(kDoubleRegsSize)); 321 for (int i = 0; i < DwVfpRegister::kNumAllocatableRegisters; ++i) { 322 DwVfpRegister vfp_reg = DwVfpRegister::FromAllocationIndex(i); 323 int offset = i * kDoubleSize; 324 __ vstr(vfp_reg, sp, offset); 325 } 326 327 // Push all 16 registers (needed to populate FrameDescription::registers_). 328 __ stm(db_w, sp, restored_regs | sp.bit() | lr.bit() | pc.bit()); 329 330 const int kSavedRegistersAreaSize = 331 (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize; 332 333 // Get the bailout id from the stack. 334 __ ldr(r2, MemOperand(sp, kSavedRegistersAreaSize)); 335 336 // Get the address of the location in the code object if possible (r3) (return 337 // address for lazy deoptimization) and compute the fp-to-sp delta in 338 // register r4. 339 if (type() == EAGER) { 340 __ mov(r3, Operand(0)); 341 // Correct one word for bailout id. 342 __ add(r4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize))); 343 } else { 344 __ mov(r3, lr); 345 // Correct two words for bailout id and return address. 346 __ add(r4, sp, Operand(kSavedRegistersAreaSize + (2 * kPointerSize))); 347 } 348 __ sub(r4, fp, r4); 349 350 // Allocate a new deoptimizer object. 351 // Pass four arguments in r0 to r3 and fifth argument on stack. 352 __ PrepareCallCFunction(5, r5); 353 __ ldr(r0, MemOperand(fp, JavaScriptFrameConstants::kFunctionOffset)); 354 __ mov(r1, Operand(type())); // bailout type, 355 // r2: bailout id already loaded. 356 // r3: code address or 0 already loaded. 357 __ str(r4, MemOperand(sp, 0 * kPointerSize)); // Fp-to-sp delta. 358 // Call Deoptimizer::New(). 359 __ CallCFunction(ExternalReference::new_deoptimizer_function(), 5); 360 361 // Preserve "deoptimizer" object in register r0 and get the input 362 // frame descriptor pointer to r1 (deoptimizer->input_); 363 __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset())); 364 365 366 // Copy core registers into FrameDescription::registers_[kNumRegisters]. 367 ASSERT(Register::kNumRegisters == kNumberOfRegisters); 368 for (int i = 0; i < kNumberOfRegisters; i++) { 369 int offset = (i * kIntSize) + FrameDescription::registers_offset(); 370 __ ldr(r2, MemOperand(sp, i * kPointerSize)); 371 __ str(r2, MemOperand(r1, offset)); 372 } 373 374 // Copy VFP registers to 375 // double_registers_[DoubleRegister::kNumAllocatableRegisters] 376 int double_regs_offset = FrameDescription::double_registers_offset(); 377 for (int i = 0; i < DwVfpRegister::kNumAllocatableRegisters; ++i) { 378 int dst_offset = i * kDoubleSize + double_regs_offset; 379 int src_offset = i * kDoubleSize + kNumberOfRegisters * kPointerSize; 380 __ vldr(d0, sp, src_offset); 381 __ vstr(d0, r1, dst_offset); 382 } 383 384 // Remove the bailout id, eventually return address, and the saved registers 385 // from the stack. 386 if (type() == EAGER) { 387 __ add(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize))); 388 } else { 389 __ add(sp, sp, Operand(kSavedRegistersAreaSize + (2 * kPointerSize))); 390 } 391 392 // Compute a pointer to the unwinding limit in register r2; that is 393 // the first stack slot not part of the input frame. 394 __ ldr(r2, MemOperand(r1, FrameDescription::frame_size_offset())); 395 __ add(r2, r2, sp); 396 397 // Unwind the stack down to - but not including - the unwinding 398 // limit and copy the contents of the activation frame to the input 399 // frame description. 400 __ add(r3, r1, Operand(FrameDescription::frame_content_offset())); 401 Label pop_loop; 402 __ bind(&pop_loop); 403 __ pop(r4); 404 __ str(r4, MemOperand(r3, 0)); 405 __ add(r3, r3, Operand(sizeof(uint32_t))); 406 __ cmp(r2, sp); 407 __ b(ne, &pop_loop); 408 409 // Compute the output frame in the deoptimizer. 410 __ push(r0); // Preserve deoptimizer object across call. 411 // r0: deoptimizer object; r1: scratch. 412 __ PrepareCallCFunction(1, r1); 413 // Call Deoptimizer::ComputeOutputFrames(). 414 __ CallCFunction(ExternalReference::compute_output_frames_function(), 1); 415 __ pop(r0); // Restore deoptimizer object (class Deoptimizer). 416 417 // Replace the current (input) frame with the output frames. 418 Label outer_push_loop, inner_push_loop; 419 // Outer loop state: r0 = current "FrameDescription** output_", 420 // r1 = one past the last FrameDescription**. 421 __ ldr(r1, MemOperand(r0, Deoptimizer::output_count_offset())); 422 __ ldr(r0, MemOperand(r0, Deoptimizer::output_offset())); // r0 is output_. 423 __ add(r1, r0, Operand(r1, LSL, 2)); 424 __ bind(&outer_push_loop); 425 // Inner loop state: r2 = current FrameDescription*, r3 = loop index. 426 __ ldr(r2, MemOperand(r0, 0)); // output_[ix] 427 __ ldr(r3, MemOperand(r2, FrameDescription::frame_size_offset())); 428 __ bind(&inner_push_loop); 429 __ sub(r3, r3, Operand(sizeof(uint32_t))); 430 // __ add(r6, r2, Operand(r3, LSL, 1)); 431 __ add(r6, r2, Operand(r3)); 432 __ ldr(r7, MemOperand(r6, FrameDescription::frame_content_offset())); 433 __ push(r7); 434 __ cmp(r3, Operand(0)); 435 __ b(ne, &inner_push_loop); // test for gt? 436 __ add(r0, r0, Operand(kPointerSize)); 437 __ cmp(r0, r1); 438 __ b(lt, &outer_push_loop); 439 440 // In case of OSR, we have to restore the XMM registers. 441 if (type() == OSR) { 442 UNIMPLEMENTED(); 443 } 444 445 // Push state, pc, and continuation from the last output frame. 446 if (type() != OSR) { 447 __ ldr(r6, MemOperand(r2, FrameDescription::state_offset())); 448 __ push(r6); 449 } 450 451 __ ldr(r6, MemOperand(r2, FrameDescription::pc_offset())); 452 __ push(r6); 453 __ ldr(r6, MemOperand(r2, FrameDescription::continuation_offset())); 454 __ push(r6); 455 456 // Push the registers from the last output frame. 457 for (int i = kNumberOfRegisters - 1; i >= 0; i--) { 458 int offset = (i * kIntSize) + FrameDescription::registers_offset(); 459 __ ldr(r6, MemOperand(r2, offset)); 460 __ push(r6); 461 } 462 463 // Restore the registers from the stack. 464 __ ldm(ia_w, sp, restored_regs); // all but pc registers. 465 __ pop(ip); // remove sp 466 __ pop(ip); // remove lr 467 468 // Set up the roots register. 469 ExternalReference roots_address = ExternalReference::roots_address(); 470 __ mov(r10, Operand(roots_address)); 471 472 __ pop(ip); // remove pc 473 __ pop(r7); // get continuation, leave pc on stack 474 __ pop(lr); 475 __ Jump(r7); 476 __ stop("Unreachable."); 477} 478 479 480void Deoptimizer::TableEntryGenerator::GeneratePrologue() { 481 // Create a sequence of deoptimization entries. Note that any 482 // registers may be still live. 483 Label done; 484 for (int i = 0; i < count(); i++) { 485 int start = masm()->pc_offset(); 486 USE(start); 487 if (type() == EAGER) { 488 __ nop(); 489 } else { 490 // Emulate ia32 like call by pushing return address to stack. 491 __ push(lr); 492 } 493 __ mov(ip, Operand(i)); 494 __ push(ip); 495 __ b(&done); 496 ASSERT(masm()->pc_offset() - start == table_entry_size_); 497 } 498 __ bind(&done); 499} 500 501#undef __ 502 503} } // namespace v8::internal 504