call_arm.cc revision d6ed642458c8820e1beca72f3d7b5f0be4a4b64b
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the Thumb2 ISA. */ 18 19#include "arm_lir.h" 20#include "codegen_arm.h" 21#include "dex/quick/mir_to_lir-inl.h" 22#include "entrypoints/quick/quick_entrypoints.h" 23 24namespace art { 25 26/* 27 * The sparse table in the literal pool is an array of <key,displacement> 28 * pairs. For each set, we'll load them as a pair using ldmia. 29 * This means that the register number of the temp we use for the key 30 * must be lower than the reg for the displacement. 31 * 32 * The test loop will look something like: 33 * 34 * adr r_base, <table> 35 * ldr r_val, [rARM_SP, v_reg_off] 36 * mov r_idx, #table_size 37 * lp: 38 * ldmia r_base!, {r_key, r_disp} 39 * sub r_idx, #1 40 * cmp r_val, r_key 41 * ifeq 42 * add rARM_PC, r_disp ; This is the branch from which we compute displacement 43 * cbnz r_idx, lp 44 */ 45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, 46 RegLocation rl_src) { 47 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 48 if (cu_->verbose) { 49 DumpSparseSwitchTable(table); 50 } 51 // Add the table to the list - we'll process it later 52 SwitchTable *tab_rec = 53 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); 54 tab_rec->table = table; 55 tab_rec->vaddr = current_dalvik_offset_; 56 uint32_t size = table[1]; 57 tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), 58 kArenaAllocLIR)); 59 switch_tables_.Insert(tab_rec); 60 61 // Get the switch value 62 rl_src = LoadValue(rl_src, kCoreReg); 63 RegStorage r_base = AllocTemp(); 64 /* Allocate key and disp temps */ 65 RegStorage r_key = AllocTemp(); 66 RegStorage r_disp = AllocTemp(); 67 // Make sure r_key's register number is less than r_disp's number for ldmia 68 if (r_key.GetReg() > r_disp.GetReg()) { 69 RegStorage tmp = r_disp; 70 r_disp = r_key; 71 r_key = tmp; 72 } 73 // Materialize a pointer to the switch table 74 NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec)); 75 // Set up r_idx 76 RegStorage r_idx = AllocTemp(); 77 LoadConstant(r_idx, size); 78 // Establish loop branch target 79 LIR* target = NewLIR0(kPseudoTargetLabel); 80 // Load next key/disp 81 NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetReg()) | (1 << r_disp.GetReg())); 82 OpRegReg(kOpCmp, r_key, rl_src.reg); 83 // Go if match. NOTE: No instruction set switch here - must stay Thumb2 84 LIR* it = OpIT(kCondEq, ""); 85 LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg()); 86 OpEndIT(it); 87 tab_rec->anchor = switch_branch; 88 // Needs to use setflags encoding here 89 OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. 90 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 91 OpCondBranch(kCondNe, target); 92} 93 94 95void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, 96 RegLocation rl_src) { 97 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 98 if (cu_->verbose) { 99 DumpPackedSwitchTable(table); 100 } 101 // Add the table to the list - we'll process it later 102 SwitchTable *tab_rec = 103 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); 104 tab_rec->table = table; 105 tab_rec->vaddr = current_dalvik_offset_; 106 uint32_t size = table[1]; 107 tab_rec->targets = 108 static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); 109 switch_tables_.Insert(tab_rec); 110 111 // Get the switch value 112 rl_src = LoadValue(rl_src, kCoreReg); 113 RegStorage table_base = AllocTemp(); 114 // Materialize a pointer to the switch table 115 NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec)); 116 int low_key = s4FromSwitchData(&table[2]); 117 RegStorage keyReg; 118 // Remove the bias, if necessary 119 if (low_key == 0) { 120 keyReg = rl_src.reg; 121 } else { 122 keyReg = AllocTemp(); 123 OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); 124 } 125 // Bounds check - if < 0 or >= size continue following switch 126 OpRegImm(kOpCmp, keyReg, size-1); 127 LIR* branch_over = OpCondBranch(kCondHi, NULL); 128 129 // Load the displacement from the switch table 130 RegStorage disp_reg = AllocTemp(); 131 LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord); 132 133 // ..and go! NOTE: No instruction set switch here - must stay Thumb2 134 LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg()); 135 tab_rec->anchor = switch_branch; 136 137 /* branch_over target here */ 138 LIR* target = NewLIR0(kPseudoTargetLabel); 139 branch_over->target = target; 140} 141 142/* 143 * Array data table format: 144 * ushort ident = 0x0300 magic value 145 * ushort width width of each element in the table 146 * uint size number of elements in the table 147 * ubyte data[size*width] table of data values (may contain a single-byte 148 * padding at the end) 149 * 150 * Total size is 4+(width * size + 1)/2 16-bit code units. 151 */ 152void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { 153 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 154 // Add the table to the list - we'll process it later 155 FillArrayData *tab_rec = 156 static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData)); 157 tab_rec->table = table; 158 tab_rec->vaddr = current_dalvik_offset_; 159 uint16_t width = tab_rec->table[1]; 160 uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); 161 tab_rec->size = (size * width) + 8; 162 163 fill_array_data_.Insert(tab_rec); 164 165 // Making a call - use explicit registers 166 FlushAllRegs(); /* Everything to home location */ 167 LoadValueDirectFixed(rl_src, rs_r0); 168 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(), 169 rs_rARM_LR); 170 // Materialize a pointer to the fill data image 171 NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec)); 172 ClobberCallerSave(); 173 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 174 MarkSafepointPC(call_inst); 175} 176 177/* 178 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more 179 * details see monitor.cc. 180 */ 181void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { 182 FlushAllRegs(); 183 LoadValueDirectFixed(rl_src, rs_r0); // Get obj 184 LockCallTemps(); // Prepare for explicit register usage 185 constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. 186 if (kArchVariantHasGoodBranchPredictor) { 187 LIR* null_check_branch = nullptr; 188 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { 189 null_check_branch = nullptr; // No null check. 190 } else { 191 // If the null-check fails its handled by the slow-path to reduce exception related meta-data. 192 if (Runtime::Current()->ExplicitNullChecks()) { 193 null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); 194 } 195 } 196 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 197 NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 198 MarkPossibleNullPointerException(opt_flags); 199 LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); 200 NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 201 LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); 202 203 204 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); 205 not_unlocked_branch->target = slow_path_target; 206 if (null_check_branch != nullptr) { 207 null_check_branch->target = slow_path_target; 208 } 209 // TODO: move to a slow path. 210 // Go expensive route - artLockObjectFromCode(obj); 211 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); 212 ClobberCallerSave(); 213 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 214 MarkSafepointPC(call_inst); 215 216 LIR* success_target = NewLIR0(kPseudoTargetLabel); 217 lock_success_branch->target = success_target; 218 GenMemBarrier(kLoadLoad); 219 } else { 220 // Explicit null-check as slow-path is entered using an IT. 221 GenNullCheck(rs_r0, opt_flags); 222 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 223 NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 224 MarkPossibleNullPointerException(opt_flags); 225 OpRegImm(kOpCmp, rs_r1, 0); 226 LIR* it = OpIT(kCondEq, ""); 227 NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 228 OpEndIT(it); 229 OpRegImm(kOpCmp, rs_r1, 0); 230 it = OpIT(kCondNe, "T"); 231 // Go expensive route - artLockObjectFromCode(self, obj); 232 LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); 233 ClobberCallerSave(); 234 LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); 235 OpEndIT(it); 236 MarkSafepointPC(call_inst); 237 GenMemBarrier(kLoadLoad); 238 } 239} 240 241/* 242 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more 243 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock 244 * and can only give away ownership if its suspended. 245 */ 246void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { 247 FlushAllRegs(); 248 LoadValueDirectFixed(rl_src, rs_r0); // Get obj 249 LockCallTemps(); // Prepare for explicit register usage 250 LIR* null_check_branch = nullptr; 251 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 252 constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. 253 if (kArchVariantHasGoodBranchPredictor) { 254 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { 255 null_check_branch = nullptr; // No null check. 256 } else { 257 // If the null-check fails its handled by the slow-path to reduce exception related meta-data. 258 if (Runtime::Current()->ExplicitNullChecks()) { 259 null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); 260 } 261 } 262 LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); 263 MarkPossibleNullPointerException(opt_flags); 264 LoadConstantNoClobber(rs_r3, 0); 265 LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); 266 StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); 267 LIR* unlock_success_branch = OpUnconditionalBranch(NULL); 268 269 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); 270 slow_unlock_branch->target = slow_path_target; 271 if (null_check_branch != nullptr) { 272 null_check_branch->target = slow_path_target; 273 } 274 // TODO: move to a slow path. 275 // Go expensive route - artUnlockObjectFromCode(obj); 276 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR); 277 ClobberCallerSave(); 278 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 279 MarkSafepointPC(call_inst); 280 281 LIR* success_target = NewLIR0(kPseudoTargetLabel); 282 unlock_success_branch->target = success_target; 283 GenMemBarrier(kStoreLoad); 284 } else { 285 // Explicit null-check as slow-path is entered using an IT. 286 GenNullCheck(rs_r0, opt_flags); 287 LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock 288 MarkPossibleNullPointerException(opt_flags); 289 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 290 LoadConstantNoClobber(rs_r3, 0); 291 // Is lock unheld on lock or held by us (==thread_id) on unlock? 292 OpRegReg(kOpCmp, rs_r1, rs_r2); 293 LIR* it = OpIT(kCondEq, "EE"); 294 StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); 295 // Go expensive route - UnlockObjectFromCode(obj); 296 LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), 297 rs_rARM_LR); 298 ClobberCallerSave(); 299 LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); 300 OpEndIT(it); 301 MarkSafepointPC(call_inst); 302 GenMemBarrier(kStoreLoad); 303 } 304} 305 306void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { 307 int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); 308 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 309 RegStorage reset_reg = AllocTemp(); 310 LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg); 311 LoadConstant(reset_reg, 0); 312 StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg); 313 FreeTemp(reset_reg); 314 StoreValue(rl_dest, rl_result); 315} 316 317/* 318 * Mark garbage collection card. Skip if the value we're storing is null. 319 */ 320void ArmMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { 321 RegStorage reg_card_base = AllocTemp(); 322 RegStorage reg_card_no = AllocTemp(); 323 LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); 324 LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base); 325 OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); 326 StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); 327 LIR* target = NewLIR0(kPseudoTargetLabel); 328 branch_over->target = target; 329 FreeTemp(reg_card_base); 330 FreeTemp(reg_card_no); 331} 332 333void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { 334 int spill_count = num_core_spills_ + num_fp_spills_; 335 /* 336 * On entry, r0, r1, r2 & r3 are live. Let the register allocation 337 * mechanism know so it doesn't try to use any of them when 338 * expanding the frame or flushing. This leaves the utility 339 * code with a single temp: r12. This should be enough. 340 */ 341 LockTemp(r0); 342 LockTemp(r1); 343 LockTemp(r2); 344 LockTemp(r3); 345 346 /* 347 * We can safely skip the stack overflow check if we're 348 * a leaf *and* our frame size < fudge factor. 349 */ 350 bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && 351 (static_cast<size_t>(frame_size_) < 352 Thread::kStackOverflowReservedBytes)); 353 NewLIR0(kPseudoMethodEntry); 354 if (!skip_overflow_check) { 355 if (Runtime::Current()->ExplicitStackOverflowChecks()) { 356 /* Load stack limit */ 357 LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12); 358 } 359 } 360 /* Spill core callee saves */ 361 NewLIR1(kThumb2Push, core_spill_mask_); 362 /* Need to spill any FP regs? */ 363 if (num_fp_spills_) { 364 /* 365 * NOTE: fp spills are a little different from core spills in that 366 * they are pushed as a contiguous block. When promoting from 367 * the fp set, we must allocate all singles from s16..highest-promoted 368 */ 369 NewLIR1(kThumb2VPushCS, num_fp_spills_); 370 } 371 372 const int spill_size = spill_count * 4; 373 const int frame_size_without_spills = frame_size_ - spill_size; 374 if (!skip_overflow_check) { 375 if (Runtime::Current()->ExplicitStackOverflowChecks()) { 376 class StackOverflowSlowPath : public LIRSlowPath { 377 public: 378 StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace) 379 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr), 380 sp_displace_(sp_displace) { 381 } 382 void Compile() OVERRIDE { 383 m2l_->ResetRegPool(); 384 m2l_->ResetDefTracking(); 385 GenerateTargetLabel(); 386 if (restore_lr_) { 387 m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); 388 } 389 m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); 390 m2l_->ClobberCallerSave(); 391 ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); 392 // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes 393 // codegen and target are in thumb2 mode. 394 m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); 395 } 396 397 private: 398 const bool restore_lr_; 399 const size_t sp_displace_; 400 }; 401 if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) { 402 OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills); 403 LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr); 404 // Need to restore LR since we used it as a temp. 405 AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); 406 OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack 407 } else { 408 // If the frame is small enough we are guaranteed to have enough space that remains to 409 // handle signals on the user stack. 410 OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); 411 LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); 412 AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_)); 413 } 414 } else { 415 // Implicit stack overflow check. 416 // Generate a load from [sp, #-overflowsize]. If this is in the stack 417 // redzone we will get a segmentation fault. 418 // 419 // Caveat coder: if someone changes the kStackOverflowReservedBytes value 420 // we need to make sure that it's loadable in an immediate field of 421 // a sub instruction. Otherwise we will get a temp allocation and the 422 // code size will increase. 423 OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); 424 LoadWordDisp(rs_r12, 0, rs_r12); 425 MarkPossibleStackOverflowException(); 426 OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); 427 } 428 } else { 429 OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); 430 } 431 432 FlushIns(ArgLocs, rl_method); 433 434 FreeTemp(r0); 435 FreeTemp(r1); 436 FreeTemp(r2); 437 FreeTemp(r3); 438} 439 440void ArmMir2Lir::GenExitSequence() { 441 int spill_count = num_core_spills_ + num_fp_spills_; 442 /* 443 * In the exit path, r0/r1 are live - make sure they aren't 444 * allocated by the register utilities as temps. 445 */ 446 LockTemp(r0); 447 LockTemp(r1); 448 449 NewLIR0(kPseudoMethodExit); 450 OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); 451 /* Need to restore any FP callee saves? */ 452 if (num_fp_spills_) { 453 NewLIR1(kThumb2VPopCS, num_fp_spills_); 454 } 455 if (core_spill_mask_ & (1 << rARM_LR)) { 456 /* Unspill rARM_LR to rARM_PC */ 457 core_spill_mask_ &= ~(1 << rARM_LR); 458 core_spill_mask_ |= (1 << rARM_PC); 459 } 460 NewLIR1(kThumb2Pop, core_spill_mask_); 461 if (!(core_spill_mask_ & (1 << rARM_PC))) { 462 /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ 463 NewLIR1(kThumbBx, rARM_LR); 464 } 465} 466 467void ArmMir2Lir::GenSpecialExitSequence() { 468 NewLIR1(kThumbBx, rARM_LR); 469} 470 471} // namespace art 472