call_arm.cc revision dd7624d2b9e599d57762d12031b10b89defc9807
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the Thumb2 ISA. */ 18 19#include "arm_lir.h" 20#include "codegen_arm.h" 21#include "dex/quick/mir_to_lir-inl.h" 22#include "entrypoints/quick/quick_entrypoints.h" 23 24namespace art { 25 26/* 27 * The sparse table in the literal pool is an array of <key,displacement> 28 * pairs. For each set, we'll load them as a pair using ldmia. 29 * This means that the register number of the temp we use for the key 30 * must be lower than the reg for the displacement. 31 * 32 * The test loop will look something like: 33 * 34 * adr r_base, <table> 35 * ldr r_val, [rARM_SP, v_reg_off] 36 * mov r_idx, #table_size 37 * lp: 38 * ldmia r_base!, {r_key, r_disp} 39 * sub r_idx, #1 40 * cmp r_val, r_key 41 * ifeq 42 * add rARM_PC, r_disp ; This is the branch from which we compute displacement 43 * cbnz r_idx, lp 44 */ 45void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, 46 RegLocation rl_src) { 47 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 48 if (cu_->verbose) { 49 DumpSparseSwitchTable(table); 50 } 51 // Add the table to the list - we'll process it later 52 SwitchTable *tab_rec = 53 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); 54 tab_rec->table = table; 55 tab_rec->vaddr = current_dalvik_offset_; 56 uint32_t size = table[1]; 57 tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), 58 kArenaAllocLIR)); 59 switch_tables_.Insert(tab_rec); 60 61 // Get the switch value 62 rl_src = LoadValue(rl_src, kCoreReg); 63 RegStorage r_base = AllocTemp(); 64 /* Allocate key and disp temps */ 65 RegStorage r_key = AllocTemp(); 66 RegStorage r_disp = AllocTemp(); 67 // Make sure r_key's register number is less than r_disp's number for ldmia 68 if (r_key.GetReg() > r_disp.GetReg()) { 69 RegStorage tmp = r_disp; 70 r_disp = r_key; 71 r_key = tmp; 72 } 73 // Materialize a pointer to the switch table 74 NewLIR3(kThumb2Adr, r_base.GetReg(), 0, WrapPointer(tab_rec)); 75 // Set up r_idx 76 RegStorage r_idx = AllocTemp(); 77 LoadConstant(r_idx, size); 78 // Establish loop branch target 79 LIR* target = NewLIR0(kPseudoTargetLabel); 80 // Load next key/disp 81 NewLIR2(kThumb2LdmiaWB, r_base.GetReg(), (1 << r_key.GetReg()) | (1 << r_disp.GetReg())); 82 OpRegReg(kOpCmp, r_key, rl_src.reg); 83 // Go if match. NOTE: No instruction set switch here - must stay Thumb2 84 OpIT(kCondEq, ""); 85 LIR* switch_branch = NewLIR1(kThumb2AddPCR, r_disp.GetReg()); 86 tab_rec->anchor = switch_branch; 87 // Needs to use setflags encoding here 88 OpRegRegImm(kOpSub, r_idx, r_idx, 1); // For value == 1, this should set flags. 89 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 90 OpCondBranch(kCondNe, target); 91} 92 93 94void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, 95 RegLocation rl_src) { 96 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 97 if (cu_->verbose) { 98 DumpPackedSwitchTable(table); 99 } 100 // Add the table to the list - we'll process it later 101 SwitchTable *tab_rec = 102 static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); 103 tab_rec->table = table; 104 tab_rec->vaddr = current_dalvik_offset_; 105 uint32_t size = table[1]; 106 tab_rec->targets = 107 static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); 108 switch_tables_.Insert(tab_rec); 109 110 // Get the switch value 111 rl_src = LoadValue(rl_src, kCoreReg); 112 RegStorage table_base = AllocTemp(); 113 // Materialize a pointer to the switch table 114 NewLIR3(kThumb2Adr, table_base.GetReg(), 0, WrapPointer(tab_rec)); 115 int low_key = s4FromSwitchData(&table[2]); 116 RegStorage keyReg; 117 // Remove the bias, if necessary 118 if (low_key == 0) { 119 keyReg = rl_src.reg; 120 } else { 121 keyReg = AllocTemp(); 122 OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key); 123 } 124 // Bounds check - if < 0 or >= size continue following switch 125 OpRegImm(kOpCmp, keyReg, size-1); 126 LIR* branch_over = OpCondBranch(kCondHi, NULL); 127 128 // Load the displacement from the switch table 129 RegStorage disp_reg = AllocTemp(); 130 LoadBaseIndexed(table_base, keyReg, disp_reg, 2, kWord); 131 132 // ..and go! NOTE: No instruction set switch here - must stay Thumb2 133 LIR* switch_branch = NewLIR1(kThumb2AddPCR, disp_reg.GetReg()); 134 tab_rec->anchor = switch_branch; 135 136 /* branch_over target here */ 137 LIR* target = NewLIR0(kPseudoTargetLabel); 138 branch_over->target = target; 139} 140 141/* 142 * Array data table format: 143 * ushort ident = 0x0300 magic value 144 * ushort width width of each element in the table 145 * uint size number of elements in the table 146 * ubyte data[size*width] table of data values (may contain a single-byte 147 * padding at the end) 148 * 149 * Total size is 4+(width * size + 1)/2 16-bit code units. 150 */ 151void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { 152 const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; 153 // Add the table to the list - we'll process it later 154 FillArrayData *tab_rec = 155 static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), kArenaAllocData)); 156 tab_rec->table = table; 157 tab_rec->vaddr = current_dalvik_offset_; 158 uint16_t width = tab_rec->table[1]; 159 uint32_t size = tab_rec->table[2] | ((static_cast<uint32_t>(tab_rec->table[3])) << 16); 160 tab_rec->size = (size * width) + 8; 161 162 fill_array_data_.Insert(tab_rec); 163 164 // Making a call - use explicit registers 165 FlushAllRegs(); /* Everything to home location */ 166 LoadValueDirectFixed(rl_src, rs_r0); 167 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData).Int32Value(), 168 rs_rARM_LR); 169 // Materialize a pointer to the fill data image 170 NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec)); 171 ClobberCallerSave(); 172 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 173 MarkSafepointPC(call_inst); 174} 175 176/* 177 * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more 178 * details see monitor.cc. 179 */ 180void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { 181 FlushAllRegs(); 182 LoadValueDirectFixed(rl_src, rs_r0); // Get obj 183 LockCallTemps(); // Prepare for explicit register usage 184 constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. 185 if (kArchVariantHasGoodBranchPredictor) { 186 LIR* null_check_branch = nullptr; 187 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { 188 null_check_branch = nullptr; // No null check. 189 } else { 190 // If the null-check fails its handled by the slow-path to reduce exception related meta-data. 191 if (Runtime::Current()->ExplicitNullChecks()) { 192 null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); 193 } 194 } 195 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 196 NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 197 MarkPossibleNullPointerException(opt_flags); 198 LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, rs_r1, 0, NULL); 199 NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 200 LIR* lock_success_branch = OpCmpImmBranch(kCondEq, rs_r1, 0, NULL); 201 202 203 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); 204 not_unlocked_branch->target = slow_path_target; 205 if (null_check_branch != nullptr) { 206 null_check_branch->target = slow_path_target; 207 } 208 // TODO: move to a slow path. 209 // Go expensive route - artLockObjectFromCode(obj); 210 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); 211 ClobberCallerSave(); 212 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 213 MarkSafepointPC(call_inst); 214 215 LIR* success_target = NewLIR0(kPseudoTargetLabel); 216 lock_success_branch->target = success_target; 217 GenMemBarrier(kLoadLoad); 218 } else { 219 // Explicit null-check as slow-path is entered using an IT. 220 GenNullCheck(rs_r0, opt_flags); 221 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 222 NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 223 MarkPossibleNullPointerException(opt_flags); 224 OpRegImm(kOpCmp, rs_r1, 0); 225 OpIT(kCondEq, ""); 226 NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); 227 OpRegImm(kOpCmp, rs_r1, 0); 228 OpIT(kCondNe, "T"); 229 // Go expensive route - artLockObjectFromCode(self, obj); 230 LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pLockObject).Int32Value(), rs_rARM_LR); 231 ClobberCallerSave(); 232 LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); 233 MarkSafepointPC(call_inst); 234 GenMemBarrier(kLoadLoad); 235 } 236} 237 238/* 239 * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more 240 * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock 241 * and can only give away ownership if its suspended. 242 */ 243void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { 244 FlushAllRegs(); 245 LoadValueDirectFixed(rl_src, rs_r0); // Get obj 246 LockCallTemps(); // Prepare for explicit register usage 247 LIR* null_check_branch = nullptr; 248 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 249 constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. 250 if (kArchVariantHasGoodBranchPredictor) { 251 if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { 252 null_check_branch = nullptr; // No null check. 253 } else { 254 // If the null-check fails its handled by the slow-path to reduce exception related meta-data. 255 if (Runtime::Current()->ExplicitNullChecks()) { 256 null_check_branch = OpCmpImmBranch(kCondEq, rs_r0, 0, NULL); 257 } 258 } 259 LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); 260 MarkPossibleNullPointerException(opt_flags); 261 LoadConstantNoClobber(rs_r3, 0); 262 LIR* slow_unlock_branch = OpCmpBranch(kCondNe, rs_r1, rs_r2, NULL); 263 StoreWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); 264 LIR* unlock_success_branch = OpUnconditionalBranch(NULL); 265 266 LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); 267 slow_unlock_branch->target = slow_path_target; 268 if (null_check_branch != nullptr) { 269 null_check_branch->target = slow_path_target; 270 } 271 // TODO: move to a slow path. 272 // Go expensive route - artUnlockObjectFromCode(obj); 273 LoadWordDisp(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), rs_rARM_LR); 274 ClobberCallerSave(); 275 LIR* call_inst = OpReg(kOpBlx, rs_rARM_LR); 276 MarkSafepointPC(call_inst); 277 278 LIR* success_target = NewLIR0(kPseudoTargetLabel); 279 unlock_success_branch->target = success_target; 280 GenMemBarrier(kStoreLoad); 281 } else { 282 // Explicit null-check as slow-path is entered using an IT. 283 GenNullCheck(rs_r0, opt_flags); 284 LoadWordDisp(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r1); // Get lock 285 MarkPossibleNullPointerException(opt_flags); 286 LoadWordDisp(rs_rARM_SELF, Thread::ThinLockIdOffset<4>().Int32Value(), rs_r2); 287 LoadConstantNoClobber(rs_r3, 0); 288 // Is lock unheld on lock or held by us (==thread_id) on unlock? 289 OpRegReg(kOpCmp, rs_r1, rs_r2); 290 OpIT(kCondEq, "EE"); 291 StoreWordDisp/*eq*/(rs_r0, mirror::Object::MonitorOffset().Int32Value(), rs_r3); 292 // Go expensive route - UnlockObjectFromCode(obj); 293 LoadWordDisp/*ne*/(rs_rARM_SELF, QUICK_ENTRYPOINT_OFFSET(4, pUnlockObject).Int32Value(), 294 rs_rARM_LR); 295 ClobberCallerSave(); 296 LIR* call_inst = OpReg(kOpBlx/*ne*/, rs_rARM_LR); 297 MarkSafepointPC(call_inst); 298 GenMemBarrier(kStoreLoad); 299 } 300} 301 302void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { 303 int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); 304 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 305 RegStorage reset_reg = AllocTemp(); 306 LoadWordDisp(rs_rARM_SELF, ex_offset, rl_result.reg); 307 LoadConstant(reset_reg, 0); 308 StoreWordDisp(rs_rARM_SELF, ex_offset, reset_reg); 309 FreeTemp(reset_reg); 310 StoreValue(rl_dest, rl_result); 311} 312 313/* 314 * Mark garbage collection card. Skip if the value we're storing is null. 315 */ 316void ArmMir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) { 317 RegStorage reg_card_base = AllocTemp(); 318 RegStorage reg_card_no = AllocTemp(); 319 LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL); 320 LoadWordDisp(rs_rARM_SELF, Thread::CardTableOffset<4>().Int32Value(), reg_card_base); 321 OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift); 322 StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte); 323 LIR* target = NewLIR0(kPseudoTargetLabel); 324 branch_over->target = target; 325 FreeTemp(reg_card_base); 326 FreeTemp(reg_card_no); 327} 328 329void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { 330 int spill_count = num_core_spills_ + num_fp_spills_; 331 /* 332 * On entry, r0, r1, r2 & r3 are live. Let the register allocation 333 * mechanism know so it doesn't try to use any of them when 334 * expanding the frame or flushing. This leaves the utility 335 * code with a single temp: r12. This should be enough. 336 */ 337 LockTemp(r0); 338 LockTemp(r1); 339 LockTemp(r2); 340 LockTemp(r3); 341 342 /* 343 * We can safely skip the stack overflow check if we're 344 * a leaf *and* our frame size < fudge factor. 345 */ 346 bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && 347 (static_cast<size_t>(frame_size_) < 348 Thread::kStackOverflowReservedBytes)); 349 NewLIR0(kPseudoMethodEntry); 350 if (!skip_overflow_check) { 351 if (Runtime::Current()->ExplicitStackOverflowChecks()) { 352 /* Load stack limit */ 353 LoadWordDisp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12); 354 } 355 } 356 /* Spill core callee saves */ 357 NewLIR1(kThumb2Push, core_spill_mask_); 358 /* Need to spill any FP regs? */ 359 if (num_fp_spills_) { 360 /* 361 * NOTE: fp spills are a little different from core spills in that 362 * they are pushed as a contiguous block. When promoting from 363 * the fp set, we must allocate all singles from s16..highest-promoted 364 */ 365 NewLIR1(kThumb2VPushCS, num_fp_spills_); 366 } 367 368 // TODO: 64 bit will be different code. 369 const int frame_size_without_spills = frame_size_ - spill_count * 4; 370 if (!skip_overflow_check) { 371 if (Runtime::Current()->ExplicitStackOverflowChecks()) { 372 class StackOverflowSlowPath : public LIRSlowPath { 373 public: 374 StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace) 375 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr), restore_lr_(restore_lr), 376 sp_displace_(sp_displace) { 377 } 378 void Compile() OVERRIDE { 379 m2l_->ResetRegPool(); 380 m2l_->ResetDefTracking(); 381 GenerateTargetLabel(); 382 if (restore_lr_) { 383 m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); 384 } 385 m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); 386 m2l_->ClobberCallerSave(); 387 ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); 388 // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes 389 // codegen and target are in thumb2 mode. 390 m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); 391 } 392 393 private: 394 const bool restore_lr_; 395 const size_t sp_displace_; 396 }; 397 if (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes) { 398 OpRegRegImm(kOpSub, rs_rARM_LR, rs_rARM_SP, frame_size_without_spills); 399 LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_LR, rs_r12, nullptr); 400 // Need to restore LR since we used it as a temp. 401 AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, 402 frame_size_without_spills)); 403 OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack 404 } else { 405 // If the frame is small enough we are guaranteed to have enough space that remains to 406 // handle signals on the user stack. 407 OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); 408 LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); 409 AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_)); 410 } 411 } else { 412 // Implicit stack overflow check. 413 // Generate a load from [sp, #-overflowsize]. If this is in the stack 414 // redzone we will get a segmentation fault. 415 // 416 // Caveat coder: if someone changes the kStackOverflowReservedBytes value 417 // we need to make sure that it's loadable in an immediate field of 418 // a sub instruction. Otherwise we will get a temp allocation and the 419 // code size will increase. 420 OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); 421 LoadWordDisp(rs_r12, 0, rs_r12); 422 MarkPossibleStackOverflowException(); 423 OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); 424 } 425 } else { 426 OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); 427 } 428 429 FlushIns(ArgLocs, rl_method); 430 431 FreeTemp(r0); 432 FreeTemp(r1); 433 FreeTemp(r2); 434 FreeTemp(r3); 435} 436 437void ArmMir2Lir::GenExitSequence() { 438 int spill_count = num_core_spills_ + num_fp_spills_; 439 /* 440 * In the exit path, r0/r1 are live - make sure they aren't 441 * allocated by the register utilities as temps. 442 */ 443 LockTemp(r0); 444 LockTemp(r1); 445 446 NewLIR0(kPseudoMethodExit); 447 OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); 448 /* Need to restore any FP callee saves? */ 449 if (num_fp_spills_) { 450 NewLIR1(kThumb2VPopCS, num_fp_spills_); 451 } 452 if (core_spill_mask_ & (1 << rARM_LR)) { 453 /* Unspill rARM_LR to rARM_PC */ 454 core_spill_mask_ &= ~(1 << rARM_LR); 455 core_spill_mask_ |= (1 << rARM_PC); 456 } 457 NewLIR1(kThumb2Pop, core_spill_mask_); 458 if (!(core_spill_mask_ & (1 << rARM_PC))) { 459 /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ 460 NewLIR1(kThumbBx, rARM_LR); 461 } 462} 463 464void ArmMir2Lir::GenSpecialExitSequence() { 465 NewLIR1(kThumbBx, rARM_LR); 466} 467 468} // namespace art 469