int_arm.cc revision 99ad7230ccaace93bf323dea9790f35fe991a4a2
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the Thumb2 ISA. */ 18 19#include "arm_lir.h" 20#include "codegen_arm.h" 21#include "dex/quick/mir_to_lir-inl.h" 22#include "entrypoints/quick/quick_entrypoints.h" 23#include "mirror/array.h" 24 25namespace art { 26 27LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) { 28 OpRegReg(kOpCmp, src1, src2); 29 return OpCondBranch(cond, target); 30} 31 32/* 33 * Generate a Thumb2 IT instruction, which can nullify up to 34 * four subsequent instructions based on a condition and its 35 * inverse. The condition applies to the first instruction, which 36 * is executed if the condition is met. The string "guide" consists 37 * of 0 to 3 chars, and applies to the 2nd through 4th instruction. 38 * A "T" means the instruction is executed if the condition is 39 * met, and an "E" means the instruction is executed if the condition 40 * is not met. 41 */ 42LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) { 43 int mask; 44 int mask3 = 0; 45 int mask2 = 0; 46 int mask1 = 0; 47 ArmConditionCode code = ArmConditionEncoding(ccode); 48 int cond_bit = code & 1; 49 int alt_bit = cond_bit ^ 1; 50 51 // Note: case fallthroughs intentional 52 switch (strlen(guide)) { 53 case 3: 54 mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; 55 case 2: 56 mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; 57 case 1: 58 mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; 59 break; 60 case 0: 61 break; 62 default: 63 LOG(FATAL) << "OAT: bad case in OpIT"; 64 } 65 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | 66 (1 << (3 - strlen(guide))); 67 return NewLIR2(kThumb2It, code, mask); 68} 69 70/* 71 * 64-bit 3way compare function. 72 * mov rX, #-1 73 * cmp op1hi, op2hi 74 * blt done 75 * bgt flip 76 * sub rX, op1lo, op2lo (treat as unsigned) 77 * beq done 78 * ite hi 79 * mov(hi) rX, #-1 80 * mov(!hi) rX, #1 81 * flip: 82 * neg rX 83 * done: 84 */ 85void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 86 RegLocation rl_src2) { 87 LIR* target1; 88 LIR* target2; 89 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 90 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 91 int t_reg = AllocTemp(); 92 LoadConstant(t_reg, -1); 93 OpRegReg(kOpCmp, rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); 94 LIR* branch1 = OpCondBranch(kCondLt, NULL); 95 LIR* branch2 = OpCondBranch(kCondGt, NULL); 96 OpRegRegReg(kOpSub, t_reg, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 97 LIR* branch3 = OpCondBranch(kCondEq, NULL); 98 99 OpIT(kCondHi, "E"); 100 NewLIR2(kThumb2MovI8M, t_reg, ModifiedImmediate(-1)); 101 LoadConstant(t_reg, 1); 102 GenBarrier(); 103 104 target2 = NewLIR0(kPseudoTargetLabel); 105 OpRegReg(kOpNeg, t_reg, t_reg); 106 107 target1 = NewLIR0(kPseudoTargetLabel); 108 109 RegLocation rl_temp = LocCReturn(); // Just using as template, will change 110 rl_temp.reg.SetReg(t_reg); 111 StoreValue(rl_dest, rl_temp); 112 FreeTemp(t_reg); 113 114 branch1->target = target1; 115 branch2->target = target2; 116 branch3->target = branch1->target; 117} 118 119void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 120 int64_t val, ConditionCode ccode) { 121 int32_t val_lo = Low32Bits(val); 122 int32_t val_hi = High32Bits(val); 123 DCHECK_GE(ModifiedImmediate(val_lo), 0); 124 DCHECK_GE(ModifiedImmediate(val_hi), 0); 125 LIR* taken = &block_label_list_[bb->taken]; 126 LIR* not_taken = &block_label_list_[bb->fall_through]; 127 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 128 int32_t low_reg = rl_src1.reg.GetReg(); 129 int32_t high_reg = rl_src1.reg.GetHighReg(); 130 131 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 132 int t_reg = AllocTemp(); 133 NewLIR4(kThumb2OrrRRRs, t_reg, low_reg, high_reg, 0); 134 FreeTemp(t_reg); 135 OpCondBranch(ccode, taken); 136 return; 137 } 138 139 switch (ccode) { 140 case kCondEq: 141 case kCondNe: 142 OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken); 143 break; 144 case kCondLt: 145 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); 146 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); 147 ccode = kCondUlt; 148 break; 149 case kCondLe: 150 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); 151 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); 152 ccode = kCondLs; 153 break; 154 case kCondGt: 155 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); 156 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); 157 ccode = kCondHi; 158 break; 159 case kCondGe: 160 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); 161 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); 162 ccode = kCondUge; 163 break; 164 default: 165 LOG(FATAL) << "Unexpected ccode: " << ccode; 166 } 167 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 168} 169 170void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 171 RegLocation rl_result; 172 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 173 RegLocation rl_dest = mir_graph_->GetDest(mir); 174 rl_src = LoadValue(rl_src, kCoreReg); 175 ConditionCode ccode = mir->meta.ccode; 176 if (mir->ssa_rep->num_uses == 1) { 177 // CONST case 178 int true_val = mir->dalvikInsn.vB; 179 int false_val = mir->dalvikInsn.vC; 180 rl_result = EvalLoc(rl_dest, kCoreReg, true); 181 // Change kCondNe to kCondEq for the special cases below. 182 if (ccode == kCondNe) { 183 ccode = kCondEq; 184 std::swap(true_val, false_val); 185 } 186 bool cheap_false_val = InexpensiveConstantInt(false_val); 187 if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { 188 OpRegRegImm(kOpSub, rl_result.reg.GetReg(), rl_src.reg.GetReg(), -true_val); 189 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 190 OpIT(true_val == 0 ? kCondNe : kCondUge, ""); 191 LoadConstant(rl_result.reg.GetReg(), false_val); 192 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 193 } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { 194 OpRegRegImm(kOpRsub, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 1); 195 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 196 OpIT(kCondLs, ""); 197 LoadConstant(rl_result.reg.GetReg(), false_val); 198 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 199 } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { 200 OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0); 201 OpIT(ccode, "E"); 202 LoadConstant(rl_result.reg.GetReg(), true_val); 203 LoadConstant(rl_result.reg.GetReg(), false_val); 204 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 205 } else { 206 // Unlikely case - could be tuned. 207 int t_reg1 = AllocTemp(); 208 int t_reg2 = AllocTemp(); 209 LoadConstant(t_reg1, true_val); 210 LoadConstant(t_reg2, false_val); 211 OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0); 212 OpIT(ccode, "E"); 213 OpRegCopy(rl_result.reg.GetReg(), t_reg1); 214 OpRegCopy(rl_result.reg.GetReg(), t_reg2); 215 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 216 } 217 } else { 218 // MOVE case 219 RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; 220 RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; 221 rl_true = LoadValue(rl_true, kCoreReg); 222 rl_false = LoadValue(rl_false, kCoreReg); 223 rl_result = EvalLoc(rl_dest, kCoreReg, true); 224 OpRegImm(kOpCmp, rl_src.reg.GetReg(), 0); 225 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? 226 OpIT(NegateComparison(ccode), ""); 227 OpRegCopy(rl_result.reg.GetReg(), rl_false.reg.GetReg()); 228 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? 229 OpIT(ccode, ""); 230 OpRegCopy(rl_result.reg.GetReg(), rl_true.reg.GetReg()); 231 } else { // Normal - select between the two. 232 OpIT(ccode, "E"); 233 OpRegCopy(rl_result.reg.GetReg(), rl_true.reg.GetReg()); 234 OpRegCopy(rl_result.reg.GetReg(), rl_false.reg.GetReg()); 235 } 236 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 237 } 238 StoreValue(rl_dest, rl_result); 239} 240 241void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 242 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 243 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 244 // Normalize such that if either operand is constant, src2 will be constant. 245 ConditionCode ccode = mir->meta.ccode; 246 if (rl_src1.is_const) { 247 std::swap(rl_src1, rl_src2); 248 ccode = FlipComparisonOrder(ccode); 249 } 250 if (rl_src2.is_const) { 251 RegLocation rl_temp = UpdateLocWide(rl_src2); 252 // Do special compare/branch against simple const operand if not already in registers. 253 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 254 if ((rl_temp.location != kLocPhysReg) && 255 ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { 256 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 257 return; 258 } 259 } 260 LIR* taken = &block_label_list_[bb->taken]; 261 LIR* not_taken = &block_label_list_[bb->fall_through]; 262 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 263 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 264 OpRegReg(kOpCmp, rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); 265 switch (ccode) { 266 case kCondEq: 267 OpCondBranch(kCondNe, not_taken); 268 break; 269 case kCondNe: 270 OpCondBranch(kCondNe, taken); 271 break; 272 case kCondLt: 273 OpCondBranch(kCondLt, taken); 274 OpCondBranch(kCondGt, not_taken); 275 ccode = kCondUlt; 276 break; 277 case kCondLe: 278 OpCondBranch(kCondLt, taken); 279 OpCondBranch(kCondGt, not_taken); 280 ccode = kCondLs; 281 break; 282 case kCondGt: 283 OpCondBranch(kCondGt, taken); 284 OpCondBranch(kCondLt, not_taken); 285 ccode = kCondHi; 286 break; 287 case kCondGe: 288 OpCondBranch(kCondGt, taken); 289 OpCondBranch(kCondLt, not_taken); 290 ccode = kCondUge; 291 break; 292 default: 293 LOG(FATAL) << "Unexpected ccode: " << ccode; 294 } 295 OpRegReg(kOpCmp, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 296 OpCondBranch(ccode, taken); 297} 298 299/* 300 * Generate a register comparison to an immediate and branch. Caller 301 * is responsible for setting branch target field. 302 */ 303LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value, 304 LIR* target) { 305 LIR* branch; 306 ArmConditionCode arm_cond = ArmConditionEncoding(cond); 307 /* 308 * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit 309 * compare-and-branch if zero is ideal if it will reach. However, because null checks 310 * branch forward to a launch pad, they will frequently not reach - and thus have to 311 * be converted to a long form during assembly (which will trigger another assembly 312 * pass). Here we estimate the branch distance for checks, and if large directly 313 * generate the long form in an attempt to avoid an extra assembly pass. 314 * TODO: consider interspersing launchpads in code following unconditional branches. 315 */ 316 bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); 317 skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); 318 if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) && 319 ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { 320 branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, 321 reg, 0); 322 } else { 323 OpRegImm(kOpCmp, reg, check_value); 324 branch = NewLIR2(kThumbBCond, 0, arm_cond); 325 } 326 branch->target = target; 327 return branch; 328} 329 330LIR* ArmMir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) { 331 LIR* res; 332 int opcode; 333 if (ARM_FPREG(r_dest) || ARM_FPREG(r_src)) 334 return OpFpRegCopy(r_dest, r_src); 335 if (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src)) 336 opcode = kThumbMovRR; 337 else if (!ARM_LOWREG(r_dest) && !ARM_LOWREG(r_src)) 338 opcode = kThumbMovRR_H2H; 339 else if (ARM_LOWREG(r_dest)) 340 opcode = kThumbMovRR_H2L; 341 else 342 opcode = kThumbMovRR_L2H; 343 res = RawLIR(current_dalvik_offset_, opcode, r_dest, r_src); 344 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 345 res->flags.is_nop = true; 346 } 347 return res; 348} 349 350LIR* ArmMir2Lir::OpRegCopy(int r_dest, int r_src) { 351 LIR* res = OpRegCopyNoInsert(r_dest, r_src); 352 AppendLIR(res); 353 return res; 354} 355 356void ArmMir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, 357 int src_hi) { 358 bool dest_fp = ARM_FPREG(dest_lo) && ARM_FPREG(dest_hi); 359 bool src_fp = ARM_FPREG(src_lo) && ARM_FPREG(src_hi); 360 DCHECK_EQ(ARM_FPREG(src_lo), ARM_FPREG(src_hi)); 361 DCHECK_EQ(ARM_FPREG(dest_lo), ARM_FPREG(dest_hi)); 362 if (dest_fp) { 363 if (src_fp) { 364 OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); 365 } else { 366 NewLIR3(kThumb2Fmdrr, S2d(dest_lo, dest_hi), src_lo, src_hi); 367 } 368 } else { 369 if (src_fp) { 370 NewLIR3(kThumb2Fmrrd, dest_lo, dest_hi, S2d(src_lo, src_hi)); 371 } else { 372 // Handle overlap 373 if (src_hi == dest_lo) { 374 DCHECK_NE(src_lo, dest_hi); 375 OpRegCopy(dest_hi, src_hi); 376 OpRegCopy(dest_lo, src_lo); 377 } else { 378 OpRegCopy(dest_lo, src_lo); 379 OpRegCopy(dest_hi, src_hi); 380 } 381 } 382 } 383} 384 385// Table of magic divisors 386struct MagicTable { 387 uint32_t magic; 388 uint32_t shift; 389 DividePattern pattern; 390}; 391 392static const MagicTable magic_table[] = { 393 {0, 0, DivideNone}, // 0 394 {0, 0, DivideNone}, // 1 395 {0, 0, DivideNone}, // 2 396 {0x55555556, 0, Divide3}, // 3 397 {0, 0, DivideNone}, // 4 398 {0x66666667, 1, Divide5}, // 5 399 {0x2AAAAAAB, 0, Divide3}, // 6 400 {0x92492493, 2, Divide7}, // 7 401 {0, 0, DivideNone}, // 8 402 {0x38E38E39, 1, Divide5}, // 9 403 {0x66666667, 2, Divide5}, // 10 404 {0x2E8BA2E9, 1, Divide5}, // 11 405 {0x2AAAAAAB, 1, Divide5}, // 12 406 {0x4EC4EC4F, 2, Divide5}, // 13 407 {0x92492493, 3, Divide7}, // 14 408 {0x88888889, 3, Divide7}, // 15 409}; 410 411// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) 412bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 413 RegLocation rl_src, RegLocation rl_dest, int lit) { 414 if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) { 415 return false; 416 } 417 DividePattern pattern = magic_table[lit].pattern; 418 if (pattern == DivideNone) { 419 return false; 420 } 421 // Tuning: add rem patterns 422 if (!is_div) { 423 return false; 424 } 425 426 int r_magic = AllocTemp(); 427 LoadConstant(r_magic, magic_table[lit].magic); 428 rl_src = LoadValue(rl_src, kCoreReg); 429 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 430 int r_hi = AllocTemp(); 431 int r_lo = AllocTemp(); 432 NewLIR4(kThumb2Smull, r_lo, r_hi, r_magic, rl_src.reg.GetReg()); 433 switch (pattern) { 434 case Divide3: 435 OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi, 436 rl_src.reg.GetReg(), EncodeShift(kArmAsr, 31)); 437 break; 438 case Divide5: 439 OpRegRegImm(kOpAsr, r_lo, rl_src.reg.GetReg(), 31); 440 OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo, r_hi, 441 EncodeShift(kArmAsr, magic_table[lit].shift)); 442 break; 443 case Divide7: 444 OpRegReg(kOpAdd, r_hi, rl_src.reg.GetReg()); 445 OpRegRegImm(kOpAsr, r_lo, rl_src.reg.GetReg(), 31); 446 OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo, r_hi, 447 EncodeShift(kArmAsr, magic_table[lit].shift)); 448 break; 449 default: 450 LOG(FATAL) << "Unexpected pattern: " << pattern; 451 } 452 StoreValue(rl_dest, rl_result); 453 return true; 454} 455 456LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, 457 int reg1, int base, int offset, ThrowKind kind) { 458 LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm"; 459 return NULL; 460} 461 462RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 463 RegLocation rl_src2, bool is_div, bool check_zero) { 464 LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; 465 return rl_dest; 466} 467 468RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) { 469 LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; 470 return rl_dest; 471} 472 473RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit, 474 bool is_div) { 475 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 476 477 // Put the literal in a temp. 478 int lit_temp = AllocTemp(); 479 LoadConstant(lit_temp, lit); 480 // Use the generic case for div/rem with arg2 in a register. 481 // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. 482 rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); 483 FreeTemp(lit_temp); 484 485 return rl_result; 486} 487 488RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2, 489 bool is_div) { 490 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 491 if (is_div) { 492 // Simple case, use sdiv instruction. 493 OpRegRegReg(kOpDiv, rl_result.reg.GetReg(), reg1, reg2); 494 } else { 495 // Remainder case, use the following code: 496 // temp = reg1 / reg2 - integer division 497 // temp = temp * reg2 498 // dest = reg1 - temp 499 500 int temp = AllocTemp(); 501 OpRegRegReg(kOpDiv, temp, reg1, reg2); 502 OpRegReg(kOpMul, temp, reg2); 503 OpRegRegReg(kOpSub, rl_result.reg.GetReg(), reg1, temp); 504 FreeTemp(temp); 505 } 506 507 return rl_result; 508} 509 510bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 511 DCHECK_EQ(cu_->instruction_set, kThumb2); 512 RegLocation rl_src1 = info->args[0]; 513 RegLocation rl_src2 = info->args[1]; 514 rl_src1 = LoadValue(rl_src1, kCoreReg); 515 rl_src2 = LoadValue(rl_src2, kCoreReg); 516 RegLocation rl_dest = InlineTarget(info); 517 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 518 OpRegReg(kOpCmp, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 519 OpIT((is_min) ? kCondGt : kCondLt, "E"); 520 OpRegReg(kOpMov, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 521 OpRegReg(kOpMov, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); 522 GenBarrier(); 523 StoreValue(rl_dest, rl_result); 524 return true; 525} 526 527bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 528 RegLocation rl_src_address = info->args[0]; // long address 529 rl_src_address.wide = 0; // ignore high half in info->args[1] 530 RegLocation rl_dest = InlineTarget(info); 531 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 532 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 533 if (size == kLong) { 534 // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. 535 if (rl_address.reg.GetReg() != rl_result.reg.GetReg()) { 536 LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), kWord, INVALID_SREG); 537 LoadBaseDisp(rl_address.reg.GetReg(), 4, rl_result.reg.GetHighReg(), kWord, INVALID_SREG); 538 } else { 539 LoadBaseDisp(rl_address.reg.GetReg(), 4, rl_result.reg.GetHighReg(), kWord, INVALID_SREG); 540 LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), kWord, INVALID_SREG); 541 } 542 StoreValueWide(rl_dest, rl_result); 543 } else { 544 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 545 // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. 546 LoadBaseDisp(rl_address.reg.GetReg(), 0, rl_result.reg.GetReg(), size, INVALID_SREG); 547 StoreValue(rl_dest, rl_result); 548 } 549 return true; 550} 551 552bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 553 RegLocation rl_src_address = info->args[0]; // long address 554 rl_src_address.wide = 0; // ignore high half in info->args[1] 555 RegLocation rl_src_value = info->args[2]; // [size] value 556 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 557 if (size == kLong) { 558 // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. 559 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 560 StoreBaseDisp(rl_address.reg.GetReg(), 0, rl_value.reg.GetReg(), kWord); 561 StoreBaseDisp(rl_address.reg.GetReg(), 4, rl_value.reg.GetHighReg(), kWord); 562 } else { 563 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 564 // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. 565 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 566 StoreBaseDisp(rl_address.reg.GetReg(), 0, rl_value.reg.GetReg(), size); 567 } 568 return true; 569} 570 571void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { 572 LOG(FATAL) << "Unexpected use of OpLea for Arm"; 573} 574 575void ArmMir2Lir::OpTlsCmp(ThreadOffset offset, int val) { 576 LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; 577} 578 579bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 580 DCHECK_EQ(cu_->instruction_set, kThumb2); 581 // Unused - RegLocation rl_src_unsafe = info->args[0]; 582 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 583 RegLocation rl_src_offset = info->args[2]; // long low 584 rl_src_offset.wide = 0; // ignore high half in info->args[3] 585 RegLocation rl_src_expected = info->args[4]; // int, long or Object 586 // If is_long, high half is in info->args[5] 587 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 588 // If is_long, high half is in info->args[7] 589 RegLocation rl_dest = InlineTarget(info); // boolean place for result 590 591 // We have only 5 temporary registers available and actually only 4 if the InlineTarget 592 // above locked one of the temps. For a straightforward CAS64 we need 7 registers: 593 // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor 594 // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop 595 // into the same temps, reducing the number of required temps down to 5. We shall work 596 // around the potentially locked temp by using LR for r_ptr, unconditionally. 597 // TODO: Pass information about the need for more temps to the stack frame generation 598 // code so that we can rely on being able to allocate enough temps. 599 DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp); 600 MarkTemp(rARM_LR); 601 FreeTemp(rARM_LR); 602 LockTemp(rARM_LR); 603 bool load_early = true; 604 if (is_long) { 605 bool expected_is_core_reg = 606 rl_src_expected.location == kLocPhysReg && !IsFpReg(rl_src_expected.reg.GetReg()); 607 bool new_value_is_core_reg = 608 rl_src_new_value.location == kLocPhysReg && !IsFpReg(rl_src_new_value.reg.GetReg()); 609 bool expected_is_good_reg = expected_is_core_reg && !IsTemp(rl_src_expected.reg.GetReg()); 610 bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(rl_src_new_value.reg.GetReg()); 611 612 if (!expected_is_good_reg && !new_value_is_good_reg) { 613 // None of expected/new_value is non-temp reg, need to load both late 614 load_early = false; 615 // Make sure they are not in the temp regs and the load will not be skipped. 616 if (expected_is_core_reg) { 617 FlushRegWide(rl_src_expected.reg.GetReg(), rl_src_expected.reg.GetHighReg()); 618 ClobberSReg(rl_src_expected.s_reg_low); 619 ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); 620 rl_src_expected.location = kLocDalvikFrame; 621 } 622 if (new_value_is_core_reg) { 623 FlushRegWide(rl_src_new_value.reg.GetReg(), rl_src_new_value.reg.GetHighReg()); 624 ClobberSReg(rl_src_new_value.s_reg_low); 625 ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); 626 rl_src_new_value.location = kLocDalvikFrame; 627 } 628 } 629 } 630 631 // Release store semantics, get the barrier out of the way. TODO: revisit 632 GenMemBarrier(kStoreLoad); 633 634 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 635 RegLocation rl_new_value; 636 if (!is_long) { 637 rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 638 } else if (load_early) { 639 rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); 640 } 641 642 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 643 // Mark card for object assuming new value is stored. 644 MarkGCCard(rl_new_value.reg.GetReg(), rl_object.reg.GetReg()); 645 } 646 647 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 648 649 int r_ptr = rARM_LR; 650 OpRegRegReg(kOpAdd, r_ptr, rl_object.reg.GetReg(), rl_offset.reg.GetReg()); 651 652 // Free now unneeded rl_object and rl_offset to give more temps. 653 ClobberSReg(rl_object.s_reg_low); 654 FreeTemp(rl_object.reg.GetReg()); 655 ClobberSReg(rl_offset.s_reg_low); 656 FreeTemp(rl_offset.reg.GetReg()); 657 658 RegLocation rl_expected; 659 if (!is_long) { 660 rl_expected = LoadValue(rl_src_expected, kCoreReg); 661 } else if (load_early) { 662 rl_expected = LoadValueWide(rl_src_expected, kCoreReg); 663 } else { 664 // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. 665 int low_reg = AllocTemp(); 666 int high_reg = AllocTemp(); 667 rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); 668 rl_expected = rl_new_value; 669 } 670 671 // do { 672 // tmp = [r_ptr] - expected; 673 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 674 // result = tmp != 0; 675 676 int r_tmp = AllocTemp(); 677 LIR* target = NewLIR0(kPseudoTargetLabel); 678 679 if (is_long) { 680 int r_tmp_high = AllocTemp(); 681 if (!load_early) { 682 LoadValueDirectWide(rl_src_expected, rl_expected.reg.GetReg(), rl_expected.reg.GetHighReg()); 683 } 684 NewLIR3(kThumb2Ldrexd, r_tmp, r_tmp_high, r_ptr); 685 OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetReg()); 686 OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHighReg()); 687 if (!load_early) { 688 LoadValueDirectWide(rl_src_new_value, rl_new_value.reg.GetReg(), rl_new_value.reg.GetHighReg()); 689 } 690 // Make sure we use ORR that sets the ccode 691 if (ARM_LOWREG(r_tmp) && ARM_LOWREG(r_tmp_high)) { 692 NewLIR2(kThumbOrr, r_tmp, r_tmp_high); 693 } else { 694 NewLIR4(kThumb2OrrRRRs, r_tmp, r_tmp, r_tmp_high, 0); 695 } 696 FreeTemp(r_tmp_high); // Now unneeded 697 698 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 699 OpIT(kCondEq, "T"); 700 NewLIR4(kThumb2Strexd /* eq */, r_tmp, rl_new_value.reg.GetReg(), rl_new_value.reg.GetHighReg(), r_ptr); 701 702 } else { 703 NewLIR3(kThumb2Ldrex, r_tmp, r_ptr, 0); 704 OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetReg()); 705 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 706 OpIT(kCondEq, "T"); 707 NewLIR4(kThumb2Strex /* eq */, r_tmp, rl_new_value.reg.GetReg(), r_ptr, 0); 708 } 709 710 // Still one conditional left from OpIT(kCondEq, "T") from either branch 711 OpRegImm(kOpCmp /* eq */, r_tmp, 1); 712 OpCondBranch(kCondEq, target); 713 714 if (!load_early) { 715 FreeTemp(rl_expected.reg.GetReg()); // Now unneeded. 716 FreeTemp(rl_expected.reg.GetHighReg()); // Now unneeded. 717 } 718 719 // result := (tmp1 != 0) ? 0 : 1; 720 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 721 OpRegRegImm(kOpRsub, rl_result.reg.GetReg(), r_tmp, 1); 722 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 723 OpIT(kCondUlt, ""); 724 LoadConstant(rl_result.reg.GetReg(), 0); /* cc */ 725 FreeTemp(r_tmp); // Now unneeded. 726 727 StoreValue(rl_dest, rl_result); 728 729 // Now, restore lr to its non-temp status. 730 Clobber(rARM_LR); 731 UnmarkTemp(rARM_LR); 732 return true; 733} 734 735LIR* ArmMir2Lir::OpPcRelLoad(int reg, LIR* target) { 736 return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg, 0, 0, 0, 0, target); 737} 738 739LIR* ArmMir2Lir::OpVldm(int rBase, int count) { 740 return NewLIR3(kThumb2Vldms, rBase, fr0, count); 741} 742 743LIR* ArmMir2Lir::OpVstm(int rBase, int count) { 744 return NewLIR3(kThumb2Vstms, rBase, fr0, count); 745} 746 747void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 748 RegLocation rl_result, int lit, 749 int first_bit, int second_bit) { 750 OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(), 751 EncodeShift(kArmLsl, second_bit - first_bit)); 752 if (first_bit != 0) { 753 OpRegRegImm(kOpLsl, rl_result.reg.GetReg(), rl_result.reg.GetReg(), first_bit); 754 } 755} 756 757void ArmMir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) { 758 int t_reg = AllocTemp(); 759 NewLIR4(kThumb2OrrRRRs, t_reg, reg_lo, reg_hi, 0); 760 FreeTemp(t_reg); 761 GenCheck(kCondEq, kThrowDivZero); 762} 763 764// Test suspend flag, return target of taken suspend branch 765LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { 766 NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1); 767 return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); 768} 769 770// Decrement register and branch on condition 771LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) { 772 // Combine sub & test using sub setflags encoding here 773 OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. 774 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 775 return OpCondBranch(c_code, target); 776} 777 778void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { 779#if ANDROID_SMP != 0 780 // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. 781 LIR* barrier = last_lir_insn_; 782 783 int dmb_flavor; 784 // TODO: revisit Arm barrier kinds 785 switch (barrier_kind) { 786 case kLoadStore: dmb_flavor = kISH; break; 787 case kLoadLoad: dmb_flavor = kISH; break; 788 case kStoreStore: dmb_flavor = kISHST; break; 789 case kStoreLoad: dmb_flavor = kISH; break; 790 default: 791 LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; 792 dmb_flavor = kSY; // quiet gcc. 793 break; 794 } 795 796 // If the same barrier already exists, don't generate another. 797 if (barrier == nullptr 798 || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { 799 barrier = NewLIR1(kThumb2Dmb, dmb_flavor); 800 } 801 802 // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. 803 DCHECK(!barrier->flags.use_def_invalid); 804 barrier->u.m.def_mask = ENCODE_ALL; 805#endif 806} 807 808void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 809 rl_src = LoadValueWide(rl_src, kCoreReg); 810 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 811 int z_reg = AllocTemp(); 812 LoadConstantNoClobber(z_reg, 0); 813 // Check for destructive overlap 814 if (rl_result.reg.GetReg() == rl_src.reg.GetHighReg()) { 815 int t_reg = AllocTemp(); 816 OpRegRegReg(kOpSub, rl_result.reg.GetReg(), z_reg, rl_src.reg.GetReg()); 817 OpRegRegReg(kOpSbc, rl_result.reg.GetHighReg(), z_reg, t_reg); 818 FreeTemp(t_reg); 819 } else { 820 OpRegRegReg(kOpSub, rl_result.reg.GetReg(), z_reg, rl_src.reg.GetReg()); 821 OpRegRegReg(kOpSbc, rl_result.reg.GetHighReg(), z_reg, rl_src.reg.GetHighReg()); 822 } 823 FreeTemp(z_reg); 824 StoreValueWide(rl_dest, rl_result); 825} 826 827void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, 828 RegLocation rl_src1, RegLocation rl_src2) { 829 /* 830 * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed 831 * dest = src1.lo * src2.lo; 832 * tmp1 += src1.lo * src2.hi; 833 * dest.hi += tmp1; 834 * 835 * To pull off inline multiply, we have a worst-case requirement of 7 temporary 836 * registers. Normally for Arm, we get 5. We can get to 6 by including 837 * lr in the temp set. The only problematic case is all operands and result are 838 * distinct, and none have been promoted. In that case, we can succeed by aggressively 839 * freeing operand temp registers after they are no longer needed. All other cases 840 * can proceed normally. We'll just punt on the case of the result having a misaligned 841 * overlap with either operand and send that case to a runtime handler. 842 */ 843 RegLocation rl_result; 844 if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) { 845 ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pLmul); 846 FlushAllRegs(); 847 CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); 848 rl_result = GetReturnWide(false); 849 StoreValueWide(rl_dest, rl_result); 850 return; 851 } 852 853 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 854 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 855 856 int reg_status = 0; 857 int res_lo = INVALID_REG; 858 int res_hi = INVALID_REG; 859 bool dest_promoted = rl_dest.location == kLocPhysReg && !rl_dest.reg.IsInvalid() && 860 !IsTemp(rl_dest.reg.GetReg()) && !IsTemp(rl_dest.reg.GetHighReg()); 861 bool src1_promoted = !IsTemp(rl_src1.reg.GetReg()) && !IsTemp(rl_src1.reg.GetHighReg()); 862 bool src2_promoted = !IsTemp(rl_src2.reg.GetReg()) && !IsTemp(rl_src2.reg.GetHighReg()); 863 // Check if rl_dest is *not* either operand and we have enough temp registers. 864 if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) && 865 (dest_promoted || src1_promoted || src2_promoted)) { 866 // In this case, we do not need to manually allocate temp registers for result. 867 rl_result = EvalLoc(rl_dest, kCoreReg, true); 868 res_lo = rl_result.reg.GetReg(); 869 res_hi = rl_result.reg.GetHighReg(); 870 } else { 871 res_lo = AllocTemp(); 872 if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) { 873 // In this case, we have enough temp registers to be allocated for result. 874 res_hi = AllocTemp(); 875 reg_status = 1; 876 } else { 877 // In this case, all temps are now allocated. 878 // res_hi will be allocated after we can free src1_hi. 879 reg_status = 2; 880 } 881 } 882 883 // Temporarily add LR to the temp pool, and assign it to tmp1 884 MarkTemp(rARM_LR); 885 FreeTemp(rARM_LR); 886 int tmp1 = rARM_LR; 887 LockTemp(rARM_LR); 888 889 if (rl_src1.reg.GetReg() == rl_src2.reg.GetReg()) { 890 DCHECK_NE(res_hi, INVALID_REG); 891 DCHECK_NE(res_lo, INVALID_REG); 892 NewLIR3(kThumb2MulRRR, tmp1, rl_src1.reg.GetReg(), rl_src1.reg.GetHighReg()); 893 NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src1.reg.GetReg(), rl_src1.reg.GetReg()); 894 OpRegRegRegShift(kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1)); 895 } else { 896 NewLIR3(kThumb2MulRRR, tmp1, rl_src2.reg.GetReg(), rl_src1.reg.GetHighReg()); 897 if (reg_status == 2) { 898 DCHECK_EQ(res_hi, INVALID_REG); 899 DCHECK_NE(rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 900 DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); 901 FreeTemp(rl_src1.reg.GetHighReg()); 902 res_hi = AllocTemp(); 903 } 904 DCHECK_NE(res_hi, INVALID_REG); 905 DCHECK_NE(res_lo, INVALID_REG); 906 NewLIR4(kThumb2Umull, res_lo, res_hi, rl_src2.reg.GetReg(), rl_src1.reg.GetReg()); 907 NewLIR4(kThumb2Mla, tmp1, rl_src1.reg.GetReg(), rl_src2.reg.GetHighReg(), tmp1); 908 NewLIR4(kThumb2AddRRR, res_hi, tmp1, res_hi, 0); 909 if (reg_status == 2) { 910 // Clobber rl_src1 since it was corrupted. 911 FreeTemp(rl_src1.reg.GetReg()); 912 Clobber(rl_src1.reg.GetReg()); 913 Clobber(rl_src1.reg.GetHighReg()); 914 } 915 } 916 917 // Now, restore lr to its non-temp status. 918 FreeTemp(tmp1); 919 Clobber(rARM_LR); 920 UnmarkTemp(rARM_LR); 921 922 if (reg_status != 0) { 923 // We had manually allocated registers for rl_result. 924 // Now construct a RegLocation. 925 rl_result = GetReturnWide(false); // Just using as a template. 926 rl_result.reg.SetReg(res_lo); 927 rl_result.reg.SetHighReg(res_hi); 928 } 929 930 StoreValueWide(rl_dest, rl_result); 931} 932 933void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 934 RegLocation rl_src2) { 935 LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; 936} 937 938void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 939 RegLocation rl_src2) { 940 LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; 941} 942 943void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 944 RegLocation rl_src2) { 945 LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; 946} 947 948void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 949 RegLocation rl_src2) { 950 LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; 951} 952 953void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 954 RegLocation rl_src2) { 955 LOG(FATAL) << "Unexpected use of genXoLong for Arm"; 956} 957 958/* 959 * Generate array load 960 */ 961void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 962 RegLocation rl_index, RegLocation rl_dest, int scale) { 963 RegisterClass reg_class = oat_reg_class_by_size(size); 964 int len_offset = mirror::Array::LengthOffset().Int32Value(); 965 int data_offset; 966 RegLocation rl_result; 967 bool constant_index = rl_index.is_const; 968 rl_array = LoadValue(rl_array, kCoreReg); 969 if (!constant_index) { 970 rl_index = LoadValue(rl_index, kCoreReg); 971 } 972 973 if (rl_dest.wide) { 974 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 975 } else { 976 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 977 } 978 979 // If index is constant, just fold it into the data offset 980 if (constant_index) { 981 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 982 } 983 984 /* null object? */ 985 GenNullCheck(rl_array.reg.GetReg(), opt_flags); 986 987 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 988 int reg_len = INVALID_REG; 989 if (needs_range_check) { 990 reg_len = AllocTemp(); 991 /* Get len */ 992 LoadWordDisp(rl_array.reg.GetReg(), len_offset, reg_len); 993 MarkPossibleNullPointerException(opt_flags); 994 } else { 995 ForceImplicitNullCheck(rl_array.reg.GetReg(), opt_flags); 996 } 997 if (rl_dest.wide || rl_dest.fp || constant_index) { 998 int reg_ptr; 999 if (constant_index) { 1000 reg_ptr = rl_array.reg.GetReg(); // NOTE: must not alter reg_ptr in constant case. 1001 } else { 1002 // No special indexed operation, lea + load w/ displacement 1003 reg_ptr = AllocTemp(); 1004 OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg.GetReg(), rl_index.reg.GetReg(), 1005 EncodeShift(kArmLsl, scale)); 1006 FreeTemp(rl_index.reg.GetReg()); 1007 } 1008 rl_result = EvalLoc(rl_dest, reg_class, true); 1009 1010 if (needs_range_check) { 1011 if (constant_index) { 1012 GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); 1013 } else { 1014 GenRegRegCheck(kCondLs, reg_len, rl_index.reg.GetReg(), kThrowArrayBounds); 1015 } 1016 FreeTemp(reg_len); 1017 } 1018 if (rl_dest.wide) { 1019 LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), 1020 INVALID_SREG); 1021 MarkPossibleNullPointerException(opt_flags); 1022 if (!constant_index) { 1023 FreeTemp(reg_ptr); 1024 } 1025 StoreValueWide(rl_dest, rl_result); 1026 } else { 1027 LoadBaseDisp(reg_ptr, data_offset, rl_result.reg.GetReg(), size, INVALID_SREG); 1028 MarkPossibleNullPointerException(opt_flags); 1029 if (!constant_index) { 1030 FreeTemp(reg_ptr); 1031 } 1032 StoreValue(rl_dest, rl_result); 1033 } 1034 } else { 1035 // Offset base, then use indexed load 1036 int reg_ptr = AllocTemp(); 1037 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg.GetReg(), data_offset); 1038 FreeTemp(rl_array.reg.GetReg()); 1039 rl_result = EvalLoc(rl_dest, reg_class, true); 1040 1041 if (needs_range_check) { 1042 GenRegRegCheck(kCondUge, rl_index.reg.GetReg(), reg_len, kThrowArrayBounds); 1043 FreeTemp(reg_len); 1044 } 1045 LoadBaseIndexed(reg_ptr, rl_index.reg.GetReg(), rl_result.reg.GetReg(), scale, size); 1046 MarkPossibleNullPointerException(opt_flags); 1047 FreeTemp(reg_ptr); 1048 StoreValue(rl_dest, rl_result); 1049 } 1050} 1051 1052/* 1053 * Generate array store 1054 * 1055 */ 1056void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1057 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1058 RegisterClass reg_class = oat_reg_class_by_size(size); 1059 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1060 bool constant_index = rl_index.is_const; 1061 1062 int data_offset; 1063 if (size == kLong || size == kDouble) { 1064 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1065 } else { 1066 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1067 } 1068 1069 // If index is constant, just fold it into the data offset. 1070 if (constant_index) { 1071 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 1072 } 1073 1074 rl_array = LoadValue(rl_array, kCoreReg); 1075 if (!constant_index) { 1076 rl_index = LoadValue(rl_index, kCoreReg); 1077 } 1078 1079 int reg_ptr; 1080 bool allocated_reg_ptr_temp = false; 1081 if (constant_index) { 1082 reg_ptr = rl_array.reg.GetReg(); 1083 } else if (IsTemp(rl_array.reg.GetReg()) && !card_mark) { 1084 Clobber(rl_array.reg.GetReg()); 1085 reg_ptr = rl_array.reg.GetReg(); 1086 } else { 1087 allocated_reg_ptr_temp = true; 1088 reg_ptr = AllocTemp(); 1089 } 1090 1091 /* null object? */ 1092 GenNullCheck(rl_array.reg.GetReg(), opt_flags); 1093 1094 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 1095 int reg_len = INVALID_REG; 1096 if (needs_range_check) { 1097 reg_len = AllocTemp(); 1098 // NOTE: max live temps(4) here. 1099 /* Get len */ 1100 LoadWordDisp(rl_array.reg.GetReg(), len_offset, reg_len); 1101 MarkPossibleNullPointerException(opt_flags); 1102 } else { 1103 ForceImplicitNullCheck(rl_array.reg.GetReg(), opt_flags); 1104 } 1105 /* at this point, reg_ptr points to array, 2 live temps */ 1106 if (rl_src.wide || rl_src.fp || constant_index) { 1107 if (rl_src.wide) { 1108 rl_src = LoadValueWide(rl_src, reg_class); 1109 } else { 1110 rl_src = LoadValue(rl_src, reg_class); 1111 } 1112 if (!constant_index) { 1113 OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg.GetReg(), rl_index.reg.GetReg(), 1114 EncodeShift(kArmLsl, scale)); 1115 } 1116 if (needs_range_check) { 1117 if (constant_index) { 1118 GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); 1119 } else { 1120 GenRegRegCheck(kCondLs, reg_len, rl_index.reg.GetReg(), kThrowArrayBounds); 1121 } 1122 FreeTemp(reg_len); 1123 } 1124 1125 if (rl_src.wide) { 1126 StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg.GetReg(), rl_src.reg.GetHighReg()); 1127 } else { 1128 StoreBaseDisp(reg_ptr, data_offset, rl_src.reg.GetReg(), size); 1129 } 1130 MarkPossibleNullPointerException(opt_flags); 1131 } else { 1132 /* reg_ptr -> array data */ 1133 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg.GetReg(), data_offset); 1134 rl_src = LoadValue(rl_src, reg_class); 1135 if (needs_range_check) { 1136 GenRegRegCheck(kCondUge, rl_index.reg.GetReg(), reg_len, kThrowArrayBounds); 1137 FreeTemp(reg_len); 1138 } 1139 StoreBaseIndexed(reg_ptr, rl_index.reg.GetReg(), rl_src.reg.GetReg(), 1140 scale, size); 1141 MarkPossibleNullPointerException(opt_flags); 1142 } 1143 if (allocated_reg_ptr_temp) { 1144 FreeTemp(reg_ptr); 1145 } 1146 if (card_mark) { 1147 MarkGCCard(rl_src.reg.GetReg(), rl_array.reg.GetReg()); 1148 } 1149} 1150 1151 1152void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, 1153 RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { 1154 rl_src = LoadValueWide(rl_src, kCoreReg); 1155 // Per spec, we only care about low 6 bits of shift amount. 1156 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1157 if (shift_amount == 0) { 1158 StoreValueWide(rl_dest, rl_src); 1159 return; 1160 } 1161 if (BadOverlap(rl_src, rl_dest)) { 1162 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1163 return; 1164 } 1165 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1166 switch (opcode) { 1167 case Instruction::SHL_LONG: 1168 case Instruction::SHL_LONG_2ADDR: 1169 if (shift_amount == 1) { 1170 OpRegRegReg(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg()); 1171 OpRegRegReg(kOpAdc, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), rl_src.reg.GetHighReg()); 1172 } else if (shift_amount == 32) { 1173 OpRegCopy(rl_result.reg.GetHighReg(), rl_src.reg.GetReg()); 1174 LoadConstant(rl_result.reg.GetReg(), 0); 1175 } else if (shift_amount > 31) { 1176 OpRegRegImm(kOpLsl, rl_result.reg.GetHighReg(), rl_src.reg.GetReg(), shift_amount - 32); 1177 LoadConstant(rl_result.reg.GetReg(), 0); 1178 } else { 1179 OpRegRegImm(kOpLsl, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount); 1180 OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetReg(), 1181 EncodeShift(kArmLsr, 32 - shift_amount)); 1182 OpRegRegImm(kOpLsl, rl_result.reg.GetReg(), rl_src.reg.GetReg(), shift_amount); 1183 } 1184 break; 1185 case Instruction::SHR_LONG: 1186 case Instruction::SHR_LONG_2ADDR: 1187 if (shift_amount == 32) { 1188 OpRegCopy(rl_result.reg.GetReg(), rl_src.reg.GetHighReg()); 1189 OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), 31); 1190 } else if (shift_amount > 31) { 1191 OpRegRegImm(kOpAsr, rl_result.reg.GetReg(), rl_src.reg.GetHighReg(), shift_amount - 32); 1192 OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), 31); 1193 } else { 1194 int t_reg = AllocTemp(); 1195 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetReg(), shift_amount); 1196 OpRegRegRegShift(kOpOr, rl_result.reg.GetReg(), t_reg, rl_src.reg.GetHighReg(), 1197 EncodeShift(kArmLsl, 32 - shift_amount)); 1198 FreeTemp(t_reg); 1199 OpRegRegImm(kOpAsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount); 1200 } 1201 break; 1202 case Instruction::USHR_LONG: 1203 case Instruction::USHR_LONG_2ADDR: 1204 if (shift_amount == 32) { 1205 OpRegCopy(rl_result.reg.GetReg(), rl_src.reg.GetHighReg()); 1206 LoadConstant(rl_result.reg.GetHighReg(), 0); 1207 } else if (shift_amount > 31) { 1208 OpRegRegImm(kOpLsr, rl_result.reg.GetReg(), rl_src.reg.GetHighReg(), shift_amount - 32); 1209 LoadConstant(rl_result.reg.GetHighReg(), 0); 1210 } else { 1211 int t_reg = AllocTemp(); 1212 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetReg(), shift_amount); 1213 OpRegRegRegShift(kOpOr, rl_result.reg.GetReg(), t_reg, rl_src.reg.GetHighReg(), 1214 EncodeShift(kArmLsl, 32 - shift_amount)); 1215 FreeTemp(t_reg); 1216 OpRegRegImm(kOpLsr, rl_result.reg.GetHighReg(), rl_src.reg.GetHighReg(), shift_amount); 1217 } 1218 break; 1219 default: 1220 LOG(FATAL) << "Unexpected case"; 1221 } 1222 StoreValueWide(rl_dest, rl_result); 1223} 1224 1225void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1226 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1227 if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { 1228 if (!rl_src2.is_const) { 1229 // Don't bother with special handling for subtract from immediate. 1230 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1231 return; 1232 } 1233 } else { 1234 // Normalize 1235 if (!rl_src2.is_const) { 1236 DCHECK(rl_src1.is_const); 1237 std::swap(rl_src1, rl_src2); 1238 } 1239 } 1240 if (BadOverlap(rl_src1, rl_dest)) { 1241 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1242 return; 1243 } 1244 DCHECK(rl_src2.is_const); 1245 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1246 uint32_t val_lo = Low32Bits(val); 1247 uint32_t val_hi = High32Bits(val); 1248 int32_t mod_imm_lo = ModifiedImmediate(val_lo); 1249 int32_t mod_imm_hi = ModifiedImmediate(val_hi); 1250 1251 // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit 1252 switch (opcode) { 1253 case Instruction::ADD_LONG: 1254 case Instruction::ADD_LONG_2ADDR: 1255 case Instruction::SUB_LONG: 1256 case Instruction::SUB_LONG_2ADDR: 1257 if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { 1258 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1259 return; 1260 } 1261 break; 1262 default: 1263 break; 1264 } 1265 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1266 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1267 // NOTE: once we've done the EvalLoc on dest, we can no longer bail. 1268 switch (opcode) { 1269 case Instruction::ADD_LONG: 1270 case Instruction::ADD_LONG_2ADDR: 1271 NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), mod_imm_lo); 1272 NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); 1273 break; 1274 case Instruction::OR_LONG: 1275 case Instruction::OR_LONG_2ADDR: 1276 if ((val_lo != 0) || (rl_result.reg.GetReg() != rl_src1.reg.GetReg())) { 1277 OpRegRegImm(kOpOr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo); 1278 } 1279 if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { 1280 OpRegRegImm(kOpOr, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi); 1281 } 1282 break; 1283 case Instruction::XOR_LONG: 1284 case Instruction::XOR_LONG_2ADDR: 1285 OpRegRegImm(kOpXor, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo); 1286 OpRegRegImm(kOpXor, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi); 1287 break; 1288 case Instruction::AND_LONG: 1289 case Instruction::AND_LONG_2ADDR: 1290 if ((val_lo != 0xffffffff) || (rl_result.reg.GetReg() != rl_src1.reg.GetReg())) { 1291 OpRegRegImm(kOpAnd, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), val_lo); 1292 } 1293 if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { 1294 OpRegRegImm(kOpAnd, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), val_hi); 1295 } 1296 break; 1297 case Instruction::SUB_LONG_2ADDR: 1298 case Instruction::SUB_LONG: 1299 NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), mod_imm_lo); 1300 NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); 1301 break; 1302 default: 1303 LOG(FATAL) << "Unexpected opcode " << opcode; 1304 } 1305 StoreValueWide(rl_dest, rl_result); 1306} 1307 1308} // namespace art 1309