int_arm.cc revision dd7624d2b9e599d57762d12031b10b89defc9807
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the Thumb2 ISA. */ 18 19#include "arm_lir.h" 20#include "codegen_arm.h" 21#include "dex/quick/mir_to_lir-inl.h" 22#include "entrypoints/quick/quick_entrypoints.h" 23#include "mirror/array.h" 24 25namespace art { 26 27LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { 28 OpRegReg(kOpCmp, src1, src2); 29 return OpCondBranch(cond, target); 30} 31 32/* 33 * Generate a Thumb2 IT instruction, which can nullify up to 34 * four subsequent instructions based on a condition and its 35 * inverse. The condition applies to the first instruction, which 36 * is executed if the condition is met. The string "guide" consists 37 * of 0 to 3 chars, and applies to the 2nd through 4th instruction. 38 * A "T" means the instruction is executed if the condition is 39 * met, and an "E" means the instruction is executed if the condition 40 * is not met. 41 */ 42LIR* ArmMir2Lir::OpIT(ConditionCode ccode, const char* guide) { 43 int mask; 44 int mask3 = 0; 45 int mask2 = 0; 46 int mask1 = 0; 47 ArmConditionCode code = ArmConditionEncoding(ccode); 48 int cond_bit = code & 1; 49 int alt_bit = cond_bit ^ 1; 50 51 // Note: case fallthroughs intentional 52 switch (strlen(guide)) { 53 case 3: 54 mask1 = (guide[2] == 'T') ? cond_bit : alt_bit; 55 case 2: 56 mask2 = (guide[1] == 'T') ? cond_bit : alt_bit; 57 case 1: 58 mask3 = (guide[0] == 'T') ? cond_bit : alt_bit; 59 break; 60 case 0: 61 break; 62 default: 63 LOG(FATAL) << "OAT: bad case in OpIT"; 64 } 65 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) | 66 (1 << (3 - strlen(guide))); 67 return NewLIR2(kThumb2It, code, mask); 68} 69 70/* 71 * 64-bit 3way compare function. 72 * mov rX, #-1 73 * cmp op1hi, op2hi 74 * blt done 75 * bgt flip 76 * sub rX, op1lo, op2lo (treat as unsigned) 77 * beq done 78 * ite hi 79 * mov(hi) rX, #-1 80 * mov(!hi) rX, #1 81 * flip: 82 * neg rX 83 * done: 84 */ 85void ArmMir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 86 RegLocation rl_src2) { 87 LIR* target1; 88 LIR* target2; 89 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 90 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 91 RegStorage t_reg = AllocTemp(); 92 LoadConstant(t_reg, -1); 93 OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); 94 LIR* branch1 = OpCondBranch(kCondLt, NULL); 95 LIR* branch2 = OpCondBranch(kCondGt, NULL); 96 OpRegRegReg(kOpSub, t_reg, rl_src1.reg, rl_src2.reg); 97 LIR* branch3 = OpCondBranch(kCondEq, NULL); 98 99 OpIT(kCondHi, "E"); 100 NewLIR2(kThumb2MovI8M, t_reg.GetReg(), ModifiedImmediate(-1)); 101 LoadConstant(t_reg, 1); 102 GenBarrier(); 103 104 target2 = NewLIR0(kPseudoTargetLabel); 105 OpRegReg(kOpNeg, t_reg, t_reg); 106 107 target1 = NewLIR0(kPseudoTargetLabel); 108 109 RegLocation rl_temp = LocCReturn(); // Just using as template, will change 110 rl_temp.reg.SetReg(t_reg.GetReg()); 111 StoreValue(rl_dest, rl_temp); 112 FreeTemp(t_reg); 113 114 branch1->target = target1; 115 branch2->target = target2; 116 branch3->target = branch1->target; 117} 118 119void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 120 int64_t val, ConditionCode ccode) { 121 int32_t val_lo = Low32Bits(val); 122 int32_t val_hi = High32Bits(val); 123 DCHECK_GE(ModifiedImmediate(val_lo), 0); 124 DCHECK_GE(ModifiedImmediate(val_hi), 0); 125 LIR* taken = &block_label_list_[bb->taken]; 126 LIR* not_taken = &block_label_list_[bb->fall_through]; 127 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 128 RegStorage low_reg = rl_src1.reg.GetLow(); 129 RegStorage high_reg = rl_src1.reg.GetHigh(); 130 131 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 132 RegStorage t_reg = AllocTemp(); 133 NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), low_reg.GetReg(), high_reg.GetReg(), 0); 134 FreeTemp(t_reg); 135 OpCondBranch(ccode, taken); 136 return; 137 } 138 139 switch (ccode) { 140 case kCondEq: 141 case kCondNe: 142 OpCmpImmBranch(kCondNe, high_reg, val_hi, (ccode == kCondEq) ? not_taken : taken); 143 break; 144 case kCondLt: 145 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); 146 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); 147 ccode = kCondUlt; 148 break; 149 case kCondLe: 150 OpCmpImmBranch(kCondLt, high_reg, val_hi, taken); 151 OpCmpImmBranch(kCondGt, high_reg, val_hi, not_taken); 152 ccode = kCondLs; 153 break; 154 case kCondGt: 155 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); 156 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); 157 ccode = kCondHi; 158 break; 159 case kCondGe: 160 OpCmpImmBranch(kCondGt, high_reg, val_hi, taken); 161 OpCmpImmBranch(kCondLt, high_reg, val_hi, not_taken); 162 ccode = kCondUge; 163 break; 164 default: 165 LOG(FATAL) << "Unexpected ccode: " << ccode; 166 } 167 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 168} 169 170void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 171 RegLocation rl_result; 172 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 173 RegLocation rl_dest = mir_graph_->GetDest(mir); 174 rl_src = LoadValue(rl_src, kCoreReg); 175 ConditionCode ccode = mir->meta.ccode; 176 if (mir->ssa_rep->num_uses == 1) { 177 // CONST case 178 int true_val = mir->dalvikInsn.vB; 179 int false_val = mir->dalvikInsn.vC; 180 rl_result = EvalLoc(rl_dest, kCoreReg, true); 181 // Change kCondNe to kCondEq for the special cases below. 182 if (ccode == kCondNe) { 183 ccode = kCondEq; 184 std::swap(true_val, false_val); 185 } 186 bool cheap_false_val = InexpensiveConstantInt(false_val); 187 if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { 188 OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); 189 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 190 OpIT(true_val == 0 ? kCondNe : kCondUge, ""); 191 LoadConstant(rl_result.reg, false_val); 192 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 193 } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { 194 OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); 195 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 196 OpIT(kCondLs, ""); 197 LoadConstant(rl_result.reg, false_val); 198 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 199 } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { 200 OpRegImm(kOpCmp, rl_src.reg, 0); 201 OpIT(ccode, "E"); 202 LoadConstant(rl_result.reg, true_val); 203 LoadConstant(rl_result.reg, false_val); 204 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 205 } else { 206 // Unlikely case - could be tuned. 207 RegStorage t_reg1 = AllocTemp(); 208 RegStorage t_reg2 = AllocTemp(); 209 LoadConstant(t_reg1, true_val); 210 LoadConstant(t_reg2, false_val); 211 OpRegImm(kOpCmp, rl_src.reg, 0); 212 OpIT(ccode, "E"); 213 OpRegCopy(rl_result.reg, t_reg1); 214 OpRegCopy(rl_result.reg, t_reg2); 215 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 216 } 217 } else { 218 // MOVE case 219 RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; 220 RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; 221 rl_true = LoadValue(rl_true, kCoreReg); 222 rl_false = LoadValue(rl_false, kCoreReg); 223 rl_result = EvalLoc(rl_dest, kCoreReg, true); 224 OpRegImm(kOpCmp, rl_src.reg, 0); 225 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? 226 OpIT(NegateComparison(ccode), ""); 227 OpRegCopy(rl_result.reg, rl_false.reg); 228 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? 229 OpIT(ccode, ""); 230 OpRegCopy(rl_result.reg, rl_true.reg); 231 } else { // Normal - select between the two. 232 OpIT(ccode, "E"); 233 OpRegCopy(rl_result.reg, rl_true.reg); 234 OpRegCopy(rl_result.reg, rl_false.reg); 235 } 236 GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact 237 } 238 StoreValue(rl_dest, rl_result); 239} 240 241void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 242 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 243 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 244 // Normalize such that if either operand is constant, src2 will be constant. 245 ConditionCode ccode = mir->meta.ccode; 246 if (rl_src1.is_const) { 247 std::swap(rl_src1, rl_src2); 248 ccode = FlipComparisonOrder(ccode); 249 } 250 if (rl_src2.is_const) { 251 RegLocation rl_temp = UpdateLocWide(rl_src2); 252 // Do special compare/branch against simple const operand if not already in registers. 253 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 254 if ((rl_temp.location != kLocPhysReg) && 255 ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) { 256 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 257 return; 258 } 259 } 260 LIR* taken = &block_label_list_[bb->taken]; 261 LIR* not_taken = &block_label_list_[bb->fall_through]; 262 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 263 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 264 OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); 265 switch (ccode) { 266 case kCondEq: 267 OpCondBranch(kCondNe, not_taken); 268 break; 269 case kCondNe: 270 OpCondBranch(kCondNe, taken); 271 break; 272 case kCondLt: 273 OpCondBranch(kCondLt, taken); 274 OpCondBranch(kCondGt, not_taken); 275 ccode = kCondUlt; 276 break; 277 case kCondLe: 278 OpCondBranch(kCondLt, taken); 279 OpCondBranch(kCondGt, not_taken); 280 ccode = kCondLs; 281 break; 282 case kCondGt: 283 OpCondBranch(kCondGt, taken); 284 OpCondBranch(kCondLt, not_taken); 285 ccode = kCondHi; 286 break; 287 case kCondGe: 288 OpCondBranch(kCondGt, taken); 289 OpCondBranch(kCondLt, not_taken); 290 ccode = kCondUge; 291 break; 292 default: 293 LOG(FATAL) << "Unexpected ccode: " << ccode; 294 } 295 OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); 296 OpCondBranch(ccode, taken); 297} 298 299/* 300 * Generate a register comparison to an immediate and branch. Caller 301 * is responsible for setting branch target field. 302 */ 303LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, LIR* target) { 304 LIR* branch; 305 ArmConditionCode arm_cond = ArmConditionEncoding(cond); 306 /* 307 * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit 308 * compare-and-branch if zero is ideal if it will reach. However, because null checks 309 * branch forward to a launch pad, they will frequently not reach - and thus have to 310 * be converted to a long form during assembly (which will trigger another assembly 311 * pass). Here we estimate the branch distance for checks, and if large directly 312 * generate the long form in an attempt to avoid an extra assembly pass. 313 * TODO: consider interspersing launchpads in code following unconditional branches. 314 */ 315 bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); 316 skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); 317 if (!skip && (ARM_LOWREG(reg.GetReg())) && (check_value == 0) && 318 ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { 319 branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, 320 reg.GetReg(), 0); 321 } else { 322 OpRegImm(kOpCmp, reg, check_value); 323 branch = NewLIR2(kThumbBCond, 0, arm_cond); 324 } 325 branch->target = target; 326 return branch; 327} 328 329LIR* ArmMir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { 330 LIR* res; 331 int opcode; 332 // If src or dest is a pair, we'll be using low reg. 333 if (r_dest.IsPair()) { 334 r_dest = r_dest.GetLow(); 335 } 336 if (r_src.IsPair()) { 337 r_src = r_src.GetLow(); 338 } 339 if (ARM_FPREG(r_dest.GetReg()) || ARM_FPREG(r_src.GetReg())) 340 return OpFpRegCopy(r_dest, r_src); 341 if (ARM_LOWREG(r_dest.GetReg()) && ARM_LOWREG(r_src.GetReg())) 342 opcode = kThumbMovRR; 343 else if (!ARM_LOWREG(r_dest.GetReg()) && !ARM_LOWREG(r_src.GetReg())) 344 opcode = kThumbMovRR_H2H; 345 else if (ARM_LOWREG(r_dest.GetReg())) 346 opcode = kThumbMovRR_H2L; 347 else 348 opcode = kThumbMovRR_L2H; 349 res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); 350 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 351 res->flags.is_nop = true; 352 } 353 return res; 354} 355 356LIR* ArmMir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { 357 LIR* res = OpRegCopyNoInsert(r_dest, r_src); 358 AppendLIR(res); 359 return res; 360} 361 362void ArmMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { 363 bool dest_fp = ARM_FPREG(r_dest.GetLowReg()); 364 bool src_fp = ARM_FPREG(r_src.GetLowReg()); 365 if (dest_fp) { 366 if (src_fp) { 367 // FIXME: handle 64-bit solo's here. 368 OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())), 369 RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg()))); 370 } else { 371 NewLIR3(kThumb2Fmdrr, S2d(r_dest.GetLowReg(), r_dest.GetHighReg()), 372 r_src.GetLowReg(), r_src.GetHighReg()); 373 } 374 } else { 375 if (src_fp) { 376 NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), S2d(r_src.GetLowReg(), 377 r_src.GetHighReg())); 378 } else { 379 // Handle overlap 380 if (r_src.GetHighReg() == r_dest.GetLowReg()) { 381 DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg()); 382 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 383 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 384 } else { 385 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 386 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 387 } 388 } 389 } 390} 391 392// Table of magic divisors 393struct MagicTable { 394 uint32_t magic; 395 uint32_t shift; 396 DividePattern pattern; 397}; 398 399static const MagicTable magic_table[] = { 400 {0, 0, DivideNone}, // 0 401 {0, 0, DivideNone}, // 1 402 {0, 0, DivideNone}, // 2 403 {0x55555556, 0, Divide3}, // 3 404 {0, 0, DivideNone}, // 4 405 {0x66666667, 1, Divide5}, // 5 406 {0x2AAAAAAB, 0, Divide3}, // 6 407 {0x92492493, 2, Divide7}, // 7 408 {0, 0, DivideNone}, // 8 409 {0x38E38E39, 1, Divide5}, // 9 410 {0x66666667, 2, Divide5}, // 10 411 {0x2E8BA2E9, 1, Divide5}, // 11 412 {0x2AAAAAAB, 1, Divide5}, // 12 413 {0x4EC4EC4F, 2, Divide5}, // 13 414 {0x92492493, 3, Divide7}, // 14 415 {0x88888889, 3, Divide7}, // 15 416}; 417 418// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) 419bool ArmMir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 420 RegLocation rl_src, RegLocation rl_dest, int lit) { 421 if ((lit < 0) || (lit >= static_cast<int>(sizeof(magic_table)/sizeof(magic_table[0])))) { 422 return false; 423 } 424 DividePattern pattern = magic_table[lit].pattern; 425 if (pattern == DivideNone) { 426 return false; 427 } 428 // Tuning: add rem patterns 429 if (!is_div) { 430 return false; 431 } 432 433 RegStorage r_magic = AllocTemp(); 434 LoadConstant(r_magic, magic_table[lit].magic); 435 rl_src = LoadValue(rl_src, kCoreReg); 436 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 437 RegStorage r_hi = AllocTemp(); 438 RegStorage r_lo = AllocTemp(); 439 NewLIR4(kThumb2Smull, r_lo.GetReg(), r_hi.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); 440 switch (pattern) { 441 case Divide3: 442 OpRegRegRegShift(kOpSub, rl_result.reg.GetReg(), r_hi.GetReg(), 443 rl_src.reg.GetReg(), EncodeShift(kArmAsr, 31)); 444 break; 445 case Divide5: 446 OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); 447 OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), 448 EncodeShift(kArmAsr, magic_table[lit].shift)); 449 break; 450 case Divide7: 451 OpRegReg(kOpAdd, r_hi, rl_src.reg); 452 OpRegRegImm(kOpAsr, r_lo, rl_src.reg, 31); 453 OpRegRegRegShift(kOpRsub, rl_result.reg.GetReg(), r_lo.GetReg(), r_hi.GetReg(), 454 EncodeShift(kArmAsr, magic_table[lit].shift)); 455 break; 456 default: 457 LOG(FATAL) << "Unexpected pattern: " << pattern; 458 } 459 StoreValue(rl_dest, rl_result); 460 return true; 461} 462 463LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, RegStorage reg1, RegStorage base, 464 int offset, ThrowKind kind) { 465 LOG(FATAL) << "Unexpected use of GenRegMemCheck for Arm"; 466 return NULL; 467} 468 469RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 470 RegLocation rl_src2, bool is_div, bool check_zero) { 471 LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; 472 return rl_dest; 473} 474 475RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) { 476 LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; 477 return rl_dest; 478} 479 480RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { 481 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 482 483 // Put the literal in a temp. 484 RegStorage lit_temp = AllocTemp(); 485 LoadConstant(lit_temp, lit); 486 // Use the generic case for div/rem with arg2 in a register. 487 // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. 488 rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); 489 FreeTemp(lit_temp); 490 491 return rl_result; 492} 493 494RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2, 495 bool is_div) { 496 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 497 if (is_div) { 498 // Simple case, use sdiv instruction. 499 OpRegRegReg(kOpDiv, rl_result.reg, reg1, reg2); 500 } else { 501 // Remainder case, use the following code: 502 // temp = reg1 / reg2 - integer division 503 // temp = temp * reg2 504 // dest = reg1 - temp 505 506 RegStorage temp = AllocTemp(); 507 OpRegRegReg(kOpDiv, temp, reg1, reg2); 508 OpRegReg(kOpMul, temp, reg2); 509 OpRegRegReg(kOpSub, rl_result.reg, reg1, temp); 510 FreeTemp(temp); 511 } 512 513 return rl_result; 514} 515 516bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 517 DCHECK_EQ(cu_->instruction_set, kThumb2); 518 RegLocation rl_src1 = info->args[0]; 519 RegLocation rl_src2 = info->args[1]; 520 rl_src1 = LoadValue(rl_src1, kCoreReg); 521 rl_src2 = LoadValue(rl_src2, kCoreReg); 522 RegLocation rl_dest = InlineTarget(info); 523 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 524 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 525 OpIT((is_min) ? kCondGt : kCondLt, "E"); 526 OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); 527 OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); 528 GenBarrier(); 529 StoreValue(rl_dest, rl_result); 530 return true; 531} 532 533bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 534 RegLocation rl_src_address = info->args[0]; // long address 535 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] 536 RegLocation rl_dest = InlineTarget(info); 537 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 538 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 539 if (size == kLong) { 540 // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. 541 if (rl_address.reg.GetReg() != rl_result.reg.GetLowReg()) { 542 LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); 543 LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); 544 } else { 545 LoadWordDisp(rl_address.reg, 4, rl_result.reg.GetHigh()); 546 LoadWordDisp(rl_address.reg, 0, rl_result.reg.GetLow()); 547 } 548 StoreValueWide(rl_dest, rl_result); 549 } else { 550 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 551 // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. 552 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG); 553 StoreValue(rl_dest, rl_result); 554 } 555 return true; 556} 557 558bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 559 RegLocation rl_src_address = info->args[0]; // long address 560 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] 561 RegLocation rl_src_value = info->args[2]; // [size] value 562 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 563 if (size == kLong) { 564 // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. 565 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 566 StoreBaseDisp(rl_address.reg, 0, rl_value.reg.GetLow(), kWord); 567 StoreBaseDisp(rl_address.reg, 4, rl_value.reg.GetHigh(), kWord); 568 } else { 569 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 570 // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. 571 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 572 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); 573 } 574 return true; 575} 576 577void ArmMir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { 578 LOG(FATAL) << "Unexpected use of OpLea for Arm"; 579} 580 581void ArmMir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { 582 LOG(FATAL) << "Unexpected use of OpTlsCmp for Arm"; 583} 584 585bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 586 DCHECK_EQ(cu_->instruction_set, kThumb2); 587 // Unused - RegLocation rl_src_unsafe = info->args[0]; 588 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 589 RegLocation rl_src_offset = info->args[2]; // long low 590 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] 591 RegLocation rl_src_expected = info->args[4]; // int, long or Object 592 // If is_long, high half is in info->args[5] 593 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 594 // If is_long, high half is in info->args[7] 595 RegLocation rl_dest = InlineTarget(info); // boolean place for result 596 597 // We have only 5 temporary registers available and actually only 4 if the InlineTarget 598 // above locked one of the temps. For a straightforward CAS64 we need 7 registers: 599 // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor 600 // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop 601 // into the same temps, reducing the number of required temps down to 5. We shall work 602 // around the potentially locked temp by using LR for r_ptr, unconditionally. 603 // TODO: Pass information about the need for more temps to the stack frame generation 604 // code so that we can rely on being able to allocate enough temps. 605 DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp); 606 MarkTemp(rARM_LR); 607 FreeTemp(rARM_LR); 608 LockTemp(rARM_LR); 609 bool load_early = true; 610 if (is_long) { 611 int expected_reg = is_long ? rl_src_expected.reg.GetLowReg() : rl_src_expected.reg.GetReg(); 612 int new_val_reg = is_long ? rl_src_new_value.reg.GetLowReg() : rl_src_new_value.reg.GetReg(); 613 bool expected_is_core_reg = rl_src_expected.location == kLocPhysReg && !IsFpReg(expected_reg); 614 bool new_value_is_core_reg = rl_src_new_value.location == kLocPhysReg && !IsFpReg(new_val_reg); 615 bool expected_is_good_reg = expected_is_core_reg && !IsTemp(expected_reg); 616 bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(new_val_reg); 617 618 if (!expected_is_good_reg && !new_value_is_good_reg) { 619 // None of expected/new_value is non-temp reg, need to load both late 620 load_early = false; 621 // Make sure they are not in the temp regs and the load will not be skipped. 622 if (expected_is_core_reg) { 623 FlushRegWide(rl_src_expected.reg); 624 ClobberSReg(rl_src_expected.s_reg_low); 625 ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); 626 rl_src_expected.location = kLocDalvikFrame; 627 } 628 if (new_value_is_core_reg) { 629 FlushRegWide(rl_src_new_value.reg); 630 ClobberSReg(rl_src_new_value.s_reg_low); 631 ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); 632 rl_src_new_value.location = kLocDalvikFrame; 633 } 634 } 635 } 636 637 // Release store semantics, get the barrier out of the way. TODO: revisit 638 GenMemBarrier(kStoreLoad); 639 640 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 641 RegLocation rl_new_value; 642 if (!is_long) { 643 rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 644 } else if (load_early) { 645 rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); 646 } 647 648 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 649 // Mark card for object assuming new value is stored. 650 MarkGCCard(rl_new_value.reg, rl_object.reg); 651 } 652 653 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 654 655 RegStorage r_ptr = rs_rARM_LR; 656 OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); 657 658 // Free now unneeded rl_object and rl_offset to give more temps. 659 ClobberSReg(rl_object.s_reg_low); 660 FreeTemp(rl_object.reg.GetReg()); 661 ClobberSReg(rl_offset.s_reg_low); 662 FreeTemp(rl_offset.reg.GetReg()); 663 664 RegLocation rl_expected; 665 if (!is_long) { 666 rl_expected = LoadValue(rl_src_expected, kCoreReg); 667 } else if (load_early) { 668 rl_expected = LoadValueWide(rl_src_expected, kCoreReg); 669 } else { 670 // NOTE: partially defined rl_expected & rl_new_value - but we just want the regs. 671 int low_reg = AllocTemp().GetReg(); 672 int high_reg = AllocTemp().GetReg(); 673 rl_new_value.reg = RegStorage(RegStorage::k64BitPair, low_reg, high_reg); 674 rl_expected = rl_new_value; 675 } 676 677 // do { 678 // tmp = [r_ptr] - expected; 679 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 680 // result = tmp != 0; 681 682 RegStorage r_tmp = AllocTemp(); 683 LIR* target = NewLIR0(kPseudoTargetLabel); 684 685 if (is_long) { 686 RegStorage r_tmp_high = AllocTemp(); 687 if (!load_early) { 688 LoadValueDirectWide(rl_src_expected, rl_expected.reg); 689 } 690 NewLIR3(kThumb2Ldrexd, r_tmp.GetReg(), r_tmp_high.GetReg(), r_ptr.GetReg()); 691 OpRegReg(kOpSub, r_tmp, rl_expected.reg.GetLow()); 692 OpRegReg(kOpSub, r_tmp_high, rl_expected.reg.GetHigh()); 693 if (!load_early) { 694 LoadValueDirectWide(rl_src_new_value, rl_new_value.reg); 695 } 696 // Make sure we use ORR that sets the ccode 697 if (ARM_LOWREG(r_tmp.GetReg()) && ARM_LOWREG(r_tmp_high.GetReg())) { 698 NewLIR2(kThumbOrr, r_tmp.GetReg(), r_tmp_high.GetReg()); 699 } else { 700 NewLIR4(kThumb2OrrRRRs, r_tmp.GetReg(), r_tmp.GetReg(), r_tmp_high.GetReg(), 0); 701 } 702 FreeTemp(r_tmp_high); // Now unneeded 703 704 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 705 OpIT(kCondEq, "T"); 706 NewLIR4(kThumb2Strexd /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetLowReg(), rl_new_value.reg.GetHighReg(), r_ptr.GetReg()); 707 708 } else { 709 NewLIR3(kThumb2Ldrex, r_tmp.GetReg(), r_ptr.GetReg(), 0); 710 OpRegReg(kOpSub, r_tmp, rl_expected.reg); 711 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 712 OpIT(kCondEq, "T"); 713 NewLIR4(kThumb2Strex /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); 714 } 715 716 // Still one conditional left from OpIT(kCondEq, "T") from either branch 717 OpRegImm(kOpCmp /* eq */, r_tmp, 1); 718 OpCondBranch(kCondEq, target); 719 720 if (!load_early) { 721 FreeTemp(rl_expected.reg); // Now unneeded. 722 } 723 724 // result := (tmp1 != 0) ? 0 : 1; 725 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 726 OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); 727 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 728 OpIT(kCondUlt, ""); 729 LoadConstant(rl_result.reg, 0); /* cc */ 730 FreeTemp(r_tmp); // Now unneeded. 731 732 StoreValue(rl_dest, rl_result); 733 734 // Now, restore lr to its non-temp status. 735 Clobber(rARM_LR); 736 UnmarkTemp(rARM_LR); 737 return true; 738} 739 740LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 741 return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target); 742} 743 744LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) { 745 return NewLIR3(kThumb2Vldms, r_base.GetReg(), fr0, count); 746} 747 748LIR* ArmMir2Lir::OpVstm(RegStorage r_base, int count) { 749 return NewLIR3(kThumb2Vstms, r_base.GetReg(), fr0, count); 750} 751 752void ArmMir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 753 RegLocation rl_result, int lit, 754 int first_bit, int second_bit) { 755 OpRegRegRegShift(kOpAdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(), 756 EncodeShift(kArmLsl, second_bit - first_bit)); 757 if (first_bit != 0) { 758 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); 759 } 760} 761 762void ArmMir2Lir::GenDivZeroCheck(RegStorage reg) { 763 DCHECK(reg.IsPair()); // TODO: support k64BitSolo. 764 RegStorage t_reg = AllocTemp(); 765 NewLIR4(kThumb2OrrRRRs, t_reg.GetReg(), reg.GetLowReg(), reg.GetHighReg(), 0); 766 FreeTemp(t_reg); 767 GenCheck(kCondEq, kThrowDivZero); 768} 769 770// Test suspend flag, return target of taken suspend branch 771LIR* ArmMir2Lir::OpTestSuspend(LIR* target) { 772 NewLIR2(kThumbSubRI8, rARM_SUSPEND, 1); 773 return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target); 774} 775 776// Decrement register and branch on condition 777LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { 778 // Combine sub & test using sub setflags encoding here 779 OpRegRegImm(kOpSub, reg, reg, 1); // For value == 1, this should set flags. 780 DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); 781 return OpCondBranch(c_code, target); 782} 783 784void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { 785#if ANDROID_SMP != 0 786 // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. 787 LIR* barrier = last_lir_insn_; 788 789 int dmb_flavor; 790 // TODO: revisit Arm barrier kinds 791 switch (barrier_kind) { 792 case kLoadStore: dmb_flavor = kISH; break; 793 case kLoadLoad: dmb_flavor = kISH; break; 794 case kStoreStore: dmb_flavor = kISHST; break; 795 case kStoreLoad: dmb_flavor = kISH; break; 796 default: 797 LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; 798 dmb_flavor = kSY; // quiet gcc. 799 break; 800 } 801 802 // If the same barrier already exists, don't generate another. 803 if (barrier == nullptr 804 || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { 805 barrier = NewLIR1(kThumb2Dmb, dmb_flavor); 806 } 807 808 // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. 809 DCHECK(!barrier->flags.use_def_invalid); 810 barrier->u.m.def_mask = ENCODE_ALL; 811#endif 812} 813 814void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 815 rl_src = LoadValueWide(rl_src, kCoreReg); 816 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 817 RegStorage z_reg = AllocTemp(); 818 LoadConstantNoClobber(z_reg, 0); 819 // Check for destructive overlap 820 if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) { 821 RegStorage t_reg = AllocTemp(); 822 OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); 823 OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg); 824 FreeTemp(t_reg); 825 } else { 826 OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow()); 827 OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, rl_src.reg.GetHigh()); 828 } 829 FreeTemp(z_reg); 830 StoreValueWide(rl_dest, rl_result); 831} 832 833void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, 834 RegLocation rl_src1, RegLocation rl_src2) { 835 /* 836 * tmp1 = src1.hi * src2.lo; // src1.hi is no longer needed 837 * dest = src1.lo * src2.lo; 838 * tmp1 += src1.lo * src2.hi; 839 * dest.hi += tmp1; 840 * 841 * To pull off inline multiply, we have a worst-case requirement of 7 temporary 842 * registers. Normally for Arm, we get 5. We can get to 6 by including 843 * lr in the temp set. The only problematic case is all operands and result are 844 * distinct, and none have been promoted. In that case, we can succeed by aggressively 845 * freeing operand temp registers after they are no longer needed. All other cases 846 * can proceed normally. We'll just punt on the case of the result having a misaligned 847 * overlap with either operand and send that case to a runtime handler. 848 */ 849 RegLocation rl_result; 850 if (BadOverlap(rl_src1, rl_dest) || (BadOverlap(rl_src2, rl_dest))) { 851 ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul); 852 FlushAllRegs(); 853 CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); 854 rl_result = GetReturnWide(false); 855 StoreValueWide(rl_dest, rl_result); 856 return; 857 } 858 859 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 860 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 861 862 int reg_status = 0; 863 RegStorage res_lo; 864 RegStorage res_hi; 865 bool dest_promoted = rl_dest.location == kLocPhysReg && rl_dest.reg.Valid() && 866 !IsTemp(rl_dest.reg.GetLowReg()) && !IsTemp(rl_dest.reg.GetHighReg()); 867 bool src1_promoted = !IsTemp(rl_src1.reg.GetLowReg()) && !IsTemp(rl_src1.reg.GetHighReg()); 868 bool src2_promoted = !IsTemp(rl_src2.reg.GetLowReg()) && !IsTemp(rl_src2.reg.GetHighReg()); 869 // Check if rl_dest is *not* either operand and we have enough temp registers. 870 if ((rl_dest.s_reg_low != rl_src1.s_reg_low && rl_dest.s_reg_low != rl_src2.s_reg_low) && 871 (dest_promoted || src1_promoted || src2_promoted)) { 872 // In this case, we do not need to manually allocate temp registers for result. 873 rl_result = EvalLoc(rl_dest, kCoreReg, true); 874 res_lo = rl_result.reg.GetLow(); 875 res_hi = rl_result.reg.GetHigh(); 876 } else { 877 res_lo = AllocTemp(); 878 if ((rl_src1.s_reg_low == rl_src2.s_reg_low) || src1_promoted || src2_promoted) { 879 // In this case, we have enough temp registers to be allocated for result. 880 res_hi = AllocTemp(); 881 reg_status = 1; 882 } else { 883 // In this case, all temps are now allocated. 884 // res_hi will be allocated after we can free src1_hi. 885 reg_status = 2; 886 } 887 } 888 889 // Temporarily add LR to the temp pool, and assign it to tmp1 890 MarkTemp(rARM_LR); 891 FreeTemp(rARM_LR); 892 RegStorage tmp1 = rs_rARM_LR; 893 LockTemp(rARM_LR); 894 895 if (rl_src1.reg == rl_src2.reg) { 896 DCHECK(res_hi.Valid()); 897 DCHECK(res_lo.Valid()); 898 NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); 899 NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src1.reg.GetLowReg(), 900 rl_src1.reg.GetLowReg()); 901 OpRegRegRegShift(kOpAdd, res_hi.GetReg(), res_hi.GetReg(), tmp1.GetReg(), 902 EncodeShift(kArmLsl, 1)); 903 } else { 904 NewLIR3(kThumb2MulRRR, tmp1.GetReg(), rl_src2.reg.GetLowReg(), rl_src1.reg.GetHighReg()); 905 if (reg_status == 2) { 906 DCHECK(!res_hi.Valid()); 907 DCHECK_NE(rl_src1.reg.GetLowReg(), rl_src2.reg.GetLowReg()); 908 DCHECK_NE(rl_src1.reg.GetHighReg(), rl_src2.reg.GetHighReg()); 909 FreeTemp(rl_src1.reg.GetHighReg()); 910 res_hi = AllocTemp(); 911 } 912 DCHECK(res_hi.Valid()); 913 DCHECK(res_lo.Valid()); 914 NewLIR4(kThumb2Umull, res_lo.GetReg(), res_hi.GetReg(), rl_src2.reg.GetLowReg(), 915 rl_src1.reg.GetLowReg()); 916 NewLIR4(kThumb2Mla, tmp1.GetReg(), rl_src1.reg.GetLowReg(), rl_src2.reg.GetHighReg(), 917 tmp1.GetReg()); 918 NewLIR4(kThumb2AddRRR, res_hi.GetReg(), tmp1.GetReg(), res_hi.GetReg(), 0); 919 if (reg_status == 2) { 920 // Clobber rl_src1 since it was corrupted. 921 FreeTemp(rl_src1.reg); 922 Clobber(rl_src1.reg); 923 } 924 } 925 926 // Now, restore lr to its non-temp status. 927 FreeTemp(tmp1); 928 Clobber(rARM_LR); 929 UnmarkTemp(rARM_LR); 930 931 if (reg_status != 0) { 932 // We had manually allocated registers for rl_result. 933 // Now construct a RegLocation. 934 rl_result = GetReturnWide(false); // Just using as a template. 935 rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi); 936 } 937 938 StoreValueWide(rl_dest, rl_result); 939} 940 941void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 942 RegLocation rl_src2) { 943 LOG(FATAL) << "Unexpected use of GenAddLong for Arm"; 944} 945 946void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 947 RegLocation rl_src2) { 948 LOG(FATAL) << "Unexpected use of GenSubLong for Arm"; 949} 950 951void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 952 RegLocation rl_src2) { 953 LOG(FATAL) << "Unexpected use of GenAndLong for Arm"; 954} 955 956void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 957 RegLocation rl_src2) { 958 LOG(FATAL) << "Unexpected use of GenOrLong for Arm"; 959} 960 961void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, 962 RegLocation rl_src2) { 963 LOG(FATAL) << "Unexpected use of genXoLong for Arm"; 964} 965 966/* 967 * Generate array load 968 */ 969void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 970 RegLocation rl_index, RegLocation rl_dest, int scale) { 971 RegisterClass reg_class = oat_reg_class_by_size(size); 972 int len_offset = mirror::Array::LengthOffset().Int32Value(); 973 int data_offset; 974 RegLocation rl_result; 975 bool constant_index = rl_index.is_const; 976 rl_array = LoadValue(rl_array, kCoreReg); 977 if (!constant_index) { 978 rl_index = LoadValue(rl_index, kCoreReg); 979 } 980 981 if (rl_dest.wide) { 982 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 983 } else { 984 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 985 } 986 987 // If index is constant, just fold it into the data offset 988 if (constant_index) { 989 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 990 } 991 992 /* null object? */ 993 GenNullCheck(rl_array.reg, opt_flags); 994 995 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 996 RegStorage reg_len; 997 if (needs_range_check) { 998 reg_len = AllocTemp(); 999 /* Get len */ 1000 LoadWordDisp(rl_array.reg, len_offset, reg_len); 1001 MarkPossibleNullPointerException(opt_flags); 1002 } else { 1003 ForceImplicitNullCheck(rl_array.reg, opt_flags); 1004 } 1005 if (rl_dest.wide || rl_dest.fp || constant_index) { 1006 RegStorage reg_ptr; 1007 if (constant_index) { 1008 reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case. 1009 } else { 1010 // No special indexed operation, lea + load w/ displacement 1011 reg_ptr = AllocTemp(); 1012 OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), 1013 EncodeShift(kArmLsl, scale)); 1014 FreeTemp(rl_index.reg.GetReg()); 1015 } 1016 rl_result = EvalLoc(rl_dest, reg_class, true); 1017 1018 if (needs_range_check) { 1019 if (constant_index) { 1020 GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); 1021 } else { 1022 GenRegRegCheck(kCondLs, reg_len, rl_index.reg, kThrowArrayBounds); 1023 } 1024 FreeTemp(reg_len); 1025 } 1026 if (rl_dest.wide) { 1027 LoadBaseDispWide(reg_ptr, data_offset, rl_result.reg, INVALID_SREG); 1028 MarkPossibleNullPointerException(opt_flags); 1029 if (!constant_index) { 1030 FreeTemp(reg_ptr); 1031 } 1032 StoreValueWide(rl_dest, rl_result); 1033 } else { 1034 LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, INVALID_SREG); 1035 MarkPossibleNullPointerException(opt_flags); 1036 if (!constant_index) { 1037 FreeTemp(reg_ptr); 1038 } 1039 StoreValue(rl_dest, rl_result); 1040 } 1041 } else { 1042 // Offset base, then use indexed load 1043 RegStorage reg_ptr = AllocTemp(); 1044 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); 1045 FreeTemp(rl_array.reg.GetReg()); 1046 rl_result = EvalLoc(rl_dest, reg_class, true); 1047 1048 if (needs_range_check) { 1049 GenRegRegCheck(kCondUge, rl_index.reg, reg_len, kThrowArrayBounds); 1050 FreeTemp(reg_len); 1051 } 1052 LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size); 1053 MarkPossibleNullPointerException(opt_flags); 1054 FreeTemp(reg_ptr); 1055 StoreValue(rl_dest, rl_result); 1056 } 1057} 1058 1059/* 1060 * Generate array store 1061 * 1062 */ 1063void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1064 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1065 RegisterClass reg_class = oat_reg_class_by_size(size); 1066 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1067 bool constant_index = rl_index.is_const; 1068 1069 int data_offset; 1070 if (size == kLong || size == kDouble) { 1071 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1072 } else { 1073 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1074 } 1075 1076 // If index is constant, just fold it into the data offset. 1077 if (constant_index) { 1078 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 1079 } 1080 1081 rl_array = LoadValue(rl_array, kCoreReg); 1082 if (!constant_index) { 1083 rl_index = LoadValue(rl_index, kCoreReg); 1084 } 1085 1086 RegStorage reg_ptr; 1087 bool allocated_reg_ptr_temp = false; 1088 if (constant_index) { 1089 reg_ptr = rl_array.reg; 1090 } else if (IsTemp(rl_array.reg.GetReg()) && !card_mark) { 1091 Clobber(rl_array.reg.GetReg()); 1092 reg_ptr = rl_array.reg; 1093 } else { 1094 allocated_reg_ptr_temp = true; 1095 reg_ptr = AllocTemp(); 1096 } 1097 1098 /* null object? */ 1099 GenNullCheck(rl_array.reg, opt_flags); 1100 1101 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 1102 RegStorage reg_len; 1103 if (needs_range_check) { 1104 reg_len = AllocTemp(); 1105 // NOTE: max live temps(4) here. 1106 /* Get len */ 1107 LoadWordDisp(rl_array.reg, len_offset, reg_len); 1108 MarkPossibleNullPointerException(opt_flags); 1109 } else { 1110 ForceImplicitNullCheck(rl_array.reg, opt_flags); 1111 } 1112 /* at this point, reg_ptr points to array, 2 live temps */ 1113 if (rl_src.wide || rl_src.fp || constant_index) { 1114 if (rl_src.wide) { 1115 rl_src = LoadValueWide(rl_src, reg_class); 1116 } else { 1117 rl_src = LoadValue(rl_src, reg_class); 1118 } 1119 if (!constant_index) { 1120 OpRegRegRegShift(kOpAdd, reg_ptr.GetReg(), rl_array.reg.GetReg(), rl_index.reg.GetReg(), 1121 EncodeShift(kArmLsl, scale)); 1122 } 1123 if (needs_range_check) { 1124 if (constant_index) { 1125 GenImmedCheck(kCondLs, reg_len, mir_graph_->ConstantValue(rl_index), kThrowConstantArrayBounds); 1126 } else { 1127 GenRegRegCheck(kCondLs, reg_len, rl_index.reg, kThrowArrayBounds); 1128 } 1129 FreeTemp(reg_len); 1130 } 1131 1132 if (rl_src.wide) { 1133 StoreBaseDispWide(reg_ptr, data_offset, rl_src.reg); 1134 } else { 1135 StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size); 1136 } 1137 MarkPossibleNullPointerException(opt_flags); 1138 } else { 1139 /* reg_ptr -> array data */ 1140 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); 1141 rl_src = LoadValue(rl_src, reg_class); 1142 if (needs_range_check) { 1143 GenRegRegCheck(kCondUge, rl_index.reg, reg_len, kThrowArrayBounds); 1144 FreeTemp(reg_len); 1145 } 1146 StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size); 1147 MarkPossibleNullPointerException(opt_flags); 1148 } 1149 if (allocated_reg_ptr_temp) { 1150 FreeTemp(reg_ptr); 1151 } 1152 if (card_mark) { 1153 MarkGCCard(rl_src.reg, rl_array.reg); 1154 } 1155} 1156 1157 1158void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, 1159 RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { 1160 rl_src = LoadValueWide(rl_src, kCoreReg); 1161 // Per spec, we only care about low 6 bits of shift amount. 1162 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1163 if (shift_amount == 0) { 1164 StoreValueWide(rl_dest, rl_src); 1165 return; 1166 } 1167 if (BadOverlap(rl_src, rl_dest)) { 1168 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1169 return; 1170 } 1171 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1172 switch (opcode) { 1173 case Instruction::SHL_LONG: 1174 case Instruction::SHL_LONG_2ADDR: 1175 if (shift_amount == 1) { 1176 OpRegRegReg(kOpAdd, rl_result.reg.GetLow(), rl_src.reg.GetLow(), rl_src.reg.GetLow()); 1177 OpRegRegReg(kOpAdc, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), rl_src.reg.GetHigh()); 1178 } else if (shift_amount == 32) { 1179 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg); 1180 LoadConstant(rl_result.reg.GetLow(), 0); 1181 } else if (shift_amount > 31) { 1182 OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetLow(), shift_amount - 32); 1183 LoadConstant(rl_result.reg.GetLow(), 0); 1184 } else { 1185 OpRegRegImm(kOpLsl, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); 1186 OpRegRegRegShift(kOpOr, rl_result.reg.GetHighReg(), rl_result.reg.GetHighReg(), rl_src.reg.GetLowReg(), 1187 EncodeShift(kArmLsr, 32 - shift_amount)); 1188 OpRegRegImm(kOpLsl, rl_result.reg.GetLow(), rl_src.reg.GetLow(), shift_amount); 1189 } 1190 break; 1191 case Instruction::SHR_LONG: 1192 case Instruction::SHR_LONG_2ADDR: 1193 if (shift_amount == 32) { 1194 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1195 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); 1196 } else if (shift_amount > 31) { 1197 OpRegRegImm(kOpAsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); 1198 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 31); 1199 } else { 1200 RegStorage t_reg = AllocTemp(); 1201 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); 1202 OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), 1203 EncodeShift(kArmLsl, 32 - shift_amount)); 1204 FreeTemp(t_reg); 1205 OpRegRegImm(kOpAsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); 1206 } 1207 break; 1208 case Instruction::USHR_LONG: 1209 case Instruction::USHR_LONG_2ADDR: 1210 if (shift_amount == 32) { 1211 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1212 LoadConstant(rl_result.reg.GetHigh(), 0); 1213 } else if (shift_amount > 31) { 1214 OpRegRegImm(kOpLsr, rl_result.reg.GetLow(), rl_src.reg.GetHigh(), shift_amount - 32); 1215 LoadConstant(rl_result.reg.GetHigh(), 0); 1216 } else { 1217 RegStorage t_reg = AllocTemp(); 1218 OpRegRegImm(kOpLsr, t_reg, rl_src.reg.GetLow(), shift_amount); 1219 OpRegRegRegShift(kOpOr, rl_result.reg.GetLowReg(), t_reg.GetReg(), rl_src.reg.GetHighReg(), 1220 EncodeShift(kArmLsl, 32 - shift_amount)); 1221 FreeTemp(t_reg); 1222 OpRegRegImm(kOpLsr, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), shift_amount); 1223 } 1224 break; 1225 default: 1226 LOG(FATAL) << "Unexpected case"; 1227 } 1228 StoreValueWide(rl_dest, rl_result); 1229} 1230 1231void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1232 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1233 if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) { 1234 if (!rl_src2.is_const) { 1235 // Don't bother with special handling for subtract from immediate. 1236 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1237 return; 1238 } 1239 } else { 1240 // Normalize 1241 if (!rl_src2.is_const) { 1242 DCHECK(rl_src1.is_const); 1243 std::swap(rl_src1, rl_src2); 1244 } 1245 } 1246 if (BadOverlap(rl_src1, rl_dest)) { 1247 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1248 return; 1249 } 1250 DCHECK(rl_src2.is_const); 1251 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1252 uint32_t val_lo = Low32Bits(val); 1253 uint32_t val_hi = High32Bits(val); 1254 int32_t mod_imm_lo = ModifiedImmediate(val_lo); 1255 int32_t mod_imm_hi = ModifiedImmediate(val_hi); 1256 1257 // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit 1258 switch (opcode) { 1259 case Instruction::ADD_LONG: 1260 case Instruction::ADD_LONG_2ADDR: 1261 case Instruction::SUB_LONG: 1262 case Instruction::SUB_LONG_2ADDR: 1263 if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) { 1264 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1265 return; 1266 } 1267 break; 1268 default: 1269 break; 1270 } 1271 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1272 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1273 // NOTE: once we've done the EvalLoc on dest, we can no longer bail. 1274 switch (opcode) { 1275 case Instruction::ADD_LONG: 1276 case Instruction::ADD_LONG_2ADDR: 1277 NewLIR3(kThumb2AddRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); 1278 NewLIR3(kThumb2AdcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); 1279 break; 1280 case Instruction::OR_LONG: 1281 case Instruction::OR_LONG_2ADDR: 1282 if ((val_lo != 0) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { 1283 OpRegRegImm(kOpOr, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); 1284 } 1285 if ((val_hi != 0) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { 1286 OpRegRegImm(kOpOr, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); 1287 } 1288 break; 1289 case Instruction::XOR_LONG: 1290 case Instruction::XOR_LONG_2ADDR: 1291 OpRegRegImm(kOpXor, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); 1292 OpRegRegImm(kOpXor, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); 1293 break; 1294 case Instruction::AND_LONG: 1295 case Instruction::AND_LONG_2ADDR: 1296 if ((val_lo != 0xffffffff) || (rl_result.reg.GetLowReg() != rl_src1.reg.GetLowReg())) { 1297 OpRegRegImm(kOpAnd, rl_result.reg.GetLow(), rl_src1.reg.GetLow(), val_lo); 1298 } 1299 if ((val_hi != 0xffffffff) || (rl_result.reg.GetHighReg() != rl_src1.reg.GetHighReg())) { 1300 OpRegRegImm(kOpAnd, rl_result.reg.GetHigh(), rl_src1.reg.GetHigh(), val_hi); 1301 } 1302 break; 1303 case Instruction::SUB_LONG_2ADDR: 1304 case Instruction::SUB_LONG: 1305 NewLIR3(kThumb2SubRRI8M, rl_result.reg.GetLowReg(), rl_src1.reg.GetLowReg(), mod_imm_lo); 1306 NewLIR3(kThumb2SbcRRI8M, rl_result.reg.GetHighReg(), rl_src1.reg.GetHighReg(), mod_imm_hi); 1307 break; 1308 default: 1309 LOG(FATAL) << "Unexpected opcode " << opcode; 1310 } 1311 StoreValueWide(rl_dest, rl_result); 1312} 1313 1314} // namespace art 1315