1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "codegen_arm.h" 18 19#include "arch/arm/instruction_set_features_arm.h" 20#include "arm_lir.h" 21#include "base/logging.h" 22#include "dex/mir_graph.h" 23#include "dex/quick/mir_to_lir-inl.h" 24#include "dex/reg_storage_eq.h" 25#include "driver/compiler_driver.h" 26 27namespace art { 28 29/* This file contains codegen for the Thumb ISA. */ 30 31static int32_t EncodeImmSingle(int32_t value) { 32 int32_t res; 33 int32_t bit_a = (value & 0x80000000) >> 31; 34 int32_t not_bit_b = (value & 0x40000000) >> 30; 35 int32_t bit_b = (value & 0x20000000) >> 29; 36 int32_t b_smear = (value & 0x3e000000) >> 25; 37 int32_t slice = (value & 0x01f80000) >> 19; 38 int32_t zeroes = (value & 0x0007ffff); 39 if (zeroes != 0) 40 return -1; 41 if (bit_b) { 42 if ((not_bit_b != 0) || (b_smear != 0x1f)) 43 return -1; 44 } else { 45 if ((not_bit_b != 1) || (b_smear != 0x0)) 46 return -1; 47 } 48 res = (bit_a << 7) | (bit_b << 6) | slice; 49 return res; 50} 51 52/* 53 * Determine whether value can be encoded as a Thumb2 floating point 54 * immediate. If not, return -1. If so return encoded 8-bit value. 55 */ 56static int32_t EncodeImmDouble(int64_t value) { 57 int32_t res; 58 int32_t bit_a = (value & INT64_C(0x8000000000000000)) >> 63; 59 int32_t not_bit_b = (value & INT64_C(0x4000000000000000)) >> 62; 60 int32_t bit_b = (value & INT64_C(0x2000000000000000)) >> 61; 61 int32_t b_smear = (value & INT64_C(0x3fc0000000000000)) >> 54; 62 int32_t slice = (value & INT64_C(0x003f000000000000)) >> 48; 63 uint64_t zeroes = (value & INT64_C(0x0000ffffffffffff)); 64 if (zeroes != 0ull) 65 return -1; 66 if (bit_b) { 67 if ((not_bit_b != 0) || (b_smear != 0xff)) 68 return -1; 69 } else { 70 if ((not_bit_b != 1) || (b_smear != 0x0)) 71 return -1; 72 } 73 res = (bit_a << 7) | (bit_b << 6) | slice; 74 return res; 75} 76 77LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { 78 DCHECK(RegStorage::IsSingle(r_dest)); 79 if (value == 0) { 80 // TODO: we need better info about the target CPU. a vector exclusive or 81 // would probably be better here if we could rely on its existance. 82 // Load an immediate +2.0 (which encodes to 0) 83 NewLIR2(kThumb2Vmovs_IMM8, r_dest, 0); 84 // +0.0 = +2.0 - +2.0 85 return NewLIR3(kThumb2Vsubs, r_dest, r_dest, r_dest); 86 } else { 87 int encoded_imm = EncodeImmSingle(value); 88 if (encoded_imm >= 0) { 89 return NewLIR2(kThumb2Vmovs_IMM8, r_dest, encoded_imm); 90 } 91 } 92 LIR* data_target = ScanLiteralPool(literal_list_, value, 0); 93 if (data_target == nullptr) { 94 data_target = AddWordData(&literal_list_, value); 95 } 96 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 97 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, 98 r_dest, rs_r15pc.GetReg(), 0, 0, 0, data_target); 99 AppendLIR(load_pc_rel); 100 return load_pc_rel; 101} 102 103/* 104 * Determine whether value can be encoded as a Thumb2 modified 105 * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. 106 */ 107int ArmMir2Lir::ModifiedImmediate(uint32_t value) { 108 uint32_t b0 = value & 0xff; 109 110 /* Note: case of value==0 must use 0:000:0:0000000 encoding */ 111 if (value <= 0xFF) 112 return b0; // 0:000:a:bcdefgh 113 if (value == ((b0 << 16) | b0)) 114 return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */ 115 if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0)) 116 return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */ 117 b0 = (value >> 8) & 0xff; 118 if (value == ((b0 << 24) | (b0 << 8))) 119 return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */ 120 /* Can we do it with rotation? */ 121 int z_leading = CLZ(value); 122 int z_trailing = CTZ(value); 123 /* A run of eight or fewer active bits? */ 124 if ((z_leading + z_trailing) < 24) 125 return -1; /* No - bail */ 126 /* left-justify the constant, discarding msb (known to be 1) */ 127 value <<= z_leading + 1; 128 /* Create bcdefgh */ 129 value >>= 25; 130 /* Put it all together */ 131 return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */ 132} 133 134bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) { 135 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); 136} 137 138bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { 139 switch (opcode) { 140 case Instruction::ADD_INT: 141 case Instruction::ADD_INT_2ADDR: 142 case Instruction::SUB_INT: 143 case Instruction::SUB_INT_2ADDR: 144 if ((value >> 12) == (value >> 31)) { // Signed 12-bit, RRI12 versions of ADD/SUB. 145 return true; 146 } 147 FALLTHROUGH_INTENDED; 148 case Instruction::IF_EQ: 149 case Instruction::IF_NE: 150 case Instruction::IF_LT: 151 case Instruction::IF_GE: 152 case Instruction::IF_GT: 153 case Instruction::IF_LE: 154 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0); 155 case Instruction::SHL_INT: 156 case Instruction::SHL_INT_2ADDR: 157 case Instruction::SHR_INT: 158 case Instruction::SHR_INT_2ADDR: 159 case Instruction::USHR_INT: 160 case Instruction::USHR_INT_2ADDR: 161 return true; 162 case Instruction::CONST: 163 case Instruction::CONST_4: 164 case Instruction::CONST_16: 165 if ((value >> 16) == 0) { 166 return true; // movw, 16-bit unsigned. 167 } 168 FALLTHROUGH_INTENDED; 169 case Instruction::AND_INT: 170 case Instruction::AND_INT_2ADDR: 171 case Instruction::AND_INT_LIT16: 172 case Instruction::AND_INT_LIT8: 173 case Instruction::OR_INT: 174 case Instruction::OR_INT_2ADDR: 175 case Instruction::OR_INT_LIT16: 176 case Instruction::OR_INT_LIT8: 177 return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); 178 case Instruction::XOR_INT: 179 case Instruction::XOR_INT_2ADDR: 180 case Instruction::XOR_INT_LIT16: 181 case Instruction::XOR_INT_LIT8: 182 return (ModifiedImmediate(value) >= 0); 183 case Instruction::MUL_INT: 184 case Instruction::MUL_INT_2ADDR: 185 case Instruction::MUL_INT_LIT8: 186 case Instruction::MUL_INT_LIT16: 187 case Instruction::DIV_INT: 188 case Instruction::DIV_INT_2ADDR: 189 case Instruction::DIV_INT_LIT8: 190 case Instruction::DIV_INT_LIT16: 191 case Instruction::REM_INT: 192 case Instruction::REM_INT_2ADDR: 193 case Instruction::REM_INT_LIT8: 194 case Instruction::REM_INT_LIT16: { 195 EasyMultiplyOp ops[2]; 196 return GetEasyMultiplyTwoOps(value, ops); 197 } 198 default: 199 return false; 200 } 201} 202 203bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) { 204 return EncodeImmSingle(value) >= 0; 205} 206 207bool ArmMir2Lir::InexpensiveConstantLong(int64_t value) { 208 return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); 209} 210 211bool ArmMir2Lir::InexpensiveConstantDouble(int64_t value) { 212 return EncodeImmDouble(value) >= 0; 213} 214 215/* 216 * Load a immediate using a shortcut if possible; otherwise 217 * grab from the per-translation literal pool. 218 * 219 * No additional register clobbering operation performed. Use this version when 220 * 1) r_dest is freshly returned from AllocTemp or 221 * 2) The codegen is under fixed register usage 222 */ 223LIR* ArmMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { 224 LIR* res; 225 int mod_imm; 226 227 if (r_dest.IsFloat()) { 228 return LoadFPConstantValue(r_dest.GetReg(), value); 229 } 230 231 /* See if the value can be constructed cheaply */ 232 if (r_dest.Low8() && (value >= 0) && (value <= 255)) { 233 return NewLIR2(kThumbMovImm, r_dest.GetReg(), value); 234 } 235 /* Check Modified immediate special cases */ 236 mod_imm = ModifiedImmediate(value); 237 if (mod_imm >= 0) { 238 res = NewLIR2(kThumb2MovI8M, r_dest.GetReg(), mod_imm); 239 return res; 240 } 241 mod_imm = ModifiedImmediate(~value); 242 if (mod_imm >= 0) { 243 res = NewLIR2(kThumb2MvnI8M, r_dest.GetReg(), mod_imm); 244 return res; 245 } 246 /* 16-bit immediate? */ 247 if ((value & 0xffff) == value) { 248 res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), value); 249 return res; 250 } 251 /* Do a low/high pair */ 252 res = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), Low16Bits(value)); 253 NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), High16Bits(value)); 254 return res; 255} 256 257LIR* ArmMir2Lir::OpUnconditionalBranch(LIR* target) { 258 LIR* res = NewLIR1(kThumbBUncond, 0 /* offset to be patched during assembly */); 259 res->target = target; 260 return res; 261} 262 263LIR* ArmMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { 264 LIR* branch = NewLIR2(kThumbBCond, 0 /* offset to be patched */, 265 ArmConditionEncoding(cc)); 266 branch->target = target; 267 return branch; 268} 269 270LIR* ArmMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { 271 ArmOpcode opcode = kThumbBkpt; 272 switch (op) { 273 case kOpBlx: 274 opcode = kThumbBlxR; 275 break; 276 case kOpBx: 277 opcode = kThumbBx; 278 break; 279 default: 280 LOG(FATAL) << "Bad opcode " << op; 281 } 282 return NewLIR1(opcode, r_dest_src.GetReg()); 283} 284 285LIR* ArmMir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, 286 int shift) { 287 bool thumb_form = 288 ((shift == 0) && r_dest_src1.Low8() && r_src2.Low8()); 289 ArmOpcode opcode = kThumbBkpt; 290 switch (op) { 291 case kOpAdc: 292 opcode = (thumb_form) ? kThumbAdcRR : kThumb2AdcRRR; 293 break; 294 case kOpAnd: 295 opcode = (thumb_form) ? kThumbAndRR : kThumb2AndRRR; 296 break; 297 case kOpBic: 298 opcode = (thumb_form) ? kThumbBicRR : kThumb2BicRRR; 299 break; 300 case kOpCmn: 301 DCHECK_EQ(shift, 0); 302 opcode = (thumb_form) ? kThumbCmnRR : kThumb2CmnRR; 303 break; 304 case kOpCmp: 305 if (thumb_form) 306 opcode = kThumbCmpRR; 307 else if ((shift == 0) && !r_dest_src1.Low8() && !r_src2.Low8()) 308 opcode = kThumbCmpHH; 309 else if ((shift == 0) && r_dest_src1.Low8()) 310 opcode = kThumbCmpLH; 311 else if (shift == 0) 312 opcode = kThumbCmpHL; 313 else 314 opcode = kThumb2CmpRR; 315 break; 316 case kOpXor: 317 opcode = (thumb_form) ? kThumbEorRR : kThumb2EorRRR; 318 break; 319 case kOpMov: 320 DCHECK_EQ(shift, 0); 321 if (r_dest_src1.Low8() && r_src2.Low8()) 322 opcode = kThumbMovRR; 323 else if (!r_dest_src1.Low8() && !r_src2.Low8()) 324 opcode = kThumbMovRR_H2H; 325 else if (r_dest_src1.Low8()) 326 opcode = kThumbMovRR_H2L; 327 else 328 opcode = kThumbMovRR_L2H; 329 break; 330 case kOpMul: 331 DCHECK_EQ(shift, 0); 332 opcode = (thumb_form) ? kThumbMul : kThumb2MulRRR; 333 break; 334 case kOpMvn: 335 opcode = (thumb_form) ? kThumbMvn : kThumb2MnvRR; 336 break; 337 case kOpNeg: 338 DCHECK_EQ(shift, 0); 339 opcode = (thumb_form) ? kThumbNeg : kThumb2NegRR; 340 break; 341 case kOpOr: 342 opcode = (thumb_form) ? kThumbOrr : kThumb2OrrRRR; 343 break; 344 case kOpSbc: 345 opcode = (thumb_form) ? kThumbSbc : kThumb2SbcRRR; 346 break; 347 case kOpTst: 348 opcode = (thumb_form) ? kThumbTst : kThumb2TstRR; 349 break; 350 case kOpLsl: 351 DCHECK_EQ(shift, 0); 352 opcode = (thumb_form) ? kThumbLslRR : kThumb2LslRRR; 353 break; 354 case kOpLsr: 355 DCHECK_EQ(shift, 0); 356 opcode = (thumb_form) ? kThumbLsrRR : kThumb2LsrRRR; 357 break; 358 case kOpAsr: 359 DCHECK_EQ(shift, 0); 360 opcode = (thumb_form) ? kThumbAsrRR : kThumb2AsrRRR; 361 break; 362 case kOpRor: 363 DCHECK_EQ(shift, 0); 364 opcode = (thumb_form) ? kThumbRorRR : kThumb2RorRRR; 365 break; 366 case kOpAdd: 367 opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; 368 break; 369 case kOpSub: 370 opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; 371 break; 372 case kOpRev: 373 DCHECK_EQ(shift, 0); 374 if (!thumb_form) { 375 // Binary, but rm is encoded twice. 376 return NewLIR3(kThumb2RevRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); 377 } 378 opcode = kThumbRev; 379 break; 380 case kOpRevsh: 381 DCHECK_EQ(shift, 0); 382 if (!thumb_form) { 383 // Binary, but rm is encoded twice. 384 return NewLIR3(kThumb2RevshRR, r_dest_src1.GetReg(), r_src2.GetReg(), r_src2.GetReg()); 385 } 386 opcode = kThumbRevsh; 387 break; 388 case kOp2Byte: 389 DCHECK_EQ(shift, 0); 390 return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 8); 391 case kOp2Short: 392 DCHECK_EQ(shift, 0); 393 return NewLIR4(kThumb2Sbfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); 394 case kOp2Char: 395 DCHECK_EQ(shift, 0); 396 return NewLIR4(kThumb2Ubfx, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 16); 397 default: 398 LOG(FATAL) << "Bad opcode: " << op; 399 break; 400 } 401 DCHECK(!IsPseudoLirOp(opcode)); 402 if (EncodingMap[opcode].flags & IS_BINARY_OP) { 403 return NewLIR2(opcode, r_dest_src1.GetReg(), r_src2.GetReg()); 404 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 405 if (EncodingMap[opcode].field_loc[2].kind == kFmtShift) { 406 return NewLIR3(opcode, r_dest_src1.GetReg(), r_src2.GetReg(), shift); 407 } else { 408 return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg()); 409 } 410 } else if (EncodingMap[opcode].flags & IS_QUAD_OP) { 411 return NewLIR4(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), r_src2.GetReg(), shift); 412 } else { 413 LOG(FATAL) << "Unexpected encoding operand count"; 414 return nullptr; 415 } 416} 417 418LIR* ArmMir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { 419 return OpRegRegShift(op, r_dest_src1, r_src2, 0); 420} 421 422LIR* ArmMir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { 423 UNUSED(r_dest, r_base, offset, move_type); 424 UNIMPLEMENTED(FATAL); 425 UNREACHABLE(); 426} 427 428LIR* ArmMir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) { 429 UNUSED(r_base, offset, r_src, move_type); 430 UNIMPLEMENTED(FATAL); 431 UNREACHABLE(); 432} 433 434LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { 435 UNUSED(op, cc, r_dest, r_src); 436 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm"; 437 UNREACHABLE(); 438} 439 440LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, 441 RegStorage r_src2, int shift) { 442 ArmOpcode opcode = kThumbBkpt; 443 bool thumb_form = (shift == 0) && r_dest.Low8() && r_src1.Low8() && r_src2.Low8(); 444 switch (op) { 445 case kOpAdd: 446 opcode = (thumb_form) ? kThumbAddRRR : kThumb2AddRRR; 447 break; 448 case kOpSub: 449 opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; 450 break; 451 case kOpRsub: 452 opcode = kThumb2RsubRRR; 453 break; 454 case kOpAdc: 455 opcode = kThumb2AdcRRR; 456 break; 457 case kOpAnd: 458 opcode = kThumb2AndRRR; 459 break; 460 case kOpBic: 461 opcode = kThumb2BicRRR; 462 break; 463 case kOpXor: 464 opcode = kThumb2EorRRR; 465 break; 466 case kOpMul: 467 DCHECK_EQ(shift, 0); 468 opcode = kThumb2MulRRR; 469 break; 470 case kOpDiv: 471 DCHECK_EQ(shift, 0); 472 opcode = kThumb2SdivRRR; 473 break; 474 case kOpOr: 475 opcode = kThumb2OrrRRR; 476 break; 477 case kOpSbc: 478 opcode = kThumb2SbcRRR; 479 break; 480 case kOpLsl: 481 DCHECK_EQ(shift, 0); 482 opcode = kThumb2LslRRR; 483 break; 484 case kOpLsr: 485 DCHECK_EQ(shift, 0); 486 opcode = kThumb2LsrRRR; 487 break; 488 case kOpAsr: 489 DCHECK_EQ(shift, 0); 490 opcode = kThumb2AsrRRR; 491 break; 492 case kOpRor: 493 DCHECK_EQ(shift, 0); 494 opcode = kThumb2RorRRR; 495 break; 496 default: 497 LOG(FATAL) << "Bad opcode: " << op; 498 break; 499 } 500 DCHECK(!IsPseudoLirOp(opcode)); 501 if (EncodingMap[opcode].flags & IS_QUAD_OP) { 502 return NewLIR4(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); 503 } else { 504 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); 505 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); 506 } 507} 508 509LIR* ArmMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { 510 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, 0); 511} 512 513LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { 514 bool neg = (value < 0); 515 int32_t abs_value = (neg) ? -value : value; 516 ArmOpcode opcode = kThumbBkpt; 517 ArmOpcode alt_opcode = kThumbBkpt; 518 bool all_low_regs = r_dest.Low8() && r_src1.Low8(); 519 int32_t mod_imm = ModifiedImmediate(value); 520 521 switch (op) { 522 case kOpLsl: 523 if (all_low_regs) 524 return NewLIR3(kThumbLslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 525 else 526 return NewLIR3(kThumb2LslRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 527 case kOpLsr: 528 if (all_low_regs) 529 return NewLIR3(kThumbLsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 530 else 531 return NewLIR3(kThumb2LsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 532 case kOpAsr: 533 if (all_low_regs) 534 return NewLIR3(kThumbAsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 535 else 536 return NewLIR3(kThumb2AsrRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 537 case kOpRor: 538 return NewLIR3(kThumb2RorRRI5, r_dest.GetReg(), r_src1.GetReg(), value); 539 case kOpAdd: 540 if (r_dest.Low8() && (r_src1 == rs_r13sp) && (value <= 1020) && ((value & 0x3) == 0)) { 541 return NewLIR3(kThumbAddSpRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); 542 } else if (r_dest.Low8() && (r_src1 == rs_r15pc) && 543 (value <= 1020) && ((value & 0x3) == 0)) { 544 return NewLIR3(kThumbAddPcRel, r_dest.GetReg(), r_src1.GetReg(), value >> 2); 545 } 546 FALLTHROUGH_INTENDED; 547 case kOpSub: 548 if (all_low_regs && ((abs_value & 0x7) == abs_value)) { 549 if (op == kOpAdd) 550 opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3; 551 else 552 opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3; 553 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); 554 } 555 if (mod_imm < 0) { 556 mod_imm = ModifiedImmediate(-value); 557 if (mod_imm >= 0) { 558 op = (op == kOpAdd) ? kOpSub : kOpAdd; 559 } 560 } 561 if (mod_imm < 0 && (abs_value >> 12) == 0) { 562 // This is deliberately used only if modified immediate encoding is inadequate since 563 // we sometimes actually use the flags for small values but not necessarily low regs. 564 if (op == kOpAdd) 565 opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12; 566 else 567 opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12; 568 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), abs_value); 569 } 570 if (op == kOpSub) { 571 opcode = kThumb2SubRRI8M; 572 alt_opcode = kThumb2SubRRR; 573 } else { 574 opcode = kThumb2AddRRI8M; 575 alt_opcode = kThumb2AddRRR; 576 } 577 break; 578 case kOpRsub: 579 opcode = kThumb2RsubRRI8M; 580 alt_opcode = kThumb2RsubRRR; 581 break; 582 case kOpAdc: 583 opcode = kThumb2AdcRRI8M; 584 alt_opcode = kThumb2AdcRRR; 585 break; 586 case kOpSbc: 587 opcode = kThumb2SbcRRI8M; 588 alt_opcode = kThumb2SbcRRR; 589 break; 590 case kOpOr: 591 opcode = kThumb2OrrRRI8M; 592 alt_opcode = kThumb2OrrRRR; 593 if (mod_imm < 0) { 594 mod_imm = ModifiedImmediate(~value); 595 if (mod_imm >= 0) { 596 opcode = kThumb2OrnRRI8M; 597 } 598 } 599 break; 600 case kOpAnd: 601 if (mod_imm < 0) { 602 mod_imm = ModifiedImmediate(~value); 603 if (mod_imm >= 0) { 604 return NewLIR3(kThumb2BicRRI8M, r_dest.GetReg(), r_src1.GetReg(), mod_imm); 605 } 606 } 607 opcode = kThumb2AndRRI8M; 608 alt_opcode = kThumb2AndRRR; 609 break; 610 case kOpXor: 611 opcode = kThumb2EorRRI8M; 612 alt_opcode = kThumb2EorRRR; 613 break; 614 case kOpMul: 615 // TUNING: power of 2, shift & add 616 mod_imm = -1; 617 alt_opcode = kThumb2MulRRR; 618 break; 619 case kOpCmp: { 620 LIR* res; 621 if (mod_imm >= 0) { 622 res = NewLIR2(kThumb2CmpRI8M, r_src1.GetReg(), mod_imm); 623 } else { 624 mod_imm = ModifiedImmediate(-value); 625 if (mod_imm >= 0) { 626 res = NewLIR2(kThumb2CmnRI8M, r_src1.GetReg(), mod_imm); 627 } else { 628 RegStorage r_tmp = AllocTemp(); 629 res = LoadConstant(r_tmp, value); 630 OpRegReg(kOpCmp, r_src1, r_tmp); 631 FreeTemp(r_tmp); 632 } 633 } 634 return res; 635 } 636 default: 637 LOG(FATAL) << "Bad opcode: " << op; 638 } 639 640 if (mod_imm >= 0) { 641 return NewLIR3(opcode, r_dest.GetReg(), r_src1.GetReg(), mod_imm); 642 } else { 643 RegStorage r_scratch = AllocTemp(); 644 LoadConstant(r_scratch, value); 645 LIR* res; 646 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) 647 res = NewLIR4(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), 0); 648 else 649 res = NewLIR3(alt_opcode, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); 650 FreeTemp(r_scratch); 651 return res; 652 } 653} 654 655/* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ 656LIR* ArmMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { 657 bool neg = (value < 0); 658 int32_t abs_value = (neg) ? -value : value; 659 bool short_form = (((abs_value & 0xff) == abs_value) && r_dest_src1.Low8()); 660 ArmOpcode opcode = kThumbBkpt; 661 switch (op) { 662 case kOpAdd: 663 if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ 664 DCHECK_EQ((value & 0x3), 0); 665 return NewLIR1(kThumbAddSpI7, value >> 2); 666 } else if (short_form) { 667 opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8; 668 } 669 break; 670 case kOpSub: 671 if (!neg && (r_dest_src1 == rs_r13sp) && (value <= 508)) { /* sp */ 672 DCHECK_EQ((value & 0x3), 0); 673 return NewLIR1(kThumbSubSpI7, value >> 2); 674 } else if (short_form) { 675 opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8; 676 } 677 break; 678 case kOpCmp: 679 if (!neg && short_form) { 680 opcode = kThumbCmpRI8; 681 } else { 682 short_form = false; 683 } 684 break; 685 default: 686 /* Punt to OpRegRegImm - if bad case catch it there */ 687 short_form = false; 688 break; 689 } 690 if (short_form) { 691 return NewLIR2(opcode, r_dest_src1.GetReg(), abs_value); 692 } else { 693 return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); 694 } 695} 696 697LIR* ArmMir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { 698 LIR* res = nullptr; 699 int32_t val_lo = Low32Bits(value); 700 int32_t val_hi = High32Bits(value); 701 if (r_dest.IsFloat()) { 702 DCHECK(!r_dest.IsPair()); 703 if ((val_lo == 0) && (val_hi == 0)) { 704 // TODO: we need better info about the target CPU. a vector exclusive or 705 // would probably be better here if we could rely on its existance. 706 // Load an immediate +2.0 (which encodes to 0) 707 NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), 0); 708 // +0.0 = +2.0 - +2.0 709 res = NewLIR3(kThumb2Vsubd, r_dest.GetReg(), r_dest.GetReg(), r_dest.GetReg()); 710 } else { 711 int encoded_imm = EncodeImmDouble(value); 712 if (encoded_imm >= 0) { 713 res = NewLIR2(kThumb2Vmovd_IMM8, r_dest.GetReg(), encoded_imm); 714 } 715 } 716 } else { 717 // NOTE: Arm32 assumption here. 718 DCHECK(r_dest.IsPair()); 719 if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) { 720 res = LoadConstantNoClobber(r_dest.GetLow(), val_lo); 721 LoadConstantNoClobber(r_dest.GetHigh(), val_hi); 722 } 723 } 724 if (res == nullptr) { 725 // No short form - load from the literal pool. 726 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 727 if (data_target == nullptr) { 728 data_target = AddWideData(&literal_list_, val_lo, val_hi); 729 } 730 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 731 if (r_dest.IsFloat()) { 732 res = RawLIR(current_dalvik_offset_, kThumb2Vldrd, 733 r_dest.GetReg(), rs_r15pc.GetReg(), 0, 0, 0, data_target); 734 } else { 735 DCHECK(r_dest.IsPair()); 736 res = RawLIR(current_dalvik_offset_, kThumb2LdrdPcRel8, 737 r_dest.GetLowReg(), r_dest.GetHighReg(), rs_r15pc.GetReg(), 0, 0, data_target); 738 } 739 AppendLIR(res); 740 } 741 return res; 742} 743 744int ArmMir2Lir::EncodeShift(int code, int amount) { 745 return ((amount & 0x1f) << 2) | code; 746} 747 748LIR* ArmMir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 749 int scale, OpSize size) { 750 bool all_low_regs = r_base.Low8() && r_index.Low8() && r_dest.Low8(); 751 LIR* load; 752 ArmOpcode opcode = kThumbBkpt; 753 bool thumb_form = (all_low_regs && (scale == 0)); 754 RegStorage reg_ptr; 755 756 if (r_dest.IsFloat()) { 757 if (r_dest.IsSingle()) { 758 DCHECK((size == k32) || (size == kSingle) || (size == kReference)); 759 opcode = kThumb2Vldrs; 760 size = kSingle; 761 } else { 762 DCHECK(r_dest.IsDouble()); 763 DCHECK((size == k64) || (size == kDouble)); 764 opcode = kThumb2Vldrd; 765 size = kDouble; 766 } 767 } else { 768 if (size == kSingle) 769 size = k32; 770 } 771 772 switch (size) { 773 case kDouble: // fall-through 774 // Intentional fall-though. 775 case kSingle: 776 reg_ptr = AllocTemp(); 777 if (scale) { 778 NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), 779 EncodeShift(kArmLsl, scale)); 780 } else { 781 OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); 782 } 783 load = NewLIR3(opcode, r_dest.GetReg(), reg_ptr.GetReg(), 0); 784 FreeTemp(reg_ptr); 785 return load; 786 case k32: 787 // Intentional fall-though. 788 case kReference: 789 opcode = (thumb_form) ? kThumbLdrRRR : kThumb2LdrRRR; 790 break; 791 case kUnsignedHalf: 792 opcode = (thumb_form) ? kThumbLdrhRRR : kThumb2LdrhRRR; 793 break; 794 case kSignedHalf: 795 opcode = (thumb_form) ? kThumbLdrshRRR : kThumb2LdrshRRR; 796 break; 797 case kUnsignedByte: 798 opcode = (thumb_form) ? kThumbLdrbRRR : kThumb2LdrbRRR; 799 break; 800 case kSignedByte: 801 opcode = (thumb_form) ? kThumbLdrsbRRR : kThumb2LdrsbRRR; 802 break; 803 default: 804 LOG(FATAL) << "Bad size: " << size; 805 } 806 if (thumb_form) 807 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); 808 else 809 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); 810 811 return load; 812} 813 814LIR* ArmMir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 815 int scale, OpSize size) { 816 bool all_low_regs = r_base.Low8() && r_index.Low8() && r_src.Low8(); 817 LIR* store = nullptr; 818 ArmOpcode opcode = kThumbBkpt; 819 bool thumb_form = (all_low_regs && (scale == 0)); 820 RegStorage reg_ptr; 821 822 if (r_src.IsFloat()) { 823 if (r_src.IsSingle()) { 824 DCHECK((size == k32) || (size == kSingle) || (size == kReference)); 825 opcode = kThumb2Vstrs; 826 size = kSingle; 827 } else { 828 DCHECK(r_src.IsDouble()); 829 DCHECK((size == k64) || (size == kDouble)); 830 DCHECK_EQ((r_src.GetReg() & 0x1), 0); 831 opcode = kThumb2Vstrd; 832 size = kDouble; 833 } 834 } else { 835 if (size == kSingle) 836 size = k32; 837 } 838 839 switch (size) { 840 case kDouble: // fall-through 841 // Intentional fall-though. 842 case kSingle: 843 reg_ptr = AllocTemp(); 844 if (scale) { 845 NewLIR4(kThumb2AddRRR, reg_ptr.GetReg(), r_base.GetReg(), r_index.GetReg(), 846 EncodeShift(kArmLsl, scale)); 847 } else { 848 OpRegRegReg(kOpAdd, reg_ptr, r_base, r_index); 849 } 850 store = NewLIR3(opcode, r_src.GetReg(), reg_ptr.GetReg(), 0); 851 FreeTemp(reg_ptr); 852 return store; 853 case k32: 854 // Intentional fall-though. 855 case kReference: 856 opcode = (thumb_form) ? kThumbStrRRR : kThumb2StrRRR; 857 break; 858 case kUnsignedHalf: 859 // Intentional fall-though. 860 case kSignedHalf: 861 opcode = (thumb_form) ? kThumbStrhRRR : kThumb2StrhRRR; 862 break; 863 case kUnsignedByte: 864 // Intentional fall-though. 865 case kSignedByte: 866 opcode = (thumb_form) ? kThumbStrbRRR : kThumb2StrbRRR; 867 break; 868 default: 869 LOG(FATAL) << "Bad size: " << size; 870 } 871 if (thumb_form) 872 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); 873 else 874 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); 875 876 return store; 877} 878 879// Helper function for LoadBaseDispBody()/StoreBaseDispBody(). 880LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStorage r_base, 881 int displacement, RegStorage r_src_dest, 882 RegStorage r_work) { 883 DCHECK_EQ(displacement & 3, 0); 884 constexpr int kOffsetMask = 0xff << 2; 885 int encoded_disp = (displacement & kOffsetMask) >> 2; // Within range of the instruction. 886 RegStorage r_ptr = r_base; 887 if ((displacement & ~kOffsetMask) != 0) { 888 r_ptr = r_work.Valid() ? r_work : AllocTemp(); 889 // Add displacement & ~kOffsetMask to base, it's a single instruction for up to +-256KiB. 890 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~kOffsetMask); 891 } 892 LIR* lir = nullptr; 893 if (!r_src_dest.IsPair()) { 894 lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp); 895 } else { 896 lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(), 897 encoded_disp); 898 } 899 if ((displacement & ~kOffsetMask) != 0 && !r_work.Valid()) { 900 FreeTemp(r_ptr); 901 } 902 return lir; 903} 904 905/* 906 * Load value from base + displacement. Optionally perform null check 907 * on base (which must have an associated s_reg and MIR). If not 908 * performing null check, incoming MIR can be null. 909 */ 910LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, 911 OpSize size) { 912 LIR* load = nullptr; 913 ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode. 914 ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode. 915 bool short_form = false; 916 bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8(); 917 int scale = 0; // Used for opcode16 and some indexed loads. 918 bool already_generated = false; 919 switch (size) { 920 case kDouble: 921 // Intentional fall-though. 922 case k64: 923 if (r_dest.IsFloat()) { 924 DCHECK(!r_dest.IsPair()); 925 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrd, r_base, displacement, r_dest); 926 } else { 927 DCHECK(r_dest.IsPair()); 928 // Use the r_dest.GetLow() for the temporary pointer if needed. 929 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2LdrdI8, r_base, displacement, r_dest, 930 r_dest.GetLow()); 931 } 932 already_generated = true; 933 break; 934 case kSingle: 935 // Intentional fall-though. 936 case k32: 937 // Intentional fall-though. 938 case kReference: 939 if (r_dest.IsFloat()) { 940 DCHECK(r_dest.IsSingle()); 941 load = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vldrs, r_base, displacement, r_dest); 942 already_generated = true; 943 break; 944 } 945 DCHECK_EQ((displacement & 0x3), 0); 946 scale = 2; 947 if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) && 948 (displacement >= 0)) { 949 short_form = true; 950 opcode16 = kThumbLdrPcRel; 951 } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) && 952 (displacement >= 0)) { 953 short_form = true; 954 opcode16 = kThumbLdrSpRel; 955 } else { 956 short_form = all_low && (displacement >> (5 + scale)) == 0; 957 opcode16 = kThumbLdrRRI5; 958 opcode32 = kThumb2LdrRRI12; 959 } 960 break; 961 case kUnsignedHalf: 962 DCHECK_EQ((displacement & 0x1), 0); 963 scale = 1; 964 short_form = all_low && (displacement >> (5 + scale)) == 0; 965 opcode16 = kThumbLdrhRRI5; 966 opcode32 = kThumb2LdrhRRI12; 967 break; 968 case kSignedHalf: 969 DCHECK_EQ((displacement & 0x1), 0); 970 scale = 1; 971 DCHECK_EQ(opcode16, kThumbBkpt); // Not available. 972 opcode32 = kThumb2LdrshRRI12; 973 break; 974 case kUnsignedByte: 975 DCHECK_EQ(scale, 0); // Keep scale = 0. 976 short_form = all_low && (displacement >> (5 + scale)) == 0; 977 opcode16 = kThumbLdrbRRI5; 978 opcode32 = kThumb2LdrbRRI12; 979 break; 980 case kSignedByte: 981 DCHECK_EQ(scale, 0); // Keep scale = 0. 982 DCHECK_EQ(opcode16, kThumbBkpt); // Not available. 983 opcode32 = kThumb2LdrsbRRI12; 984 break; 985 default: 986 LOG(FATAL) << "Bad size: " << size; 987 } 988 989 if (!already_generated) { 990 if (short_form) { 991 load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale); 992 } else if ((displacement >> 12) == 0) { // Thumb2 form. 993 load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement); 994 } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) && 995 InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) { 996 // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can 997 // actually do it in two because we know that the kOpAdd is a single insn. On the 998 // other hand, we introduce an extra dependency, so this is not necessarily faster. 999 if (opcode16 != kThumbBkpt && r_dest.Low8() && 1000 InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) { 1001 // We can use the 16-bit Thumb opcode for the load. 1002 OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale)); 1003 load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f); 1004 } else { 1005 DCHECK_NE(opcode32, kThumbBkpt); 1006 OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff); 1007 load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff); 1008 } 1009 } else { 1010 if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) || 1011 (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) { 1012 scale = 0; // Prefer unscaled indexing if the same number of insns. 1013 } 1014 RegStorage reg_offset = AllocTemp(); 1015 LoadConstant(reg_offset, displacement >> scale); 1016 DCHECK(!r_dest.IsFloat()); 1017 load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size); 1018 FreeTemp(reg_offset); 1019 } 1020 } 1021 1022 // TODO: in future may need to differentiate Dalvik accesses w/ spills 1023 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1024 DCHECK_EQ(r_base, rs_rARM_SP); 1025 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); 1026 } 1027 return load; 1028} 1029 1030LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1031 OpSize size, VolatileKind is_volatile) { 1032 // TODO: base this on target. 1033 if (size == kWord) { 1034 size = k32; 1035 } 1036 LIR* load; 1037 if (is_volatile == kVolatile && (size == k64 || size == kDouble) && 1038 !cu_->compiler_driver->GetInstructionSetFeatures()-> 1039 AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) { 1040 // Only 64-bit load needs special handling. 1041 // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp(). 1042 DCHECK(!r_dest.IsFloat()); // See RegClassForFieldLoadSave(). 1043 // Use LDREXD for the atomic load. (Expect displacement > 0, don't optimize for == 0.) 1044 RegStorage r_ptr = AllocTemp(); 1045 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); 1046 load = NewLIR3(kThumb2Ldrexd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg()); 1047 FreeTemp(r_ptr); 1048 } else { 1049 load = LoadBaseDispBody(r_base, displacement, r_dest, size); 1050 } 1051 1052 if (UNLIKELY(is_volatile == kVolatile)) { 1053 GenMemBarrier(kLoadAny); 1054 } 1055 1056 return load; 1057} 1058 1059 1060LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, 1061 OpSize size) { 1062 LIR* store = nullptr; 1063 ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode. 1064 ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode. 1065 bool short_form = false; 1066 bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8(); 1067 int scale = 0; // Used for opcode16 and some indexed loads. 1068 bool already_generated = false; 1069 switch (size) { 1070 case kDouble: 1071 // Intentional fall-though. 1072 case k64: 1073 if (r_src.IsFloat()) { 1074 // Note: If the register is retrieved by register allocator, it should never be a pair. 1075 // But some functions in mir2lir assume 64-bit registers are 32-bit register pairs. 1076 // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect(). 1077 if (r_src.IsPair()) { 1078 r_src = As64BitFloatReg(r_src); 1079 } 1080 DCHECK(!r_src.IsPair()); 1081 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src); 1082 } else { 1083 DCHECK(r_src.IsPair()); 1084 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2StrdI8, r_base, displacement, r_src); 1085 } 1086 already_generated = true; 1087 break; 1088 case kSingle: 1089 // Intentional fall-through. 1090 case k32: 1091 // Intentional fall-through. 1092 case kReference: 1093 if (r_src.IsFloat()) { 1094 DCHECK(r_src.IsSingle()); 1095 store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrs, r_base, displacement, r_src); 1096 already_generated = true; 1097 break; 1098 } 1099 DCHECK_EQ((displacement & 0x3), 0); 1100 scale = 2; 1101 if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) { 1102 short_form = true; 1103 opcode16 = kThumbStrSpRel; 1104 } else { 1105 short_form = all_low && (displacement >> (5 + scale)) == 0; 1106 opcode16 = kThumbStrRRI5; 1107 opcode32 = kThumb2StrRRI12; 1108 } 1109 break; 1110 case kUnsignedHalf: 1111 case kSignedHalf: 1112 DCHECK_EQ((displacement & 0x1), 0); 1113 scale = 1; 1114 short_form = all_low && (displacement >> (5 + scale)) == 0; 1115 opcode16 = kThumbStrhRRI5; 1116 opcode32 = kThumb2StrhRRI12; 1117 break; 1118 case kUnsignedByte: 1119 case kSignedByte: 1120 DCHECK_EQ(scale, 0); // Keep scale = 0. 1121 short_form = all_low && (displacement >> (5 + scale)) == 0; 1122 opcode16 = kThumbStrbRRI5; 1123 opcode32 = kThumb2StrbRRI12; 1124 break; 1125 default: 1126 LOG(FATAL) << "Bad size: " << size; 1127 } 1128 if (!already_generated) { 1129 if (short_form) { 1130 store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale); 1131 } else if ((displacement >> 12) == 0) { 1132 store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement); 1133 } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) && 1134 InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) { 1135 // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can 1136 // actually do it in two because we know that the kOpAdd is a single insn. On the 1137 // other hand, we introduce an extra dependency, so this is not necessarily faster. 1138 RegStorage r_scratch = AllocTemp(); 1139 if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() && 1140 InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) { 1141 // We can use the 16-bit Thumb opcode for the load. 1142 OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale)); 1143 store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(), 1144 (displacement >> scale) & 0x1f); 1145 } else { 1146 DCHECK_NE(opcode32, kThumbBkpt); 1147 OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff); 1148 store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff); 1149 } 1150 FreeTemp(r_scratch); 1151 } else { 1152 if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) || 1153 (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) { 1154 scale = 0; // Prefer unscaled indexing if the same number of insns. 1155 } 1156 RegStorage r_scratch = AllocTemp(); 1157 LoadConstant(r_scratch, displacement >> scale); 1158 DCHECK(!r_src.IsFloat()); 1159 store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size); 1160 FreeTemp(r_scratch); 1161 } 1162 } 1163 1164 // TODO: In future, may need to differentiate Dalvik & spill accesses 1165 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1166 DCHECK_EQ(r_base, rs_rARM_SP); 1167 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); 1168 } 1169 return store; 1170} 1171 1172LIR* ArmMir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, 1173 OpSize size, VolatileKind is_volatile) { 1174 if (UNLIKELY(is_volatile == kVolatile)) { 1175 // Ensure that prior accesses become visible to other threads first. 1176 GenMemBarrier(kAnyStore); 1177 } 1178 1179 LIR* null_ck_insn; 1180 if (is_volatile == kVolatile && (size == k64 || size == kDouble) && 1181 !cu_->compiler_driver->GetInstructionSetFeatures()-> 1182 AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) { 1183 // Only 64-bit store needs special handling. 1184 // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp(). 1185 // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.) 1186 DCHECK(!r_src.IsFloat()); // See RegClassForFieldLoadSave(). 1187 RegStorage r_ptr = AllocTemp(); 1188 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); 1189 LIR* fail_target = NewLIR0(kPseudoTargetLabel); 1190 // We have only 5 temporary registers available and if r_base, r_src and r_ptr already 1191 // take 4, we can't directly allocate 2 more for LDREXD temps. In that case clobber r_ptr 1192 // in LDREXD and recalculate it from r_base. 1193 RegStorage r_temp = AllocTemp(); 1194 RegStorage r_temp_high = AllocTemp(false); // We may not have another temp. 1195 if (r_temp_high.Valid()) { 1196 null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_temp_high.GetReg(), r_ptr.GetReg()); 1197 FreeTemp(r_temp_high); 1198 FreeTemp(r_temp); 1199 } else { 1200 // If we don't have another temp, clobber r_ptr in LDREXD and reload it. 1201 null_ck_insn = NewLIR3(kThumb2Ldrexd, r_temp.GetReg(), r_ptr.GetReg(), r_ptr.GetReg()); 1202 FreeTemp(r_temp); // May need the temp for kOpAdd. 1203 OpRegRegImm(kOpAdd, r_ptr, r_base, displacement); 1204 } 1205 NewLIR4(kThumb2Strexd, r_temp.GetReg(), r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg()); 1206 OpCmpImmBranch(kCondNe, r_temp, 0, fail_target); 1207 FreeTemp(r_ptr); 1208 } else { 1209 // TODO: base this on target. 1210 if (size == kWord) { 1211 size = k32; 1212 } 1213 1214 null_ck_insn = StoreBaseDispBody(r_base, displacement, r_src, size); 1215 } 1216 1217 if (UNLIKELY(is_volatile == kVolatile)) { 1218 // Preserve order with respect to any subsequent volatile loads. 1219 // We need StoreLoad, but that generally requires the most expensive barrier. 1220 GenMemBarrier(kAnyAny); 1221 } 1222 1223 return null_ck_insn; 1224} 1225 1226LIR* ArmMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { 1227 int opcode; 1228 DCHECK_EQ(r_dest.IsDouble(), r_src.IsDouble()); 1229 if (r_dest.IsDouble()) { 1230 opcode = kThumb2Vmovd; 1231 } else { 1232 if (r_dest.IsSingle()) { 1233 opcode = r_src.IsSingle() ? kThumb2Vmovs : kThumb2Fmsr; 1234 } else { 1235 DCHECK(r_src.IsSingle()); 1236 opcode = kThumb2Fmrs; 1237 } 1238 } 1239 LIR* res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); 1240 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 1241 res->flags.is_nop = true; 1242 } 1243 return res; 1244} 1245 1246LIR* ArmMir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { 1247 UNUSED(op, r_base, disp); 1248 LOG(FATAL) << "Unexpected use of OpMem for Arm"; 1249 UNREACHABLE(); 1250} 1251 1252LIR* ArmMir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { 1253 UNUSED(trampoline); // The address of the trampoline is already loaded into r_tgt. 1254 return OpReg(op, r_tgt); 1255} 1256 1257size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) { 1258 uint64_t check_flags = GetTargetInstFlags(lir->opcode); 1259 DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE)); 1260 size_t offset = (check_flags & IS_TERTIARY_OP) ? lir->operands[2] : 0; 1261 1262 if (check_flags & SCALED_OFFSET_X2) { 1263 offset = offset * 2; 1264 } else if (check_flags & SCALED_OFFSET_X4) { 1265 offset = offset * 4; 1266 } 1267 return offset; 1268} 1269 1270void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { 1271 // Start with the default counts. 1272 Mir2Lir::CountRefs(core_counts, fp_counts, num_regs); 1273 1274 if (pc_rel_temp_ != nullptr) { 1275 // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), 1276 // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative 1277 // load sequence is 4 instructions long and by promoting the PC base we save up to 3 1278 // instructions per use. 1279 int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); 1280 if (core_counts[p_map_idx].count == 1) { 1281 core_counts[p_map_idx].count = 0; 1282 } else { 1283 core_counts[p_map_idx].count *= 3; 1284 } 1285 } 1286} 1287 1288void ArmMir2Lir::DoPromotion() { 1289 if (CanUseOpPcRelDexCacheArrayLoad()) { 1290 pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); 1291 } 1292 1293 Mir2Lir::DoPromotion(); 1294 1295 if (pc_rel_temp_ != nullptr) { 1296 // Now, if the dex cache array base temp is promoted, remember the register but 1297 // always remove the temp's stack location to avoid unnecessarily bloating the stack. 1298 dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg; 1299 DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat()); 1300 mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_); 1301 pc_rel_temp_ = nullptr; 1302 } 1303} 1304 1305} // namespace art 1306