utility_arm64.cc revision 63999683329612292d534e6be09dbde9480f1250
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "arm64_lir.h" 18#include "codegen_arm64.h" 19#include "dex/quick/mir_to_lir-inl.h" 20#include "dex/reg_storage_eq.h" 21 22namespace art { 23 24/* This file contains codegen for the A64 ISA. */ 25 26static int32_t EncodeImmSingle(uint32_t bits) { 27 /* 28 * Valid values will have the form: 29 * 30 * aBbb.bbbc.defg.h000.0000.0000.0000.0000 31 * 32 * where B = not(b). In other words, if b == 1, then B == 0 and viceversa. 33 */ 34 35 // bits[19..0] are cleared. 36 if ((bits & 0x0007ffff) != 0) 37 return -1; 38 39 // bits[29..25] are all set or all cleared. 40 uint32_t b_pattern = (bits >> 16) & 0x3e00; 41 if (b_pattern != 0 && b_pattern != 0x3e00) 42 return -1; 43 44 // bit[30] and bit[29] are opposite. 45 if (((bits ^ (bits << 1)) & 0x40000000) == 0) 46 return -1; 47 48 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 49 // bit7: a000.0000 50 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 51 // bit6: 0b00.0000 52 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 53 // bit5_to_0: 00cd.efgh 54 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 55 return (bit7 | bit6 | bit5_to_0); 56} 57 58static int32_t EncodeImmDouble(uint64_t bits) { 59 /* 60 * Valid values will have the form: 61 * 62 * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 63 * 0000.0000.0000.0000.0000.0000.0000.0000 64 * 65 * where B = not(b). 66 */ 67 68 // bits[47..0] are cleared. 69 if ((bits & UINT64_C(0xffffffffffff)) != 0) 70 return -1; 71 72 // bits[61..54] are all set or all cleared. 73 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 74 if (b_pattern != 0 && b_pattern != 0x3fc0) 75 return -1; 76 77 // bit[62] and bit[61] are opposite. 78 if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0) 79 return -1; 80 81 // bit7: a000.0000 82 uint32_t bit7 = ((bits >> 63) & 0x1) << 7; 83 // bit6: 0b00.0000 84 uint32_t bit6 = ((bits >> 61) & 0x1) << 6; 85 // bit5_to_0: 00cd.efgh 86 uint32_t bit5_to_0 = (bits >> 48) & 0x3f; 87 return (bit7 | bit6 | bit5_to_0); 88} 89 90size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) { 91 bool opcode_is_wide = IS_WIDE(lir->opcode); 92 ArmOpcode opcode = UNWIDE(lir->opcode); 93 DCHECK(!IsPseudoLirOp(opcode)); 94 const ArmEncodingMap *encoder = &EncodingMap[opcode]; 95 uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; 96 return (bits >> 30); 97} 98 99size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) { 100 size_t offset = lir->operands[2]; 101 uint64_t check_flags = GetTargetInstFlags(lir->opcode); 102 DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE)); 103 if (check_flags & SCALED_OFFSET_X0) { 104 DCHECK(check_flags & IS_TERTIARY_OP); 105 offset = offset * (1 << GetLoadStoreSize(lir)); 106 } 107 return offset; 108} 109 110LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) { 111 DCHECK(r_dest.IsSingle()); 112 if (value == 0) { 113 return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr); 114 } else { 115 int32_t encoded_imm = EncodeImmSingle((uint32_t)value); 116 if (encoded_imm >= 0) { 117 return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm); 118 } 119 } 120 121 LIR* data_target = ScanLiteralPool(literal_list_, value, 0); 122 if (data_target == NULL) { 123 // Wide, as we need 8B alignment. 124 data_target = AddWideData(&literal_list_, value, 0); 125 } 126 127 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 128 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, 129 r_dest.GetReg(), 0, 0, 0, 0, data_target); 130 AppendLIR(load_pc_rel); 131 return load_pc_rel; 132} 133 134LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) { 135 DCHECK(r_dest.IsDouble()); 136 if (value == 0) { 137 return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr); 138 } else { 139 int32_t encoded_imm = EncodeImmDouble(value); 140 if (encoded_imm >= 0) { 141 return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm); 142 } 143 } 144 145 // No short form - load from the literal pool. 146 int32_t val_lo = Low32Bits(value); 147 int32_t val_hi = High32Bits(value); 148 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 149 if (data_target == NULL) { 150 data_target = AddWideData(&literal_list_, val_lo, val_hi); 151 } 152 153 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 154 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), 155 r_dest.GetReg(), 0, 0, 0, 0, data_target); 156 AppendLIR(load_pc_rel); 157 return load_pc_rel; 158} 159 160static int CountLeadingZeros(bool is_wide, uint64_t value) { 161 return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value); 162} 163 164static int CountTrailingZeros(bool is_wide, uint64_t value) { 165 return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value); 166} 167 168static int CountSetBits(bool is_wide, uint64_t value) { 169 return ((is_wide) ? 170 __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); 171} 172 173/** 174 * @brief Try encoding an immediate in the form required by logical instructions. 175 * 176 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value. 177 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as 178 * 32-bit if @p is_wide is false. 179 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed. 180 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate(). 181 */ 182int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { 183 unsigned n, imm_s, imm_r; 184 185 // Logical immediates are encoded using parameters n, imm_s and imm_r using 186 // the following table: 187 // 188 // N imms immr size S R 189 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 190 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 191 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 192 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 193 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 194 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 195 // (s bits must not be all set) 196 // 197 // A pattern is constructed of size bits, where the least significant S+1 198 // bits are set. The pattern is rotated right by R, and repeated across a 199 // 32 or 64-bit value, depending on destination register width. 200 // 201 // To test if an arbitary immediate can be encoded using this scheme, an 202 // iterative algorithm is used. 203 // 204 205 // 1. If the value has all set or all clear bits, it can't be encoded. 206 if (value == 0 || value == ~UINT64_C(0) || 207 (!is_wide && (uint32_t)value == ~UINT32_C(0))) { 208 return -1; 209 } 210 211 unsigned lead_zero = CountLeadingZeros(is_wide, value); 212 unsigned lead_one = CountLeadingZeros(is_wide, ~value); 213 unsigned trail_zero = CountTrailingZeros(is_wide, value); 214 unsigned trail_one = CountTrailingZeros(is_wide, ~value); 215 unsigned set_bits = CountSetBits(is_wide, value); 216 217 // The fixed bits in the immediate s field. 218 // If width == 64 (X reg), start at 0xFFFFFF80. 219 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 220 // widths won't be executed. 221 unsigned width = (is_wide) ? 64 : 32; 222 int imm_s_fixed = (is_wide) ? -128 : -64; 223 int imm_s_mask = 0x3f; 224 225 for (;;) { 226 // 2. If the value is two bits wide, it can be encoded. 227 if (width == 2) { 228 n = 0; 229 imm_s = 0x3C; 230 imm_r = (value & 3) - 1; 231 break; 232 } 233 234 n = (width == 64) ? 1 : 0; 235 imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 236 if ((lead_zero + set_bits) == width) { 237 imm_r = 0; 238 } else { 239 imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 240 } 241 242 // 3. If the sum of leading zeros, trailing zeros and set bits is 243 // equal to the bit width of the value, it can be encoded. 244 if (lead_zero + trail_zero + set_bits == width) { 245 break; 246 } 247 248 // 4. If the sum of leading ones, trailing ones and unset bits in the 249 // value is equal to the bit width of the value, it can be encoded. 250 if (lead_one + trail_one + (width - set_bits) == width) { 251 break; 252 } 253 254 // 5. If the most-significant half of the bitwise value is equal to 255 // the least-significant half, return to step 2 using the 256 // least-significant half of the value. 257 uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1; 258 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 259 width >>= 1; 260 set_bits >>= 1; 261 imm_s_fixed >>= 1; 262 continue; 263 } 264 265 // 6. Otherwise, the value can't be encoded. 266 return -1; 267 } 268 269 return (n << 12 | imm_r << 6 | imm_s); 270} 271 272bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { 273 return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); 274} 275 276bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { 277 return EncodeImmSingle(value) >= 0; 278} 279 280bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { 281 return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); 282} 283 284bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { 285 return EncodeImmDouble(value) >= 0; 286} 287 288/* 289 * Load a immediate using one single instruction when possible; otherwise 290 * use a pair of movz and movk instructions. 291 * 292 * No additional register clobbering operation performed. Use this version when 293 * 1) r_dest is freshly returned from AllocTemp or 294 * 2) The codegen is under fixed register usage 295 */ 296LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { 297 LIR* res; 298 299 if (r_dest.IsFloat()) { 300 return LoadFPConstantValue(r_dest, value); 301 } 302 303 if (r_dest.Is64Bit()) { 304 return LoadConstantWide(r_dest, value); 305 } 306 307 // Loading SP/ZR with an immediate is not supported. 308 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 309 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 310 311 // Compute how many movk, movz instructions are needed to load the value. 312 uint16_t high_bits = High16Bits(value); 313 uint16_t low_bits = Low16Bits(value); 314 315 bool low_fast = ((uint16_t)(low_bits + 1) <= 1); 316 bool high_fast = ((uint16_t)(high_bits + 1) <= 1); 317 318 if (LIKELY(low_fast || high_fast)) { 319 // 1 instruction is enough to load the immediate. 320 if (LIKELY(low_bits == high_bits)) { 321 // Value is either 0 or -1: we can just use wzr. 322 ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr; 323 res = NewLIR2(opcode, r_dest.GetReg(), rwzr); 324 } else { 325 uint16_t uniform_bits, useful_bits; 326 int shift; 327 328 if (LIKELY(high_fast)) { 329 shift = 0; 330 uniform_bits = high_bits; 331 useful_bits = low_bits; 332 } else { 333 shift = 1; 334 uniform_bits = low_bits; 335 useful_bits = high_bits; 336 } 337 338 if (UNLIKELY(uniform_bits != 0)) { 339 res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift); 340 } else { 341 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift); 342 } 343 } 344 } else { 345 // movk, movz require 2 instructions. Try detecting logical immediates. 346 int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value); 347 if (log_imm >= 0) { 348 res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm); 349 } else { 350 // Use 2 instructions. 351 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0); 352 NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1); 353 } 354 } 355 356 return res; 357} 358 359// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). 360LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { 361 // Maximum number of instructions to use for encoding the immediate. 362 const int max_num_ops = 2; 363 364 if (r_dest.IsFloat()) { 365 return LoadFPConstantValueWide(r_dest, value); 366 } 367 368 DCHECK(r_dest.Is64Bit()); 369 370 // Loading SP/ZR with an immediate is not supported. 371 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 372 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 373 374 if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) { 375 // value is either 0 or -1: we can just use xzr. 376 ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr); 377 return NewLIR2(opcode, r_dest.GetReg(), rxzr); 378 } 379 380 // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. 381 int num_0000_halfwords = 0; 382 int num_ffff_halfwords = 0; 383 uint64_t uvalue = static_cast<uint64_t>(value); 384 for (int shift = 0; shift < 64; shift += 16) { 385 uint16_t halfword = static_cast<uint16_t>(uvalue >> shift); 386 if (halfword == 0) 387 num_0000_halfwords++; 388 else if (halfword == UINT16_C(0xffff)) 389 num_ffff_halfwords++; 390 } 391 int num_fast_halfwords = std::max(num_0000_halfwords, num_ffff_halfwords); 392 393 if (num_fast_halfwords < 3) { 394 // A single movz/movn is not enough. Try the logical immediate route. 395 int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); 396 if (log_imm >= 0) { 397 return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm); 398 } 399 } 400 401 if (num_fast_halfwords >= 4 - max_num_ops) { 402 // We can encode the number using a movz/movn followed by one or more movk. 403 ArmOpcode op; 404 uint16_t background; 405 LIR* res = nullptr; 406 407 // Decide whether to use a movz or a movn. 408 if (num_0000_halfwords >= num_ffff_halfwords) { 409 op = WIDE(kA64Movz3rdM); 410 background = 0; 411 } else { 412 op = WIDE(kA64Movn3rdM); 413 background = 0xffff; 414 } 415 416 // Emit the first instruction (movz, movn). 417 int shift; 418 for (shift = 0; shift < 4; shift++) { 419 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 420 if (halfword != background) { 421 res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift); 422 break; 423 } 424 } 425 426 // Emit the movk instructions. 427 for (shift++; shift < 4; shift++) { 428 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 429 if (halfword != background) { 430 NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift); 431 } 432 } 433 return res; 434 } 435 436 // Use the literal pool. 437 int32_t val_lo = Low32Bits(value); 438 int32_t val_hi = High32Bits(value); 439 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 440 if (data_target == NULL) { 441 data_target = AddWideData(&literal_list_, val_lo, val_hi); 442 } 443 444 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 445 LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), 446 r_dest.GetReg(), 0, 0, 0, 0, data_target); 447 AppendLIR(res); 448 return res; 449} 450 451LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { 452 LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */); 453 res->target = target; 454 return res; 455} 456 457LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { 458 LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc), 459 0 /* offset to be patched */); 460 branch->target = target; 461 return branch; 462} 463 464LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { 465 ArmOpcode opcode = kA64Brk1d; 466 switch (op) { 467 case kOpBlx: 468 opcode = kA64Blr1x; 469 break; 470 // TODO(Arm64): port kThumbBx. 471 // case kOpBx: 472 // opcode = kThumbBx; 473 // break; 474 default: 475 LOG(FATAL) << "Bad opcode " << op; 476 } 477 return NewLIR1(opcode, r_dest_src.GetReg()); 478} 479 480LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) { 481 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 482 CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit()); 483 ArmOpcode opcode = kA64Brk1d; 484 485 switch (op) { 486 case kOpCmn: 487 opcode = kA64Cmn3rro; 488 break; 489 case kOpCmp: 490 opcode = kA64Cmp3rro; 491 break; 492 case kOpMov: 493 opcode = kA64Mov2rr; 494 break; 495 case kOpMvn: 496 opcode = kA64Mvn2rr; 497 break; 498 case kOpNeg: 499 opcode = kA64Neg3rro; 500 break; 501 case kOpTst: 502 opcode = kA64Tst3rro; 503 break; 504 case kOpRev: 505 DCHECK_EQ(shift, 0); 506 // Binary, but rm is encoded twice. 507 return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 508 break; 509 case kOpRevsh: 510 // Binary, but rm is encoded twice. 511 NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 512 // "sxth r1, r2" is "sbfm r1, r2, #0, #15" 513 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15); 514 break; 515 case kOp2Byte: 516 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 517 // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 518 // For now we use sbfm directly. 519 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7); 520 case kOp2Short: 521 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 522 // For now we use sbfm rather than its alias, sbfx. 523 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 524 case kOp2Char: 525 // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 526 // For now we use ubfm directly. 527 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 528 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 529 default: 530 return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); 531 } 532 533 DCHECK(!IsPseudoLirOp(opcode)); 534 if (EncodingMap[opcode].flags & IS_BINARY_OP) { 535 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 536 return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 537 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 538 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 539 if (kind == kFmtShift) { 540 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift); 541 } 542 } 543 544 LOG(FATAL) << "Unexpected encoding operand count"; 545 return NULL; 546} 547 548LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) { 549 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 550 ArmOpcode opcode = kA64Brk1d; 551 552 switch (op) { 553 case kOpCmn: 554 opcode = kA64Cmn3Rre; 555 break; 556 case kOpCmp: 557 opcode = kA64Cmp3Rre; 558 break; 559 default: 560 LOG(FATAL) << "Bad Opcode: " << opcode; 561 break; 562 } 563 564 DCHECK(!IsPseudoLirOp(opcode)); 565 if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 566 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 567 if (kind == kFmtExtend) { 568 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend); 569 } 570 } 571 572 LOG(FATAL) << "Unexpected encoding operand count"; 573 return NULL; 574} 575 576LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { 577 /* RegReg operations with SP in first parameter need extended register instruction form. 578 * Only CMN and CMP instructions are implemented. 579 */ 580 if (r_dest_src1 == rs_sp) { 581 return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND); 582 } else { 583 return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); 584 } 585} 586 587LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { 588 UNIMPLEMENTED(FATAL); 589 return nullptr; 590} 591 592LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) { 593 UNIMPLEMENTED(FATAL); 594 return nullptr; 595} 596 597LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { 598 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64"; 599 return NULL; 600} 601 602LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, 603 RegStorage r_src2, int shift) { 604 ArmOpcode opcode = kA64Brk1d; 605 606 switch (op) { 607 case kOpAdd: 608 opcode = kA64Add4rrro; 609 break; 610 case kOpSub: 611 opcode = kA64Sub4rrro; 612 break; 613 // case kOpRsub: 614 // opcode = kA64RsubWWW; 615 // break; 616 case kOpAdc: 617 opcode = kA64Adc3rrr; 618 break; 619 case kOpAnd: 620 opcode = kA64And4rrro; 621 break; 622 case kOpXor: 623 opcode = kA64Eor4rrro; 624 break; 625 case kOpMul: 626 opcode = kA64Mul3rrr; 627 break; 628 case kOpDiv: 629 opcode = kA64Sdiv3rrr; 630 break; 631 case kOpOr: 632 opcode = kA64Orr4rrro; 633 break; 634 case kOpSbc: 635 opcode = kA64Sbc3rrr; 636 break; 637 case kOpLsl: 638 opcode = kA64Lsl3rrr; 639 break; 640 case kOpLsr: 641 opcode = kA64Lsr3rrr; 642 break; 643 case kOpAsr: 644 opcode = kA64Asr3rrr; 645 break; 646 case kOpRor: 647 opcode = kA64Ror3rrr; 648 break; 649 default: 650 LOG(FATAL) << "Bad opcode: " << op; 651 break; 652 } 653 654 // The instructions above belong to two kinds: 655 // - 4-operands instructions, where the last operand is a shift/extend immediate, 656 // - 3-operands instructions with no shift/extend. 657 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 658 CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); 659 CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); 660 if (EncodingMap[opcode].flags & IS_QUAD_OP) { 661 DCHECK(!IsExtendEncoding(shift)); 662 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); 663 } else { 664 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); 665 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 666 return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); 667 } 668} 669 670LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, 671 RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) { 672 ArmOpcode opcode = kA64Brk1d; 673 674 switch (op) { 675 case kOpAdd: 676 opcode = kA64Add4RRre; 677 break; 678 case kOpSub: 679 opcode = kA64Sub4RRre; 680 break; 681 default: 682 LOG(FATAL) << "Unimplemented opcode: " << op; 683 break; 684 } 685 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 686 687 if (r_dest.Is64Bit()) { 688 CHECK(r_src1.Is64Bit()); 689 690 // dest determines whether the op is wide or not. Up-convert src2 when necessary. 691 // Note: this is not according to aarch64 specifications, but our encoding. 692 if (!r_src2.Is64Bit()) { 693 r_src2 = As64BitReg(r_src2); 694 } 695 } else { 696 CHECK(!r_src1.Is64Bit()); 697 CHECK(!r_src2.Is64Bit()); 698 } 699 700 // Sanity checks. 701 // 1) Amount is in the range 0..4 702 CHECK_LE(amount, 4); 703 704 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), 705 EncodeExtend(ext, amount)); 706} 707 708LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { 709 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); 710} 711 712LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { 713 return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); 714} 715 716LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { 717 LIR* res; 718 bool neg = (value < 0); 719 int64_t abs_value = (neg) ? -value : value; 720 ArmOpcode opcode = kA64Brk1d; 721 ArmOpcode alt_opcode = kA64Brk1d; 722 int32_t log_imm = -1; 723 bool is_wide = r_dest.Is64Bit(); 724 ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); 725 int info = 0; 726 727 switch (op) { 728 case kOpLsl: { 729 // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" 730 // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)". 731 // For now, we just use ubfm directly. 732 int max_value = (is_wide) ? 63 : 31; 733 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), 734 (-value) & max_value, max_value - value); 735 } 736 case kOpLsr: 737 return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 738 case kOpAsr: 739 return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 740 case kOpRor: 741 // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm". 742 // For now, we just use extr directly. 743 return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(), 744 value); 745 case kOpAdd: 746 neg = !neg; 747 // Note: intentional fallthrough 748 case kOpSub: 749 // Add and sub below read/write sp rather than xzr. 750 if (abs_value < 0x1000) { 751 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 752 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0); 753 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 754 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 755 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); 756 } else { 757 log_imm = -1; 758 alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; 759 info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); 760 } 761 break; 762 // case kOpRsub: 763 // opcode = kThumb2RsubRRI8M; 764 // alt_opcode = kThumb2RsubRRR; 765 // break; 766 case kOpAdc: 767 log_imm = -1; 768 alt_opcode = kA64Adc3rrr; 769 break; 770 case kOpSbc: 771 log_imm = -1; 772 alt_opcode = kA64Sbc3rrr; 773 break; 774 case kOpOr: 775 log_imm = EncodeLogicalImmediate(is_wide, value); 776 opcode = kA64Orr3Rrl; 777 alt_opcode = kA64Orr4rrro; 778 break; 779 case kOpAnd: 780 log_imm = EncodeLogicalImmediate(is_wide, value); 781 opcode = kA64And3Rrl; 782 alt_opcode = kA64And4rrro; 783 break; 784 case kOpXor: 785 log_imm = EncodeLogicalImmediate(is_wide, value); 786 opcode = kA64Eor3Rrl; 787 alt_opcode = kA64Eor4rrro; 788 break; 789 case kOpMul: 790 // TUNING: power of 2, shift & add 791 log_imm = -1; 792 alt_opcode = kA64Mul3rrr; 793 break; 794 default: 795 LOG(FATAL) << "Bad opcode: " << op; 796 } 797 798 if (log_imm >= 0) { 799 return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); 800 } else { 801 RegStorage r_scratch; 802 if (is_wide) { 803 r_scratch = AllocTempWide(); 804 LoadConstantWide(r_scratch, value); 805 } else { 806 r_scratch = AllocTemp(); 807 LoadConstant(r_scratch, value); 808 } 809 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) 810 res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); 811 else 812 res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); 813 FreeTemp(r_scratch); 814 return res; 815 } 816} 817 818LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { 819 return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value)); 820} 821 822LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) { 823 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 824 ArmOpcode opcode = kA64Brk1d; 825 ArmOpcode neg_opcode = kA64Brk1d; 826 bool shift; 827 bool neg = (value < 0); 828 uint64_t abs_value = (neg) ? -value : value; 829 830 if (LIKELY(abs_value < 0x1000)) { 831 // abs_value is a 12-bit immediate. 832 shift = false; 833 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 834 // abs_value is a shifted 12-bit immediate. 835 shift = true; 836 abs_value >>= 12; 837 } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { 838 // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. 839 // This works for both normal registers and SP. 840 // For a frame size == 0x2468, it will be encoded as: 841 // sub sp, #0x2000 842 // sub sp, #0x468 843 if (neg) { 844 op = (op == kOpAdd) ? kOpSub : kOpAdd; 845 } 846 OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); 847 return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); 848 } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) { 849 // Note: "sub sp, sp, Xm" is not correct on arm64. 850 // We need special instructions for SP. 851 // Also operation on 32-bit SP should be avoided. 852 DCHECK(IS_WIDE(wide)); 853 RegStorage r_tmp = AllocTempWide(); 854 OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0); 855 OpRegImm64(op, r_tmp, value); 856 return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0); 857 } else { 858 RegStorage r_tmp; 859 LIR* res; 860 if (IS_WIDE(wide)) { 861 r_tmp = AllocTempWide(); 862 res = LoadConstantWide(r_tmp, value); 863 } else { 864 r_tmp = AllocTemp(); 865 res = LoadConstant(r_tmp, value); 866 } 867 OpRegReg(op, r_dest_src1, r_tmp); 868 FreeTemp(r_tmp); 869 return res; 870 } 871 872 switch (op) { 873 case kOpAdd: 874 neg_opcode = kA64Sub4RRdT; 875 opcode = kA64Add4RRdT; 876 break; 877 case kOpSub: 878 neg_opcode = kA64Add4RRdT; 879 opcode = kA64Sub4RRdT; 880 break; 881 case kOpCmp: 882 neg_opcode = kA64Cmn3RdT; 883 opcode = kA64Cmp3RdT; 884 break; 885 default: 886 LOG(FATAL) << "Bad op-kind in OpRegImm: " << op; 887 break; 888 } 889 890 if (UNLIKELY(neg)) 891 opcode = neg_opcode; 892 893 if (EncodingMap[opcode].flags & IS_QUAD_OP) 894 return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, 895 (shift) ? 1 : 0); 896 else 897 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); 898} 899 900int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) { 901 return ((shift_type & 0x3) << 7) | (amount & 0x3f); 902} 903 904int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) { 905 return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7); 906} 907 908bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { 909 return ((1 << 6) & encoded_value) != 0; 910} 911 912LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 913 int scale, OpSize size) { 914 LIR* load; 915 int expected_scale = 0; 916 ArmOpcode opcode = kA64Brk1d; 917 r_base = Check64BitReg(r_base); 918 919 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 920 // register offset load (rather than doing the sign extension in a separate instruction). 921 if (r_index.Is32Bit()) { 922 // Assemble: ``sxtw xN, wN''. 923 r_index = As64BitReg(r_index); 924 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 925 } 926 927 if (r_dest.IsFloat()) { 928 if (r_dest.IsDouble()) { 929 DCHECK(size == k64 || size == kDouble); 930 expected_scale = 3; 931 opcode = FWIDE(kA64Ldr4fXxG); 932 } else { 933 DCHECK(r_dest.IsSingle()); 934 DCHECK(size == k32 || size == kSingle); 935 expected_scale = 2; 936 opcode = kA64Ldr4fXxG; 937 } 938 939 DCHECK(scale == 0 || scale == expected_scale); 940 return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 941 (scale != 0) ? 1 : 0); 942 } 943 944 switch (size) { 945 case kDouble: 946 case kWord: 947 case k64: 948 r_dest = Check64BitReg(r_dest); 949 opcode = WIDE(kA64Ldr4rXxG); 950 expected_scale = 3; 951 break; 952 case kSingle: // Intentional fall-through. 953 case k32: // Intentional fall-through. 954 case kReference: 955 r_dest = Check32BitReg(r_dest); 956 opcode = kA64Ldr4rXxG; 957 expected_scale = 2; 958 break; 959 case kUnsignedHalf: 960 r_dest = Check32BitReg(r_dest); 961 opcode = kA64Ldrh4wXxd; 962 expected_scale = 1; 963 break; 964 case kSignedHalf: 965 r_dest = Check32BitReg(r_dest); 966 opcode = kA64Ldrsh4rXxd; 967 expected_scale = 1; 968 break; 969 case kUnsignedByte: 970 r_dest = Check32BitReg(r_dest); 971 opcode = kA64Ldrb3wXx; 972 break; 973 case kSignedByte: 974 r_dest = Check32BitReg(r_dest); 975 opcode = kA64Ldrsb3rXx; 976 break; 977 default: 978 LOG(FATAL) << "Bad size: " << size; 979 } 980 981 if (UNLIKELY(expected_scale == 0)) { 982 // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. 983 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 984 DCHECK_EQ(scale, 0); 985 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); 986 } else { 987 DCHECK(scale == 0 || scale == expected_scale); 988 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 989 (scale != 0) ? 1 : 0); 990 } 991 992 return load; 993} 994 995LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 996 int scale) { 997 return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference); 998} 999 1000LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1001 int scale, OpSize size) { 1002 LIR* store; 1003 int expected_scale = 0; 1004 ArmOpcode opcode = kA64Brk1d; 1005 r_base = Check64BitReg(r_base); 1006 1007 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 1008 // register offset store (rather than doing the sign extension in a separate instruction). 1009 if (r_index.Is32Bit()) { 1010 // Assemble: ``sxtw xN, wN''. 1011 r_index = As64BitReg(r_index); 1012 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 1013 } 1014 1015 if (r_src.IsFloat()) { 1016 if (r_src.IsDouble()) { 1017 DCHECK(size == k64 || size == kDouble); 1018 expected_scale = 3; 1019 opcode = FWIDE(kA64Str4fXxG); 1020 } else { 1021 DCHECK(r_src.IsSingle()); 1022 DCHECK(size == k32 || size == kSingle); 1023 expected_scale = 2; 1024 opcode = kA64Str4fXxG; 1025 } 1026 1027 DCHECK(scale == 0 || scale == expected_scale); 1028 return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1029 (scale != 0) ? 1 : 0); 1030 } 1031 1032 switch (size) { 1033 case kDouble: // Intentional fall-trough. 1034 case kWord: // Intentional fall-trough. 1035 case k64: 1036 r_src = Check64BitReg(r_src); 1037 opcode = WIDE(kA64Str4rXxG); 1038 expected_scale = 3; 1039 break; 1040 case kSingle: // Intentional fall-trough. 1041 case k32: // Intentional fall-trough. 1042 case kReference: 1043 r_src = Check32BitReg(r_src); 1044 opcode = kA64Str4rXxG; 1045 expected_scale = 2; 1046 break; 1047 case kUnsignedHalf: 1048 case kSignedHalf: 1049 r_src = Check32BitReg(r_src); 1050 opcode = kA64Strh4wXxd; 1051 expected_scale = 1; 1052 break; 1053 case kUnsignedByte: 1054 case kSignedByte: 1055 r_src = Check32BitReg(r_src); 1056 opcode = kA64Strb3wXx; 1057 break; 1058 default: 1059 LOG(FATAL) << "Bad size: " << size; 1060 } 1061 1062 if (UNLIKELY(expected_scale == 0)) { 1063 // This is a tertiary op (e.g. strb), it does not not support scale. 1064 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1065 DCHECK_EQ(scale, 0); 1066 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); 1067 } else { 1068 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1069 (scale != 0) ? 1 : 0); 1070 } 1071 1072 return store; 1073} 1074 1075LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1076 int scale) { 1077 return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference); 1078} 1079 1080/* 1081 * Load value from base + displacement. Optionally perform null check 1082 * on base (which must have an associated s_reg and MIR). If not 1083 * performing null check, incoming MIR can be null. 1084 */ 1085LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, 1086 OpSize size) { 1087 LIR* load = NULL; 1088 ArmOpcode opcode = kA64Brk1d; 1089 ArmOpcode alt_opcode = kA64Brk1d; 1090 int scale = 0; 1091 1092 switch (size) { 1093 case kDouble: // Intentional fall-through. 1094 case kWord: // Intentional fall-through. 1095 case k64: 1096 r_dest = Check64BitReg(r_dest); 1097 scale = 3; 1098 if (r_dest.IsFloat()) { 1099 DCHECK(r_dest.IsDouble()); 1100 opcode = FWIDE(kA64Ldr3fXD); 1101 alt_opcode = FWIDE(kA64Ldur3fXd); 1102 } else { 1103 opcode = WIDE(kA64Ldr3rXD); 1104 alt_opcode = WIDE(kA64Ldur3rXd); 1105 } 1106 break; 1107 case kSingle: // Intentional fall-through. 1108 case k32: // Intentional fall-trough. 1109 case kReference: 1110 r_dest = Check32BitReg(r_dest); 1111 scale = 2; 1112 if (r_dest.IsFloat()) { 1113 DCHECK(r_dest.IsSingle()); 1114 opcode = kA64Ldr3fXD; 1115 } else { 1116 opcode = kA64Ldr3rXD; 1117 } 1118 break; 1119 case kUnsignedHalf: 1120 scale = 1; 1121 opcode = kA64Ldrh3wXF; 1122 break; 1123 case kSignedHalf: 1124 scale = 1; 1125 opcode = kA64Ldrsh3rXF; 1126 break; 1127 case kUnsignedByte: 1128 opcode = kA64Ldrb3wXd; 1129 break; 1130 case kSignedByte: 1131 opcode = kA64Ldrsb3rXd; 1132 break; 1133 default: 1134 LOG(FATAL) << "Bad size: " << size; 1135 } 1136 1137 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1138 int scaled_disp = displacement >> scale; 1139 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1140 // Can use scaled load. 1141 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp); 1142 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1143 // Can use unscaled load. 1144 load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement); 1145 } else { 1146 // Use long sequence. 1147 // TODO: cleaner support for index/displacement registers? Not a reference, but must match width. 1148 RegStorage r_scratch = AllocTempWide(); 1149 LoadConstantWide(r_scratch, displacement); 1150 load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size); 1151 FreeTemp(r_scratch); 1152 } 1153 1154 // TODO: in future may need to differentiate Dalvik accesses w/ spills 1155 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1156 DCHECK(r_base == rs_sp); 1157 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); 1158 } 1159 return load; 1160} 1161 1162LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1163 OpSize size, VolatileKind is_volatile) { 1164 // LoadBaseDisp() will emit correct insn for atomic load on arm64 1165 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1166 1167 LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size); 1168 1169 if (UNLIKELY(is_volatile == kVolatile)) { 1170 // TODO: This should generate an acquire load instead of the barrier. 1171 GenMemBarrier(kLoadAny); 1172 } 1173 1174 return load; 1175} 1176 1177LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1178 VolatileKind is_volatile) { 1179 return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile); 1180} 1181 1182LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, 1183 OpSize size) { 1184 LIR* store = NULL; 1185 ArmOpcode opcode = kA64Brk1d; 1186 ArmOpcode alt_opcode = kA64Brk1d; 1187 int scale = 0; 1188 1189 switch (size) { 1190 case kDouble: // Intentional fall-through. 1191 case kWord: // Intentional fall-through. 1192 case k64: 1193 r_src = Check64BitReg(r_src); 1194 scale = 3; 1195 if (r_src.IsFloat()) { 1196 DCHECK(r_src.IsDouble()); 1197 opcode = FWIDE(kA64Str3fXD); 1198 alt_opcode = FWIDE(kA64Stur3fXd); 1199 } else { 1200 opcode = FWIDE(kA64Str3rXD); 1201 alt_opcode = FWIDE(kA64Stur3rXd); 1202 } 1203 break; 1204 case kSingle: // Intentional fall-through. 1205 case k32: // Intentional fall-trough. 1206 case kReference: 1207 r_src = Check32BitReg(r_src); 1208 scale = 2; 1209 if (r_src.IsFloat()) { 1210 DCHECK(r_src.IsSingle()); 1211 opcode = kA64Str3fXD; 1212 } else { 1213 opcode = kA64Str3rXD; 1214 } 1215 break; 1216 case kUnsignedHalf: 1217 case kSignedHalf: 1218 scale = 1; 1219 opcode = kA64Strh3wXF; 1220 break; 1221 case kUnsignedByte: 1222 case kSignedByte: 1223 opcode = kA64Strb3wXd; 1224 break; 1225 default: 1226 LOG(FATAL) << "Bad size: " << size; 1227 } 1228 1229 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1230 int scaled_disp = displacement >> scale; 1231 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1232 // Can use scaled store. 1233 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp); 1234 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1235 // Can use unscaled store. 1236 store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement); 1237 } else { 1238 // Use long sequence. 1239 RegStorage r_scratch = AllocTempWide(); 1240 LoadConstantWide(r_scratch, displacement); 1241 store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); 1242 FreeTemp(r_scratch); 1243 } 1244 1245 // TODO: In future, may need to differentiate Dalvik & spill accesses. 1246 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1247 DCHECK(r_base == rs_sp); 1248 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); 1249 } 1250 return store; 1251} 1252 1253LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, 1254 OpSize size, VolatileKind is_volatile) { 1255 // TODO: This should generate a release store and no barriers. 1256 if (UNLIKELY(is_volatile == kVolatile)) { 1257 // Ensure that prior accesses become visible to other threads first. 1258 GenMemBarrier(kAnyStore); 1259 } 1260 1261 // StoreBaseDisp() will emit correct insn for atomic store on arm64 1262 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1263 1264 LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size); 1265 1266 if (UNLIKELY(is_volatile == kVolatile)) { 1267 // Preserve order with respect to any subsequent volatile loads. 1268 // We need StoreLoad, but that generally requires the most expensive barrier. 1269 GenMemBarrier(kAnyAny); 1270 } 1271 1272 return store; 1273} 1274 1275LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, 1276 VolatileKind is_volatile) { 1277 return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile); 1278} 1279 1280LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { 1281 LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64"; 1282 return NULL; 1283} 1284 1285LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { 1286 LOG(FATAL) << "Unexpected use of OpMem for Arm64"; 1287 return NULL; 1288} 1289 1290LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { 1291 return OpReg(op, r_tgt); 1292} 1293 1294} // namespace art 1295