1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "codegen_arm64.h" 18 19#include "arm64_lir.h" 20#include "base/logging.h" 21#include "dex/quick/mir_to_lir-inl.h" 22#include "dex/reg_storage_eq.h" 23 24namespace art { 25 26/* This file contains codegen for the A64 ISA. */ 27 28int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) { 29 /* 30 * Valid values will have the form: 31 * 32 * aBbb.bbbc.defg.h000.0000.0000.0000.0000 33 * 34 * where B = not(b). In other words, if b == 1, then B == 0 and viceversa. 35 */ 36 37 // bits[19..0] are cleared. 38 if ((bits & 0x0007ffff) != 0) 39 return -1; 40 41 // bits[29..25] are all set or all cleared. 42 uint32_t b_pattern = (bits >> 16) & 0x3e00; 43 if (b_pattern != 0 && b_pattern != 0x3e00) 44 return -1; 45 46 // bit[30] and bit[29] are opposite. 47 if (((bits ^ (bits << 1)) & 0x40000000) == 0) 48 return -1; 49 50 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 51 // bit7: a000.0000 52 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 53 // bit6: 0b00.0000 54 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 55 // bit5_to_0: 00cd.efgh 56 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 57 return (bit7 | bit6 | bit5_to_0); 58} 59 60int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) { 61 /* 62 * Valid values will have the form: 63 * 64 * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 65 * 0000.0000.0000.0000.0000.0000.0000.0000 66 * 67 * where B = not(b). 68 */ 69 70 // bits[47..0] are cleared. 71 if ((bits & UINT64_C(0xffffffffffff)) != 0) 72 return -1; 73 74 // bits[61..54] are all set or all cleared. 75 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 76 if (b_pattern != 0 && b_pattern != 0x3fc0) 77 return -1; 78 79 // bit[62] and bit[61] are opposite. 80 if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0) 81 return -1; 82 83 // bit7: a000.0000 84 uint32_t bit7 = ((bits >> 63) & 0x1) << 7; 85 // bit6: 0b00.0000 86 uint32_t bit6 = ((bits >> 61) & 0x1) << 6; 87 // bit5_to_0: 00cd.efgh 88 uint32_t bit5_to_0 = (bits >> 48) & 0x3f; 89 return (bit7 | bit6 | bit5_to_0); 90} 91 92size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) { 93 bool opcode_is_wide = IS_WIDE(lir->opcode); 94 A64Opcode opcode = UNWIDE(lir->opcode); 95 DCHECK(!IsPseudoLirOp(opcode)); 96 const A64EncodingMap *encoder = &EncodingMap[opcode]; 97 uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton; 98 return (bits >> 30); 99} 100 101size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) { 102 size_t offset = lir->operands[2]; 103 uint64_t check_flags = GetTargetInstFlags(lir->opcode); 104 DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE)); 105 if (check_flags & SCALED_OFFSET_X0) { 106 DCHECK(check_flags & IS_TERTIARY_OP); 107 offset = offset * (1 << GetLoadStoreSize(lir)); 108 } 109 return offset; 110} 111 112LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) { 113 DCHECK(r_dest.IsSingle()); 114 if (value == 0) { 115 return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr); 116 } else { 117 int32_t encoded_imm = EncodeImmSingle((uint32_t)value); 118 if (encoded_imm >= 0) { 119 return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm); 120 } 121 } 122 123 LIR* data_target = ScanLiteralPool(literal_list_, value, 0); 124 if (data_target == nullptr) { 125 // Wide, as we need 8B alignment. 126 data_target = AddWideData(&literal_list_, value, 0); 127 } 128 129 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 130 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, 131 r_dest.GetReg(), 0, 0, 0, 0, data_target); 132 AppendLIR(load_pc_rel); 133 return load_pc_rel; 134} 135 136LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) { 137 DCHECK(r_dest.IsDouble()); 138 if (value == 0) { 139 return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr); 140 } else { 141 int32_t encoded_imm = EncodeImmDouble(value); 142 if (encoded_imm >= 0) { 143 return NewLIR2(WIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm); 144 } 145 } 146 147 // No short form - load from the literal pool. 148 int32_t val_lo = Low32Bits(value); 149 int32_t val_hi = High32Bits(value); 150 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 151 if (data_target == nullptr) { 152 data_target = AddWideData(&literal_list_, val_lo, val_hi); 153 } 154 155 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 156 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2fp), 157 r_dest.GetReg(), 0, 0, 0, 0, data_target); 158 AppendLIR(load_pc_rel); 159 return load_pc_rel; 160} 161 162static int CountLeadingZeros(bool is_wide, uint64_t value) { 163 return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value); 164} 165 166static int CountTrailingZeros(bool is_wide, uint64_t value) { 167 return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value); 168} 169 170static int CountSetBits(bool is_wide, uint64_t value) { 171 return ((is_wide) ? 172 __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); 173} 174 175/** 176 * @brief Try encoding an immediate in the form required by logical instructions. 177 * 178 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value. 179 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as 180 * 32-bit if @p is_wide is false. 181 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed. 182 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate(). 183 */ 184int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { 185 unsigned n, imm_s, imm_r; 186 187 // Logical immediates are encoded using parameters n, imm_s and imm_r using 188 // the following table: 189 // 190 // N imms immr size S R 191 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 192 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 193 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 194 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 195 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 196 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 197 // (s bits must not be all set) 198 // 199 // A pattern is constructed of size bits, where the least significant S+1 200 // bits are set. The pattern is rotated right by R, and repeated across a 201 // 32 or 64-bit value, depending on destination register width. 202 // 203 // To test if an arbitary immediate can be encoded using this scheme, an 204 // iterative algorithm is used. 205 // 206 207 // 1. If the value has all set or all clear bits, it can't be encoded. 208 if (value == 0 || value == ~UINT64_C(0) || 209 (!is_wide && (uint32_t)value == ~UINT32_C(0))) { 210 return -1; 211 } 212 213 unsigned lead_zero = CountLeadingZeros(is_wide, value); 214 unsigned lead_one = CountLeadingZeros(is_wide, ~value); 215 unsigned trail_zero = CountTrailingZeros(is_wide, value); 216 unsigned trail_one = CountTrailingZeros(is_wide, ~value); 217 unsigned set_bits = CountSetBits(is_wide, value); 218 219 // The fixed bits in the immediate s field. 220 // If width == 64 (X reg), start at 0xFFFFFF80. 221 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 222 // widths won't be executed. 223 unsigned width = (is_wide) ? 64 : 32; 224 int imm_s_fixed = (is_wide) ? -128 : -64; 225 int imm_s_mask = 0x3f; 226 227 for (;;) { 228 // 2. If the value is two bits wide, it can be encoded. 229 if (width == 2) { 230 n = 0; 231 imm_s = 0x3C; 232 imm_r = (value & 3) - 1; 233 break; 234 } 235 236 n = (width == 64) ? 1 : 0; 237 imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 238 if ((lead_zero + set_bits) == width) { 239 imm_r = 0; 240 } else { 241 imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 242 } 243 244 // 3. If the sum of leading zeros, trailing zeros and set bits is 245 // equal to the bit width of the value, it can be encoded. 246 if (lead_zero + trail_zero + set_bits == width) { 247 break; 248 } 249 250 // 4. If the sum of leading ones, trailing ones and unset bits in the 251 // value is equal to the bit width of the value, it can be encoded. 252 if (lead_one + trail_one + (width - set_bits) == width) { 253 break; 254 } 255 256 // 5. If the most-significant half of the bitwise value is equal to 257 // the least-significant half, return to step 2 using the 258 // least-significant half of the value. 259 uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1; 260 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 261 width >>= 1; 262 set_bits >>= 1; 263 imm_s_fixed >>= 1; 264 continue; 265 } 266 267 // 6. Otherwise, the value can't be encoded. 268 return -1; 269 } 270 271 return (n << 12 | imm_r << 6 | imm_s); 272} 273 274// Maximum number of instructions to use for encoding the immediate. 275static const int max_num_ops_per_const_load = 2; 276 277/** 278 * @brief Return the number of fast halfwords in the given uint64_t integer. 279 * @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The 280 * number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for 281 * a more accurate description. 282 * @param value The input 64-bit integer. 283 * @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is 284 * the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits 285 * set (0xffff). Additionally (retval & 0x8) is set when m > n. 286 */ 287static int GetNumFastHalfWords(uint64_t value) { 288 unsigned int num_0000_halfwords = 0; 289 unsigned int num_ffff_halfwords = 0; 290 for (int shift = 0; shift < 64; shift += 16) { 291 uint16_t halfword = static_cast<uint16_t>(value >> shift); 292 if (halfword == 0) 293 num_0000_halfwords++; 294 else if (halfword == UINT16_C(0xffff)) 295 num_ffff_halfwords++; 296 } 297 if (num_0000_halfwords >= num_ffff_halfwords) { 298 DCHECK_LE(num_0000_halfwords, 4U); 299 return num_0000_halfwords; 300 } else { 301 DCHECK_LE(num_ffff_halfwords, 4U); 302 return num_ffff_halfwords | 0x8; 303 } 304} 305 306// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a 307// constant is considered for promotion. If the constant is "inexpensive" then the promotion 308// algorithm will give it a low priority for promotion, even when it is referenced many times in 309// the code. 310 311bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) { 312 // A 32-bit int can always be loaded with 2 instructions (and without using the literal pool). 313 // We therefore return true and give it a low priority for promotion. 314 return true; 315} 316 317bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { 318 return EncodeImmSingle(value) >= 0; 319} 320 321bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { 322 int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7); 323 if (num_slow_halfwords <= max_num_ops_per_const_load) { 324 return true; 325 } 326 return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0); 327} 328 329bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { 330 return EncodeImmDouble(value) >= 0; 331} 332 333// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use 334// when one of the operands is an immediate (e.g. register version or immediate version of add). 335 336bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) { 337 switch (opcode) { 338 case Instruction::IF_EQ: 339 case Instruction::IF_NE: 340 case Instruction::IF_LT: 341 case Instruction::IF_GE: 342 case Instruction::IF_GT: 343 case Instruction::IF_LE: 344 case Instruction::ADD_INT: 345 case Instruction::ADD_INT_2ADDR: 346 case Instruction::SUB_INT: 347 case Instruction::SUB_INT_2ADDR: 348 // The code below is consistent with the implementation of OpRegRegImm(). 349 { 350 uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value); 351 if (abs_value < 0x1000) { 352 return true; 353 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 354 return true; 355 } 356 return false; 357 } 358 case Instruction::SHL_INT: 359 case Instruction::SHL_INT_2ADDR: 360 case Instruction::SHR_INT: 361 case Instruction::SHR_INT_2ADDR: 362 case Instruction::USHR_INT: 363 case Instruction::USHR_INT_2ADDR: 364 return true; 365 case Instruction::AND_INT: 366 case Instruction::AND_INT_2ADDR: 367 case Instruction::AND_INT_LIT16: 368 case Instruction::AND_INT_LIT8: 369 case Instruction::OR_INT: 370 case Instruction::OR_INT_2ADDR: 371 case Instruction::OR_INT_LIT16: 372 case Instruction::OR_INT_LIT8: 373 case Instruction::XOR_INT: 374 case Instruction::XOR_INT_2ADDR: 375 case Instruction::XOR_INT_LIT16: 376 case Instruction::XOR_INT_LIT8: 377 if (value == 0 || value == INT32_C(-1)) { 378 return true; 379 } 380 return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0); 381 default: 382 return false; 383 } 384} 385 386/* 387 * Load a immediate using one single instruction when possible; otherwise 388 * use a pair of movz and movk instructions. 389 * 390 * No additional register clobbering operation performed. Use this version when 391 * 1) r_dest is freshly returned from AllocTemp or 392 * 2) The codegen is under fixed register usage 393 */ 394LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { 395 LIR* res; 396 397 if (r_dest.IsFloat()) { 398 return LoadFPConstantValue(r_dest, value); 399 } 400 401 if (r_dest.Is64Bit()) { 402 return LoadConstantWide(r_dest, value); 403 } 404 405 // Loading SP/ZR with an immediate is not supported. 406 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 407 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 408 409 // Compute how many movk, movz instructions are needed to load the value. 410 uint16_t high_bits = High16Bits(value); 411 uint16_t low_bits = Low16Bits(value); 412 413 bool low_fast = ((uint16_t)(low_bits + 1) <= 1); 414 bool high_fast = ((uint16_t)(high_bits + 1) <= 1); 415 416 if (LIKELY(low_fast || high_fast)) { 417 // 1 instruction is enough to load the immediate. 418 if (LIKELY(low_bits == high_bits)) { 419 // Value is either 0 or -1: we can just use wzr. 420 A64Opcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr; 421 res = NewLIR2(opcode, r_dest.GetReg(), rwzr); 422 } else { 423 uint16_t uniform_bits, useful_bits; 424 int shift; 425 426 if (LIKELY(high_fast)) { 427 shift = 0; 428 uniform_bits = high_bits; 429 useful_bits = low_bits; 430 } else { 431 shift = 1; 432 uniform_bits = low_bits; 433 useful_bits = high_bits; 434 } 435 436 if (UNLIKELY(uniform_bits != 0)) { 437 res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift); 438 } else { 439 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift); 440 } 441 } 442 } else { 443 // movk, movz require 2 instructions. Try detecting logical immediates. 444 int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value); 445 if (log_imm >= 0) { 446 res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm); 447 } else { 448 // Use 2 instructions. 449 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0); 450 NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1); 451 } 452 } 453 454 return res; 455} 456 457// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). 458LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { 459 if (r_dest.IsFloat()) { 460 return LoadFPConstantValueWide(r_dest, value); 461 } 462 463 DCHECK(r_dest.Is64Bit()); 464 465 // Loading SP/ZR with an immediate is not supported. 466 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 467 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 468 469 if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) { 470 // value is either 0 or -1: we can just use xzr. 471 A64Opcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr); 472 return NewLIR2(opcode, r_dest.GetReg(), rxzr); 473 } 474 475 // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. 476 uint64_t uvalue = static_cast<uint64_t>(value); 477 int num_fast_halfwords = GetNumFastHalfWords(uvalue); 478 int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7); 479 bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0; 480 481 if (num_slow_halfwords > 1) { 482 // A single movz/movn is not enough. Try the logical immediate route. 483 int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); 484 if (log_imm >= 0) { 485 return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm); 486 } 487 } 488 489 if (num_slow_halfwords <= max_num_ops_per_const_load) { 490 // We can encode the number using a movz/movn followed by one or more movk. 491 A64Opcode op; 492 uint16_t background; 493 LIR* res = nullptr; 494 495 // Decide whether to use a movz or a movn. 496 if (more_ffff_halfwords) { 497 op = WIDE(kA64Movn3rdM); 498 background = 0xffff; 499 } else { 500 op = WIDE(kA64Movz3rdM); 501 background = 0; 502 } 503 504 // Emit the first instruction (movz, movn). 505 int shift; 506 for (shift = 0; shift < 4; shift++) { 507 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 508 if (halfword != background) { 509 res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift); 510 break; 511 } 512 } 513 514 // Emit the movk instructions. 515 for (shift++; shift < 4; shift++) { 516 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 517 if (halfword != background) { 518 NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift); 519 } 520 } 521 return res; 522 } 523 524 // Use the literal pool. 525 int32_t val_lo = Low32Bits(value); 526 int32_t val_hi = High32Bits(value); 527 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 528 if (data_target == nullptr) { 529 data_target = AddWideData(&literal_list_, val_lo, val_hi); 530 } 531 532 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 533 LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), 534 r_dest.GetReg(), 0, 0, 0, 0, data_target); 535 AppendLIR(res); 536 return res; 537} 538 539LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { 540 LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */); 541 res->target = target; 542 return res; 543} 544 545LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { 546 LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc), 547 0 /* offset to be patched */); 548 branch->target = target; 549 return branch; 550} 551 552LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { 553 A64Opcode opcode = kA64Brk1d; 554 switch (op) { 555 case kOpBlx: 556 opcode = kA64Blr1x; 557 break; 558 default: 559 LOG(FATAL) << "Bad opcode " << op; 560 } 561 return NewLIR1(opcode, r_dest_src.GetReg()); 562} 563 564LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) { 565 A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 566 CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit()); 567 A64Opcode opcode = kA64Brk1d; 568 569 switch (op) { 570 case kOpCmn: 571 opcode = kA64Cmn3rro; 572 break; 573 case kOpCmp: 574 opcode = kA64Cmp3rro; 575 break; 576 case kOpMov: 577 opcode = kA64Mov2rr; 578 break; 579 case kOpMvn: 580 opcode = kA64Mvn2rr; 581 break; 582 case kOpNeg: 583 opcode = kA64Neg3rro; 584 break; 585 case kOpTst: 586 opcode = kA64Tst3rro; 587 break; 588 case kOpRev: 589 DCHECK_EQ(shift, 0); 590 // Binary, but rm is encoded twice. 591 return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 592 case kOpRevsh: 593 // Binary, but rm is encoded twice. 594 NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 595 // "sxth r1, r2" is "sbfm r1, r2, #0, #15" 596 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15); 597 case kOp2Byte: 598 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 599 // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 600 // For now we use sbfm directly. 601 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7); 602 case kOp2Short: 603 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 604 // For now we use sbfm rather than its alias, sbfx. 605 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 606 case kOp2Char: 607 // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 608 // For now we use ubfm directly. 609 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 610 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 611 default: 612 return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); 613 } 614 615 DCHECK(!IsPseudoLirOp(opcode)); 616 if (EncodingMap[opcode].flags & IS_BINARY_OP) { 617 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 618 return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 619 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 620 A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 621 if (kind == kFmtShift) { 622 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift); 623 } 624 } 625 626 LOG(FATAL) << "Unexpected encoding operand count"; 627 return nullptr; 628} 629 630LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, 631 A64RegExtEncodings ext, uint8_t amount) { 632 A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 633 A64Opcode opcode = kA64Brk1d; 634 635 switch (op) { 636 case kOpCmn: 637 opcode = kA64Cmn3Rre; 638 break; 639 case kOpCmp: 640 opcode = kA64Cmp3Rre; 641 break; 642 case kOpAdd: 643 // Note: intentional fallthrough 644 case kOpSub: 645 return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount); 646 default: 647 LOG(FATAL) << "Bad Opcode: " << opcode; 648 UNREACHABLE(); 649 } 650 651 DCHECK(!IsPseudoLirOp(opcode)); 652 if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 653 A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 654 if (kind == kFmtExtend) { 655 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 656 EncodeExtend(ext, amount)); 657 } 658 } 659 660 LOG(FATAL) << "Unexpected encoding operand count"; 661 return nullptr; 662} 663 664LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { 665 /* RegReg operations with SP in first parameter need extended register instruction form. 666 * Only CMN, CMP, ADD & SUB instructions are implemented. 667 */ 668 if (r_dest_src1 == rs_sp) { 669 return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0); 670 } else { 671 return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); 672 } 673} 674 675LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, 676 MoveType move_type) { 677 UNUSED(r_dest, r_base, offset, move_type); 678 UNIMPLEMENTED(FATAL); 679 UNREACHABLE(); 680} 681 682LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, 683 MoveType move_type) { 684 UNUSED(r_base, offset, r_src, move_type); 685 UNIMPLEMENTED(FATAL); 686 return nullptr; 687} 688 689LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { 690 UNUSED(op, cc, r_dest, r_src); 691 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64"; 692 UNREACHABLE(); 693} 694 695LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, 696 RegStorage r_src2, int shift) { 697 A64Opcode opcode = kA64Brk1d; 698 699 switch (op) { 700 case kOpAdd: 701 opcode = kA64Add4rrro; 702 break; 703 case kOpSub: 704 opcode = kA64Sub4rrro; 705 break; 706 // case kOpRsub: 707 // opcode = kA64RsubWWW; 708 // break; 709 case kOpAdc: 710 opcode = kA64Adc3rrr; 711 break; 712 case kOpAnd: 713 opcode = kA64And4rrro; 714 break; 715 case kOpXor: 716 opcode = kA64Eor4rrro; 717 break; 718 case kOpMul: 719 opcode = kA64Mul3rrr; 720 break; 721 case kOpDiv: 722 opcode = kA64Sdiv3rrr; 723 break; 724 case kOpOr: 725 opcode = kA64Orr4rrro; 726 break; 727 case kOpSbc: 728 opcode = kA64Sbc3rrr; 729 break; 730 case kOpLsl: 731 opcode = kA64Lsl3rrr; 732 break; 733 case kOpLsr: 734 opcode = kA64Lsr3rrr; 735 break; 736 case kOpAsr: 737 opcode = kA64Asr3rrr; 738 break; 739 case kOpRor: 740 opcode = kA64Ror3rrr; 741 break; 742 default: 743 LOG(FATAL) << "Bad opcode: " << op; 744 break; 745 } 746 747 // The instructions above belong to two kinds: 748 // - 4-operands instructions, where the last operand is a shift/extend immediate, 749 // - 3-operands instructions with no shift/extend. 750 A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 751 CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); 752 CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); 753 if (EncodingMap[opcode].flags & IS_QUAD_OP) { 754 DCHECK(!IsExtendEncoding(shift)); 755 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); 756 } else { 757 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); 758 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 759 return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); 760 } 761} 762 763LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, 764 RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) { 765 A64Opcode opcode = kA64Brk1d; 766 767 switch (op) { 768 case kOpAdd: 769 opcode = kA64Add4RRre; 770 break; 771 case kOpSub: 772 opcode = kA64Sub4RRre; 773 break; 774 default: 775 UNIMPLEMENTED(FATAL) << "Unimplemented opcode: " << op; 776 UNREACHABLE(); 777 } 778 A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 779 780 if (r_dest.Is64Bit()) { 781 CHECK(r_src1.Is64Bit()); 782 783 // dest determines whether the op is wide or not. Up-convert src2 when necessary. 784 // Note: this is not according to aarch64 specifications, but our encoding. 785 if (!r_src2.Is64Bit()) { 786 r_src2 = As64BitReg(r_src2); 787 } 788 } else { 789 CHECK(!r_src1.Is64Bit()); 790 CHECK(!r_src2.Is64Bit()); 791 } 792 793 // Sanity checks. 794 // 1) Amount is in the range 0..4 795 CHECK_LE(amount, 4); 796 797 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), 798 EncodeExtend(ext, amount)); 799} 800 801LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { 802 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); 803} 804 805LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { 806 return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); 807} 808 809LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { 810 LIR* res; 811 bool neg = (value < 0); 812 uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value; 813 A64Opcode opcode = kA64Brk1d; 814 A64Opcode alt_opcode = kA64Brk1d; 815 bool is_logical = false; 816 bool is_wide = r_dest.Is64Bit(); 817 A64Opcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); 818 int info = 0; 819 820 switch (op) { 821 case kOpLsl: { 822 // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" 823 // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)". 824 // For now, we just use ubfm directly. 825 int max_value = (is_wide) ? 63 : 31; 826 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), 827 (-value) & max_value, max_value - value); 828 } 829 case kOpLsr: 830 return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 831 case kOpAsr: 832 return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 833 case kOpRor: 834 // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm". 835 // For now, we just use extr directly. 836 return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(), 837 value); 838 case kOpAdd: 839 neg = !neg; 840 FALLTHROUGH_INTENDED; 841 case kOpSub: 842 // Add and sub below read/write sp rather than xzr. 843 if (abs_value < 0x1000) { 844 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 845 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0); 846 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 847 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 848 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); 849 } else { 850 alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; 851 info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); 852 } 853 break; 854 case kOpAdc: 855 alt_opcode = kA64Adc3rrr; 856 break; 857 case kOpSbc: 858 alt_opcode = kA64Sbc3rrr; 859 break; 860 case kOpOr: 861 is_logical = true; 862 opcode = kA64Orr3Rrl; 863 alt_opcode = kA64Orr4rrro; 864 break; 865 case kOpAnd: 866 is_logical = true; 867 opcode = kA64And3Rrl; 868 alt_opcode = kA64And4rrro; 869 break; 870 case kOpXor: 871 is_logical = true; 872 opcode = kA64Eor3Rrl; 873 alt_opcode = kA64Eor4rrro; 874 break; 875 case kOpMul: 876 // TUNING: power of 2, shift & add 877 alt_opcode = kA64Mul3rrr; 878 break; 879 default: 880 LOG(FATAL) << "Bad opcode: " << op; 881 } 882 883 if (is_logical) { 884 int log_imm = EncodeLogicalImmediate(is_wide, value); 885 if (log_imm >= 0) { 886 return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); 887 } else { 888 // When the immediate is either 0 or ~0, the logical operation can be trivially reduced 889 // to a - possibly negated - assignment. 890 if (value == 0) { 891 switch (op) { 892 case kOpOr: 893 case kOpXor: 894 // Or/Xor by zero reduces to an assignment. 895 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 896 default: 897 // And by zero reduces to a `mov rdest, xzr'. 898 DCHECK(op == kOpAnd); 899 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); 900 } 901 } else if (value == INT64_C(-1) 902 || (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) { 903 switch (op) { 904 case kOpAnd: 905 // And by -1 reduces to an assignment. 906 return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 907 case kOpXor: 908 // Xor by -1 reduces to an `mvn rdest, rsrc'. 909 return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg()); 910 default: 911 // Or by -1 reduces to a `mvn rdest, xzr'. 912 DCHECK(op == kOpOr); 913 return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr); 914 } 915 } 916 } 917 } 918 919 RegStorage r_scratch; 920 if (is_wide) { 921 r_scratch = AllocTempWide(); 922 LoadConstantWide(r_scratch, value); 923 } else { 924 r_scratch = AllocTemp(); 925 LoadConstant(r_scratch, value); 926 } 927 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) 928 res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); 929 else 930 res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); 931 FreeTemp(r_scratch); 932 return res; 933} 934 935LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { 936 return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value)); 937} 938 939LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) { 940 A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 941 A64Opcode opcode = kA64Brk1d; 942 A64Opcode neg_opcode = kA64Brk1d; 943 bool shift; 944 bool neg = (value < 0); 945 uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value; 946 947 if (LIKELY(abs_value < 0x1000)) { 948 // abs_value is a 12-bit immediate. 949 shift = false; 950 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 951 // abs_value is a shifted 12-bit immediate. 952 shift = true; 953 abs_value >>= 12; 954 } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { 955 // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. 956 // This works for both normal registers and SP. 957 // For a frame size == 0x2468, it will be encoded as: 958 // sub sp, #0x2000 959 // sub sp, #0x468 960 if (neg) { 961 op = (op == kOpAdd) ? kOpSub : kOpAdd; 962 } 963 OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); 964 return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); 965 } else { 966 RegStorage r_tmp; 967 LIR* res; 968 if (IS_WIDE(wide)) { 969 r_tmp = AllocTempWide(); 970 res = LoadConstantWide(r_tmp, value); 971 } else { 972 r_tmp = AllocTemp(); 973 res = LoadConstant(r_tmp, value); 974 } 975 OpRegReg(op, r_dest_src1, r_tmp); 976 FreeTemp(r_tmp); 977 return res; 978 } 979 980 switch (op) { 981 case kOpAdd: 982 neg_opcode = kA64Sub4RRdT; 983 opcode = kA64Add4RRdT; 984 break; 985 case kOpSub: 986 neg_opcode = kA64Add4RRdT; 987 opcode = kA64Sub4RRdT; 988 break; 989 case kOpCmp: 990 neg_opcode = kA64Cmn3RdT; 991 opcode = kA64Cmp3RdT; 992 break; 993 default: 994 LOG(FATAL) << "Bad op-kind in OpRegImm: " << op; 995 break; 996 } 997 998 if (UNLIKELY(neg)) 999 opcode = neg_opcode; 1000 1001 if (EncodingMap[opcode].flags & IS_QUAD_OP) 1002 return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, 1003 (shift) ? 1 : 0); 1004 else 1005 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); 1006} 1007 1008int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) { 1009 DCHECK_EQ(shift_type & 0x3, shift_type); 1010 DCHECK_EQ(amount & 0x3f, amount); 1011 return ((shift_type & 0x3) << 7) | (amount & 0x3f); 1012} 1013 1014int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) { 1015 DCHECK_EQ(extend_type & 0x7, extend_type); 1016 DCHECK_EQ(amount & 0x7, amount); 1017 return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7); 1018} 1019 1020bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { 1021 return ((1 << 6) & encoded_value) != 0; 1022} 1023 1024LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 1025 int scale, OpSize size) { 1026 LIR* load; 1027 int expected_scale = 0; 1028 A64Opcode opcode = kA64Brk1d; 1029 r_base = Check64BitReg(r_base); 1030 1031 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 1032 // register offset load (rather than doing the sign extension in a separate instruction). 1033 if (r_index.Is32Bit()) { 1034 // Assemble: ``sxtw xN, wN''. 1035 r_index = As64BitReg(r_index); 1036 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 1037 } 1038 1039 if (r_dest.IsFloat()) { 1040 if (r_dest.IsDouble()) { 1041 DCHECK(size == k64 || size == kDouble); 1042 expected_scale = 3; 1043 opcode = WIDE(kA64Ldr4fXxG); 1044 } else { 1045 DCHECK(r_dest.IsSingle()); 1046 DCHECK(size == k32 || size == kSingle); 1047 expected_scale = 2; 1048 opcode = kA64Ldr4fXxG; 1049 } 1050 1051 DCHECK(scale == 0 || scale == expected_scale); 1052 return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 1053 (scale != 0) ? 1 : 0); 1054 } 1055 1056 switch (size) { 1057 case kDouble: 1058 case kWord: 1059 case k64: 1060 r_dest = Check64BitReg(r_dest); 1061 opcode = WIDE(kA64Ldr4rXxG); 1062 expected_scale = 3; 1063 break; 1064 case kReference: 1065 r_dest = As32BitReg(r_dest); 1066 FALLTHROUGH_INTENDED; 1067 case kSingle: // Intentional fall-through. 1068 case k32: 1069 r_dest = Check32BitReg(r_dest); 1070 opcode = kA64Ldr4rXxG; 1071 expected_scale = 2; 1072 break; 1073 case kUnsignedHalf: 1074 r_dest = Check32BitReg(r_dest); 1075 opcode = kA64Ldrh4wXxd; 1076 expected_scale = 1; 1077 break; 1078 case kSignedHalf: 1079 r_dest = Check32BitReg(r_dest); 1080 opcode = kA64Ldrsh4rXxd; 1081 expected_scale = 1; 1082 break; 1083 case kUnsignedByte: 1084 r_dest = Check32BitReg(r_dest); 1085 opcode = kA64Ldrb3wXx; 1086 break; 1087 case kSignedByte: 1088 r_dest = Check32BitReg(r_dest); 1089 opcode = kA64Ldrsb3rXx; 1090 break; 1091 default: 1092 LOG(FATAL) << "Bad size: " << size; 1093 } 1094 1095 if (UNLIKELY(expected_scale == 0)) { 1096 // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. 1097 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1098 DCHECK_EQ(scale, 0); 1099 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); 1100 } else { 1101 DCHECK(scale == 0 || scale == expected_scale); 1102 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 1103 (scale != 0) ? 1 : 0); 1104 } 1105 1106 return load; 1107} 1108 1109LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1110 int scale, OpSize size) { 1111 LIR* store; 1112 int expected_scale = 0; 1113 A64Opcode opcode = kA64Brk1d; 1114 r_base = Check64BitReg(r_base); 1115 1116 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 1117 // register offset store (rather than doing the sign extension in a separate instruction). 1118 if (r_index.Is32Bit()) { 1119 // Assemble: ``sxtw xN, wN''. 1120 r_index = As64BitReg(r_index); 1121 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 1122 } 1123 1124 if (r_src.IsFloat()) { 1125 if (r_src.IsDouble()) { 1126 DCHECK(size == k64 || size == kDouble); 1127 expected_scale = 3; 1128 opcode = WIDE(kA64Str4fXxG); 1129 } else { 1130 DCHECK(r_src.IsSingle()); 1131 DCHECK(size == k32 || size == kSingle); 1132 expected_scale = 2; 1133 opcode = kA64Str4fXxG; 1134 } 1135 1136 DCHECK(scale == 0 || scale == expected_scale); 1137 return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1138 (scale != 0) ? 1 : 0); 1139 } 1140 1141 switch (size) { 1142 case kDouble: // Intentional fall-trough. 1143 case kWord: // Intentional fall-trough. 1144 case k64: 1145 r_src = Check64BitReg(r_src); 1146 opcode = WIDE(kA64Str4rXxG); 1147 expected_scale = 3; 1148 break; 1149 case kReference: 1150 r_src = As32BitReg(r_src); 1151 FALLTHROUGH_INTENDED; 1152 case kSingle: // Intentional fall-trough. 1153 case k32: 1154 r_src = Check32BitReg(r_src); 1155 opcode = kA64Str4rXxG; 1156 expected_scale = 2; 1157 break; 1158 case kUnsignedHalf: 1159 case kSignedHalf: 1160 r_src = Check32BitReg(r_src); 1161 opcode = kA64Strh4wXxd; 1162 expected_scale = 1; 1163 break; 1164 case kUnsignedByte: 1165 case kSignedByte: 1166 r_src = Check32BitReg(r_src); 1167 opcode = kA64Strb3wXx; 1168 break; 1169 default: 1170 LOG(FATAL) << "Bad size: " << size; 1171 } 1172 1173 if (UNLIKELY(expected_scale == 0)) { 1174 // This is a tertiary op (e.g. strb), it does not not support scale. 1175 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1176 DCHECK_EQ(scale, 0); 1177 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); 1178 } else { 1179 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1180 (scale != 0) ? 1 : 0); 1181 } 1182 1183 return store; 1184} 1185 1186/* 1187 * Load value from base + displacement. Optionally perform null check 1188 * on base (which must have an associated s_reg and MIR). If not 1189 * performing null check, incoming MIR can be null. 1190 */ 1191LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, 1192 OpSize size) { 1193 LIR* load = nullptr; 1194 A64Opcode opcode = kA64Brk1d; 1195 A64Opcode alt_opcode = kA64Brk1d; 1196 int scale = 0; 1197 1198 switch (size) { 1199 case kDouble: // Intentional fall-through. 1200 case kWord: // Intentional fall-through. 1201 case k64: 1202 r_dest = Check64BitReg(r_dest); 1203 scale = 3; 1204 if (r_dest.IsFloat()) { 1205 DCHECK(r_dest.IsDouble()); 1206 opcode = WIDE(kA64Ldr3fXD); 1207 alt_opcode = WIDE(kA64Ldur3fXd); 1208 } else { 1209 opcode = WIDE(kA64Ldr3rXD); 1210 alt_opcode = WIDE(kA64Ldur3rXd); 1211 } 1212 break; 1213 case kReference: 1214 r_dest = As32BitReg(r_dest); 1215 FALLTHROUGH_INTENDED; 1216 case kSingle: // Intentional fall-through. 1217 case k32: 1218 r_dest = Check32BitReg(r_dest); 1219 scale = 2; 1220 if (r_dest.IsFloat()) { 1221 DCHECK(r_dest.IsSingle()); 1222 opcode = kA64Ldr3fXD; 1223 } else { 1224 opcode = kA64Ldr3rXD; 1225 } 1226 break; 1227 case kUnsignedHalf: 1228 scale = 1; 1229 opcode = kA64Ldrh3wXF; 1230 break; 1231 case kSignedHalf: 1232 scale = 1; 1233 opcode = kA64Ldrsh3rXF; 1234 break; 1235 case kUnsignedByte: 1236 opcode = kA64Ldrb3wXd; 1237 break; 1238 case kSignedByte: 1239 opcode = kA64Ldrsb3rXd; 1240 break; 1241 default: 1242 LOG(FATAL) << "Bad size: " << size; 1243 } 1244 1245 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1246 int scaled_disp = displacement >> scale; 1247 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1248 // Can use scaled load. 1249 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp); 1250 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1251 // Can use unscaled load. 1252 load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement); 1253 } else { 1254 // Use long sequence. 1255 // TODO: cleaner support for index/displacement registers? Not a reference, but must match width. 1256 RegStorage r_scratch = AllocTempWide(); 1257 LoadConstantWide(r_scratch, displacement); 1258 load = LoadBaseIndexed(r_base, r_scratch, 1259 (size == kReference) ? As64BitReg(r_dest) : r_dest, 1260 0, size); 1261 FreeTemp(r_scratch); 1262 } 1263 1264 // TODO: in future may need to differentiate Dalvik accesses w/ spills 1265 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1266 DCHECK_EQ(r_base, rs_sp); 1267 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); 1268 } 1269 return load; 1270} 1271 1272LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1273 OpSize size, VolatileKind is_volatile) { 1274 // LoadBaseDisp() will emit correct insn for atomic load on arm64 1275 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1276 1277 LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size); 1278 1279 if (UNLIKELY(is_volatile == kVolatile)) { 1280 // TODO: This should generate an acquire load instead of the barrier. 1281 GenMemBarrier(kLoadAny); 1282 } 1283 1284 return load; 1285} 1286 1287LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, 1288 OpSize size) { 1289 LIR* store = nullptr; 1290 A64Opcode opcode = kA64Brk1d; 1291 A64Opcode alt_opcode = kA64Brk1d; 1292 int scale = 0; 1293 1294 switch (size) { 1295 case kDouble: // Intentional fall-through. 1296 case kWord: // Intentional fall-through. 1297 case k64: 1298 r_src = Check64BitReg(r_src); 1299 scale = 3; 1300 if (r_src.IsFloat()) { 1301 DCHECK(r_src.IsDouble()); 1302 opcode = WIDE(kA64Str3fXD); 1303 alt_opcode = WIDE(kA64Stur3fXd); 1304 } else { 1305 opcode = WIDE(kA64Str3rXD); 1306 alt_opcode = WIDE(kA64Stur3rXd); 1307 } 1308 break; 1309 case kReference: 1310 r_src = As32BitReg(r_src); 1311 FALLTHROUGH_INTENDED; 1312 case kSingle: // Intentional fall-through. 1313 case k32: 1314 r_src = Check32BitReg(r_src); 1315 scale = 2; 1316 if (r_src.IsFloat()) { 1317 DCHECK(r_src.IsSingle()); 1318 opcode = kA64Str3fXD; 1319 } else { 1320 opcode = kA64Str3rXD; 1321 } 1322 break; 1323 case kUnsignedHalf: 1324 case kSignedHalf: 1325 scale = 1; 1326 opcode = kA64Strh3wXF; 1327 break; 1328 case kUnsignedByte: 1329 case kSignedByte: 1330 opcode = kA64Strb3wXd; 1331 break; 1332 default: 1333 LOG(FATAL) << "Bad size: " << size; 1334 } 1335 1336 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1337 int scaled_disp = displacement >> scale; 1338 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1339 // Can use scaled store. 1340 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp); 1341 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1342 // Can use unscaled store. 1343 store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement); 1344 } else { 1345 // Use long sequence. 1346 RegStorage r_scratch = AllocTempWide(); 1347 LoadConstantWide(r_scratch, displacement); 1348 store = StoreBaseIndexed(r_base, r_scratch, 1349 (size == kReference) ? As64BitReg(r_src) : r_src, 1350 0, size); 1351 FreeTemp(r_scratch); 1352 } 1353 1354 // TODO: In future, may need to differentiate Dalvik & spill accesses. 1355 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1356 DCHECK_EQ(r_base, rs_sp); 1357 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); 1358 } 1359 return store; 1360} 1361 1362LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, 1363 OpSize size, VolatileKind is_volatile) { 1364 // TODO: This should generate a release store and no barriers. 1365 if (UNLIKELY(is_volatile == kVolatile)) { 1366 // Ensure that prior accesses become visible to other threads first. 1367 GenMemBarrier(kAnyStore); 1368 } 1369 1370 // StoreBaseDisp() will emit correct insn for atomic store on arm64 1371 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1372 1373 LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size); 1374 1375 if (UNLIKELY(is_volatile == kVolatile)) { 1376 // Preserve order with respect to any subsequent volatile loads. 1377 // We need StoreLoad, but that generally requires the most expensive barrier. 1378 GenMemBarrier(kAnyAny); 1379 } 1380 1381 return store; 1382} 1383 1384LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { 1385 UNUSED(r_dest, r_src); 1386 LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64"; 1387 UNREACHABLE(); 1388} 1389 1390LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { 1391 UNUSED(op, r_base, disp); 1392 LOG(FATAL) << "Unexpected use of OpMem for Arm64"; 1393 UNREACHABLE(); 1394} 1395 1396LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, 1397 QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) { 1398 // The address of the trampoline is already loaded into r_tgt. 1399 return OpReg(op, r_tgt); 1400} 1401 1402} // namespace art 1403