utility_arm64.cc revision 984305917bf57b3f8d92965e4715a0370cc5bcfb
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "arm64_lir.h" 18#include "codegen_arm64.h" 19#include "dex/quick/mir_to_lir-inl.h" 20#include "dex/reg_storage_eq.h" 21 22namespace art { 23 24/* This file contains codegen for the A64 ISA. */ 25 26static int32_t EncodeImmSingle(uint32_t bits) { 27 /* 28 * Valid values will have the form: 29 * 30 * aBbb.bbbc.defg.h000.0000.0000.0000.0000 31 * 32 * where B = not(b). In other words, if b == 1, then B == 0 and viceversa. 33 */ 34 35 // bits[19..0] are cleared. 36 if ((bits & 0x0007ffff) != 0) 37 return -1; 38 39 // bits[29..25] are all set or all cleared. 40 uint32_t b_pattern = (bits >> 16) & 0x3e00; 41 if (b_pattern != 0 && b_pattern != 0x3e00) 42 return -1; 43 44 // bit[30] and bit[29] are opposite. 45 if (((bits ^ (bits << 1)) & 0x40000000) == 0) 46 return -1; 47 48 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 49 // bit7: a000.0000 50 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 51 // bit6: 0b00.0000 52 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 53 // bit5_to_0: 00cd.efgh 54 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 55 return (bit7 | bit6 | bit5_to_0); 56} 57 58static int32_t EncodeImmDouble(uint64_t bits) { 59 /* 60 * Valid values will have the form: 61 * 62 * aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 63 * 0000.0000.0000.0000.0000.0000.0000.0000 64 * 65 * where B = not(b). 66 */ 67 68 // bits[47..0] are cleared. 69 if ((bits & UINT64_C(0xffffffffffff)) != 0) 70 return -1; 71 72 // bits[61..54] are all set or all cleared. 73 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 74 if (b_pattern != 0 && b_pattern != 0x3fc0) 75 return -1; 76 77 // bit[62] and bit[61] are opposite. 78 if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0) 79 return -1; 80 81 // bit7: a000.0000 82 uint32_t bit7 = ((bits >> 63) & 0x1) << 7; 83 // bit6: 0b00.0000 84 uint32_t bit6 = ((bits >> 61) & 0x1) << 6; 85 // bit5_to_0: 00cd.efgh 86 uint32_t bit5_to_0 = (bits >> 48) & 0x3f; 87 return (bit7 | bit6 | bit5_to_0); 88} 89 90LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) { 91 DCHECK(r_dest.IsSingle()); 92 if (value == 0) { 93 return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr); 94 } else { 95 int32_t encoded_imm = EncodeImmSingle((uint32_t)value); 96 if (encoded_imm >= 0) { 97 return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm); 98 } 99 } 100 101 LIR* data_target = ScanLiteralPool(literal_list_, value, 0); 102 if (data_target == NULL) { 103 // Wide, as we need 8B alignment. 104 data_target = AddWideData(&literal_list_, value, 0); 105 } 106 107 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 108 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp, 109 r_dest.GetReg(), 0, 0, 0, 0, data_target); 110 AppendLIR(load_pc_rel); 111 return load_pc_rel; 112} 113 114LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) { 115 DCHECK(r_dest.IsDouble()); 116 if (value == 0) { 117 return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr); 118 } else { 119 int32_t encoded_imm = EncodeImmDouble(value); 120 if (encoded_imm >= 0) { 121 return NewLIR2(FWIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm); 122 } 123 } 124 125 // No short form - load from the literal pool. 126 int32_t val_lo = Low32Bits(value); 127 int32_t val_hi = High32Bits(value); 128 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 129 if (data_target == NULL) { 130 data_target = AddWideData(&literal_list_, val_lo, val_hi); 131 } 132 133 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 134 LIR* load_pc_rel = RawLIR(current_dalvik_offset_, FWIDE(kA64Ldr2fp), 135 r_dest.GetReg(), 0, 0, 0, 0, data_target); 136 AppendLIR(load_pc_rel); 137 return load_pc_rel; 138} 139 140static int CountLeadingZeros(bool is_wide, uint64_t value) { 141 return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value); 142} 143 144static int CountTrailingZeros(bool is_wide, uint64_t value) { 145 return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value); 146} 147 148static int CountSetBits(bool is_wide, uint64_t value) { 149 return ((is_wide) ? 150 __builtin_popcountll(value) : __builtin_popcount((uint32_t)value)); 151} 152 153/** 154 * @brief Try encoding an immediate in the form required by logical instructions. 155 * 156 * @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value. 157 * @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as 158 * 32-bit if @p is_wide is false. 159 * @return A non-negative integer containing the encoded immediate or -1 if the encoding failed. 160 * @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate(). 161 */ 162int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) { 163 unsigned n, imm_s, imm_r; 164 165 // Logical immediates are encoded using parameters n, imm_s and imm_r using 166 // the following table: 167 // 168 // N imms immr size S R 169 // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr) 170 // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr) 171 // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr) 172 // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr) 173 // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr) 174 // 0 11110s xxxxxr 2 UInt(s) UInt(r) 175 // (s bits must not be all set) 176 // 177 // A pattern is constructed of size bits, where the least significant S+1 178 // bits are set. The pattern is rotated right by R, and repeated across a 179 // 32 or 64-bit value, depending on destination register width. 180 // 181 // To test if an arbitary immediate can be encoded using this scheme, an 182 // iterative algorithm is used. 183 // 184 185 // 1. If the value has all set or all clear bits, it can't be encoded. 186 if (value == 0 || value == ~UINT64_C(0) || 187 (!is_wide && (uint32_t)value == ~UINT32_C(0))) { 188 return -1; 189 } 190 191 unsigned lead_zero = CountLeadingZeros(is_wide, value); 192 unsigned lead_one = CountLeadingZeros(is_wide, ~value); 193 unsigned trail_zero = CountTrailingZeros(is_wide, value); 194 unsigned trail_one = CountTrailingZeros(is_wide, ~value); 195 unsigned set_bits = CountSetBits(is_wide, value); 196 197 // The fixed bits in the immediate s field. 198 // If width == 64 (X reg), start at 0xFFFFFF80. 199 // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit 200 // widths won't be executed. 201 unsigned width = (is_wide) ? 64 : 32; 202 int imm_s_fixed = (is_wide) ? -128 : -64; 203 int imm_s_mask = 0x3f; 204 205 for (;;) { 206 // 2. If the value is two bits wide, it can be encoded. 207 if (width == 2) { 208 n = 0; 209 imm_s = 0x3C; 210 imm_r = (value & 3) - 1; 211 break; 212 } 213 214 n = (width == 64) ? 1 : 0; 215 imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask); 216 if ((lead_zero + set_bits) == width) { 217 imm_r = 0; 218 } else { 219 imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one; 220 } 221 222 // 3. If the sum of leading zeros, trailing zeros and set bits is 223 // equal to the bit width of the value, it can be encoded. 224 if (lead_zero + trail_zero + set_bits == width) { 225 break; 226 } 227 228 // 4. If the sum of leading ones, trailing ones and unset bits in the 229 // value is equal to the bit width of the value, it can be encoded. 230 if (lead_one + trail_one + (width - set_bits) == width) { 231 break; 232 } 233 234 // 5. If the most-significant half of the bitwise value is equal to 235 // the least-significant half, return to step 2 using the 236 // least-significant half of the value. 237 uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1; 238 if ((value & mask) == ((value >> (width >> 1)) & mask)) { 239 width >>= 1; 240 set_bits >>= 1; 241 imm_s_fixed >>= 1; 242 continue; 243 } 244 245 // 6. Otherwise, the value can't be encoded. 246 return -1; 247 } 248 249 return (n << 12 | imm_r << 6 | imm_s); 250} 251 252bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value) { 253 return false; // (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0); 254} 255 256bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) { 257 return EncodeImmSingle(value) >= 0; 258} 259 260bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) { 261 return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value)); 262} 263 264bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) { 265 return EncodeImmDouble(value) >= 0; 266} 267 268/* 269 * Load a immediate using one single instruction when possible; otherwise 270 * use a pair of movz and movk instructions. 271 * 272 * No additional register clobbering operation performed. Use this version when 273 * 1) r_dest is freshly returned from AllocTemp or 274 * 2) The codegen is under fixed register usage 275 */ 276LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { 277 LIR* res; 278 279 if (r_dest.IsFloat()) { 280 return LoadFPConstantValue(r_dest, value); 281 } 282 283 if (r_dest.Is64Bit()) { 284 return LoadConstantWide(r_dest, value); 285 } 286 287 // Loading SP/ZR with an immediate is not supported. 288 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 289 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 290 291 // Compute how many movk, movz instructions are needed to load the value. 292 uint16_t high_bits = High16Bits(value); 293 uint16_t low_bits = Low16Bits(value); 294 295 bool low_fast = ((uint16_t)(low_bits + 1) <= 1); 296 bool high_fast = ((uint16_t)(high_bits + 1) <= 1); 297 298 if (LIKELY(low_fast || high_fast)) { 299 // 1 instruction is enough to load the immediate. 300 if (LIKELY(low_bits == high_bits)) { 301 // Value is either 0 or -1: we can just use wzr. 302 ArmOpcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr; 303 res = NewLIR2(opcode, r_dest.GetReg(), rwzr); 304 } else { 305 uint16_t uniform_bits, useful_bits; 306 int shift; 307 308 if (LIKELY(high_fast)) { 309 shift = 0; 310 uniform_bits = high_bits; 311 useful_bits = low_bits; 312 } else { 313 shift = 1; 314 uniform_bits = low_bits; 315 useful_bits = high_bits; 316 } 317 318 if (UNLIKELY(uniform_bits != 0)) { 319 res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift); 320 } else { 321 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift); 322 } 323 } 324 } else { 325 // movk, movz require 2 instructions. Try detecting logical immediates. 326 int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value); 327 if (log_imm >= 0) { 328 res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm); 329 } else { 330 // Use 2 instructions. 331 res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0); 332 NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1); 333 } 334 } 335 336 return res; 337} 338 339// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide(). 340LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { 341 // Maximum number of instructions to use for encoding the immediate. 342 const int max_num_ops = 2; 343 344 if (r_dest.IsFloat()) { 345 return LoadFPConstantValueWide(r_dest, value); 346 } 347 348 DCHECK(r_dest.Is64Bit()); 349 350 // Loading SP/ZR with an immediate is not supported. 351 DCHECK(!A64_REG_IS_SP(r_dest.GetReg())); 352 DCHECK(!A64_REG_IS_ZR(r_dest.GetReg())); 353 354 if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) { 355 // value is either 0 or -1: we can just use xzr. 356 ArmOpcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr); 357 return NewLIR2(opcode, r_dest.GetReg(), rxzr); 358 } 359 360 // At least one in value's halfwords is not 0x0, nor 0xffff: find out how many. 361 int num_0000_halfwords = 0; 362 int num_ffff_halfwords = 0; 363 uint64_t uvalue = static_cast<uint64_t>(value); 364 for (int shift = 0; shift < 64; shift += 16) { 365 uint16_t halfword = static_cast<uint16_t>(uvalue >> shift); 366 if (halfword == 0) 367 num_0000_halfwords++; 368 else if (halfword == UINT16_C(0xffff)) 369 num_ffff_halfwords++; 370 } 371 int num_fast_halfwords = std::max(num_0000_halfwords, num_ffff_halfwords); 372 373 if (num_fast_halfwords < 3) { 374 // A single movz/movn is not enough. Try the logical immediate route. 375 int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value); 376 if (log_imm >= 0) { 377 return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm); 378 } 379 } 380 381 if (num_fast_halfwords >= 4 - max_num_ops) { 382 // We can encode the number using a movz/movn followed by one or more movk. 383 ArmOpcode op; 384 uint16_t background; 385 LIR* res = nullptr; 386 387 // Decide whether to use a movz or a movn. 388 if (num_0000_halfwords >= num_ffff_halfwords) { 389 op = WIDE(kA64Movz3rdM); 390 background = 0; 391 } else { 392 op = WIDE(kA64Movn3rdM); 393 background = 0xffff; 394 } 395 396 // Emit the first instruction (movz, movn). 397 int shift; 398 for (shift = 0; shift < 4; shift++) { 399 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 400 if (halfword != background) { 401 res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift); 402 break; 403 } 404 } 405 406 // Emit the movk instructions. 407 for (shift++; shift < 4; shift++) { 408 uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4)); 409 if (halfword != background) { 410 NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift); 411 } 412 } 413 return res; 414 } 415 416 // Use the literal pool. 417 int32_t val_lo = Low32Bits(value); 418 int32_t val_hi = High32Bits(value); 419 LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); 420 if (data_target == NULL) { 421 data_target = AddWideData(&literal_list_, val_lo, val_hi); 422 } 423 424 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 425 LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), 426 r_dest.GetReg(), 0, 0, 0, 0, data_target); 427 AppendLIR(res); 428 return res; 429} 430 431LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) { 432 LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */); 433 res->target = target; 434 return res; 435} 436 437LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) { 438 LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc), 439 0 /* offset to be patched */); 440 branch->target = target; 441 return branch; 442} 443 444LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { 445 ArmOpcode opcode = kA64Brk1d; 446 switch (op) { 447 case kOpBlx: 448 opcode = kA64Blr1x; 449 break; 450 // TODO(Arm64): port kThumbBx. 451 // case kOpBx: 452 // opcode = kThumbBx; 453 // break; 454 default: 455 LOG(FATAL) << "Bad opcode " << op; 456 } 457 return NewLIR1(opcode, r_dest_src.GetReg()); 458} 459 460LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) { 461 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 462 CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit()); 463 ArmOpcode opcode = kA64Brk1d; 464 465 switch (op) { 466 case kOpCmn: 467 opcode = kA64Cmn3rro; 468 break; 469 case kOpCmp: 470 opcode = kA64Cmp3rro; 471 break; 472 case kOpMov: 473 opcode = kA64Mov2rr; 474 break; 475 case kOpMvn: 476 opcode = kA64Mvn2rr; 477 break; 478 case kOpNeg: 479 opcode = kA64Neg3rro; 480 break; 481 case kOpTst: 482 opcode = kA64Tst3rro; 483 break; 484 case kOpRev: 485 DCHECK_EQ(shift, 0); 486 // Binary, but rm is encoded twice. 487 return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 488 break; 489 case kOpRevsh: 490 // Binary, but rm is encoded twice. 491 NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 492 // "sxth r1, r2" is "sbfm r1, r2, #0, #15" 493 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15); 494 break; 495 case kOp2Byte: 496 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 497 // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 498 // For now we use sbfm directly. 499 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7); 500 case kOp2Short: 501 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 502 // For now we use sbfm rather than its alias, sbfx. 503 return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 504 case kOp2Char: 505 // "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)". 506 // For now we use ubfm directly. 507 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 508 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15); 509 default: 510 return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift); 511 } 512 513 DCHECK(!IsPseudoLirOp(opcode)); 514 if (EncodingMap[opcode].flags & IS_BINARY_OP) { 515 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 516 return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg()); 517 } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 518 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 519 if (kind == kFmtShift) { 520 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift); 521 } 522 } 523 524 LOG(FATAL) << "Unexpected encoding operand count"; 525 return NULL; 526} 527 528LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) { 529 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 530 ArmOpcode opcode = kA64Brk1d; 531 532 switch (op) { 533 case kOpCmn: 534 opcode = kA64Cmn3Rre; 535 break; 536 case kOpCmp: 537 opcode = kA64Cmp3Rre; 538 break; 539 default: 540 LOG(FATAL) << "Bad Opcode: " << opcode; 541 break; 542 } 543 544 DCHECK(!IsPseudoLirOp(opcode)); 545 if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { 546 ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind; 547 if (kind == kFmtExtend) { 548 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend); 549 } 550 } 551 552 LOG(FATAL) << "Unexpected encoding operand count"; 553 return NULL; 554} 555 556LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) { 557 /* RegReg operations with SP in first parameter need extended register instruction form. 558 * Only CMN and CMP instructions are implemented. 559 */ 560 if (r_dest_src1 == rs_sp) { 561 return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND); 562 } else { 563 return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT); 564 } 565} 566 567LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) { 568 UNIMPLEMENTED(FATAL); 569 return nullptr; 570} 571 572LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src, MoveType move_type) { 573 UNIMPLEMENTED(FATAL); 574 return nullptr; 575} 576 577LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) { 578 LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64"; 579 return NULL; 580} 581 582LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, 583 RegStorage r_src2, int shift) { 584 ArmOpcode opcode = kA64Brk1d; 585 586 switch (op) { 587 case kOpAdd: 588 opcode = kA64Add4rrro; 589 break; 590 case kOpSub: 591 opcode = kA64Sub4rrro; 592 break; 593 // case kOpRsub: 594 // opcode = kA64RsubWWW; 595 // break; 596 case kOpAdc: 597 opcode = kA64Adc3rrr; 598 break; 599 case kOpAnd: 600 opcode = kA64And4rrro; 601 break; 602 case kOpXor: 603 opcode = kA64Eor4rrro; 604 break; 605 case kOpMul: 606 opcode = kA64Mul3rrr; 607 break; 608 case kOpDiv: 609 opcode = kA64Sdiv3rrr; 610 break; 611 case kOpOr: 612 opcode = kA64Orr4rrro; 613 break; 614 case kOpSbc: 615 opcode = kA64Sbc3rrr; 616 break; 617 case kOpLsl: 618 opcode = kA64Lsl3rrr; 619 break; 620 case kOpLsr: 621 opcode = kA64Lsr3rrr; 622 break; 623 case kOpAsr: 624 opcode = kA64Asr3rrr; 625 break; 626 case kOpRor: 627 opcode = kA64Ror3rrr; 628 break; 629 default: 630 LOG(FATAL) << "Bad opcode: " << op; 631 break; 632 } 633 634 // The instructions above belong to two kinds: 635 // - 4-operands instructions, where the last operand is a shift/extend immediate, 636 // - 3-operands instructions with no shift/extend. 637 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 638 CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); 639 CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); 640 if (EncodingMap[opcode].flags & IS_QUAD_OP) { 641 DCHECK(!IsExtendEncoding(shift)); 642 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); 643 } else { 644 DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); 645 DCHECK_EQ(shift, ENCODE_NO_SHIFT); 646 return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg()); 647 } 648} 649 650LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1, 651 RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) { 652 ArmOpcode opcode = kA64Brk1d; 653 654 switch (op) { 655 case kOpAdd: 656 opcode = kA64Add4RRre; 657 break; 658 case kOpSub: 659 opcode = kA64Sub4RRre; 660 break; 661 default: 662 LOG(FATAL) << "Unimplemented opcode: " << op; 663 break; 664 } 665 ArmOpcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode; 666 667 if (r_dest.Is64Bit()) { 668 CHECK(r_src1.Is64Bit()); 669 670 // dest determines whether the op is wide or not. Up-convert src2 when necessary. 671 // Note: this is not according to aarch64 specifications, but our encoding. 672 if (!r_src2.Is64Bit()) { 673 r_src2 = As64BitReg(r_src2); 674 } 675 } else { 676 CHECK(!r_src1.Is64Bit()); 677 CHECK(!r_src2.Is64Bit()); 678 } 679 680 // Sanity checks. 681 // 1) Amount is in the range 0..4 682 CHECK_LE(amount, 4); 683 684 return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), 685 EncodeExtend(ext, amount)); 686} 687 688LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { 689 return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT); 690} 691 692LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) { 693 return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value)); 694} 695 696LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) { 697 LIR* res; 698 bool neg = (value < 0); 699 int64_t abs_value = (neg) ? -value : value; 700 ArmOpcode opcode = kA64Brk1d; 701 ArmOpcode alt_opcode = kA64Brk1d; 702 int32_t log_imm = -1; 703 bool is_wide = r_dest.Is64Bit(); 704 ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0); 705 int info = 0; 706 707 switch (op) { 708 case kOpLsl: { 709 // "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)" 710 // and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)". 711 // For now, we just use ubfm directly. 712 int max_value = (is_wide) ? 63 : 31; 713 return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(), 714 (-value) & max_value, max_value - value); 715 } 716 case kOpLsr: 717 return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 718 case kOpAsr: 719 return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value); 720 case kOpRor: 721 // "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm". 722 // For now, we just use extr directly. 723 return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(), 724 value); 725 case kOpAdd: 726 neg = !neg; 727 // Note: intentional fallthrough 728 case kOpSub: 729 // Add and sub below read/write sp rather than xzr. 730 if (abs_value < 0x1000) { 731 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 732 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0); 733 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 734 opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT; 735 return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1); 736 } else { 737 log_imm = -1; 738 alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre; 739 info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0); 740 } 741 break; 742 // case kOpRsub: 743 // opcode = kThumb2RsubRRI8M; 744 // alt_opcode = kThumb2RsubRRR; 745 // break; 746 case kOpAdc: 747 log_imm = -1; 748 alt_opcode = kA64Adc3rrr; 749 break; 750 case kOpSbc: 751 log_imm = -1; 752 alt_opcode = kA64Sbc3rrr; 753 break; 754 case kOpOr: 755 log_imm = EncodeLogicalImmediate(is_wide, value); 756 opcode = kA64Orr3Rrl; 757 alt_opcode = kA64Orr4rrro; 758 break; 759 case kOpAnd: 760 log_imm = EncodeLogicalImmediate(is_wide, value); 761 opcode = kA64And3Rrl; 762 alt_opcode = kA64And4rrro; 763 break; 764 case kOpXor: 765 log_imm = EncodeLogicalImmediate(is_wide, value); 766 opcode = kA64Eor3Rrl; 767 alt_opcode = kA64Eor4rrro; 768 break; 769 case kOpMul: 770 // TUNING: power of 2, shift & add 771 log_imm = -1; 772 alt_opcode = kA64Mul3rrr; 773 break; 774 default: 775 LOG(FATAL) << "Bad opcode: " << op; 776 } 777 778 if (log_imm >= 0) { 779 return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm); 780 } else { 781 RegStorage r_scratch; 782 if (is_wide) { 783 r_scratch = AllocTempWide(); 784 LoadConstantWide(r_scratch, value); 785 } else { 786 r_scratch = AllocTemp(); 787 LoadConstant(r_scratch, value); 788 } 789 if (EncodingMap[alt_opcode].flags & IS_QUAD_OP) 790 res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info); 791 else 792 res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg()); 793 FreeTemp(r_scratch); 794 return res; 795 } 796} 797 798LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { 799 return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value)); 800} 801 802LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) { 803 ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0); 804 ArmOpcode opcode = kA64Brk1d; 805 ArmOpcode neg_opcode = kA64Brk1d; 806 bool shift; 807 bool neg = (value < 0); 808 uint64_t abs_value = (neg) ? -value : value; 809 810 if (LIKELY(abs_value < 0x1000)) { 811 // abs_value is a 12-bit immediate. 812 shift = false; 813 } else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) { 814 // abs_value is a shifted 12-bit immediate. 815 shift = true; 816 abs_value >>= 12; 817 } else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) { 818 // Note: It is better to use two ADD/SUB instead of loading a number to a temp register. 819 // This works for both normal registers and SP. 820 // For a frame size == 0x2468, it will be encoded as: 821 // sub sp, #0x2000 822 // sub sp, #0x468 823 if (neg) { 824 op = (op == kOpAdd) ? kOpSub : kOpAdd; 825 } 826 OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff))); 827 return OpRegImm64(op, r_dest_src1, abs_value & 0xfff); 828 } else if (LIKELY(A64_REG_IS_SP(r_dest_src1.GetReg()) && (op == kOpAdd || op == kOpSub))) { 829 // Note: "sub sp, sp, Xm" is not correct on arm64. 830 // We need special instructions for SP. 831 // Also operation on 32-bit SP should be avoided. 832 DCHECK(IS_WIDE(wide)); 833 RegStorage r_tmp = AllocTempWide(); 834 OpRegRegImm(kOpAdd, r_tmp, r_dest_src1, 0); 835 OpRegImm64(op, r_tmp, value); 836 return OpRegRegImm(kOpAdd, r_dest_src1, r_tmp, 0); 837 } else { 838 RegStorage r_tmp; 839 LIR* res; 840 if (IS_WIDE(wide)) { 841 r_tmp = AllocTempWide(); 842 res = LoadConstantWide(r_tmp, value); 843 } else { 844 r_tmp = AllocTemp(); 845 res = LoadConstant(r_tmp, value); 846 } 847 OpRegReg(op, r_dest_src1, r_tmp); 848 FreeTemp(r_tmp); 849 return res; 850 } 851 852 switch (op) { 853 case kOpAdd: 854 neg_opcode = kA64Sub4RRdT; 855 opcode = kA64Add4RRdT; 856 break; 857 case kOpSub: 858 neg_opcode = kA64Add4RRdT; 859 opcode = kA64Sub4RRdT; 860 break; 861 case kOpCmp: 862 neg_opcode = kA64Cmn3RdT; 863 opcode = kA64Cmp3RdT; 864 break; 865 default: 866 LOG(FATAL) << "Bad op-kind in OpRegImm: " << op; 867 break; 868 } 869 870 if (UNLIKELY(neg)) 871 opcode = neg_opcode; 872 873 if (EncodingMap[opcode].flags & IS_QUAD_OP) 874 return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value, 875 (shift) ? 1 : 0); 876 else 877 return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0); 878} 879 880int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) { 881 return ((shift_type & 0x3) << 7) | (amount & 0x3f); 882} 883 884int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) { 885 return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7); 886} 887 888bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { 889 return ((1 << 6) & encoded_value) != 0; 890} 891 892LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 893 int scale, OpSize size) { 894 LIR* load; 895 int expected_scale = 0; 896 ArmOpcode opcode = kA64Brk1d; 897 r_base = Check64BitReg(r_base); 898 899 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 900 // register offset load (rather than doing the sign extension in a separate instruction). 901 if (r_index.Is32Bit()) { 902 // Assemble: ``sxtw xN, wN''. 903 r_index = As64BitReg(r_index); 904 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 905 } 906 907 if (r_dest.IsFloat()) { 908 if (r_dest.IsDouble()) { 909 DCHECK(size == k64 || size == kDouble); 910 expected_scale = 3; 911 opcode = FWIDE(kA64Ldr4fXxG); 912 } else { 913 DCHECK(r_dest.IsSingle()); 914 DCHECK(size == k32 || size == kSingle); 915 expected_scale = 2; 916 opcode = kA64Ldr4fXxG; 917 } 918 919 DCHECK(scale == 0 || scale == expected_scale); 920 return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 921 (scale != 0) ? 1 : 0); 922 } 923 924 switch (size) { 925 case kDouble: 926 case kWord: 927 case k64: 928 r_dest = Check64BitReg(r_dest); 929 opcode = WIDE(kA64Ldr4rXxG); 930 expected_scale = 3; 931 break; 932 case kSingle: // Intentional fall-through. 933 case k32: // Intentional fall-through. 934 case kReference: 935 r_dest = Check32BitReg(r_dest); 936 opcode = kA64Ldr4rXxG; 937 expected_scale = 2; 938 break; 939 case kUnsignedHalf: 940 r_dest = Check32BitReg(r_dest); 941 opcode = kA64Ldrh4wXxd; 942 expected_scale = 1; 943 break; 944 case kSignedHalf: 945 r_dest = Check32BitReg(r_dest); 946 opcode = kA64Ldrsh4rXxd; 947 expected_scale = 1; 948 break; 949 case kUnsignedByte: 950 r_dest = Check32BitReg(r_dest); 951 opcode = kA64Ldrb3wXx; 952 break; 953 case kSignedByte: 954 r_dest = Check32BitReg(r_dest); 955 opcode = kA64Ldrsb3rXx; 956 break; 957 default: 958 LOG(FATAL) << "Bad size: " << size; 959 } 960 961 if (UNLIKELY(expected_scale == 0)) { 962 // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. 963 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 964 DCHECK_EQ(scale, 0); 965 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); 966 } else { 967 DCHECK(scale == 0 || scale == expected_scale); 968 load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), 969 (scale != 0) ? 1 : 0); 970 } 971 972 return load; 973} 974 975LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, 976 int scale) { 977 return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference); 978} 979 980LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 981 int scale, OpSize size) { 982 LIR* store; 983 int expected_scale = 0; 984 ArmOpcode opcode = kA64Brk1d; 985 r_base = Check64BitReg(r_base); 986 987 // TODO(Arm64): The sign extension of r_index should be carried out by using an extended 988 // register offset store (rather than doing the sign extension in a separate instruction). 989 if (r_index.Is32Bit()) { 990 // Assemble: ``sxtw xN, wN''. 991 r_index = As64BitReg(r_index); 992 NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); 993 } 994 995 if (r_src.IsFloat()) { 996 if (r_src.IsDouble()) { 997 DCHECK(size == k64 || size == kDouble); 998 expected_scale = 3; 999 opcode = FWIDE(kA64Str4fXxG); 1000 } else { 1001 DCHECK(r_src.IsSingle()); 1002 DCHECK(size == k32 || size == kSingle); 1003 expected_scale = 2; 1004 opcode = kA64Str4fXxG; 1005 } 1006 1007 DCHECK(scale == 0 || scale == expected_scale); 1008 return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1009 (scale != 0) ? 1 : 0); 1010 } 1011 1012 switch (size) { 1013 case kDouble: // Intentional fall-trough. 1014 case kWord: // Intentional fall-trough. 1015 case k64: 1016 r_src = Check64BitReg(r_src); 1017 opcode = WIDE(kA64Str4rXxG); 1018 expected_scale = 3; 1019 break; 1020 case kSingle: // Intentional fall-trough. 1021 case k32: // Intentional fall-trough. 1022 case kReference: 1023 r_src = Check32BitReg(r_src); 1024 opcode = kA64Str4rXxG; 1025 expected_scale = 2; 1026 break; 1027 case kUnsignedHalf: 1028 case kSignedHalf: 1029 r_src = Check32BitReg(r_src); 1030 opcode = kA64Strh4wXxd; 1031 expected_scale = 1; 1032 break; 1033 case kUnsignedByte: 1034 case kSignedByte: 1035 r_src = Check32BitReg(r_src); 1036 opcode = kA64Strb3wXx; 1037 break; 1038 default: 1039 LOG(FATAL) << "Bad size: " << size; 1040 } 1041 1042 if (UNLIKELY(expected_scale == 0)) { 1043 // This is a tertiary op (e.g. strb), it does not not support scale. 1044 DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); 1045 DCHECK_EQ(scale, 0); 1046 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); 1047 } else { 1048 store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), 1049 (scale != 0) ? 1 : 0); 1050 } 1051 1052 return store; 1053} 1054 1055LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, 1056 int scale) { 1057 return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference); 1058} 1059 1060/* 1061 * Load value from base + displacement. Optionally perform null check 1062 * on base (which must have an associated s_reg and MIR). If not 1063 * performing null check, incoming MIR can be null. 1064 */ 1065LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest, 1066 OpSize size) { 1067 LIR* load = NULL; 1068 ArmOpcode opcode = kA64Brk1d; 1069 ArmOpcode alt_opcode = kA64Brk1d; 1070 int scale = 0; 1071 1072 switch (size) { 1073 case kDouble: // Intentional fall-through. 1074 case kWord: // Intentional fall-through. 1075 case k64: 1076 r_dest = Check64BitReg(r_dest); 1077 scale = 3; 1078 if (r_dest.IsFloat()) { 1079 DCHECK(r_dest.IsDouble()); 1080 opcode = FWIDE(kA64Ldr3fXD); 1081 alt_opcode = FWIDE(kA64Ldur3fXd); 1082 } else { 1083 opcode = WIDE(kA64Ldr3rXD); 1084 alt_opcode = WIDE(kA64Ldur3rXd); 1085 } 1086 break; 1087 case kSingle: // Intentional fall-through. 1088 case k32: // Intentional fall-trough. 1089 case kReference: 1090 r_dest = Check32BitReg(r_dest); 1091 scale = 2; 1092 if (r_dest.IsFloat()) { 1093 DCHECK(r_dest.IsSingle()); 1094 opcode = kA64Ldr3fXD; 1095 } else { 1096 opcode = kA64Ldr3rXD; 1097 } 1098 break; 1099 case kUnsignedHalf: 1100 scale = 1; 1101 opcode = kA64Ldrh3wXF; 1102 break; 1103 case kSignedHalf: 1104 scale = 1; 1105 opcode = kA64Ldrsh3rXF; 1106 break; 1107 case kUnsignedByte: 1108 opcode = kA64Ldrb3wXd; 1109 break; 1110 case kSignedByte: 1111 opcode = kA64Ldrsb3rXd; 1112 break; 1113 default: 1114 LOG(FATAL) << "Bad size: " << size; 1115 } 1116 1117 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1118 int scaled_disp = displacement >> scale; 1119 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1120 // Can use scaled load. 1121 load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp); 1122 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1123 // Can use unscaled load. 1124 load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement); 1125 } else { 1126 // Use long sequence. 1127 // TODO: cleaner support for index/displacement registers? Not a reference, but must match width. 1128 RegStorage r_scratch = AllocTempWide(); 1129 LoadConstantWide(r_scratch, displacement); 1130 load = LoadBaseIndexed(r_base, r_scratch, r_dest, 0, size); 1131 FreeTemp(r_scratch); 1132 } 1133 1134 // TODO: in future may need to differentiate Dalvik accesses w/ spills 1135 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1136 DCHECK(r_base == rs_sp); 1137 AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit()); 1138 } 1139 return load; 1140} 1141 1142LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1143 OpSize size, VolatileKind is_volatile) { 1144 // LoadBaseDisp() will emit correct insn for atomic load on arm64 1145 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1146 1147 LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size); 1148 1149 if (UNLIKELY(is_volatile == kVolatile)) { 1150 // TODO: This should generate an acquire load instead of the barrier. 1151 GenMemBarrier(kLoadAny); 1152 } 1153 1154 return load; 1155} 1156 1157LIR* Arm64Mir2Lir::LoadRefDisp(RegStorage r_base, int displacement, RegStorage r_dest, 1158 VolatileKind is_volatile) { 1159 return LoadBaseDisp(r_base, displacement, As32BitReg(r_dest), kReference, is_volatile); 1160} 1161 1162LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src, 1163 OpSize size) { 1164 LIR* store = NULL; 1165 ArmOpcode opcode = kA64Brk1d; 1166 ArmOpcode alt_opcode = kA64Brk1d; 1167 int scale = 0; 1168 1169 switch (size) { 1170 case kDouble: // Intentional fall-through. 1171 case kWord: // Intentional fall-through. 1172 case k64: 1173 r_src = Check64BitReg(r_src); 1174 scale = 3; 1175 if (r_src.IsFloat()) { 1176 DCHECK(r_src.IsDouble()); 1177 opcode = FWIDE(kA64Str3fXD); 1178 alt_opcode = FWIDE(kA64Stur3fXd); 1179 } else { 1180 opcode = FWIDE(kA64Str3rXD); 1181 alt_opcode = FWIDE(kA64Stur3rXd); 1182 } 1183 break; 1184 case kSingle: // Intentional fall-through. 1185 case k32: // Intentional fall-trough. 1186 case kReference: 1187 r_src = Check32BitReg(r_src); 1188 scale = 2; 1189 if (r_src.IsFloat()) { 1190 DCHECK(r_src.IsSingle()); 1191 opcode = kA64Str3fXD; 1192 } else { 1193 opcode = kA64Str3rXD; 1194 } 1195 break; 1196 case kUnsignedHalf: 1197 case kSignedHalf: 1198 scale = 1; 1199 opcode = kA64Strh3wXF; 1200 break; 1201 case kUnsignedByte: 1202 case kSignedByte: 1203 opcode = kA64Strb3wXd; 1204 break; 1205 default: 1206 LOG(FATAL) << "Bad size: " << size; 1207 } 1208 1209 bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0; 1210 int scaled_disp = displacement >> scale; 1211 if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) { 1212 // Can use scaled store. 1213 store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp); 1214 } else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) { 1215 // Can use unscaled store. 1216 store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement); 1217 } else { 1218 // Use long sequence. 1219 RegStorage r_scratch = AllocTempWide(); 1220 LoadConstantWide(r_scratch, displacement); 1221 store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size); 1222 FreeTemp(r_scratch); 1223 } 1224 1225 // TODO: In future, may need to differentiate Dalvik & spill accesses. 1226 if (mem_ref_type_ == ResourceMask::kDalvikReg) { 1227 DCHECK(r_base == rs_sp); 1228 AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit()); 1229 } 1230 return store; 1231} 1232 1233LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src, 1234 OpSize size, VolatileKind is_volatile) { 1235 // TODO: This should generate a release store and no barriers. 1236 if (UNLIKELY(is_volatile == kVolatile)) { 1237 // Ensure that prior accesses become visible to other threads first. 1238 GenMemBarrier(kAnyStore); 1239 } 1240 1241 // StoreBaseDisp() will emit correct insn for atomic store on arm64 1242 // assuming r_dest is correctly prepared using RegClassForFieldLoadStore(). 1243 1244 LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size); 1245 1246 if (UNLIKELY(is_volatile == kVolatile)) { 1247 // Preserve order with respect to any subsequent volatile loads. 1248 // We need StoreLoad, but that generally requires the most expensive barrier. 1249 GenMemBarrier(kAnyAny); 1250 } 1251 1252 return store; 1253} 1254 1255LIR* Arm64Mir2Lir::StoreRefDisp(RegStorage r_base, int displacement, RegStorage r_src, 1256 VolatileKind is_volatile) { 1257 return StoreBaseDisp(r_base, displacement, As32BitReg(r_src), kReference, is_volatile); 1258} 1259 1260LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { 1261 LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64"; 1262 return NULL; 1263} 1264 1265LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) { 1266 LOG(FATAL) << "Unexpected use of OpMem for Arm64"; 1267 return NULL; 1268} 1269 1270LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { 1271 return OpReg(op, r_tgt); 1272} 1273 1274} // namespace art 1275