1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the Thumb2 ISA. */ 18 19#include "codegen_arm64.h" 20 21#include "arch/instruction_set_features.h" 22#include "arm64_lir.h" 23#include "base/bit_utils.h" 24#include "base/logging.h" 25#include "dex/compiler_ir.h" 26#include "dex/mir_graph.h" 27#include "dex/quick/mir_to_lir-inl.h" 28#include "dex/reg_storage_eq.h" 29#include "driver/compiler_driver.h" 30#include "entrypoints/quick/quick_entrypoints.h" 31#include "mirror/array-inl.h" 32 33namespace art { 34 35LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { 36 OpRegReg(kOpCmp, src1, src2); 37 return OpCondBranch(cond, target); 38} 39 40LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) { 41 UNUSED(ccode, guide); 42 LOG(FATAL) << "Unexpected use of OpIT for Arm64"; 43 UNREACHABLE(); 44} 45 46void Arm64Mir2Lir::OpEndIT(LIR* it) { 47 UNUSED(it); 48 LOG(FATAL) << "Unexpected use of OpEndIT for Arm64"; 49} 50 51/* 52 * 64-bit 3way compare function. 53 * cmp xA, xB 54 * csinc wC, wzr, wzr, eq // wC = (xA == xB) ? 0 : 1 55 * csneg wC, wC, wC, ge // wC = (xA >= xB) ? wC : -wC 56 */ 57void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 58 RegLocation rl_src2) { 59 RegLocation rl_result; 60 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 61 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 62 rl_result = EvalLoc(rl_dest, kCoreReg, true); 63 64 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 65 NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq); 66 NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), 67 rl_result.reg.GetReg(), kArmCondGe); 68 StoreValue(rl_dest, rl_result); 69} 70 71void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, 72 RegLocation rl_src1, RegLocation rl_shift) { 73 OpKind op = kOpBkpt; 74 switch (opcode) { 75 case Instruction::SHL_LONG: 76 case Instruction::SHL_LONG_2ADDR: 77 op = kOpLsl; 78 break; 79 case Instruction::SHR_LONG: 80 case Instruction::SHR_LONG_2ADDR: 81 op = kOpAsr; 82 break; 83 case Instruction::USHR_LONG: 84 case Instruction::USHR_LONG_2ADDR: 85 op = kOpLsr; 86 break; 87 default: 88 LOG(FATAL) << "Unexpected case: " << opcode; 89 } 90 rl_shift = LoadValue(rl_shift, kCoreReg); 91 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 92 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 93 OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg)); 94 StoreValueWide(rl_dest, rl_result); 95} 96 97static constexpr bool kUseDeltaEncodingInGenSelect = false; 98 99void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode, 100 RegStorage rs_dest, int result_reg_class) { 101 if (false_val == 0 || // 0 is better as first operand. 102 true_val == 1 || // Potentially Csinc. 103 true_val == -1 || // Potentially Csinv. 104 true_val == false_val + 1) { // Potentially Csinc. 105 ccode = NegateComparison(ccode); 106 std::swap(true_val, false_val); 107 } 108 109 ArmConditionCode code = ArmConditionEncoding(ccode); 110 111 int opcode; // The opcode. 112 RegStorage left_op = RegStorage::InvalidReg(); // The operands. 113 RegStorage right_op = RegStorage::InvalidReg(); // The operands. 114 115 bool is_wide = rs_dest.Is64Bit(); 116 117 RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr; 118 119 if (true_val == 0) { 120 left_op = zero_reg; 121 } else { 122 left_op = rs_dest; 123 LoadConstantNoClobber(rs_dest, true_val); 124 } 125 if (false_val == 1) { 126 right_op = zero_reg; 127 opcode = kA64Csinc4rrrc; 128 } else if (false_val == -1) { 129 right_op = zero_reg; 130 opcode = kA64Csinv4rrrc; 131 } else if (false_val == true_val + 1) { 132 right_op = left_op; 133 opcode = kA64Csinc4rrrc; 134 } else if (false_val == -true_val) { 135 right_op = left_op; 136 opcode = kA64Csneg4rrrc; 137 } else if (false_val == ~true_val) { 138 right_op = left_op; 139 opcode = kA64Csinv4rrrc; 140 } else if (true_val == 0) { 141 // left_op is zero_reg. 142 right_op = rs_dest; 143 LoadConstantNoClobber(rs_dest, false_val); 144 opcode = kA64Csel4rrrc; 145 } else { 146 // Generic case. 147 RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class); 148 if (is_wide) { 149 if (t_reg2.Is32Bit()) { 150 t_reg2 = As64BitReg(t_reg2); 151 } 152 } else { 153 if (t_reg2.Is64Bit()) { 154 t_reg2 = As32BitReg(t_reg2); 155 } 156 } 157 158 if (kUseDeltaEncodingInGenSelect) { 159 int32_t delta = false_val - true_val; 160 uint32_t abs_val = delta < 0 ? -delta : delta; 161 162 if (abs_val < 0x1000) { // TODO: Replace with InexpensiveConstant with opcode. 163 // Can encode as immediate to an add. 164 right_op = t_reg2; 165 OpRegRegImm(kOpAdd, t_reg2, left_op, delta); 166 } 167 } 168 169 // Load as constant. 170 if (!right_op.Valid()) { 171 LoadConstantNoClobber(t_reg2, false_val); 172 right_op = t_reg2; 173 } 174 175 opcode = kA64Csel4rrrc; 176 } 177 178 DCHECK(left_op.Valid() && right_op.Valid()); 179 NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(), 180 code); 181} 182 183void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, 184 int32_t true_val, int32_t false_val, RegStorage rs_dest, 185 RegisterClass dest_reg_class) { 186 DCHECK(rs_dest.Valid()); 187 OpRegReg(kOpCmp, left_op, right_op); 188 GenSelect(true_val, false_val, code, rs_dest, dest_reg_class); 189} 190 191void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 192 UNUSED(bb); 193 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 194 rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg); 195 // rl_src may be aliased with rl_result/rl_dest, so do compare early. 196 OpRegImm(kOpCmp, rl_src.reg, 0); 197 198 RegLocation rl_dest = mir_graph_->GetDest(mir); 199 200 // The kMirOpSelect has two variants, one for constants and one for moves. 201 if (mir->ssa_rep->num_uses == 1) { 202 RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true); 203 GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg, 204 rl_dest.ref ? kRefReg : kCoreReg); 205 StoreValue(rl_dest, rl_result); 206 } else { 207 RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; 208 RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; 209 210 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; 211 rl_true = LoadValue(rl_true, result_reg_class); 212 rl_false = LoadValue(rl_false, result_reg_class); 213 RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true); 214 215 bool is_wide = rl_dest.ref || rl_dest.wide; 216 int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; 217 NewLIR4(opcode, rl_result.reg.GetReg(), 218 rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode)); 219 StoreValue(rl_dest, rl_result); 220 } 221} 222 223void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 224 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 225 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 226 LIR* taken = &block_label_list_[bb->taken]; 227 LIR* not_taken = &block_label_list_[bb->fall_through]; 228 // Normalize such that if either operand is constant, src2 will be constant. 229 ConditionCode ccode = mir->meta.ccode; 230 if (rl_src1.is_const) { 231 std::swap(rl_src1, rl_src2); 232 ccode = FlipComparisonOrder(ccode); 233 } 234 235 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 236 237 if (rl_src2.is_const) { 238 // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.) 239 240 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 241 // Special handling using cbz & cbnz. 242 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 243 OpCmpImmBranch(ccode, rl_src1.reg, 0, taken); 244 OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken); 245 return; 246 } 247 248 // Only handle Imm if src2 is not already in a register. 249 rl_src2 = UpdateLocWide(rl_src2); 250 if (rl_src2.location != kLocPhysReg) { 251 OpRegImm64(kOpCmp, rl_src1.reg, val); 252 OpCondBranch(ccode, taken); 253 OpCondBranch(NegateComparison(ccode), not_taken); 254 return; 255 } 256 } 257 258 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 259 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 260 OpCondBranch(ccode, taken); 261 OpCondBranch(NegateComparison(ccode), not_taken); 262} 263 264/* 265 * Generate a register comparison to an immediate and branch. Caller 266 * is responsible for setting branch target field. 267 */ 268LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value, 269 LIR* target) { 270 LIR* branch = nullptr; 271 ArmConditionCode arm_cond = ArmConditionEncoding(cond); 272 if (check_value == 0) { 273 if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) { 274 A64Opcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt; 275 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); 276 branch = NewLIR2(opcode | wide, reg.GetReg(), 0); 277 } else if (arm_cond == kArmCondLs) { 278 // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz. 279 // This case happens for a bounds check of array[0]. 280 A64Opcode opcode = kA64Cbz2rt; 281 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); 282 branch = NewLIR2(opcode | wide, reg.GetReg(), 0); 283 } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) { 284 A64Opcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht; 285 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); 286 int value = reg.Is64Bit() ? 63 : 31; 287 branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0); 288 } 289 } 290 291 if (branch == nullptr) { 292 OpRegImm(kOpCmp, reg, check_value); 293 branch = NewLIR2(kA64B2ct, arm_cond, 0); 294 } 295 296 branch->target = target; 297 return branch; 298} 299 300LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, 301 RegStorage base_reg, int offset, int check_value, 302 LIR* target, LIR** compare) { 303 DCHECK(compare == nullptr); 304 // It is possible that temp register is 64-bit. (ArgReg or RefReg) 305 // Always compare 32-bit value no matter what temp_reg is. 306 if (temp_reg.Is64Bit()) { 307 temp_reg = As32BitReg(temp_reg); 308 } 309 Load32Disp(base_reg, offset, temp_reg); 310 LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target); 311 return branch; 312} 313 314LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { 315 bool dest_is_fp = r_dest.IsFloat(); 316 bool src_is_fp = r_src.IsFloat(); 317 A64Opcode opcode = kA64Brk1d; 318 LIR* res; 319 320 if (LIKELY(dest_is_fp == src_is_fp)) { 321 if (LIKELY(!dest_is_fp)) { 322 DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit()); 323 324 // Core/core copy. 325 // Copies involving the sp register require a different instruction. 326 opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr; 327 328 // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction. 329 // This currently works because the other arguments are set to 0 by default. We should 330 // rather introduce an alias kA64Mov2RR. 331 332 // core/core copy. Do a x/x copy only if both registers are x. 333 if (r_dest.Is64Bit() && r_src.Is64Bit()) { 334 opcode = WIDE(opcode); 335 } 336 } else { 337 // Float/float copy. 338 bool dest_is_double = r_dest.IsDouble(); 339 bool src_is_double = r_src.IsDouble(); 340 341 // We do not do float/double or double/float casts here. 342 DCHECK_EQ(dest_is_double, src_is_double); 343 344 // Homogeneous float/float copy. 345 opcode = (dest_is_double) ? WIDE(kA64Fmov2ff) : kA64Fmov2ff; 346 } 347 } else { 348 // Inhomogeneous register copy. 349 if (dest_is_fp) { 350 if (r_dest.IsDouble()) { 351 opcode = kA64Fmov2Sx; 352 } else { 353 r_src = Check32BitReg(r_src); 354 opcode = kA64Fmov2sw; 355 } 356 } else { 357 if (r_src.IsDouble()) { 358 opcode = kA64Fmov2xS; 359 } else { 360 r_dest = Check32BitReg(r_dest); 361 opcode = kA64Fmov2ws; 362 } 363 } 364 } 365 366 res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg()); 367 368 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 369 res->flags.is_nop = true; 370 } 371 372 return res; 373} 374 375void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { 376 if (r_dest != r_src) { 377 LIR* res = OpRegCopyNoInsert(r_dest, r_src); 378 AppendLIR(res); 379 } 380} 381 382void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { 383 OpRegCopy(r_dest, r_src); 384} 385 386// Table of magic divisors 387struct MagicTable { 388 int magic64_base; 389 int magic64_eor; 390 uint64_t magic64; 391 uint32_t magic32; 392 uint32_t shift; 393 DividePattern pattern; 394}; 395 396static const MagicTable magic_table[] = { 397 { 0, 0, 0, 0, 0, DivideNone}, // 0 398 { 0, 0, 0, 0, 0, DivideNone}, // 1 399 { 0, 0, 0, 0, 0, DivideNone}, // 2 400 {0x3c, -1, 0x5555555555555556, 0x55555556, 0, Divide3}, // 3 401 { 0, 0, 0, 0, 0, DivideNone}, // 4 402 {0xf9, -1, 0x6666666666666667, 0x66666667, 1, Divide5}, // 5 403 {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3}, // 6 404 { -1, -1, 0x924924924924924A, 0x92492493, 2, Divide7}, // 7 405 { 0, 0, 0, 0, 0, DivideNone}, // 8 406 { -1, -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5}, // 9 407 {0xf9, -1, 0x6666666666666667, 0x66666667, 2, Divide5}, // 10 408 { -1, -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5}, // 11 409 {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5}, // 12 410 { -1, -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5}, // 13 411 { -1, -1, 0x924924924924924A, 0x92492493, 3, Divide7}, // 14 412 {0x78, -1, 0x8888888888888889, 0x88888889, 3, Divide7}, // 15 413}; 414 415// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4) 416bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 417 RegLocation rl_src, RegLocation rl_dest, int lit) { 418 UNUSED(dalvik_opcode); 419 if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) { 420 return false; 421 } 422 DividePattern pattern = magic_table[lit].pattern; 423 if (pattern == DivideNone) { 424 return false; 425 } 426 // Tuning: add rem patterns 427 if (!is_div) { 428 return false; 429 } 430 431 RegStorage r_magic = AllocTemp(); 432 LoadConstant(r_magic, magic_table[lit].magic32); 433 rl_src = LoadValue(rl_src, kCoreReg); 434 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 435 RegStorage r_long_mul = AllocTemp(); 436 NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); 437 switch (pattern) { 438 case Divide3: 439 OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32); 440 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31)); 441 break; 442 case Divide5: 443 OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 444 32 + magic_table[lit].shift); 445 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31)); 446 break; 447 case Divide7: 448 OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg), 449 As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32)); 450 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift); 451 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31)); 452 break; 453 default: 454 LOG(FATAL) << "Unexpected pattern: " << pattern; 455 } 456 StoreValue(rl_dest, rl_result); 457 return true; 458} 459 460bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, 461 RegLocation rl_src, RegLocation rl_dest, int64_t lit) { 462 UNUSED(dalvik_opcode); 463 if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) { 464 return false; 465 } 466 DividePattern pattern = magic_table[lit].pattern; 467 if (pattern == DivideNone) { 468 return false; 469 } 470 // Tuning: add rem patterns 471 if (!is_div) { 472 return false; 473 } 474 475 RegStorage r_magic = AllocTempWide(); 476 rl_src = LoadValueWide(rl_src, kCoreReg); 477 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 478 RegStorage r_long_mul = AllocTempWide(); 479 480 if (magic_table[lit].magic64_base >= 0) { 481 // Check that the entry in the table is correct. 482 if (kIsDebugBuild) { 483 uint64_t reconstructed_imm; 484 uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base); 485 if (magic_table[lit].magic64_eor >= 0) { 486 uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor); 487 reconstructed_imm = base ^ eor; 488 } else { 489 reconstructed_imm = base + 1; 490 } 491 DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit; 492 } 493 494 // Load the magic constant in two instructions. 495 NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base); 496 if (magic_table[lit].magic64_eor >= 0) { 497 NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(), 498 magic_table[lit].magic64_eor); 499 } else { 500 NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0); 501 } 502 } else { 503 LoadConstantWide(r_magic, magic_table[lit].magic64); 504 } 505 506 NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg()); 507 switch (pattern) { 508 case Divide3: 509 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63)); 510 break; 511 case Divide5: 512 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift); 513 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63)); 514 break; 515 case Divide7: 516 OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul); 517 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift); 518 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63)); 519 break; 520 default: 521 LOG(FATAL) << "Unexpected pattern: " << pattern; 522 } 523 StoreValueWide(rl_dest, rl_result); 524 return true; 525} 526 527// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit' 528// and store the result in 'rl_dest'. 529bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, 530 RegLocation rl_src, RegLocation rl_dest, int lit) { 531 return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit)); 532} 533 534// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit' 535// and store the result in 'rl_dest'. 536bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div, 537 RegLocation rl_src, RegLocation rl_dest, int64_t lit) { 538 const bool is_64bit = rl_dest.wide; 539 const int nbits = (is_64bit) ? 64 : 32; 540 541 if (lit < 2) { 542 return false; 543 } 544 if (!IsPowerOfTwo(lit)) { 545 if (is_64bit) { 546 return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit); 547 } else { 548 return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit)); 549 } 550 } 551 int k = CTZ(lit); 552 if (k >= nbits - 2) { 553 // Avoid special cases. 554 return false; 555 } 556 557 RegLocation rl_result; 558 RegStorage t_reg; 559 if (is_64bit) { 560 rl_src = LoadValueWide(rl_src, kCoreReg); 561 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 562 t_reg = AllocTempWide(); 563 } else { 564 rl_src = LoadValue(rl_src, kCoreReg); 565 rl_result = EvalLoc(rl_dest, kCoreReg, true); 566 t_reg = AllocTemp(); 567 } 568 569 int shift = EncodeShift(kA64Lsr, nbits - k); 570 if (is_div) { 571 if (lit == 2) { 572 // Division by 2 is by far the most common division by constant. 573 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift); 574 OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k); 575 } else { 576 OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1); 577 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift); 578 OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k); 579 } 580 } else { 581 if (lit == 2) { 582 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift); 583 OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1); 584 OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift); 585 } else { 586 RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp(); 587 OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1); 588 OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift); 589 OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1); 590 OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift); 591 } 592 } 593 594 if (is_64bit) { 595 StoreValueWide(rl_dest, rl_result); 596 } else { 597 StoreValue(rl_dest, rl_result); 598 } 599 return true; 600} 601 602bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { 603 UNUSED(rl_src, rl_dest, lit); 604 LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64"; 605 UNREACHABLE(); 606} 607 608RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, 609 bool is_div) { 610 UNUSED(rl_dest, rl_src1, lit, is_div); 611 LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64"; 612 UNREACHABLE(); 613} 614 615RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { 616 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 617 618 // Put the literal in a temp. 619 RegStorage lit_temp = AllocTemp(); 620 LoadConstant(lit_temp, lit); 621 // Use the generic case for div/rem with arg2 in a register. 622 // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. 623 rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); 624 FreeTemp(lit_temp); 625 626 return rl_result; 627} 628 629RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 630 RegLocation rl_src2, bool is_div, int flags) { 631 UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags); 632 LOG(FATAL) << "Unexpected use of GenDivRem for Arm64"; 633 UNREACHABLE(); 634} 635 636RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2, 637 bool is_div) { 638 CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit()); 639 640 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 641 if (is_div) { 642 OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2); 643 } else { 644 // temp = r_src1 / r_src2 645 // dest = r_src1 - temp * r_src2 646 RegStorage temp; 647 A64Opcode wide; 648 if (rl_result.reg.Is64Bit()) { 649 temp = AllocTempWide(); 650 wide = WIDE(0); 651 } else { 652 temp = AllocTemp(); 653 wide = UNWIDE(0); 654 } 655 OpRegRegReg(kOpDiv, temp, r_src1, r_src2); 656 NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(), 657 r_src2.GetReg(), r_src1.GetReg()); 658 FreeTemp(temp); 659 } 660 return rl_result; 661} 662 663bool Arm64Mir2Lir::GenInlinedAbsInt(CallInfo* info) { 664 RegLocation rl_src = info->args[0]; 665 rl_src = LoadValue(rl_src, kCoreReg); 666 RegLocation rl_dest = InlineTarget(info); 667 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 668 669 // Compare the source value with zero. Write the negated value to the result if 670 // negative, otherwise write the original value. 671 OpRegImm(kOpCmp, rl_src.reg, 0); 672 NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(), 673 kArmCondPl); 674 StoreValue(rl_dest, rl_result); 675 return true; 676} 677 678bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) { 679 RegLocation rl_src = info->args[0]; 680 rl_src = LoadValueWide(rl_src, kCoreReg); 681 RegLocation rl_dest = InlineTargetWide(info); 682 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 683 684 // Compare the source value with zero. Write the negated value to the result if 685 // negative, otherwise write the original value. 686 OpRegImm(kOpCmp, rl_src.reg, 0); 687 NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 688 rl_src.reg.GetReg(), kArmCondPl); 689 StoreValueWide(rl_dest, rl_result); 690 return true; 691} 692 693bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { 694 DCHECK_EQ(cu_->instruction_set, kArm64); 695 RegLocation rl_src1 = info->args[0]; 696 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1]; 697 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg); 698 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg); 699 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info); 700 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 701 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 702 NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(), 703 rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt); 704 (is_long) ? StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result); 705 return true; 706} 707 708bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 709 RegLocation rl_src_address = info->args[0]; // long address 710 RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); 711 RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); 712 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 713 714 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); 715 if (size == k64) { 716 StoreValueWide(rl_dest, rl_result); 717 } else { 718 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 719 StoreValue(rl_dest, rl_result); 720 } 721 return true; 722} 723 724bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 725 RegLocation rl_src_address = info->args[0]; // long address 726 RegLocation rl_src_value = info->args[2]; // [size] value 727 RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); 728 729 RegLocation rl_value; 730 if (size == k64) { 731 rl_value = LoadValueWide(rl_src_value, kCoreReg); 732 } else { 733 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 734 rl_value = LoadValue(rl_src_value, kCoreReg); 735 } 736 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); 737 return true; 738} 739 740bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 741 DCHECK_EQ(cu_->instruction_set, kArm64); 742 // Unused - RegLocation rl_src_unsafe = info->args[0]; 743 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 744 RegLocation rl_src_offset = info->args[2]; // long low 745 RegLocation rl_src_expected = info->args[4]; // int, long or Object 746 // If is_long, high half is in info->args[5] 747 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 748 // If is_long, high half is in info->args[7] 749 RegLocation rl_dest = InlineTarget(info); // boolean place for result 750 751 // Load Object and offset 752 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); 753 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); 754 755 RegLocation rl_new_value; 756 RegLocation rl_expected; 757 if (is_long) { 758 rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); 759 rl_expected = LoadValueWide(rl_src_expected, kCoreReg); 760 } else { 761 rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg); 762 rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg); 763 } 764 765 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 766 // Mark card for object assuming new value is stored. 767 MarkGCCard(0, rl_new_value.reg, rl_object.reg); 768 } 769 770 RegStorage r_ptr = AllocTempRef(); 771 OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg); 772 773 // Free now unneeded rl_object and rl_offset to give more temps. 774 ClobberSReg(rl_object.s_reg_low); 775 FreeTemp(rl_object.reg); 776 ClobberSReg(rl_offset.s_reg_low); 777 FreeTemp(rl_offset.reg); 778 779 // do { 780 // tmp = [r_ptr] - expected; 781 // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); 782 // result = tmp != 0; 783 784 RegStorage r_tmp; 785 RegStorage r_tmp_stored; 786 RegStorage rl_new_value_stored = rl_new_value.reg; 787 A64Opcode wide = UNWIDE(0); 788 if (is_long) { 789 r_tmp_stored = r_tmp = AllocTempWide(); 790 wide = WIDE(0); 791 } else if (is_object) { 792 // References use 64-bit registers, but are stored as compressed 32-bit values. 793 // This means r_tmp_stored != r_tmp. 794 r_tmp = AllocTempRef(); 795 r_tmp_stored = As32BitReg(r_tmp); 796 rl_new_value_stored = As32BitReg(rl_new_value_stored); 797 } else { 798 r_tmp_stored = r_tmp = AllocTemp(); 799 } 800 801 RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp); 802 LIR* loop = NewLIR0(kPseudoTargetLabel); 803 NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg()); 804 OpRegReg(kOpCmp, r_tmp, rl_expected.reg); 805 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); 806 LIR* early_exit = OpCondBranch(kCondNe, nullptr); 807 NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg()); 808 NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT); 809 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); 810 OpCondBranch(kCondNe, loop); 811 812 LIR* exit_loop = NewLIR0(kPseudoTargetLabel); 813 early_exit->target = exit_loop; 814 815 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 816 NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); 817 818 FreeTemp(r_tmp); // Now unneeded. 819 FreeTemp(r_ptr); // Now unneeded. 820 821 StoreValue(rl_dest, rl_result); 822 823 return true; 824} 825 826bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { 827 constexpr int kLargeArrayThreshold = 512; 828 829 RegLocation rl_src = info->args[0]; 830 RegLocation rl_src_pos = info->args[1]; 831 RegLocation rl_dst = info->args[2]; 832 RegLocation rl_dst_pos = info->args[3]; 833 RegLocation rl_length = info->args[4]; 834 // Compile time check, handle exception by non-inline method to reduce related meta-data. 835 if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) || 836 (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) || 837 (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) { 838 return false; 839 } 840 841 ClobberCallerSave(); 842 LockCallTemps(); // Prepare for explicit register usage. 843 RegStorage rs_src = rs_x0; 844 RegStorage rs_dst = rs_x1; 845 LoadValueDirectFixed(rl_src, rs_src); 846 LoadValueDirectFixed(rl_dst, rs_dst); 847 848 // Handle null pointer exception in slow-path. 849 LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr); 850 LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr); 851 // Handle potential overlapping in slow-path. 852 // TUNING: Support overlapping cases. 853 LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr); 854 // Handle exception or big length in slow-path. 855 RegStorage rs_length = rs_w2; 856 LoadValueDirectFixed(rl_length, rs_length); 857 LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr); 858 // Src bounds check. 859 RegStorage rs_src_pos = rs_w3; 860 RegStorage rs_arr_length = rs_w4; 861 LoadValueDirectFixed(rl_src_pos, rs_src_pos); 862 LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr); 863 Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); 864 OpRegReg(kOpSub, rs_arr_length, rs_src_pos); 865 LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); 866 // Dst bounds check. 867 RegStorage rs_dst_pos = rs_w5; 868 LoadValueDirectFixed(rl_dst_pos, rs_dst_pos); 869 LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr); 870 Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); 871 OpRegReg(kOpSub, rs_arr_length, rs_dst_pos); 872 LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); 873 874 // Everything is checked now. 875 // Set rs_src to the address of the first element to be copied. 876 rs_src_pos = As64BitReg(rs_src_pos); 877 OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value()); 878 OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1); 879 OpRegReg(kOpAdd, rs_src, rs_src_pos); 880 // Set rs_src to the address of the first element to be copied. 881 rs_dst_pos = As64BitReg(rs_dst_pos); 882 OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value()); 883 OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1); 884 OpRegReg(kOpAdd, rs_dst, rs_dst_pos); 885 886 // rs_arr_length won't be not used anymore. 887 RegStorage rs_tmp = rs_arr_length; 888 // Use 64-bit view since rs_length will be used as index. 889 rs_length = As64BitReg(rs_length); 890 OpRegRegImm(kOpLsl, rs_length, rs_length, 1); 891 892 // Copy one element. 893 LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0); 894 OpRegImm(kOpSub, rs_length, 2); 895 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf); 896 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf); 897 898 // Copy two elements. 899 LIR *copy_two = NewLIR0(kPseudoTargetLabel); 900 LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0); 901 OpRegImm(kOpSub, rs_length, 4); 902 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32); 903 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32); 904 905 // Copy four elements. 906 LIR *copy_four = NewLIR0(kPseudoTargetLabel); 907 LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr); 908 LIR *begin_loop = NewLIR0(kPseudoTargetLabel); 909 OpRegImm(kOpSub, rs_length, 8); 910 rs_tmp = As64BitReg(rs_tmp); 911 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64); 912 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64); 913 LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr); 914 LIR* loop_finished = OpUnconditionalBranch(nullptr); 915 916 LIR *check_failed = NewLIR0(kPseudoTargetLabel); 917 LIR* launchpad_branch = OpUnconditionalBranch(nullptr); 918 LIR* return_point = NewLIR0(kPseudoTargetLabel); 919 920 src_check_branch->target = check_failed; 921 dst_check_branch->target = check_failed; 922 src_dst_same->target = check_failed; 923 len_neg_or_too_big->target = check_failed; 924 src_pos_negative->target = check_failed; 925 src_bad_len->target = check_failed; 926 dst_pos_negative->target = check_failed; 927 dst_bad_len->target = check_failed; 928 jmp_to_copy_two->target = copy_two; 929 jmp_to_copy_four->target = copy_four; 930 jmp_to_ret->target = return_point; 931 jmp_to_loop->target = begin_loop; 932 loop_finished->target = return_point; 933 934 AddIntrinsicSlowPath(info, launchpad_branch, return_point); 935 ClobberCallerSave(); // We must clobber everything because slow path will return here 936 937 return true; 938} 939 940void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 941 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 942 LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0); 943 lir->target = target; 944} 945 946bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { 947 return dex_cache_arrays_layout_.Valid(); 948} 949 950void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest, 951 bool wide) { 952 LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0); 953 adrp->operands[2] = WrapPointer(dex_file); 954 adrp->operands[3] = offset; 955 adrp->operands[4] = WrapPointer(adrp); 956 dex_cache_access_insns_.push_back(adrp); 957 if (wide) { 958 DCHECK(r_dest.Is64Bit()); 959 } 960 LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, wide ? k64 : kReference, kNotVolatile); 961 ldr->operands[4] = adrp->operands[4]; 962 ldr->flags.fixup = kFixupLabel; 963 dex_cache_access_insns_.push_back(ldr); 964} 965 966LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) { 967 UNUSED(r_base, count); 968 LOG(FATAL) << "Unexpected use of OpVldm for Arm64"; 969 UNREACHABLE(); 970} 971 972LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) { 973 UNUSED(r_base, count); 974 LOG(FATAL) << "Unexpected use of OpVstm for Arm64"; 975 UNREACHABLE(); 976} 977 978void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, 979 RegLocation rl_src3, bool is_sub) { 980 rl_src1 = LoadValue(rl_src1, kCoreReg); 981 rl_src2 = LoadValue(rl_src2, kCoreReg); 982 rl_src3 = LoadValue(rl_src3, kCoreReg); 983 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 984 NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), 985 rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); 986 StoreValue(rl_dest, rl_result); 987} 988 989void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, 990 RegLocation rl_src3, bool is_sub) { 991 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 992 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 993 rl_src3 = LoadValueWide(rl_src3, kCoreReg); 994 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 995 NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(), 996 rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg()); 997 StoreValueWide(rl_dest, rl_result); 998} 999 1000void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 1001 RegLocation rl_result, int lit ATTRIBUTE_UNUSED, 1002 int first_bit, int second_bit) { 1003 OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg, 1004 EncodeShift(kA64Lsl, second_bit - first_bit)); 1005 if (first_bit != 0) { 1006 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); 1007 } 1008} 1009 1010void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg ATTRIBUTE_UNUSED) { 1011 LOG(FATAL) << "Unexpected use of GenDivZero for Arm64"; 1012} 1013 1014// Test suspend flag, return target of taken suspend branch 1015LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) { 1016 RegStorage r_tmp = AllocTemp(); 1017 LoadBaseDisp(rs_xSELF, Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value(), r_tmp, 1018 kUnsignedHalf, kNotVolatile); 1019 LIR* cmp_branch = OpCmpImmBranch(target == nullptr ? kCondNe: kCondEq, r_tmp, 0, target); 1020 FreeTemp(r_tmp); 1021 return cmp_branch; 1022} 1023 1024// Decrement register and branch on condition 1025LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { 1026 // Combine sub & test using sub setflags encoding here. We need to make sure a 1027 // subtract form that sets carry is used, so generate explicitly. 1028 // TODO: might be best to add a new op, kOpSubs, and handle it generically. 1029 A64Opcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd); 1030 NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1); // For value == 1, this should set flags. 1031 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); 1032 return OpCondBranch(c_code, target); 1033} 1034 1035bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { 1036 if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) { 1037 return false; 1038 } 1039 // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. 1040 LIR* barrier = last_lir_insn_; 1041 1042 int dmb_flavor; 1043 // TODO: revisit Arm barrier kinds 1044 switch (barrier_kind) { 1045 case kAnyStore: dmb_flavor = kISH; break; 1046 case kLoadAny: dmb_flavor = kISH; break; 1047 // We conjecture that kISHLD is insufficient. It is documented 1048 // to provide LoadLoad | StoreStore ordering. But if this were used 1049 // to implement volatile loads, we suspect that the lack of store 1050 // atomicity on ARM would cause us to allow incorrect results for 1051 // the canonical IRIW example. But we're not sure. 1052 // We should be using acquire loads instead. 1053 case kStoreStore: dmb_flavor = kISHST; break; 1054 case kAnyAny: dmb_flavor = kISH; break; 1055 default: 1056 LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind; 1057 dmb_flavor = kSY; // quiet gcc. 1058 break; 1059 } 1060 1061 bool ret = false; 1062 1063 // If the same barrier already exists, don't generate another. 1064 if (barrier == nullptr 1065 || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) { 1066 barrier = NewLIR1(kA64Dmb1B, dmb_flavor); 1067 ret = true; 1068 } 1069 1070 // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. 1071 DCHECK(!barrier->flags.use_def_invalid); 1072 barrier->u.m.def_mask = &kEncodeAll; 1073 return ret; 1074} 1075 1076void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { 1077 RegLocation rl_result; 1078 1079 rl_src = LoadValue(rl_src, kCoreReg); 1080 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1081 NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31); 1082 StoreValueWide(rl_dest, rl_result); 1083} 1084 1085void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, 1086 RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) { 1087 if (rl_src2.is_const) { 1088 DCHECK(rl_src2.wide); 1089 int64_t lit = mir_graph_->ConstantValueWide(rl_src2); 1090 if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) { 1091 return; 1092 } 1093 } 1094 1095 RegLocation rl_result; 1096 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1097 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1098 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { 1099 GenDivZeroCheck(rl_src2.reg); 1100 } 1101 rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div); 1102 StoreValueWide(rl_dest, rl_result); 1103} 1104 1105void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1, 1106 RegLocation rl_src2) { 1107 RegLocation rl_result; 1108 1109 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1110 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1111 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1112 OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT); 1113 StoreValueWide(rl_dest, rl_result); 1114} 1115 1116void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1117 RegLocation rl_result; 1118 1119 rl_src = LoadValueWide(rl_src, kCoreReg); 1120 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1121 OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT); 1122 StoreValueWide(rl_dest, rl_result); 1123} 1124 1125void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { 1126 RegLocation rl_result; 1127 1128 rl_src = LoadValueWide(rl_src, kCoreReg); 1129 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1130 OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT); 1131 StoreValueWide(rl_dest, rl_result); 1132} 1133 1134void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, 1135 RegLocation rl_src1, RegLocation rl_src2, int flags) { 1136 switch (opcode) { 1137 case Instruction::NOT_LONG: 1138 GenNotLong(rl_dest, rl_src2); 1139 return; 1140 case Instruction::ADD_LONG: 1141 case Instruction::ADD_LONG_2ADDR: 1142 GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2); 1143 return; 1144 case Instruction::SUB_LONG: 1145 case Instruction::SUB_LONG_2ADDR: 1146 GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2); 1147 return; 1148 case Instruction::MUL_LONG: 1149 case Instruction::MUL_LONG_2ADDR: 1150 GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2); 1151 return; 1152 case Instruction::DIV_LONG: 1153 case Instruction::DIV_LONG_2ADDR: 1154 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags); 1155 return; 1156 case Instruction::REM_LONG: 1157 case Instruction::REM_LONG_2ADDR: 1158 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags); 1159 return; 1160 case Instruction::AND_LONG_2ADDR: 1161 case Instruction::AND_LONG: 1162 GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2); 1163 return; 1164 case Instruction::OR_LONG: 1165 case Instruction::OR_LONG_2ADDR: 1166 GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2); 1167 return; 1168 case Instruction::XOR_LONG: 1169 case Instruction::XOR_LONG_2ADDR: 1170 GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2); 1171 return; 1172 case Instruction::NEG_LONG: { 1173 GenNegLong(rl_dest, rl_src2); 1174 return; 1175 } 1176 default: 1177 LOG(FATAL) << "Invalid long arith op"; 1178 return; 1179 } 1180} 1181 1182/* 1183 * Generate array load 1184 */ 1185void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1186 RegLocation rl_index, RegLocation rl_dest, int scale) { 1187 RegisterClass reg_class = RegClassBySize(size); 1188 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1189 int data_offset; 1190 RegLocation rl_result; 1191 bool constant_index = rl_index.is_const; 1192 rl_array = LoadValue(rl_array, kRefReg); 1193 if (!constant_index) { 1194 rl_index = LoadValue(rl_index, kCoreReg); 1195 } 1196 1197 if (rl_dest.wide) { 1198 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1199 } else { 1200 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1201 } 1202 1203 /* null object? */ 1204 GenNullCheck(rl_array.reg, opt_flags); 1205 1206 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 1207 RegStorage reg_len; 1208 if (needs_range_check) { 1209 reg_len = AllocTemp(); 1210 /* Get len */ 1211 Load32Disp(rl_array.reg, len_offset, reg_len); 1212 MarkPossibleNullPointerException(opt_flags); 1213 } else { 1214 ForceImplicitNullCheck(rl_array.reg, opt_flags); 1215 } 1216 if (constant_index) { 1217 rl_result = EvalLoc(rl_dest, reg_class, true); 1218 1219 if (needs_range_check) { 1220 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); 1221 FreeTemp(reg_len); 1222 } 1223 // Fold the constant index into the data offset. 1224 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 1225 if (rl_result.ref) { 1226 LoadRefDisp(rl_array.reg, data_offset, rl_result.reg, kNotVolatile); 1227 } else { 1228 LoadBaseDisp(rl_array.reg, data_offset, rl_result.reg, size, kNotVolatile); 1229 } 1230 } else { 1231 // Offset base, then use indexed load. 1232 RegStorage reg_ptr = AllocTempRef(); 1233 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); 1234 FreeTemp(rl_array.reg); 1235 rl_result = EvalLoc(rl_dest, reg_class, true); 1236 1237 if (needs_range_check) { 1238 GenArrayBoundsCheck(rl_index.reg, reg_len); 1239 FreeTemp(reg_len); 1240 } 1241 if (rl_result.ref) { 1242 LoadRefIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale); 1243 } else { 1244 LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size); 1245 } 1246 FreeTemp(reg_ptr); 1247 } 1248 if (rl_dest.wide) { 1249 StoreValueWide(rl_dest, rl_result); 1250 } else { 1251 StoreValue(rl_dest, rl_result); 1252 } 1253} 1254 1255/* 1256 * Generate array store 1257 * 1258 */ 1259void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1260 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1261 RegisterClass reg_class = RegClassBySize(size); 1262 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1263 bool constant_index = rl_index.is_const; 1264 1265 int data_offset; 1266 if (size == k64 || size == kDouble) { 1267 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1268 } else { 1269 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1270 } 1271 1272 rl_array = LoadValue(rl_array, kRefReg); 1273 if (!constant_index) { 1274 rl_index = LoadValue(rl_index, kCoreReg); 1275 } 1276 1277 RegStorage reg_ptr; 1278 bool allocated_reg_ptr_temp = false; 1279 if (constant_index) { 1280 reg_ptr = rl_array.reg; 1281 } else if (IsTemp(rl_array.reg) && !card_mark) { 1282 Clobber(rl_array.reg); 1283 reg_ptr = rl_array.reg; 1284 } else { 1285 allocated_reg_ptr_temp = true; 1286 reg_ptr = AllocTempRef(); 1287 } 1288 1289 /* null object? */ 1290 GenNullCheck(rl_array.reg, opt_flags); 1291 1292 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); 1293 RegStorage reg_len; 1294 if (needs_range_check) { 1295 reg_len = AllocTemp(); 1296 // NOTE: max live temps(4) here. 1297 /* Get len */ 1298 Load32Disp(rl_array.reg, len_offset, reg_len); 1299 MarkPossibleNullPointerException(opt_flags); 1300 } else { 1301 ForceImplicitNullCheck(rl_array.reg, opt_flags); 1302 } 1303 /* at this point, reg_ptr points to array, 2 live temps */ 1304 if (rl_src.wide) { 1305 rl_src = LoadValueWide(rl_src, reg_class); 1306 } else { 1307 rl_src = LoadValue(rl_src, reg_class); 1308 } 1309 if (constant_index) { 1310 if (needs_range_check) { 1311 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len); 1312 FreeTemp(reg_len); 1313 } 1314 // Fold the constant index into the data offset. 1315 data_offset += mir_graph_->ConstantValue(rl_index) << scale; 1316 if (rl_src.ref) { 1317 StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile); 1318 } else { 1319 StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile); 1320 } 1321 } else { 1322 /* reg_ptr -> array data */ 1323 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); 1324 if (needs_range_check) { 1325 GenArrayBoundsCheck(rl_index.reg, reg_len); 1326 FreeTemp(reg_len); 1327 } 1328 if (rl_src.ref) { 1329 StoreRefIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale); 1330 } else { 1331 StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size); 1332 } 1333 } 1334 if (allocated_reg_ptr_temp) { 1335 FreeTemp(reg_ptr); 1336 } 1337 if (card_mark) { 1338 MarkGCCard(opt_flags, rl_src.reg, rl_array.reg); 1339 } 1340} 1341 1342void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, 1343 RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift, 1344 int flags ATTRIBUTE_UNUSED) { 1345 OpKind op = kOpBkpt; 1346 // Per spec, we only care about low 6 bits of shift amount. 1347 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1348 rl_src = LoadValueWide(rl_src, kCoreReg); 1349 if (shift_amount == 0) { 1350 StoreValueWide(rl_dest, rl_src); 1351 return; 1352 } 1353 1354 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1355 switch (opcode) { 1356 case Instruction::SHL_LONG: 1357 case Instruction::SHL_LONG_2ADDR: 1358 op = kOpLsl; 1359 break; 1360 case Instruction::SHR_LONG: 1361 case Instruction::SHR_LONG_2ADDR: 1362 op = kOpAsr; 1363 break; 1364 case Instruction::USHR_LONG: 1365 case Instruction::USHR_LONG_2ADDR: 1366 op = kOpLsr; 1367 break; 1368 default: 1369 LOG(FATAL) << "Unexpected case"; 1370 } 1371 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); 1372 StoreValueWide(rl_dest, rl_result); 1373} 1374 1375void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1376 RegLocation rl_src1, RegLocation rl_src2, int flags) { 1377 OpKind op = kOpBkpt; 1378 switch (opcode) { 1379 case Instruction::ADD_LONG: 1380 case Instruction::ADD_LONG_2ADDR: 1381 op = kOpAdd; 1382 break; 1383 case Instruction::SUB_LONG: 1384 case Instruction::SUB_LONG_2ADDR: 1385 op = kOpSub; 1386 break; 1387 case Instruction::AND_LONG: 1388 case Instruction::AND_LONG_2ADDR: 1389 op = kOpAnd; 1390 break; 1391 case Instruction::OR_LONG: 1392 case Instruction::OR_LONG_2ADDR: 1393 op = kOpOr; 1394 break; 1395 case Instruction::XOR_LONG: 1396 case Instruction::XOR_LONG_2ADDR: 1397 op = kOpXor; 1398 break; 1399 default: 1400 LOG(FATAL) << "Unexpected opcode"; 1401 } 1402 1403 if (op == kOpSub) { 1404 if (!rl_src2.is_const) { 1405 return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); 1406 } 1407 } else { 1408 // Associativity. 1409 if (!rl_src2.is_const) { 1410 DCHECK(rl_src1.is_const); 1411 std::swap(rl_src1, rl_src2); 1412 } 1413 } 1414 DCHECK(rl_src2.is_const); 1415 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1416 1417 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1418 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1419 OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val); 1420 StoreValueWide(rl_dest, rl_result); 1421} 1422 1423static uint32_t ExtractReg(uint32_t reg_mask, int* reg) { 1424 // Find first register. 1425 int first_bit_set = CTZ(reg_mask) + 1; 1426 *reg = *reg + first_bit_set; 1427 reg_mask >>= first_bit_set; 1428 return reg_mask; 1429} 1430 1431/** 1432 * @brief Split a register list in pairs or registers. 1433 * 1434 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows: 1435 * @code 1436 * int reg1 = -1, reg2 = -1; 1437 * while (reg_mask) { 1438 * reg_mask = GenPairWise(reg_mask, & reg1, & reg2); 1439 * if (UNLIKELY(reg2 < 0)) { 1440 * // Single register in reg1. 1441 * } else { 1442 * // Pair in reg1, reg2. 1443 * } 1444 * } 1445 * @endcode 1446 */ 1447static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) { 1448 // Find first register. 1449 int first_bit_set = CTZ(reg_mask) + 1; 1450 int reg = *reg1 + first_bit_set; 1451 reg_mask >>= first_bit_set; 1452 1453 if (LIKELY(reg_mask)) { 1454 // Save the first register, find the second and use the pair opcode. 1455 int second_bit_set = CTZ(reg_mask) + 1; 1456 *reg2 = reg; 1457 reg_mask >>= second_bit_set; 1458 *reg1 = reg + second_bit_set; 1459 return reg_mask; 1460 } 1461 1462 // Use the single opcode, as we just have one register. 1463 *reg1 = reg; 1464 *reg2 = -1; 1465 return reg_mask; 1466} 1467 1468static dwarf::Reg DwarfCoreReg(int num) { 1469 return dwarf::Reg::Arm64Core(num); 1470} 1471 1472static dwarf::Reg DwarfFpReg(int num) { 1473 return dwarf::Reg::Arm64Fp(num); 1474} 1475 1476static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { 1477 int reg1 = -1, reg2 = -1; 1478 const int reg_log2_size = 3; 1479 1480 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) { 1481 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); 1482 if (UNLIKELY(reg2 < 0)) { 1483 m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); 1484 m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size); 1485 } else { 1486 m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), 1487 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); 1488 m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size); 1489 m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size); 1490 } 1491 } 1492} 1493 1494// TODO(Arm64): consider using ld1 and st1? 1495static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { 1496 int reg1 = -1, reg2 = -1; 1497 const int reg_log2_size = 3; 1498 1499 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) { 1500 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); 1501 if (UNLIKELY(reg2 < 0)) { 1502 m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), 1503 offset); 1504 m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size); 1505 } else { 1506 m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), 1507 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); 1508 m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size); 1509 m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size); 1510 } 1511 } 1512} 1513 1514static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask, 1515 int frame_size) { 1516 m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size); 1517 m2l->cfi().AdjustCFAOffset(frame_size); 1518 1519 int core_count = POPCOUNT(core_reg_mask); 1520 1521 if (fp_reg_mask != 0) { 1522 // Spill FP regs. 1523 int fp_count = POPCOUNT(fp_reg_mask); 1524 int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize; 1525 SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask); 1526 } 1527 1528 if (core_reg_mask != 0) { 1529 // Spill core regs. 1530 int spill_offset = frame_size - (core_count * kArm64PointerSize); 1531 SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask); 1532 } 1533 1534 return frame_size; 1535} 1536 1537static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask, 1538 uint32_t fp_reg_mask) { 1539 // Otherwise, spill both core and fp regs at the same time. 1540 // The very first instruction will be an stp with pre-indexed address, moving the stack pointer 1541 // down. From then on, we fill upwards. This will generate overall the same number of instructions 1542 // as the specialized code above in most cases (exception being odd number of core and even 1543 // non-zero fp spills), but is more flexible, as the offsets are guaranteed small. 1544 // 1545 // Some demonstrative fill cases : (c) = core, (f) = fp 1546 // cc 44 cc 44 cc 22 cc 33 fc => 1[1/2] 1547 // fc => 23 fc => 23 ff => 11 ff => 22 1548 // ff 11 f 11 f 11 1549 // 1550 int reg1 = -1, reg2 = -1; 1551 int core_count = POPCOUNT(core_reg_mask); 1552 int fp_count = POPCOUNT(fp_reg_mask); 1553 1554 int combined = fp_count + core_count; 1555 int all_offset = RoundUp(combined, 2); // Needs to be 16B = 2-reg aligned. 1556 1557 int cur_offset = 2; // What's the starting offset after the first stp? We expect the base slot 1558 // to be filled. 1559 1560 // First figure out whether the bottom is FP or core. 1561 if (fp_count > 0) { 1562 // Some FP spills. 1563 // 1564 // Four cases: (d0 is dummy to fill up stp) 1565 // 1) Single FP, even number of core -> stp d0, fp_reg 1566 // 2) Single FP, odd number of core -> stp fp_reg, d0 1567 // 3) More FP, even number combined -> stp fp_reg1, fp_reg2 1568 // 4) More FP, odd number combined -> stp d0, fp_reg 1569 if (fp_count == 1) { 1570 fp_reg_mask = ExtractReg(fp_reg_mask, ®1); 1571 DCHECK_EQ(fp_reg_mask, 0U); 1572 if (core_count % 2 == 0) { 1573 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), 1574 RegStorage::FloatSolo64(reg1).GetReg(), 1575 RegStorage::FloatSolo64(reg1).GetReg(), 1576 base.GetReg(), -all_offset); 1577 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1578 m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); 1579 } else { 1580 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), 1581 RegStorage::FloatSolo64(reg1).GetReg(), 1582 RegStorage::FloatSolo64(reg1).GetReg(), 1583 base.GetReg(), -all_offset); 1584 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1585 m2l->cfi().RelOffset(DwarfFpReg(reg1), 0); 1586 cur_offset = 0; // That core reg needs to go into the upper half. 1587 } 1588 } else { 1589 if (combined % 2 == 0) { 1590 fp_reg_mask = GenPairWise(fp_reg_mask, ®1, ®2); 1591 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), 1592 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); 1593 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1594 m2l->cfi().RelOffset(DwarfFpReg(reg2), 0); 1595 m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); 1596 } else { 1597 fp_reg_mask = ExtractReg(fp_reg_mask, ®1); 1598 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), 1599 base.GetReg(), -all_offset); 1600 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1601 m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); 1602 } 1603 } 1604 } else { 1605 // No FP spills. 1606 // 1607 // Two cases: 1608 // 1) Even number of core -> stp core1, core2 1609 // 2) Odd number of core -> stp xzr, core1 1610 if (core_count % 2 == 1) { 1611 core_reg_mask = ExtractReg(core_reg_mask, ®1); 1612 m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(), 1613 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); 1614 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1615 m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); 1616 } else { 1617 core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); 1618 m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(), 1619 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); 1620 m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); 1621 m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0); 1622 m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); 1623 } 1624 } 1625 DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(), 1626 static_cast<int>(all_offset * kArm64PointerSize)); 1627 1628 if (fp_count != 0) { 1629 for (; fp_reg_mask != 0;) { 1630 // Have some FP regs to do. 1631 fp_reg_mask = GenPairWise(fp_reg_mask, ®1, ®2); 1632 if (UNLIKELY(reg2 < 0)) { 1633 m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), 1634 cur_offset); 1635 m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize); 1636 // Do not increment offset here, as the second half will be filled by a core reg. 1637 } else { 1638 m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), 1639 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); 1640 m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize); 1641 m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize); 1642 cur_offset += 2; 1643 } 1644 } 1645 1646 // Reset counting. 1647 reg1 = -1; 1648 1649 // If there is an odd number of core registers, we need to store the bottom now. 1650 if (core_count % 2 == 1) { 1651 core_reg_mask = ExtractReg(core_reg_mask, ®1); 1652 m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), 1653 cur_offset + 1); 1654 m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); 1655 cur_offset += 2; // Half-slot filled now. 1656 } 1657 } 1658 1659 // Spill the rest of the core regs. They are guaranteed to be even. 1660 DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0); 1661 for (; core_reg_mask != 0; cur_offset += 2) { 1662 core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); 1663 m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), 1664 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset); 1665 m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize); 1666 m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); 1667 } 1668 1669 DCHECK_EQ(cur_offset, all_offset); 1670 1671 return all_offset * 8; 1672} 1673 1674int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, 1675 int frame_size) { 1676 // If the frame size is small enough that all offsets would fit into the immediates, use that 1677 // setup, as it decrements sp early (kind of instruction scheduling), and is not worse 1678 // instruction-count wise than the complicated code below. 1679 // 1680 // This case is also optimal when we have an odd number of core spills, and an even (non-zero) 1681 // number of fp spills. 1682 if ((RoundUp(frame_size, 8) / 8 <= 63)) { 1683 return SpillRegsPreSub(this, core_reg_mask, fp_reg_mask, frame_size); 1684 } else { 1685 return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask); 1686 } 1687} 1688 1689static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { 1690 int reg1 = -1, reg2 = -1; 1691 const int reg_log2_size = 3; 1692 1693 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) { 1694 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); 1695 if (UNLIKELY(reg2 < 0)) { 1696 m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); 1697 m2l->cfi().Restore(DwarfCoreReg(reg1)); 1698 } else { 1699 DCHECK_LE(offset, 63); 1700 m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(), 1701 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); 1702 m2l->cfi().Restore(DwarfCoreReg(reg2)); 1703 m2l->cfi().Restore(DwarfCoreReg(reg1)); 1704 } 1705 } 1706} 1707 1708static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { 1709 int reg1 = -1, reg2 = -1; 1710 const int reg_log2_size = 3; 1711 1712 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) { 1713 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); 1714 if (UNLIKELY(reg2 < 0)) { 1715 m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), 1716 offset); 1717 m2l->cfi().Restore(DwarfFpReg(reg1)); 1718 } else { 1719 m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), 1720 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); 1721 m2l->cfi().Restore(DwarfFpReg(reg2)); 1722 m2l->cfi().Restore(DwarfFpReg(reg1)); 1723 } 1724 } 1725} 1726 1727void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask, 1728 int frame_size) { 1729 DCHECK_EQ(base, rs_sp); 1730 // Restore saves and drop stack frame. 1731 // 2 versions: 1732 // 1733 // 1. (Original): Try to address directly, then drop the whole frame. 1734 // Limitation: ldp is a 7b signed immediate. 1735 // 1736 // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be 1737 // in range. Then drop the rest. 1738 // 1739 // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads 1740 // in variant 1. 1741 1742 // "Magic" constant, 63 (max signed 7b) * 8. 1743 static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize; 1744 1745 const int num_core_spills = POPCOUNT(core_reg_mask); 1746 const int num_fp_spills = POPCOUNT(fp_reg_mask); 1747 1748 int early_drop = 0; 1749 1750 if (frame_size > kMaxFramesizeForOffset) { 1751 // Second variant. Drop the frame part. 1752 1753 // TODO: Always use the first formula, as num_fp_spills would be zero? 1754 if (fp_reg_mask != 0) { 1755 early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills); 1756 } else { 1757 early_drop = frame_size - kArm64PointerSize * num_core_spills; 1758 } 1759 1760 // Drop needs to be 16B aligned, so that SP keeps aligned. 1761 early_drop = RoundDown(early_drop, 16); 1762 1763 OpRegImm64(kOpAdd, rs_sp, early_drop); 1764 cfi_.AdjustCFAOffset(-early_drop); 1765 } 1766 1767 // Unspill. 1768 if (fp_reg_mask != 0) { 1769 int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills); 1770 UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask); 1771 } 1772 if (core_reg_mask != 0) { 1773 int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills; 1774 UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask); 1775 } 1776 1777 // Drop the (rest of) the frame. 1778 int adjust = frame_size - early_drop; 1779 OpRegImm64(kOpAdd, rs_sp, adjust); 1780 cfi_.AdjustCFAOffset(-adjust); 1781} 1782 1783bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { 1784 A64Opcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0); 1785 RegLocation rl_src_i = info->args[0]; 1786 RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info); // result reg 1787 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1788 RegLocation rl_i = IsWide(size) ? 1789 LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg); 1790 NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg()); 1791 IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result); 1792 return true; 1793} 1794 1795} // namespace art 1796