code_generator_arm64.cc revision 74eb1b264691c4eb399d0858015a7fc13c476ac6
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "code_generator_arm64.h" 18 19#include "arch/arm64/instruction_set_features_arm64.h" 20#include "art_method.h" 21#include "code_generator_utils.h" 22#include "compiled_method.h" 23#include "entrypoints/quick/quick_entrypoints.h" 24#include "entrypoints/quick/quick_entrypoints_enum.h" 25#include "gc/accounting/card_table.h" 26#include "intrinsics.h" 27#include "intrinsics_arm64.h" 28#include "mirror/array-inl.h" 29#include "mirror/class-inl.h" 30#include "offsets.h" 31#include "thread.h" 32#include "utils/arm64/assembler_arm64.h" 33#include "utils/assembler.h" 34#include "utils/stack_checks.h" 35 36 37using namespace vixl; // NOLINT(build/namespaces) 38 39#ifdef __ 40#error "ARM64 Codegen VIXL macro-assembler macro already defined." 41#endif 42 43namespace art { 44 45template<class MirrorType> 46class GcRoot; 47 48namespace arm64 { 49 50using helpers::CPURegisterFrom; 51using helpers::DRegisterFrom; 52using helpers::FPRegisterFrom; 53using helpers::HeapOperand; 54using helpers::HeapOperandFrom; 55using helpers::InputCPURegisterAt; 56using helpers::InputFPRegisterAt; 57using helpers::InputRegisterAt; 58using helpers::InputOperandAt; 59using helpers::Int64ConstantFrom; 60using helpers::LocationFrom; 61using helpers::OperandFromMemOperand; 62using helpers::OutputCPURegister; 63using helpers::OutputFPRegister; 64using helpers::OutputRegister; 65using helpers::RegisterFrom; 66using helpers::StackOperandFrom; 67using helpers::VIXLRegCodeFromART; 68using helpers::WRegisterFrom; 69using helpers::XRegisterFrom; 70using helpers::ARM64EncodableConstantOrRegister; 71using helpers::ArtVixlRegCodeCoherentForRegSet; 72 73static constexpr int kCurrentMethodStackOffset = 0; 74// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump 75// table version generates 7 instructions and num_entries literals. Compare/jump sequence will 76// generates less code/data with a small num_entries. 77static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 78 79inline Condition ARM64Condition(IfCondition cond) { 80 switch (cond) { 81 case kCondEQ: return eq; 82 case kCondNE: return ne; 83 case kCondLT: return lt; 84 case kCondLE: return le; 85 case kCondGT: return gt; 86 case kCondGE: return ge; 87 case kCondB: return lo; 88 case kCondBE: return ls; 89 case kCondA: return hi; 90 case kCondAE: return hs; 91 } 92 LOG(FATAL) << "Unreachable"; 93 UNREACHABLE(); 94} 95 96inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { 97 // The ARM64 condition codes can express all the necessary branches, see the 98 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. 99 // There is no dex instruction or HIR that would need the missing conditions 100 // "equal or unordered" or "not equal". 101 switch (cond) { 102 case kCondEQ: return eq; 103 case kCondNE: return ne /* unordered */; 104 case kCondLT: return gt_bias ? cc : lt /* unordered */; 105 case kCondLE: return gt_bias ? ls : le /* unordered */; 106 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 107 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 108 default: 109 LOG(FATAL) << "UNREACHABLE"; 110 UNREACHABLE(); 111 } 112} 113 114Location ARM64ReturnLocation(Primitive::Type return_type) { 115 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the 116 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, 117 // but we use the exact registers for clarity. 118 if (return_type == Primitive::kPrimFloat) { 119 return LocationFrom(s0); 120 } else if (return_type == Primitive::kPrimDouble) { 121 return LocationFrom(d0); 122 } else if (return_type == Primitive::kPrimLong) { 123 return LocationFrom(x0); 124 } else if (return_type == Primitive::kPrimVoid) { 125 return Location::NoLocation(); 126 } else { 127 return LocationFrom(w0); 128 } 129} 130 131Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) { 132 return ARM64ReturnLocation(return_type); 133} 134 135#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> 136#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value() 137 138// Calculate memory accessing operand for save/restore live registers. 139static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, 140 RegisterSet* register_set, 141 int64_t spill_offset, 142 bool is_save) { 143 DCHECK(ArtVixlRegCodeCoherentForRegSet(register_set->GetCoreRegisters(), 144 codegen->GetNumberOfCoreRegisters(), 145 register_set->GetFloatingPointRegisters(), 146 codegen->GetNumberOfFloatingPointRegisters())); 147 148 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, 149 register_set->GetCoreRegisters() & (~callee_saved_core_registers.list())); 150 CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, 151 register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list())); 152 153 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); 154 UseScratchRegisterScope temps(masm); 155 156 Register base = masm->StackPointer(); 157 int64_t core_spill_size = core_list.TotalSizeInBytes(); 158 int64_t fp_spill_size = fp_list.TotalSizeInBytes(); 159 int64_t reg_size = kXRegSizeInBytes; 160 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; 161 uint32_t ls_access_size = WhichPowerOf2(reg_size); 162 if (((core_list.Count() > 1) || (fp_list.Count() > 1)) && 163 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { 164 // If the offset does not fit in the instruction's immediate field, use an alternate register 165 // to compute the base address(float point registers spill base address). 166 Register new_base = temps.AcquireSameSizeAs(base); 167 __ Add(new_base, base, Operand(spill_offset + core_spill_size)); 168 base = new_base; 169 spill_offset = -core_spill_size; 170 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; 171 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); 172 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); 173 } 174 175 if (is_save) { 176 __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); 177 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 178 } else { 179 __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); 180 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 181 } 182} 183 184void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 185 RegisterSet* register_set = locations->GetLiveRegisters(); 186 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 187 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 188 if (!codegen->IsCoreCalleeSaveRegister(i) && register_set->ContainsCoreRegister(i)) { 189 // If the register holds an object, update the stack mask. 190 if (locations->RegisterContainsObject(i)) { 191 locations->SetStackBit(stack_offset / kVRegSize); 192 } 193 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 194 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 195 saved_core_stack_offsets_[i] = stack_offset; 196 stack_offset += kXRegSizeInBytes; 197 } 198 } 199 200 for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) { 201 if (!codegen->IsFloatingPointCalleeSaveRegister(i) && 202 register_set->ContainsFloatingPointRegister(i)) { 203 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 204 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 205 saved_fpu_stack_offsets_[i] = stack_offset; 206 stack_offset += kDRegSizeInBytes; 207 } 208 } 209 210 SaveRestoreLiveRegistersHelper(codegen, register_set, 211 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); 212} 213 214void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 215 RegisterSet* register_set = locations->GetLiveRegisters(); 216 SaveRestoreLiveRegistersHelper(codegen, register_set, 217 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); 218} 219 220class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { 221 public: 222 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : instruction_(instruction) {} 223 224 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 225 LocationSummary* locations = instruction_->GetLocations(); 226 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 227 228 __ Bind(GetEntryLabel()); 229 if (instruction_->CanThrowIntoCatchBlock()) { 230 // Live registers will be restored in the catch block if caught. 231 SaveLiveRegisters(codegen, instruction_->GetLocations()); 232 } 233 // We're moving two locations to locations that could overlap, so we need a parallel 234 // move resolver. 235 InvokeRuntimeCallingConvention calling_convention; 236 codegen->EmitParallelMoves( 237 locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, 238 locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); 239 arm64_codegen->InvokeRuntime( 240 QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); 241 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 242 } 243 244 bool IsFatal() const OVERRIDE { return true; } 245 246 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } 247 248 private: 249 HBoundsCheck* const instruction_; 250 251 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); 252}; 253 254class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { 255 public: 256 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : instruction_(instruction) {} 257 258 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 259 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 260 __ Bind(GetEntryLabel()); 261 if (instruction_->CanThrowIntoCatchBlock()) { 262 // Live registers will be restored in the catch block if caught. 263 SaveLiveRegisters(codegen, instruction_->GetLocations()); 264 } 265 arm64_codegen->InvokeRuntime( 266 QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); 267 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 268 } 269 270 bool IsFatal() const OVERRIDE { return true; } 271 272 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } 273 274 private: 275 HDivZeroCheck* const instruction_; 276 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); 277}; 278 279class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { 280 public: 281 LoadClassSlowPathARM64(HLoadClass* cls, 282 HInstruction* at, 283 uint32_t dex_pc, 284 bool do_clinit) 285 : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { 286 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 287 } 288 289 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 290 LocationSummary* locations = at_->GetLocations(); 291 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 292 293 __ Bind(GetEntryLabel()); 294 SaveLiveRegisters(codegen, locations); 295 296 InvokeRuntimeCallingConvention calling_convention; 297 __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex()); 298 int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) 299 : QUICK_ENTRY_POINT(pInitializeType); 300 arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); 301 if (do_clinit_) { 302 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); 303 } else { 304 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); 305 } 306 307 // Move the class to the desired location. 308 Location out = locations->Out(); 309 if (out.IsValid()) { 310 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 311 Primitive::Type type = at_->GetType(); 312 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); 313 } 314 315 RestoreLiveRegisters(codegen, locations); 316 __ B(GetExitLabel()); 317 } 318 319 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; } 320 321 private: 322 // The class this slow path will load. 323 HLoadClass* const cls_; 324 325 // The instruction where this slow path is happening. 326 // (Might be the load class or an initialization check). 327 HInstruction* const at_; 328 329 // The dex PC of `at_`. 330 const uint32_t dex_pc_; 331 332 // Whether to initialize the class. 333 const bool do_clinit_; 334 335 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); 336}; 337 338class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { 339 public: 340 explicit LoadStringSlowPathARM64(HLoadString* instruction) : instruction_(instruction) {} 341 342 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 343 LocationSummary* locations = instruction_->GetLocations(); 344 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 345 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 346 347 __ Bind(GetEntryLabel()); 348 SaveLiveRegisters(codegen, locations); 349 350 InvokeRuntimeCallingConvention calling_convention; 351 __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); 352 arm64_codegen->InvokeRuntime( 353 QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); 354 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 355 Primitive::Type type = instruction_->GetType(); 356 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); 357 358 RestoreLiveRegisters(codegen, locations); 359 __ B(GetExitLabel()); 360 } 361 362 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } 363 364 private: 365 HLoadString* const instruction_; 366 367 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); 368}; 369 370class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { 371 public: 372 explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {} 373 374 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 375 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 376 __ Bind(GetEntryLabel()); 377 if (instruction_->CanThrowIntoCatchBlock()) { 378 // Live registers will be restored in the catch block if caught. 379 SaveLiveRegisters(codegen, instruction_->GetLocations()); 380 } 381 arm64_codegen->InvokeRuntime( 382 QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); 383 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 384 } 385 386 bool IsFatal() const OVERRIDE { return true; } 387 388 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } 389 390 private: 391 HNullCheck* const instruction_; 392 393 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); 394}; 395 396class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { 397 public: 398 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) 399 : instruction_(instruction), successor_(successor) {} 400 401 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 402 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 403 __ Bind(GetEntryLabel()); 404 SaveLiveRegisters(codegen, instruction_->GetLocations()); 405 arm64_codegen->InvokeRuntime( 406 QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); 407 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 408 RestoreLiveRegisters(codegen, instruction_->GetLocations()); 409 if (successor_ == nullptr) { 410 __ B(GetReturnLabel()); 411 } else { 412 __ B(arm64_codegen->GetLabelOf(successor_)); 413 } 414 } 415 416 vixl::Label* GetReturnLabel() { 417 DCHECK(successor_ == nullptr); 418 return &return_label_; 419 } 420 421 HBasicBlock* GetSuccessor() const { 422 return successor_; 423 } 424 425 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } 426 427 private: 428 HSuspendCheck* const instruction_; 429 // If not null, the block to branch to after the suspend check. 430 HBasicBlock* const successor_; 431 432 // If `successor_` is null, the label to branch to after the suspend check. 433 vixl::Label return_label_; 434 435 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); 436}; 437 438class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { 439 public: 440 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) 441 : instruction_(instruction), is_fatal_(is_fatal) {} 442 443 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 444 LocationSummary* locations = instruction_->GetLocations(); 445 Location class_to_check = locations->InAt(1); 446 Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) 447 : locations->Out(); 448 DCHECK(instruction_->IsCheckCast() 449 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 450 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 451 uint32_t dex_pc = instruction_->GetDexPc(); 452 453 __ Bind(GetEntryLabel()); 454 455 if (!is_fatal_) { 456 SaveLiveRegisters(codegen, locations); 457 } 458 459 // We're moving two locations to locations that could overlap, so we need a parallel 460 // move resolver. 461 InvokeRuntimeCallingConvention calling_convention; 462 codegen->EmitParallelMoves( 463 class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot, 464 object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot); 465 466 if (instruction_->IsInstanceOf()) { 467 arm64_codegen->InvokeRuntime( 468 QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); 469 CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, 470 const mirror::Class*, const mirror::Class*>(); 471 Primitive::Type ret_type = instruction_->GetType(); 472 Location ret_loc = calling_convention.GetReturnLocation(ret_type); 473 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); 474 } else { 475 DCHECK(instruction_->IsCheckCast()); 476 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); 477 CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); 478 } 479 480 if (!is_fatal_) { 481 RestoreLiveRegisters(codegen, locations); 482 __ B(GetExitLabel()); 483 } 484 } 485 486 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } 487 bool IsFatal() const { return is_fatal_; } 488 489 private: 490 HInstruction* const instruction_; 491 const bool is_fatal_; 492 493 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); 494}; 495 496class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { 497 public: 498 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) 499 : instruction_(instruction) {} 500 501 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 502 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 503 __ Bind(GetEntryLabel()); 504 SaveLiveRegisters(codegen, instruction_->GetLocations()); 505 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), 506 instruction_, 507 instruction_->GetDexPc(), 508 this); 509 CheckEntrypointTypes<kQuickDeoptimize, void, void>(); 510 } 511 512 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } 513 514 private: 515 HDeoptimize* const instruction_; 516 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); 517}; 518 519class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { 520 public: 521 explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {} 522 523 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 524 LocationSummary* locations = instruction_->GetLocations(); 525 __ Bind(GetEntryLabel()); 526 SaveLiveRegisters(codegen, locations); 527 528 InvokeRuntimeCallingConvention calling_convention; 529 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 530 parallel_move.AddMove( 531 locations->InAt(0), 532 LocationFrom(calling_convention.GetRegisterAt(0)), 533 Primitive::kPrimNot, 534 nullptr); 535 parallel_move.AddMove( 536 locations->InAt(1), 537 LocationFrom(calling_convention.GetRegisterAt(1)), 538 Primitive::kPrimInt, 539 nullptr); 540 parallel_move.AddMove( 541 locations->InAt(2), 542 LocationFrom(calling_convention.GetRegisterAt(2)), 543 Primitive::kPrimNot, 544 nullptr); 545 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 546 547 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 548 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), 549 instruction_, 550 instruction_->GetDexPc(), 551 this); 552 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 553 RestoreLiveRegisters(codegen, locations); 554 __ B(GetExitLabel()); 555 } 556 557 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } 558 559 private: 560 HInstruction* const instruction_; 561 562 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); 563}; 564 565void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { 566 uint32_t num_entries = switch_instr_->GetNumEntries(); 567 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 568 569 // We are about to use the assembler to place literals directly. Make sure we have enough 570 // underlying code buffer and we have generated the jump table with right size. 571 CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t), 572 CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize); 573 574 __ Bind(&table_start_); 575 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 576 for (uint32_t i = 0; i < num_entries; i++) { 577 vixl::Label* target_label = codegen->GetLabelOf(successors[i]); 578 DCHECK(target_label->IsBound()); 579 ptrdiff_t jump_offset = target_label->location() - table_start_.location(); 580 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 581 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 582 Literal<int32_t> literal(jump_offset); 583 __ place(&literal); 584 } 585} 586 587// Slow path marking an object during a read barrier. 588class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { 589 public: 590 ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) 591 : instruction_(instruction), out_(out), obj_(obj) { 592 DCHECK(kEmitCompilerReadBarrier); 593 } 594 595 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } 596 597 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 598 LocationSummary* locations = instruction_->GetLocations(); 599 Primitive::Type type = Primitive::kPrimNot; 600 DCHECK(locations->CanCall()); 601 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 602 DCHECK(instruction_->IsInstanceFieldGet() || 603 instruction_->IsStaticFieldGet() || 604 instruction_->IsArrayGet() || 605 instruction_->IsLoadClass() || 606 instruction_->IsLoadString() || 607 instruction_->IsInstanceOf() || 608 instruction_->IsCheckCast()) 609 << "Unexpected instruction in read barrier marking slow path: " 610 << instruction_->DebugName(); 611 612 __ Bind(GetEntryLabel()); 613 SaveLiveRegisters(codegen, locations); 614 615 InvokeRuntimeCallingConvention calling_convention; 616 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 617 arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type); 618 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), 619 instruction_, 620 instruction_->GetDexPc(), 621 this); 622 CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); 623 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 624 625 RestoreLiveRegisters(codegen, locations); 626 __ B(GetExitLabel()); 627 } 628 629 private: 630 HInstruction* const instruction_; 631 const Location out_; 632 const Location obj_; 633 634 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); 635}; 636 637// Slow path generating a read barrier for a heap reference. 638class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { 639 public: 640 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, 641 Location out, 642 Location ref, 643 Location obj, 644 uint32_t offset, 645 Location index) 646 : instruction_(instruction), 647 out_(out), 648 ref_(ref), 649 obj_(obj), 650 offset_(offset), 651 index_(index) { 652 DCHECK(kEmitCompilerReadBarrier); 653 // If `obj` is equal to `out` or `ref`, it means the initial object 654 // has been overwritten by (or after) the heap object reference load 655 // to be instrumented, e.g.: 656 // 657 // __ Ldr(out, HeapOperand(out, class_offset); 658 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 659 // 660 // In that case, we have lost the information about the original 661 // object, and the emitted read barrier cannot work properly. 662 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 663 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 664 } 665 666 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 667 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 668 LocationSummary* locations = instruction_->GetLocations(); 669 Primitive::Type type = Primitive::kPrimNot; 670 DCHECK(locations->CanCall()); 671 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 672 DCHECK(!instruction_->IsInvoke() || 673 (instruction_->IsInvokeStaticOrDirect() && 674 instruction_->GetLocations()->Intrinsified())) 675 << "Unexpected instruction in read barrier for heap reference slow path: " 676 << instruction_->DebugName(); 677 // The read barrier instrumentation does not support the 678 // HArm64IntermediateAddress instruction yet. 679 DCHECK(!(instruction_->IsArrayGet() && 680 instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress())); 681 682 __ Bind(GetEntryLabel()); 683 684 SaveLiveRegisters(codegen, locations); 685 686 // We may have to change the index's value, but as `index_` is a 687 // constant member (like other "inputs" of this slow path), 688 // introduce a copy of it, `index`. 689 Location index = index_; 690 if (index_.IsValid()) { 691 // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. 692 if (instruction_->IsArrayGet()) { 693 // Compute the actual memory offset and store it in `index`. 694 Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); 695 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); 696 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { 697 // We are about to change the value of `index_reg` (see the 698 // calls to vixl::MacroAssembler::Lsl and 699 // vixl::MacroAssembler::Mov below), but it has 700 // not been saved by the previous call to 701 // art::SlowPathCode::SaveLiveRegisters, as it is a 702 // callee-save register -- 703 // art::SlowPathCode::SaveLiveRegisters does not consider 704 // callee-save registers, as it has been designed with the 705 // assumption that callee-save registers are supposed to be 706 // handled by the called function. So, as a callee-save 707 // register, `index_reg` _would_ eventually be saved onto 708 // the stack, but it would be too late: we would have 709 // changed its value earlier. Therefore, we manually save 710 // it here into another freely available register, 711 // `free_reg`, chosen of course among the caller-save 712 // registers (as a callee-save `free_reg` register would 713 // exhibit the same problem). 714 // 715 // Note we could have requested a temporary register from 716 // the register allocator instead; but we prefer not to, as 717 // this is a slow path, and we know we can find a 718 // caller-save register that is available. 719 Register free_reg = FindAvailableCallerSaveRegister(codegen); 720 __ Mov(free_reg.W(), index_reg); 721 index_reg = free_reg; 722 index = LocationFrom(index_reg); 723 } else { 724 // The initial register stored in `index_` has already been 725 // saved in the call to art::SlowPathCode::SaveLiveRegisters 726 // (as it is not a callee-save register), so we can freely 727 // use it. 728 } 729 // Shifting the index value contained in `index_reg` by the scale 730 // factor (2) cannot overflow in practice, as the runtime is 731 // unable to allocate object arrays with a size larger than 732 // 2^26 - 1 (that is, 2^28 - 4 bytes). 733 __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); 734 static_assert( 735 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 736 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 737 __ Add(index_reg, index_reg, Operand(offset_)); 738 } else { 739 DCHECK(instruction_->IsInvoke()); 740 DCHECK(instruction_->GetLocations()->Intrinsified()); 741 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 742 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 743 << instruction_->AsInvoke()->GetIntrinsic(); 744 DCHECK_EQ(offset_, 0U); 745 DCHECK(index_.IsRegisterPair()); 746 // UnsafeGet's offset location is a register pair, the low 747 // part contains the correct offset. 748 index = index_.ToLow(); 749 } 750 } 751 752 // We're moving two or three locations to locations that could 753 // overlap, so we need a parallel move resolver. 754 InvokeRuntimeCallingConvention calling_convention; 755 HParallelMove parallel_move(codegen->GetGraph()->GetArena()); 756 parallel_move.AddMove(ref_, 757 LocationFrom(calling_convention.GetRegisterAt(0)), 758 type, 759 nullptr); 760 parallel_move.AddMove(obj_, 761 LocationFrom(calling_convention.GetRegisterAt(1)), 762 type, 763 nullptr); 764 if (index.IsValid()) { 765 parallel_move.AddMove(index, 766 LocationFrom(calling_convention.GetRegisterAt(2)), 767 Primitive::kPrimInt, 768 nullptr); 769 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 770 } else { 771 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 772 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); 773 } 774 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), 775 instruction_, 776 instruction_->GetDexPc(), 777 this); 778 CheckEntrypointTypes< 779 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 780 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 781 782 RestoreLiveRegisters(codegen, locations); 783 784 __ B(GetExitLabel()); 785 } 786 787 const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } 788 789 private: 790 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 791 size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); 792 size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); 793 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 794 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 795 return Register(VIXLRegCodeFromART(i), kXRegSize); 796 } 797 } 798 // We shall never fail to find a free caller-save register, as 799 // there are more than two core caller-save registers on ARM64 800 // (meaning it is possible to find one which is different from 801 // `ref` and `obj`). 802 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 803 LOG(FATAL) << "Could not find a free register"; 804 UNREACHABLE(); 805 } 806 807 HInstruction* const instruction_; 808 const Location out_; 809 const Location ref_; 810 const Location obj_; 811 const uint32_t offset_; 812 // An additional location containing an index to an array. 813 // Only used for HArrayGet and the UnsafeGetObject & 814 // UnsafeGetObjectVolatile intrinsics. 815 const Location index_; 816 817 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); 818}; 819 820// Slow path generating a read barrier for a GC root. 821class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { 822 public: 823 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) 824 : instruction_(instruction), out_(out), root_(root) { 825 DCHECK(kEmitCompilerReadBarrier); 826 } 827 828 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { 829 LocationSummary* locations = instruction_->GetLocations(); 830 Primitive::Type type = Primitive::kPrimNot; 831 DCHECK(locations->CanCall()); 832 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 833 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 834 << "Unexpected instruction in read barrier for GC root slow path: " 835 << instruction_->DebugName(); 836 837 __ Bind(GetEntryLabel()); 838 SaveLiveRegisters(codegen, locations); 839 840 InvokeRuntimeCallingConvention calling_convention; 841 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 842 // The argument of the ReadBarrierForRootSlow is not a managed 843 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; 844 // thus we need a 64-bit move here, and we cannot use 845 // 846 // arm64_codegen->MoveLocation( 847 // LocationFrom(calling_convention.GetRegisterAt(0)), 848 // root_, 849 // type); 850 // 851 // which would emit a 32-bit move, as `type` is a (32-bit wide) 852 // reference type (`Primitive::kPrimNot`). 853 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); 854 arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), 855 instruction_, 856 instruction_->GetDexPc(), 857 this); 858 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 859 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 860 861 RestoreLiveRegisters(codegen, locations); 862 __ B(GetExitLabel()); 863 } 864 865 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } 866 867 private: 868 HInstruction* const instruction_; 869 const Location out_; 870 const Location root_; 871 872 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); 873}; 874 875#undef __ 876 877Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { 878 Location next_location; 879 if (type == Primitive::kPrimVoid) { 880 LOG(FATAL) << "Unreachable type " << type; 881 } 882 883 if (Primitive::IsFloatingPointType(type) && 884 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { 885 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 886 } else if (!Primitive::IsFloatingPointType(type) && 887 (gp_index_ < calling_convention.GetNumberOfRegisters())) { 888 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); 889 } else { 890 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); 891 next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) 892 : Location::StackSlot(stack_offset); 893 } 894 895 // Space on the stack is reserved for all arguments. 896 stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; 897 return next_location; 898} 899 900Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { 901 return LocationFrom(kArtMethodRegister); 902} 903 904CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, 905 const Arm64InstructionSetFeatures& isa_features, 906 const CompilerOptions& compiler_options, 907 OptimizingCompilerStats* stats) 908 : CodeGenerator(graph, 909 kNumberOfAllocatableRegisters, 910 kNumberOfAllocatableFPRegisters, 911 kNumberOfAllocatableRegisterPairs, 912 callee_saved_core_registers.list(), 913 callee_saved_fp_registers.list(), 914 compiler_options, 915 stats), 916 block_labels_(nullptr), 917 jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 918 location_builder_(graph, this), 919 instruction_visitor_(graph, this), 920 move_resolver_(graph->GetArena(), this), 921 isa_features_(isa_features), 922 uint64_literals_(std::less<uint64_t>(), 923 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 924 method_patches_(MethodReferenceComparator(), 925 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 926 call_patches_(MethodReferenceComparator(), 927 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 928 relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 929 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { 930 // Save the link register (containing the return address) to mimic Quick. 931 AddAllocatedRegister(LocationFrom(lr)); 932} 933 934#define __ GetVIXLAssembler()-> 935 936void CodeGeneratorARM64::EmitJumpTables() { 937 for (auto jump_table : jump_tables_) { 938 jump_table->EmitTable(this); 939 } 940} 941 942void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { 943 EmitJumpTables(); 944 // Ensure we emit the literal pool. 945 __ FinalizeCode(); 946 947 CodeGenerator::Finalize(allocator); 948} 949 950void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { 951 // Note: There are 6 kinds of moves: 952 // 1. constant -> GPR/FPR (non-cycle) 953 // 2. constant -> stack (non-cycle) 954 // 3. GPR/FPR -> GPR/FPR 955 // 4. GPR/FPR -> stack 956 // 5. stack -> GPR/FPR 957 // 6. stack -> stack (non-cycle) 958 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 959 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting 960 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the 961 // dependency. 962 vixl_temps_.Open(GetVIXLAssembler()); 963} 964 965void ParallelMoveResolverARM64::FinishEmitNativeCode() { 966 vixl_temps_.Close(); 967} 968 969Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { 970 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister || 971 kind == Location::kStackSlot || kind == Location::kDoubleStackSlot); 972 kind = (kind == Location::kFpuRegister) ? Location::kFpuRegister : Location::kRegister; 973 Location scratch = GetScratchLocation(kind); 974 if (!scratch.Equals(Location::NoLocation())) { 975 return scratch; 976 } 977 // Allocate from VIXL temp registers. 978 if (kind == Location::kRegister) { 979 scratch = LocationFrom(vixl_temps_.AcquireX()); 980 } else { 981 DCHECK(kind == Location::kFpuRegister); 982 scratch = LocationFrom(vixl_temps_.AcquireD()); 983 } 984 AddScratchLocation(scratch); 985 return scratch; 986} 987 988void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { 989 if (loc.IsRegister()) { 990 vixl_temps_.Release(XRegisterFrom(loc)); 991 } else { 992 DCHECK(loc.IsFpuRegister()); 993 vixl_temps_.Release(DRegisterFrom(loc)); 994 } 995 RemoveScratchLocation(loc); 996} 997 998void ParallelMoveResolverARM64::EmitMove(size_t index) { 999 MoveOperands* move = moves_[index]; 1000 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid); 1001} 1002 1003void CodeGeneratorARM64::GenerateFrameEntry() { 1004 MacroAssembler* masm = GetVIXLAssembler(); 1005 BlockPoolsScope block_pools(masm); 1006 __ Bind(&frame_entry_label_); 1007 1008 bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); 1009 if (do_overflow_check) { 1010 UseScratchRegisterScope temps(masm); 1011 Register temp = temps.AcquireX(); 1012 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1013 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); 1014 __ Ldr(wzr, MemOperand(temp, 0)); 1015 RecordPcInfo(nullptr, 0); 1016 } 1017 1018 if (!HasEmptyFrame()) { 1019 int frame_size = GetFrameSize(); 1020 // Stack layout: 1021 // sp[frame_size - 8] : lr. 1022 // ... : other preserved core registers. 1023 // ... : other preserved fp registers. 1024 // ... : reserved frame space. 1025 // sp[0] : current method. 1026 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); 1027 GetAssembler()->cfi().AdjustCFAOffset(frame_size); 1028 GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(), 1029 frame_size - GetCoreSpillSize()); 1030 GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), 1031 frame_size - FrameEntrySpillSize()); 1032 } 1033} 1034 1035void CodeGeneratorARM64::GenerateFrameExit() { 1036 BlockPoolsScope block_pools(GetVIXLAssembler()); 1037 GetAssembler()->cfi().RememberState(); 1038 if (!HasEmptyFrame()) { 1039 int frame_size = GetFrameSize(); 1040 GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(), 1041 frame_size - FrameEntrySpillSize()); 1042 GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(), 1043 frame_size - GetCoreSpillSize()); 1044 __ Drop(frame_size); 1045 GetAssembler()->cfi().AdjustCFAOffset(-frame_size); 1046 } 1047 __ Ret(); 1048 GetAssembler()->cfi().RestoreState(); 1049 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 1050} 1051 1052vixl::CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { 1053 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); 1054 return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, 1055 core_spill_mask_); 1056} 1057 1058vixl::CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { 1059 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, 1060 GetNumberOfFloatingPointRegisters())); 1061 return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize, 1062 fpu_spill_mask_); 1063} 1064 1065void CodeGeneratorARM64::Bind(HBasicBlock* block) { 1066 __ Bind(GetLabelOf(block)); 1067} 1068 1069void CodeGeneratorARM64::Move(HInstruction* instruction, 1070 Location location, 1071 HInstruction* move_for) { 1072 LocationSummary* locations = instruction->GetLocations(); 1073 Primitive::Type type = instruction->GetType(); 1074 DCHECK_NE(type, Primitive::kPrimVoid); 1075 1076 if (instruction->IsCurrentMethod()) { 1077 MoveLocation(location, 1078 Location::DoubleStackSlot(kCurrentMethodStackOffset), 1079 Primitive::kPrimVoid); 1080 } else if (locations != nullptr && locations->Out().Equals(location)) { 1081 return; 1082 } else if (instruction->IsIntConstant() 1083 || instruction->IsLongConstant() 1084 || instruction->IsNullConstant()) { 1085 int64_t value = GetInt64ValueOf(instruction->AsConstant()); 1086 if (location.IsRegister()) { 1087 Register dst = RegisterFrom(location, type); 1088 DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) || 1089 (instruction->IsLongConstant() && dst.Is64Bits())); 1090 __ Mov(dst, value); 1091 } else { 1092 DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); 1093 UseScratchRegisterScope temps(GetVIXLAssembler()); 1094 Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant()) 1095 ? temps.AcquireW() 1096 : temps.AcquireX(); 1097 __ Mov(temp, value); 1098 __ Str(temp, StackOperandFrom(location)); 1099 } 1100 } else if (instruction->IsTemporary()) { 1101 Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); 1102 MoveLocation(location, temp_location, type); 1103 } else if (instruction->IsLoadLocal()) { 1104 uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal()); 1105 if (Primitive::Is64BitType(type)) { 1106 MoveLocation(location, Location::DoubleStackSlot(stack_slot), type); 1107 } else { 1108 MoveLocation(location, Location::StackSlot(stack_slot), type); 1109 } 1110 1111 } else { 1112 DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary()); 1113 MoveLocation(location, locations->Out(), type); 1114 } 1115} 1116 1117void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { 1118 DCHECK(location.IsRegister()); 1119 __ Mov(RegisterFrom(location, Primitive::kPrimInt), value); 1120} 1121 1122void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1123 if (location.IsRegister()) { 1124 locations->AddTemp(location); 1125 } else { 1126 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1127 } 1128} 1129 1130Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const { 1131 Primitive::Type type = load->GetType(); 1132 1133 switch (type) { 1134 case Primitive::kPrimNot: 1135 case Primitive::kPrimInt: 1136 case Primitive::kPrimFloat: 1137 return Location::StackSlot(GetStackSlot(load->GetLocal())); 1138 1139 case Primitive::kPrimLong: 1140 case Primitive::kPrimDouble: 1141 return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); 1142 1143 case Primitive::kPrimBoolean: 1144 case Primitive::kPrimByte: 1145 case Primitive::kPrimChar: 1146 case Primitive::kPrimShort: 1147 case Primitive::kPrimVoid: 1148 LOG(FATAL) << "Unexpected type " << type; 1149 } 1150 1151 LOG(FATAL) << "Unreachable"; 1152 return Location::NoLocation(); 1153} 1154 1155void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { 1156 UseScratchRegisterScope temps(GetVIXLAssembler()); 1157 Register card = temps.AcquireX(); 1158 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. 1159 vixl::Label done; 1160 if (value_can_be_null) { 1161 __ Cbz(value, &done); 1162 } 1163 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value())); 1164 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); 1165 __ Strb(card, MemOperand(card, temp.X())); 1166 if (value_can_be_null) { 1167 __ Bind(&done); 1168 } 1169} 1170 1171void CodeGeneratorARM64::SetupBlockedRegisters() const { 1172 // Blocked core registers: 1173 // lr : Runtime reserved. 1174 // tr : Runtime reserved. 1175 // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it. 1176 // ip1 : VIXL core temp. 1177 // ip0 : VIXL core temp. 1178 // 1179 // Blocked fp registers: 1180 // d31 : VIXL fp temp. 1181 CPURegList reserved_core_registers = vixl_reserved_core_registers; 1182 reserved_core_registers.Combine(runtime_reserved_core_registers); 1183 while (!reserved_core_registers.IsEmpty()) { 1184 blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true; 1185 } 1186 1187 CPURegList reserved_fp_registers = vixl_reserved_fp_registers; 1188 while (!reserved_fp_registers.IsEmpty()) { 1189 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; 1190 } 1191 1192 if (GetGraph()->IsDebuggable()) { 1193 // Stubs do not save callee-save floating point registers. If the graph 1194 // is debuggable, we need to deal with these registers differently. For 1195 // now, just block them. 1196 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; 1197 while (!reserved_fp_registers_debuggable.IsEmpty()) { 1198 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true; 1199 } 1200 } 1201} 1202 1203size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1204 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1205 __ Str(reg, MemOperand(sp, stack_index)); 1206 return kArm64WordSize; 1207} 1208 1209size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1210 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1211 __ Ldr(reg, MemOperand(sp, stack_index)); 1212 return kArm64WordSize; 1213} 1214 1215size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1216 FPRegister reg = FPRegister(reg_id, kDRegSize); 1217 __ Str(reg, MemOperand(sp, stack_index)); 1218 return kArm64WordSize; 1219} 1220 1221size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1222 FPRegister reg = FPRegister(reg_id, kDRegSize); 1223 __ Ldr(reg, MemOperand(sp, stack_index)); 1224 return kArm64WordSize; 1225} 1226 1227void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { 1228 stream << XRegister(reg); 1229} 1230 1231void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1232 stream << DRegister(reg); 1233} 1234 1235void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { 1236 if (constant->IsIntConstant()) { 1237 __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); 1238 } else if (constant->IsLongConstant()) { 1239 __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); 1240 } else if (constant->IsNullConstant()) { 1241 __ Mov(Register(destination), 0); 1242 } else if (constant->IsFloatConstant()) { 1243 __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); 1244 } else { 1245 DCHECK(constant->IsDoubleConstant()); 1246 __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue()); 1247 } 1248} 1249 1250 1251static bool CoherentConstantAndType(Location constant, Primitive::Type type) { 1252 DCHECK(constant.IsConstant()); 1253 HConstant* cst = constant.GetConstant(); 1254 return (cst->IsIntConstant() && type == Primitive::kPrimInt) || 1255 // Null is mapped to a core W register, which we associate with kPrimInt. 1256 (cst->IsNullConstant() && type == Primitive::kPrimInt) || 1257 (cst->IsLongConstant() && type == Primitive::kPrimLong) || 1258 (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || 1259 (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); 1260} 1261 1262void CodeGeneratorARM64::MoveLocation(Location destination, 1263 Location source, 1264 Primitive::Type dst_type) { 1265 if (source.Equals(destination)) { 1266 return; 1267 } 1268 1269 // A valid move can always be inferred from the destination and source 1270 // locations. When moving from and to a register, the argument type can be 1271 // used to generate 32bit instead of 64bit moves. In debug mode we also 1272 // checks the coherency of the locations and the type. 1273 bool unspecified_type = (dst_type == Primitive::kPrimVoid); 1274 1275 if (destination.IsRegister() || destination.IsFpuRegister()) { 1276 if (unspecified_type) { 1277 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; 1278 if (source.IsStackSlot() || 1279 (src_cst != nullptr && (src_cst->IsIntConstant() 1280 || src_cst->IsFloatConstant() 1281 || src_cst->IsNullConstant()))) { 1282 // For stack slots and 32bit constants, a 64bit type is appropriate. 1283 dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; 1284 } else { 1285 // If the source is a double stack slot or a 64bit constant, a 64bit 1286 // type is appropriate. Else the source is a register, and since the 1287 // type has not been specified, we chose a 64bit type to force a 64bit 1288 // move. 1289 dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; 1290 } 1291 } 1292 DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || 1293 (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); 1294 CPURegister dst = CPURegisterFrom(destination, dst_type); 1295 if (source.IsStackSlot() || source.IsDoubleStackSlot()) { 1296 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); 1297 __ Ldr(dst, StackOperandFrom(source)); 1298 } else if (source.IsConstant()) { 1299 DCHECK(CoherentConstantAndType(source, dst_type)); 1300 MoveConstant(dst, source.GetConstant()); 1301 } else if (source.IsRegister()) { 1302 if (destination.IsRegister()) { 1303 __ Mov(Register(dst), RegisterFrom(source, dst_type)); 1304 } else { 1305 DCHECK(destination.IsFpuRegister()); 1306 Primitive::Type source_type = Primitive::Is64BitType(dst_type) 1307 ? Primitive::kPrimLong 1308 : Primitive::kPrimInt; 1309 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); 1310 } 1311 } else { 1312 DCHECK(source.IsFpuRegister()); 1313 if (destination.IsRegister()) { 1314 Primitive::Type source_type = Primitive::Is64BitType(dst_type) 1315 ? Primitive::kPrimDouble 1316 : Primitive::kPrimFloat; 1317 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); 1318 } else { 1319 DCHECK(destination.IsFpuRegister()); 1320 __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); 1321 } 1322 } 1323 } else { // The destination is not a register. It must be a stack slot. 1324 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); 1325 if (source.IsRegister() || source.IsFpuRegister()) { 1326 if (unspecified_type) { 1327 if (source.IsRegister()) { 1328 dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; 1329 } else { 1330 dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; 1331 } 1332 } 1333 DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && 1334 (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); 1335 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); 1336 } else if (source.IsConstant()) { 1337 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) 1338 << source << " " << dst_type; 1339 UseScratchRegisterScope temps(GetVIXLAssembler()); 1340 HConstant* src_cst = source.GetConstant(); 1341 CPURegister temp; 1342 if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) { 1343 temp = temps.AcquireW(); 1344 } else if (src_cst->IsLongConstant()) { 1345 temp = temps.AcquireX(); 1346 } else if (src_cst->IsFloatConstant()) { 1347 temp = temps.AcquireS(); 1348 } else { 1349 DCHECK(src_cst->IsDoubleConstant()); 1350 temp = temps.AcquireD(); 1351 } 1352 MoveConstant(temp, src_cst); 1353 __ Str(temp, StackOperandFrom(destination)); 1354 } else { 1355 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); 1356 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); 1357 UseScratchRegisterScope temps(GetVIXLAssembler()); 1358 // There is generally less pressure on FP registers. 1359 FPRegister temp = destination.IsDoubleStackSlot() ? temps.AcquireD() : temps.AcquireS(); 1360 __ Ldr(temp, StackOperandFrom(source)); 1361 __ Str(temp, StackOperandFrom(destination)); 1362 } 1363 } 1364} 1365 1366void CodeGeneratorARM64::Load(Primitive::Type type, 1367 CPURegister dst, 1368 const MemOperand& src) { 1369 switch (type) { 1370 case Primitive::kPrimBoolean: 1371 __ Ldrb(Register(dst), src); 1372 break; 1373 case Primitive::kPrimByte: 1374 __ Ldrsb(Register(dst), src); 1375 break; 1376 case Primitive::kPrimShort: 1377 __ Ldrsh(Register(dst), src); 1378 break; 1379 case Primitive::kPrimChar: 1380 __ Ldrh(Register(dst), src); 1381 break; 1382 case Primitive::kPrimInt: 1383 case Primitive::kPrimNot: 1384 case Primitive::kPrimLong: 1385 case Primitive::kPrimFloat: 1386 case Primitive::kPrimDouble: 1387 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 1388 __ Ldr(dst, src); 1389 break; 1390 case Primitive::kPrimVoid: 1391 LOG(FATAL) << "Unreachable type " << type; 1392 } 1393} 1394 1395void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, 1396 CPURegister dst, 1397 const MemOperand& src, 1398 bool needs_null_check) { 1399 MacroAssembler* masm = GetVIXLAssembler(); 1400 BlockPoolsScope block_pools(masm); 1401 UseScratchRegisterScope temps(masm); 1402 Register temp_base = temps.AcquireX(); 1403 Primitive::Type type = instruction->GetType(); 1404 1405 DCHECK(!src.IsPreIndex()); 1406 DCHECK(!src.IsPostIndex()); 1407 1408 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1409 __ Add(temp_base, src.base(), OperandFromMemOperand(src)); 1410 MemOperand base = MemOperand(temp_base); 1411 switch (type) { 1412 case Primitive::kPrimBoolean: 1413 __ Ldarb(Register(dst), base); 1414 if (needs_null_check) { 1415 MaybeRecordImplicitNullCheck(instruction); 1416 } 1417 break; 1418 case Primitive::kPrimByte: 1419 __ Ldarb(Register(dst), base); 1420 if (needs_null_check) { 1421 MaybeRecordImplicitNullCheck(instruction); 1422 } 1423 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); 1424 break; 1425 case Primitive::kPrimChar: 1426 __ Ldarh(Register(dst), base); 1427 if (needs_null_check) { 1428 MaybeRecordImplicitNullCheck(instruction); 1429 } 1430 break; 1431 case Primitive::kPrimShort: 1432 __ Ldarh(Register(dst), base); 1433 if (needs_null_check) { 1434 MaybeRecordImplicitNullCheck(instruction); 1435 } 1436 __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); 1437 break; 1438 case Primitive::kPrimInt: 1439 case Primitive::kPrimNot: 1440 case Primitive::kPrimLong: 1441 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 1442 __ Ldar(Register(dst), base); 1443 if (needs_null_check) { 1444 MaybeRecordImplicitNullCheck(instruction); 1445 } 1446 break; 1447 case Primitive::kPrimFloat: 1448 case Primitive::kPrimDouble: { 1449 DCHECK(dst.IsFPRegister()); 1450 DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); 1451 1452 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1453 __ Ldar(temp, base); 1454 if (needs_null_check) { 1455 MaybeRecordImplicitNullCheck(instruction); 1456 } 1457 __ Fmov(FPRegister(dst), temp); 1458 break; 1459 } 1460 case Primitive::kPrimVoid: 1461 LOG(FATAL) << "Unreachable type " << type; 1462 } 1463} 1464 1465void CodeGeneratorARM64::Store(Primitive::Type type, 1466 CPURegister src, 1467 const MemOperand& dst) { 1468 switch (type) { 1469 case Primitive::kPrimBoolean: 1470 case Primitive::kPrimByte: 1471 __ Strb(Register(src), dst); 1472 break; 1473 case Primitive::kPrimChar: 1474 case Primitive::kPrimShort: 1475 __ Strh(Register(src), dst); 1476 break; 1477 case Primitive::kPrimInt: 1478 case Primitive::kPrimNot: 1479 case Primitive::kPrimLong: 1480 case Primitive::kPrimFloat: 1481 case Primitive::kPrimDouble: 1482 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 1483 __ Str(src, dst); 1484 break; 1485 case Primitive::kPrimVoid: 1486 LOG(FATAL) << "Unreachable type " << type; 1487 } 1488} 1489 1490void CodeGeneratorARM64::StoreRelease(Primitive::Type type, 1491 CPURegister src, 1492 const MemOperand& dst) { 1493 UseScratchRegisterScope temps(GetVIXLAssembler()); 1494 Register temp_base = temps.AcquireX(); 1495 1496 DCHECK(!dst.IsPreIndex()); 1497 DCHECK(!dst.IsPostIndex()); 1498 1499 // TODO(vixl): Let the MacroAssembler handle this. 1500 Operand op = OperandFromMemOperand(dst); 1501 __ Add(temp_base, dst.base(), op); 1502 MemOperand base = MemOperand(temp_base); 1503 switch (type) { 1504 case Primitive::kPrimBoolean: 1505 case Primitive::kPrimByte: 1506 __ Stlrb(Register(src), base); 1507 break; 1508 case Primitive::kPrimChar: 1509 case Primitive::kPrimShort: 1510 __ Stlrh(Register(src), base); 1511 break; 1512 case Primitive::kPrimInt: 1513 case Primitive::kPrimNot: 1514 case Primitive::kPrimLong: 1515 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 1516 __ Stlr(Register(src), base); 1517 break; 1518 case Primitive::kPrimFloat: 1519 case Primitive::kPrimDouble: { 1520 DCHECK(src.IsFPRegister()); 1521 DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); 1522 1523 Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1524 __ Fmov(temp, FPRegister(src)); 1525 __ Stlr(temp, base); 1526 break; 1527 } 1528 case Primitive::kPrimVoid: 1529 LOG(FATAL) << "Unreachable type " << type; 1530 } 1531} 1532 1533void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1534 HInstruction* instruction, 1535 uint32_t dex_pc, 1536 SlowPathCode* slow_path) { 1537 InvokeRuntime(GetThreadOffset<kArm64WordSize>(entrypoint).Int32Value(), 1538 instruction, 1539 dex_pc, 1540 slow_path); 1541} 1542 1543void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, 1544 HInstruction* instruction, 1545 uint32_t dex_pc, 1546 SlowPathCode* slow_path) { 1547 ValidateInvokeRuntime(instruction, slow_path); 1548 BlockPoolsScope block_pools(GetVIXLAssembler()); 1549 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 1550 __ Blr(lr); 1551 RecordPcInfo(instruction, dex_pc, slow_path); 1552} 1553 1554void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, 1555 vixl::Register class_reg) { 1556 UseScratchRegisterScope temps(GetVIXLAssembler()); 1557 Register temp = temps.AcquireW(); 1558 size_t status_offset = mirror::Class::StatusOffset().SizeValue(); 1559 bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); 1560 1561 // Even if the initialized flag is set, we need to ensure consistent memory ordering. 1562 if (use_acquire_release) { 1563 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1564 __ Add(temp, class_reg, status_offset); 1565 __ Ldar(temp, HeapOperand(temp)); 1566 __ Cmp(temp, mirror::Class::kStatusInitialized); 1567 __ B(lt, slow_path->GetEntryLabel()); 1568 } else { 1569 __ Ldr(temp, HeapOperand(class_reg, status_offset)); 1570 __ Cmp(temp, mirror::Class::kStatusInitialized); 1571 __ B(lt, slow_path->GetEntryLabel()); 1572 __ Dmb(InnerShareable, BarrierReads); 1573 } 1574 __ Bind(slow_path->GetExitLabel()); 1575} 1576 1577void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { 1578 BarrierType type = BarrierAll; 1579 1580 switch (kind) { 1581 case MemBarrierKind::kAnyAny: 1582 case MemBarrierKind::kAnyStore: { 1583 type = BarrierAll; 1584 break; 1585 } 1586 case MemBarrierKind::kLoadAny: { 1587 type = BarrierReads; 1588 break; 1589 } 1590 case MemBarrierKind::kStoreStore: { 1591 type = BarrierWrites; 1592 break; 1593 } 1594 default: 1595 LOG(FATAL) << "Unexpected memory barrier " << kind; 1596 } 1597 __ Dmb(InnerShareable, type); 1598} 1599 1600void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, 1601 HBasicBlock* successor) { 1602 SuspendCheckSlowPathARM64* slow_path = 1603 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); 1604 if (slow_path == nullptr) { 1605 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); 1606 instruction->SetSlowPath(slow_path); 1607 codegen_->AddSlowPath(slow_path); 1608 if (successor != nullptr) { 1609 DCHECK(successor->IsLoopHeader()); 1610 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); 1611 } 1612 } else { 1613 DCHECK_EQ(slow_path->GetSuccessor(), successor); 1614 } 1615 1616 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 1617 Register temp = temps.AcquireW(); 1618 1619 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue())); 1620 if (successor == nullptr) { 1621 __ Cbnz(temp, slow_path->GetEntryLabel()); 1622 __ Bind(slow_path->GetReturnLabel()); 1623 } else { 1624 __ Cbz(temp, codegen_->GetLabelOf(successor)); 1625 __ B(slow_path->GetEntryLabel()); 1626 // slow_path will return to GetLabelOf(successor). 1627 } 1628} 1629 1630InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, 1631 CodeGeneratorARM64* codegen) 1632 : InstructionCodeGenerator(graph, codegen), 1633 assembler_(codegen->GetAssembler()), 1634 codegen_(codegen) {} 1635 1636#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ 1637 /* No unimplemented IR. */ 1638 1639#define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode 1640 1641enum UnimplementedInstructionBreakCode { 1642 // Using a base helps identify when we hit such breakpoints. 1643 UnimplementedInstructionBreakCodeBaseCode = 0x900, 1644#define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name), 1645 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION) 1646#undef ENUM_UNIMPLEMENTED_INSTRUCTION 1647}; 1648 1649#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name) \ 1650 void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) { \ 1651 __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name)); \ 1652 } \ 1653 void LocationsBuilderARM64::Visit##name(H##name* instr) { \ 1654 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \ 1655 locations->SetOut(Location::Any()); \ 1656 } 1657 FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS) 1658#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS 1659 1660#undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE 1661#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION 1662 1663void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { 1664 DCHECK_EQ(instr->InputCount(), 2U); 1665 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); 1666 Primitive::Type type = instr->GetResultType(); 1667 switch (type) { 1668 case Primitive::kPrimInt: 1669 case Primitive::kPrimLong: 1670 locations->SetInAt(0, Location::RequiresRegister()); 1671 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); 1672 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1673 break; 1674 1675 case Primitive::kPrimFloat: 1676 case Primitive::kPrimDouble: 1677 locations->SetInAt(0, Location::RequiresFpuRegister()); 1678 locations->SetInAt(1, Location::RequiresFpuRegister()); 1679 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 1680 break; 1681 1682 default: 1683 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; 1684 } 1685} 1686 1687void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { 1688 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1689 1690 bool object_field_get_with_read_barrier = 1691 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 1692 LocationSummary* locations = 1693 new (GetGraph()->GetArena()) LocationSummary(instruction, 1694 object_field_get_with_read_barrier ? 1695 LocationSummary::kCallOnSlowPath : 1696 LocationSummary::kNoCall); 1697 locations->SetInAt(0, Location::RequiresRegister()); 1698 if (Primitive::IsFloatingPointType(instruction->GetType())) { 1699 locations->SetOut(Location::RequiresFpuRegister()); 1700 } else { 1701 // The output overlaps for an object field get when read barriers 1702 // are enabled: we do not want the load to overwrite the object's 1703 // location, as we need it to emit the read barrier. 1704 locations->SetOut( 1705 Location::RequiresRegister(), 1706 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 1707 } 1708} 1709 1710void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, 1711 const FieldInfo& field_info) { 1712 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1713 LocationSummary* locations = instruction->GetLocations(); 1714 Location base_loc = locations->InAt(0); 1715 Location out = locations->Out(); 1716 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 1717 Primitive::Type field_type = field_info.GetFieldType(); 1718 BlockPoolsScope block_pools(GetVIXLAssembler()); 1719 1720 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); 1721 bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); 1722 1723 if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1724 // Object FieldGet with Baker's read barrier case. 1725 MacroAssembler* masm = GetVIXLAssembler(); 1726 UseScratchRegisterScope temps(masm); 1727 // /* HeapReference<Object> */ out = *(base + offset) 1728 Register base = RegisterFrom(base_loc, Primitive::kPrimNot); 1729 Register temp = temps.AcquireW(); 1730 // Note that potential implicit null checks are handled in this 1731 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. 1732 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1733 instruction, 1734 out, 1735 base, 1736 offset, 1737 temp, 1738 /* needs_null_check */ true, 1739 field_info.IsVolatile() && use_acquire_release); 1740 if (field_info.IsVolatile() && !use_acquire_release) { 1741 // For IRIW sequential consistency kLoadAny is not sufficient. 1742 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 1743 } 1744 } else { 1745 // General case. 1746 if (field_info.IsVolatile()) { 1747 if (use_acquire_release) { 1748 // Note that a potential implicit null check is handled in this 1749 // CodeGeneratorARM64::LoadAcquire call. 1750 // NB: LoadAcquire will record the pc info if needed. 1751 codegen_->LoadAcquire( 1752 instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); 1753 } else { 1754 codegen_->Load(field_type, OutputCPURegister(instruction), field); 1755 codegen_->MaybeRecordImplicitNullCheck(instruction); 1756 // For IRIW sequential consistency kLoadAny is not sufficient. 1757 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 1758 } 1759 } else { 1760 codegen_->Load(field_type, OutputCPURegister(instruction), field); 1761 codegen_->MaybeRecordImplicitNullCheck(instruction); 1762 } 1763 if (field_type == Primitive::kPrimNot) { 1764 // If read barriers are enabled, emit read barriers other than 1765 // Baker's using a slow path (and also unpoison the loaded 1766 // reference, if heap poisoning is enabled). 1767 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 1768 } 1769 } 1770} 1771 1772void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { 1773 LocationSummary* locations = 1774 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 1775 locations->SetInAt(0, Location::RequiresRegister()); 1776 if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 1777 locations->SetInAt(1, Location::RequiresFpuRegister()); 1778 } else { 1779 locations->SetInAt(1, Location::RequiresRegister()); 1780 } 1781} 1782 1783void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, 1784 const FieldInfo& field_info, 1785 bool value_can_be_null) { 1786 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 1787 BlockPoolsScope block_pools(GetVIXLAssembler()); 1788 1789 Register obj = InputRegisterAt(instruction, 0); 1790 CPURegister value = InputCPURegisterAt(instruction, 1); 1791 CPURegister source = value; 1792 Offset offset = field_info.GetFieldOffset(); 1793 Primitive::Type field_type = field_info.GetFieldType(); 1794 bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); 1795 1796 { 1797 // We use a block to end the scratch scope before the write barrier, thus 1798 // freeing the temporary registers so they can be used in `MarkGCCard`. 1799 UseScratchRegisterScope temps(GetVIXLAssembler()); 1800 1801 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { 1802 DCHECK(value.IsW()); 1803 Register temp = temps.AcquireW(); 1804 __ Mov(temp, value.W()); 1805 GetAssembler()->PoisonHeapReference(temp.W()); 1806 source = temp; 1807 } 1808 1809 if (field_info.IsVolatile()) { 1810 if (use_acquire_release) { 1811 codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset)); 1812 codegen_->MaybeRecordImplicitNullCheck(instruction); 1813 } else { 1814 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 1815 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 1816 codegen_->MaybeRecordImplicitNullCheck(instruction); 1817 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 1818 } 1819 } else { 1820 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 1821 codegen_->MaybeRecordImplicitNullCheck(instruction); 1822 } 1823 } 1824 1825 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 1826 codegen_->MarkGCCard(obj, Register(value), value_can_be_null); 1827 } 1828} 1829 1830void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { 1831 Primitive::Type type = instr->GetType(); 1832 1833 switch (type) { 1834 case Primitive::kPrimInt: 1835 case Primitive::kPrimLong: { 1836 Register dst = OutputRegister(instr); 1837 Register lhs = InputRegisterAt(instr, 0); 1838 Operand rhs = InputOperandAt(instr, 1); 1839 if (instr->IsAdd()) { 1840 __ Add(dst, lhs, rhs); 1841 } else if (instr->IsAnd()) { 1842 __ And(dst, lhs, rhs); 1843 } else if (instr->IsOr()) { 1844 __ Orr(dst, lhs, rhs); 1845 } else if (instr->IsSub()) { 1846 __ Sub(dst, lhs, rhs); 1847 } else if (instr->IsRor()) { 1848 if (rhs.IsImmediate()) { 1849 uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1); 1850 __ Ror(dst, lhs, shift); 1851 } else { 1852 // Ensure shift distance is in the same size register as the result. If 1853 // we are rotating a long and the shift comes in a w register originally, 1854 // we don't need to sxtw for use as an x since the shift distances are 1855 // all & reg_bits - 1. 1856 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); 1857 } 1858 } else { 1859 DCHECK(instr->IsXor()); 1860 __ Eor(dst, lhs, rhs); 1861 } 1862 break; 1863 } 1864 case Primitive::kPrimFloat: 1865 case Primitive::kPrimDouble: { 1866 FPRegister dst = OutputFPRegister(instr); 1867 FPRegister lhs = InputFPRegisterAt(instr, 0); 1868 FPRegister rhs = InputFPRegisterAt(instr, 1); 1869 if (instr->IsAdd()) { 1870 __ Fadd(dst, lhs, rhs); 1871 } else if (instr->IsSub()) { 1872 __ Fsub(dst, lhs, rhs); 1873 } else { 1874 LOG(FATAL) << "Unexpected floating-point binary operation"; 1875 } 1876 break; 1877 } 1878 default: 1879 LOG(FATAL) << "Unexpected binary operation type " << type; 1880 } 1881} 1882 1883void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { 1884 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 1885 1886 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); 1887 Primitive::Type type = instr->GetResultType(); 1888 switch (type) { 1889 case Primitive::kPrimInt: 1890 case Primitive::kPrimLong: { 1891 locations->SetInAt(0, Location::RequiresRegister()); 1892 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); 1893 locations->SetOut(Location::RequiresRegister()); 1894 break; 1895 } 1896 default: 1897 LOG(FATAL) << "Unexpected shift type " << type; 1898 } 1899} 1900 1901void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { 1902 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 1903 1904 Primitive::Type type = instr->GetType(); 1905 switch (type) { 1906 case Primitive::kPrimInt: 1907 case Primitive::kPrimLong: { 1908 Register dst = OutputRegister(instr); 1909 Register lhs = InputRegisterAt(instr, 0); 1910 Operand rhs = InputOperandAt(instr, 1); 1911 if (rhs.IsImmediate()) { 1912 uint32_t shift_value = (type == Primitive::kPrimInt) 1913 ? static_cast<uint32_t>(rhs.immediate() & kMaxIntShiftValue) 1914 : static_cast<uint32_t>(rhs.immediate() & kMaxLongShiftValue); 1915 if (instr->IsShl()) { 1916 __ Lsl(dst, lhs, shift_value); 1917 } else if (instr->IsShr()) { 1918 __ Asr(dst, lhs, shift_value); 1919 } else { 1920 __ Lsr(dst, lhs, shift_value); 1921 } 1922 } else { 1923 Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W(); 1924 1925 if (instr->IsShl()) { 1926 __ Lsl(dst, lhs, rhs_reg); 1927 } else if (instr->IsShr()) { 1928 __ Asr(dst, lhs, rhs_reg); 1929 } else { 1930 __ Lsr(dst, lhs, rhs_reg); 1931 } 1932 } 1933 break; 1934 } 1935 default: 1936 LOG(FATAL) << "Unexpected shift operation type " << type; 1937 } 1938} 1939 1940void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { 1941 HandleBinaryOp(instruction); 1942} 1943 1944void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) { 1945 HandleBinaryOp(instruction); 1946} 1947 1948void LocationsBuilderARM64::VisitAnd(HAnd* instruction) { 1949 HandleBinaryOp(instruction); 1950} 1951 1952void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { 1953 HandleBinaryOp(instruction); 1954} 1955 1956void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( 1957 HArm64DataProcWithShifterOp* instruction) { 1958 DCHECK(instruction->GetType() == Primitive::kPrimInt || 1959 instruction->GetType() == Primitive::kPrimLong); 1960 LocationSummary* locations = 1961 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 1962 if (instruction->GetInstrKind() == HInstruction::kNeg) { 1963 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); 1964 } else { 1965 locations->SetInAt(0, Location::RequiresRegister()); 1966 } 1967 locations->SetInAt(1, Location::RequiresRegister()); 1968 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1969} 1970 1971void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( 1972 HArm64DataProcWithShifterOp* instruction) { 1973 Primitive::Type type = instruction->GetType(); 1974 HInstruction::InstructionKind kind = instruction->GetInstrKind(); 1975 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 1976 Register out = OutputRegister(instruction); 1977 Register left; 1978 if (kind != HInstruction::kNeg) { 1979 left = InputRegisterAt(instruction, 0); 1980 } 1981 // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the 1982 // shifter operand operation, the IR generating `right_reg` (input to the type 1983 // conversion) can have a different type from the current instruction's type, 1984 // so we manually indicate the type. 1985 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); 1986 int64_t shift_amount = (type == Primitive::kPrimInt) 1987 ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) 1988 : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); 1989 1990 Operand right_operand(0); 1991 1992 HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 1993 if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { 1994 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); 1995 } else { 1996 right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); 1997 } 1998 1999 // Logical binary operations do not support extension operations in the 2000 // operand. Note that VIXL would still manage if it was passed by generating 2001 // the extension as a separate instruction. 2002 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. 2003 DCHECK(!right_operand.IsExtendedRegister() || 2004 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && 2005 kind != HInstruction::kNeg)); 2006 switch (kind) { 2007 case HInstruction::kAdd: 2008 __ Add(out, left, right_operand); 2009 break; 2010 case HInstruction::kAnd: 2011 __ And(out, left, right_operand); 2012 break; 2013 case HInstruction::kNeg: 2014 DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); 2015 __ Neg(out, right_operand); 2016 break; 2017 case HInstruction::kOr: 2018 __ Orr(out, left, right_operand); 2019 break; 2020 case HInstruction::kSub: 2021 __ Sub(out, left, right_operand); 2022 break; 2023 case HInstruction::kXor: 2024 __ Eor(out, left, right_operand); 2025 break; 2026 default: 2027 LOG(FATAL) << "Unexpected operation kind: " << kind; 2028 UNREACHABLE(); 2029 } 2030} 2031 2032void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { 2033 // The read barrier instrumentation does not support the 2034 // HArm64IntermediateAddress instruction yet. 2035 DCHECK(!kEmitCompilerReadBarrier); 2036 LocationSummary* locations = 2037 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 2038 locations->SetInAt(0, Location::RequiresRegister()); 2039 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); 2040 locations->SetOut(Location::RequiresRegister()); 2041} 2042 2043void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( 2044 HArm64IntermediateAddress* instruction) { 2045 // The read barrier instrumentation does not support the 2046 // HArm64IntermediateAddress instruction yet. 2047 DCHECK(!kEmitCompilerReadBarrier); 2048 __ Add(OutputRegister(instruction), 2049 InputRegisterAt(instruction, 0), 2050 Operand(InputOperandAt(instruction, 1))); 2051} 2052 2053void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { 2054 LocationSummary* locations = 2055 new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); 2056 locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, 2057 Location::RequiresRegister()); 2058 locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 2059 locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 2060 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2061} 2062 2063void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { 2064 Register res = OutputRegister(instr); 2065 Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); 2066 Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); 2067 Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); 2068 2069 // Avoid emitting code that could trigger Cortex A53's erratum 835769. 2070 // This fixup should be carried out for all multiply-accumulate instructions: 2071 // madd, msub, smaddl, smsubl, umaddl and umsubl. 2072 if (instr->GetType() == Primitive::kPrimLong && 2073 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { 2074 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); 2075 vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; 2076 if (prev->IsLoadOrStore()) { 2077 // Make sure we emit only exactly one nop. 2078 vixl::CodeBufferCheckScope scope(masm, 2079 vixl::kInstructionSize, 2080 vixl::CodeBufferCheckScope::kCheck, 2081 vixl::CodeBufferCheckScope::kExactSize); 2082 __ nop(); 2083 } 2084 } 2085 2086 if (instr->GetOpKind() == HInstruction::kAdd) { 2087 __ Madd(res, mul_left, mul_right, accumulator); 2088 } else { 2089 DCHECK(instr->GetOpKind() == HInstruction::kSub); 2090 __ Msub(res, mul_left, mul_right, accumulator); 2091 } 2092} 2093 2094void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { 2095 bool object_array_get_with_read_barrier = 2096 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); 2097 LocationSummary* locations = 2098 new (GetGraph()->GetArena()) LocationSummary(instruction, 2099 object_array_get_with_read_barrier ? 2100 LocationSummary::kCallOnSlowPath : 2101 LocationSummary::kNoCall); 2102 locations->SetInAt(0, Location::RequiresRegister()); 2103 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2104 if (Primitive::IsFloatingPointType(instruction->GetType())) { 2105 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2106 } else { 2107 // The output overlaps in the case of an object array get with 2108 // read barriers enabled: we do not want the move to overwrite the 2109 // array's location, as we need it to emit the read barrier. 2110 locations->SetOut( 2111 Location::RequiresRegister(), 2112 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2113 } 2114} 2115 2116void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { 2117 Primitive::Type type = instruction->GetType(); 2118 Register obj = InputRegisterAt(instruction, 0); 2119 LocationSummary* locations = instruction->GetLocations(); 2120 Location index = locations->InAt(1); 2121 uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); 2122 Location out = locations->Out(); 2123 2124 MacroAssembler* masm = GetVIXLAssembler(); 2125 UseScratchRegisterScope temps(masm); 2126 // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. 2127 BlockPoolsScope block_pools(masm); 2128 2129 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2130 // Object ArrayGet with Baker's read barrier case. 2131 Register temp = temps.AcquireW(); 2132 // The read barrier instrumentation does not support the 2133 // HArm64IntermediateAddress instruction yet. 2134 DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); 2135 // Note that a potential implicit null check is handled in the 2136 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. 2137 codegen_->GenerateArrayLoadWithBakerReadBarrier( 2138 instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); 2139 } else { 2140 // General case. 2141 MemOperand source = HeapOperand(obj); 2142 if (index.IsConstant()) { 2143 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); 2144 source = HeapOperand(obj, offset); 2145 } else { 2146 Register temp = temps.AcquireSameSizeAs(obj); 2147 if (instruction->GetArray()->IsArm64IntermediateAddress()) { 2148 // The read barrier instrumentation does not support the 2149 // HArm64IntermediateAddress instruction yet. 2150 DCHECK(!kEmitCompilerReadBarrier); 2151 // We do not need to compute the intermediate address from the array: the 2152 // input instruction has done it already. See the comment in 2153 // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. 2154 if (kIsDebugBuild) { 2155 HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); 2156 DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); 2157 } 2158 temp = obj; 2159 } else { 2160 __ Add(temp, obj, offset); 2161 } 2162 source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); 2163 } 2164 2165 codegen_->Load(type, OutputCPURegister(instruction), source); 2166 codegen_->MaybeRecordImplicitNullCheck(instruction); 2167 2168 if (type == Primitive::kPrimNot) { 2169 static_assert( 2170 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 2171 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 2172 Location obj_loc = locations->InAt(0); 2173 if (index.IsConstant()) { 2174 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); 2175 } else { 2176 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); 2177 } 2178 } 2179 } 2180} 2181 2182void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { 2183 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 2184 locations->SetInAt(0, Location::RequiresRegister()); 2185 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2186} 2187 2188void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { 2189 BlockPoolsScope block_pools(GetVIXLAssembler()); 2190 __ Ldr(OutputRegister(instruction), 2191 HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset())); 2192 codegen_->MaybeRecordImplicitNullCheck(instruction); 2193} 2194 2195void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { 2196 Primitive::Type value_type = instruction->GetComponentType(); 2197 2198 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2199 bool object_array_set_with_read_barrier = 2200 kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); 2201 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( 2202 instruction, 2203 (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? 2204 LocationSummary::kCallOnSlowPath : 2205 LocationSummary::kNoCall); 2206 locations->SetInAt(0, Location::RequiresRegister()); 2207 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2208 if (Primitive::IsFloatingPointType(value_type)) { 2209 locations->SetInAt(2, Location::RequiresFpuRegister()); 2210 } else { 2211 locations->SetInAt(2, Location::RequiresRegister()); 2212 } 2213} 2214 2215void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { 2216 Primitive::Type value_type = instruction->GetComponentType(); 2217 LocationSummary* locations = instruction->GetLocations(); 2218 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); 2219 bool needs_write_barrier = 2220 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 2221 2222 Register array = InputRegisterAt(instruction, 0); 2223 CPURegister value = InputCPURegisterAt(instruction, 2); 2224 CPURegister source = value; 2225 Location index = locations->InAt(1); 2226 size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); 2227 MemOperand destination = HeapOperand(array); 2228 MacroAssembler* masm = GetVIXLAssembler(); 2229 BlockPoolsScope block_pools(masm); 2230 2231 if (!needs_write_barrier) { 2232 DCHECK(!may_need_runtime_call_for_type_check); 2233 if (index.IsConstant()) { 2234 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); 2235 destination = HeapOperand(array, offset); 2236 } else { 2237 UseScratchRegisterScope temps(masm); 2238 Register temp = temps.AcquireSameSizeAs(array); 2239 if (instruction->GetArray()->IsArm64IntermediateAddress()) { 2240 // The read barrier instrumentation does not support the 2241 // HArm64IntermediateAddress instruction yet. 2242 DCHECK(!kEmitCompilerReadBarrier); 2243 // We do not need to compute the intermediate address from the array: the 2244 // input instruction has done it already. See the comment in 2245 // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. 2246 if (kIsDebugBuild) { 2247 HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); 2248 DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); 2249 } 2250 temp = array; 2251 } else { 2252 __ Add(temp, array, offset); 2253 } 2254 destination = HeapOperand(temp, 2255 XRegisterFrom(index), 2256 LSL, 2257 Primitive::ComponentSizeShift(value_type)); 2258 } 2259 codegen_->Store(value_type, value, destination); 2260 codegen_->MaybeRecordImplicitNullCheck(instruction); 2261 } else { 2262 DCHECK(needs_write_barrier); 2263 DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); 2264 vixl::Label done; 2265 SlowPathCodeARM64* slow_path = nullptr; 2266 { 2267 // We use a block to end the scratch scope before the write barrier, thus 2268 // freeing the temporary registers so they can be used in `MarkGCCard`. 2269 UseScratchRegisterScope temps(masm); 2270 Register temp = temps.AcquireSameSizeAs(array); 2271 if (index.IsConstant()) { 2272 offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); 2273 destination = HeapOperand(array, offset); 2274 } else { 2275 destination = HeapOperand(temp, 2276 XRegisterFrom(index), 2277 LSL, 2278 Primitive::ComponentSizeShift(value_type)); 2279 } 2280 2281 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2282 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2283 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2284 2285 if (may_need_runtime_call_for_type_check) { 2286 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); 2287 codegen_->AddSlowPath(slow_path); 2288 if (instruction->GetValueCanBeNull()) { 2289 vixl::Label non_zero; 2290 __ Cbnz(Register(value), &non_zero); 2291 if (!index.IsConstant()) { 2292 __ Add(temp, array, offset); 2293 } 2294 __ Str(wzr, destination); 2295 codegen_->MaybeRecordImplicitNullCheck(instruction); 2296 __ B(&done); 2297 __ Bind(&non_zero); 2298 } 2299 2300 if (kEmitCompilerReadBarrier) { 2301 // When read barriers are enabled, the type checking 2302 // instrumentation requires two read barriers: 2303 // 2304 // __ Mov(temp2, temp); 2305 // // /* HeapReference<Class> */ temp = temp->component_type_ 2306 // __ Ldr(temp, HeapOperand(temp, component_offset)); 2307 // codegen_->GenerateReadBarrierSlow( 2308 // instruction, temp_loc, temp_loc, temp2_loc, component_offset); 2309 // 2310 // // /* HeapReference<Class> */ temp2 = value->klass_ 2311 // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 2312 // codegen_->GenerateReadBarrierSlow( 2313 // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); 2314 // 2315 // __ Cmp(temp, temp2); 2316 // 2317 // However, the second read barrier may trash `temp`, as it 2318 // is a temporary register, and as such would not be saved 2319 // along with live registers before calling the runtime (nor 2320 // restored afterwards). So in this case, we bail out and 2321 // delegate the work to the array set slow path. 2322 // 2323 // TODO: Extend the register allocator to support a new 2324 // "(locally) live temp" location so as to avoid always 2325 // going into the slow path when read barriers are enabled. 2326 __ B(slow_path->GetEntryLabel()); 2327 } else { 2328 Register temp2 = temps.AcquireSameSizeAs(array); 2329 // /* HeapReference<Class> */ temp = array->klass_ 2330 __ Ldr(temp, HeapOperand(array, class_offset)); 2331 codegen_->MaybeRecordImplicitNullCheck(instruction); 2332 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2333 2334 // /* HeapReference<Class> */ temp = temp->component_type_ 2335 __ Ldr(temp, HeapOperand(temp, component_offset)); 2336 // /* HeapReference<Class> */ temp2 = value->klass_ 2337 __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 2338 // If heap poisoning is enabled, no need to unpoison `temp` 2339 // nor `temp2`, as we are comparing two poisoned references. 2340 __ Cmp(temp, temp2); 2341 2342 if (instruction->StaticTypeOfArrayIsObjectArray()) { 2343 vixl::Label do_put; 2344 __ B(eq, &do_put); 2345 // If heap poisoning is enabled, the `temp` reference has 2346 // not been unpoisoned yet; unpoison it now. 2347 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2348 2349 // /* HeapReference<Class> */ temp = temp->super_class_ 2350 __ Ldr(temp, HeapOperand(temp, super_offset)); 2351 // If heap poisoning is enabled, no need to unpoison 2352 // `temp`, as we are comparing against null below. 2353 __ Cbnz(temp, slow_path->GetEntryLabel()); 2354 __ Bind(&do_put); 2355 } else { 2356 __ B(ne, slow_path->GetEntryLabel()); 2357 } 2358 temps.Release(temp2); 2359 } 2360 } 2361 2362 if (kPoisonHeapReferences) { 2363 Register temp2 = temps.AcquireSameSizeAs(array); 2364 DCHECK(value.IsW()); 2365 __ Mov(temp2, value.W()); 2366 GetAssembler()->PoisonHeapReference(temp2); 2367 source = temp2; 2368 } 2369 2370 if (!index.IsConstant()) { 2371 __ Add(temp, array, offset); 2372 } 2373 __ Str(source, destination); 2374 2375 if (!may_need_runtime_call_for_type_check) { 2376 codegen_->MaybeRecordImplicitNullCheck(instruction); 2377 } 2378 } 2379 2380 codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); 2381 2382 if (done.IsLinked()) { 2383 __ Bind(&done); 2384 } 2385 2386 if (slow_path != nullptr) { 2387 __ Bind(slow_path->GetExitLabel()); 2388 } 2389 } 2390} 2391 2392void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2393 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() 2394 ? LocationSummary::kCallOnSlowPath 2395 : LocationSummary::kNoCall; 2396 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 2397 locations->SetInAt(0, Location::RequiresRegister()); 2398 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 2399 if (instruction->HasUses()) { 2400 locations->SetOut(Location::SameAsFirstInput()); 2401 } 2402} 2403 2404void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2405 BoundsCheckSlowPathARM64* slow_path = 2406 new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); 2407 codegen_->AddSlowPath(slow_path); 2408 2409 __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); 2410 __ B(slow_path->GetEntryLabel(), hs); 2411} 2412 2413void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { 2414 LocationSummary* locations = 2415 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 2416 locations->SetInAt(0, Location::RequiresRegister()); 2417 if (check->HasUses()) { 2418 locations->SetOut(Location::SameAsFirstInput()); 2419 } 2420} 2421 2422void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { 2423 // We assume the class is not null. 2424 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( 2425 check->GetLoadClass(), check, check->GetDexPc(), true); 2426 codegen_->AddSlowPath(slow_path); 2427 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 2428} 2429 2430static bool IsFloatingPointZeroConstant(HInstruction* instruction) { 2431 return (instruction->IsFloatConstant() && (instruction->AsFloatConstant()->GetValue() == 0.0f)) 2432 || (instruction->IsDoubleConstant() && (instruction->AsDoubleConstant()->GetValue() == 0.0)); 2433} 2434 2435void LocationsBuilderARM64::VisitCompare(HCompare* compare) { 2436 LocationSummary* locations = 2437 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); 2438 Primitive::Type in_type = compare->InputAt(0)->GetType(); 2439 switch (in_type) { 2440 case Primitive::kPrimLong: { 2441 locations->SetInAt(0, Location::RequiresRegister()); 2442 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); 2443 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2444 break; 2445 } 2446 case Primitive::kPrimFloat: 2447 case Primitive::kPrimDouble: { 2448 locations->SetInAt(0, Location::RequiresFpuRegister()); 2449 locations->SetInAt(1, 2450 IsFloatingPointZeroConstant(compare->InputAt(1)) 2451 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) 2452 : Location::RequiresFpuRegister()); 2453 locations->SetOut(Location::RequiresRegister()); 2454 break; 2455 } 2456 default: 2457 LOG(FATAL) << "Unexpected type for compare operation " << in_type; 2458 } 2459} 2460 2461void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { 2462 Primitive::Type in_type = compare->InputAt(0)->GetType(); 2463 2464 // 0 if: left == right 2465 // 1 if: left > right 2466 // -1 if: left < right 2467 switch (in_type) { 2468 case Primitive::kPrimLong: { 2469 Register result = OutputRegister(compare); 2470 Register left = InputRegisterAt(compare, 0); 2471 Operand right = InputOperandAt(compare, 1); 2472 2473 __ Cmp(left, right); 2474 __ Cset(result, ne); 2475 __ Cneg(result, result, lt); 2476 break; 2477 } 2478 case Primitive::kPrimFloat: 2479 case Primitive::kPrimDouble: { 2480 Register result = OutputRegister(compare); 2481 FPRegister left = InputFPRegisterAt(compare, 0); 2482 if (compare->GetLocations()->InAt(1).IsConstant()) { 2483 DCHECK(IsFloatingPointZeroConstant(compare->GetLocations()->InAt(1).GetConstant())); 2484 // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. 2485 __ Fcmp(left, 0.0); 2486 } else { 2487 __ Fcmp(left, InputFPRegisterAt(compare, 1)); 2488 } 2489 __ Cset(result, ne); 2490 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); 2491 break; 2492 } 2493 default: 2494 LOG(FATAL) << "Unimplemented compare type " << in_type; 2495 } 2496} 2497 2498void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { 2499 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 2500 2501 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2502 locations->SetInAt(0, Location::RequiresFpuRegister()); 2503 locations->SetInAt(1, 2504 IsFloatingPointZeroConstant(instruction->InputAt(1)) 2505 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) 2506 : Location::RequiresFpuRegister()); 2507 } else { 2508 // Integer cases. 2509 locations->SetInAt(0, Location::RequiresRegister()); 2510 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 2511 } 2512 2513 if (!instruction->IsEmittedAtUseSite()) { 2514 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2515 } 2516} 2517 2518void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { 2519 if (instruction->IsEmittedAtUseSite()) { 2520 return; 2521 } 2522 2523 LocationSummary* locations = instruction->GetLocations(); 2524 Register res = RegisterFrom(locations->Out(), instruction->GetType()); 2525 IfCondition if_cond = instruction->GetCondition(); 2526 2527 if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2528 FPRegister lhs = InputFPRegisterAt(instruction, 0); 2529 if (locations->InAt(1).IsConstant()) { 2530 DCHECK(IsFloatingPointZeroConstant(locations->InAt(1).GetConstant())); 2531 // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. 2532 __ Fcmp(lhs, 0.0); 2533 } else { 2534 __ Fcmp(lhs, InputFPRegisterAt(instruction, 1)); 2535 } 2536 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); 2537 } else { 2538 // Integer cases. 2539 Register lhs = InputRegisterAt(instruction, 0); 2540 Operand rhs = InputOperandAt(instruction, 1); 2541 __ Cmp(lhs, rhs); 2542 __ Cset(res, ARM64Condition(if_cond)); 2543 } 2544} 2545 2546#define FOR_EACH_CONDITION_INSTRUCTION(M) \ 2547 M(Equal) \ 2548 M(NotEqual) \ 2549 M(LessThan) \ 2550 M(LessThanOrEqual) \ 2551 M(GreaterThan) \ 2552 M(GreaterThanOrEqual) \ 2553 M(Below) \ 2554 M(BelowOrEqual) \ 2555 M(Above) \ 2556 M(AboveOrEqual) 2557#define DEFINE_CONDITION_VISITORS(Name) \ 2558void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ 2559void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } 2560FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) 2561#undef DEFINE_CONDITION_VISITORS 2562#undef FOR_EACH_CONDITION_INSTRUCTION 2563 2564void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 2565 DCHECK(instruction->IsDiv() || instruction->IsRem()); 2566 2567 LocationSummary* locations = instruction->GetLocations(); 2568 Location second = locations->InAt(1); 2569 DCHECK(second.IsConstant()); 2570 2571 Register out = OutputRegister(instruction); 2572 Register dividend = InputRegisterAt(instruction, 0); 2573 int64_t imm = Int64FromConstant(second.GetConstant()); 2574 DCHECK(imm == 1 || imm == -1); 2575 2576 if (instruction->IsRem()) { 2577 __ Mov(out, 0); 2578 } else { 2579 if (imm == 1) { 2580 __ Mov(out, dividend); 2581 } else { 2582 __ Neg(out, dividend); 2583 } 2584 } 2585} 2586 2587void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { 2588 DCHECK(instruction->IsDiv() || instruction->IsRem()); 2589 2590 LocationSummary* locations = instruction->GetLocations(); 2591 Location second = locations->InAt(1); 2592 DCHECK(second.IsConstant()); 2593 2594 Register out = OutputRegister(instruction); 2595 Register dividend = InputRegisterAt(instruction, 0); 2596 int64_t imm = Int64FromConstant(second.GetConstant()); 2597 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 2598 int ctz_imm = CTZ(abs_imm); 2599 2600 UseScratchRegisterScope temps(GetVIXLAssembler()); 2601 Register temp = temps.AcquireSameSizeAs(out); 2602 2603 if (instruction->IsDiv()) { 2604 __ Add(temp, dividend, abs_imm - 1); 2605 __ Cmp(dividend, 0); 2606 __ Csel(out, temp, dividend, lt); 2607 if (imm > 0) { 2608 __ Asr(out, out, ctz_imm); 2609 } else { 2610 __ Neg(out, Operand(out, ASR, ctz_imm)); 2611 } 2612 } else { 2613 int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64; 2614 __ Asr(temp, dividend, bits - 1); 2615 __ Lsr(temp, temp, bits - ctz_imm); 2616 __ Add(out, dividend, temp); 2617 __ And(out, out, abs_imm - 1); 2618 __ Sub(out, out, temp); 2619 } 2620} 2621 2622void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 2623 DCHECK(instruction->IsDiv() || instruction->IsRem()); 2624 2625 LocationSummary* locations = instruction->GetLocations(); 2626 Location second = locations->InAt(1); 2627 DCHECK(second.IsConstant()); 2628 2629 Register out = OutputRegister(instruction); 2630 Register dividend = InputRegisterAt(instruction, 0); 2631 int64_t imm = Int64FromConstant(second.GetConstant()); 2632 2633 Primitive::Type type = instruction->GetResultType(); 2634 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); 2635 2636 int64_t magic; 2637 int shift; 2638 CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift); 2639 2640 UseScratchRegisterScope temps(GetVIXLAssembler()); 2641 Register temp = temps.AcquireSameSizeAs(out); 2642 2643 // temp = get_high(dividend * magic) 2644 __ Mov(temp, magic); 2645 if (type == Primitive::kPrimLong) { 2646 __ Smulh(temp, dividend, temp); 2647 } else { 2648 __ Smull(temp.X(), dividend, temp); 2649 __ Lsr(temp.X(), temp.X(), 32); 2650 } 2651 2652 if (imm > 0 && magic < 0) { 2653 __ Add(temp, temp, dividend); 2654 } else if (imm < 0 && magic > 0) { 2655 __ Sub(temp, temp, dividend); 2656 } 2657 2658 if (shift != 0) { 2659 __ Asr(temp, temp, shift); 2660 } 2661 2662 if (instruction->IsDiv()) { 2663 __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); 2664 } else { 2665 __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); 2666 // TODO: Strength reduction for msub. 2667 Register temp_imm = temps.AcquireSameSizeAs(out); 2668 __ Mov(temp_imm, imm); 2669 __ Msub(out, temp, temp_imm, dividend); 2670 } 2671} 2672 2673void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 2674 DCHECK(instruction->IsDiv() || instruction->IsRem()); 2675 Primitive::Type type = instruction->GetResultType(); 2676 DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); 2677 2678 LocationSummary* locations = instruction->GetLocations(); 2679 Register out = OutputRegister(instruction); 2680 Location second = locations->InAt(1); 2681 2682 if (second.IsConstant()) { 2683 int64_t imm = Int64FromConstant(second.GetConstant()); 2684 2685 if (imm == 0) { 2686 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 2687 } else if (imm == 1 || imm == -1) { 2688 DivRemOneOrMinusOne(instruction); 2689 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 2690 DivRemByPowerOfTwo(instruction); 2691 } else { 2692 DCHECK(imm <= -2 || imm >= 2); 2693 GenerateDivRemWithAnyConstant(instruction); 2694 } 2695 } else { 2696 Register dividend = InputRegisterAt(instruction, 0); 2697 Register divisor = InputRegisterAt(instruction, 1); 2698 if (instruction->IsDiv()) { 2699 __ Sdiv(out, dividend, divisor); 2700 } else { 2701 UseScratchRegisterScope temps(GetVIXLAssembler()); 2702 Register temp = temps.AcquireSameSizeAs(out); 2703 __ Sdiv(temp, dividend, divisor); 2704 __ Msub(out, temp, divisor, dividend); 2705 } 2706 } 2707} 2708 2709void LocationsBuilderARM64::VisitDiv(HDiv* div) { 2710 LocationSummary* locations = 2711 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); 2712 switch (div->GetResultType()) { 2713 case Primitive::kPrimInt: 2714 case Primitive::kPrimLong: 2715 locations->SetInAt(0, Location::RequiresRegister()); 2716 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 2717 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2718 break; 2719 2720 case Primitive::kPrimFloat: 2721 case Primitive::kPrimDouble: 2722 locations->SetInAt(0, Location::RequiresFpuRegister()); 2723 locations->SetInAt(1, Location::RequiresFpuRegister()); 2724 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2725 break; 2726 2727 default: 2728 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 2729 } 2730} 2731 2732void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { 2733 Primitive::Type type = div->GetResultType(); 2734 switch (type) { 2735 case Primitive::kPrimInt: 2736 case Primitive::kPrimLong: 2737 GenerateDivRemIntegral(div); 2738 break; 2739 2740 case Primitive::kPrimFloat: 2741 case Primitive::kPrimDouble: 2742 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); 2743 break; 2744 2745 default: 2746 LOG(FATAL) << "Unexpected div type " << type; 2747 } 2748} 2749 2750void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 2751 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() 2752 ? LocationSummary::kCallOnSlowPath 2753 : LocationSummary::kNoCall; 2754 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 2755 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 2756 if (instruction->HasUses()) { 2757 locations->SetOut(Location::SameAsFirstInput()); 2758 } 2759} 2760 2761void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 2762 SlowPathCodeARM64* slow_path = 2763 new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction); 2764 codegen_->AddSlowPath(slow_path); 2765 Location value = instruction->GetLocations()->InAt(0); 2766 2767 Primitive::Type type = instruction->GetType(); 2768 2769 if ((type == Primitive::kPrimBoolean) || !Primitive::IsIntegralType(type)) { 2770 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; 2771 return; 2772 } 2773 2774 if (value.IsConstant()) { 2775 int64_t divisor = Int64ConstantFrom(value); 2776 if (divisor == 0) { 2777 __ B(slow_path->GetEntryLabel()); 2778 } else { 2779 // A division by a non-null constant is valid. We don't need to perform 2780 // any check, so simply fall through. 2781 } 2782 } else { 2783 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 2784 } 2785} 2786 2787void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { 2788 LocationSummary* locations = 2789 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2790 locations->SetOut(Location::ConstantLocation(constant)); 2791} 2792 2793void InstructionCodeGeneratorARM64::VisitDoubleConstant( 2794 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2795 // Will be generated at use site. 2796} 2797 2798void LocationsBuilderARM64::VisitExit(HExit* exit) { 2799 exit->SetLocations(nullptr); 2800} 2801 2802void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 2803} 2804 2805void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { 2806 LocationSummary* locations = 2807 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); 2808 locations->SetOut(Location::ConstantLocation(constant)); 2809} 2810 2811void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2812 // Will be generated at use site. 2813} 2814 2815void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 2816 DCHECK(!successor->IsExitBlock()); 2817 HBasicBlock* block = got->GetBlock(); 2818 HInstruction* previous = got->GetPrevious(); 2819 HLoopInformation* info = block->GetLoopInformation(); 2820 2821 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 2822 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); 2823 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 2824 return; 2825 } 2826 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 2827 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 2828 } 2829 if (!codegen_->GoesToNextBlock(block, successor)) { 2830 __ B(codegen_->GetLabelOf(successor)); 2831 } 2832} 2833 2834void LocationsBuilderARM64::VisitGoto(HGoto* got) { 2835 got->SetLocations(nullptr); 2836} 2837 2838void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { 2839 HandleGoto(got, got->GetSuccessor()); 2840} 2841 2842void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 2843 try_boundary->SetLocations(nullptr); 2844} 2845 2846void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 2847 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 2848 if (!successor->IsExitBlock()) { 2849 HandleGoto(try_boundary, successor); 2850 } 2851} 2852 2853void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, 2854 size_t condition_input_index, 2855 vixl::Label* true_target, 2856 vixl::Label* false_target) { 2857 // FP branching requires both targets to be explicit. If either of the targets 2858 // is nullptr (fallthrough) use and bind `fallthrough_target` instead. 2859 vixl::Label fallthrough_target; 2860 HInstruction* cond = instruction->InputAt(condition_input_index); 2861 2862 if (true_target == nullptr && false_target == nullptr) { 2863 // Nothing to do. The code always falls through. 2864 return; 2865 } else if (cond->IsIntConstant()) { 2866 // Constant condition, statically compared against 1. 2867 if (cond->AsIntConstant()->IsOne()) { 2868 if (true_target != nullptr) { 2869 __ B(true_target); 2870 } 2871 } else { 2872 DCHECK(cond->AsIntConstant()->IsZero()); 2873 if (false_target != nullptr) { 2874 __ B(false_target); 2875 } 2876 } 2877 return; 2878 } 2879 2880 // The following code generates these patterns: 2881 // (1) true_target == nullptr && false_target != nullptr 2882 // - opposite condition true => branch to false_target 2883 // (2) true_target != nullptr && false_target == nullptr 2884 // - condition true => branch to true_target 2885 // (3) true_target != nullptr && false_target != nullptr 2886 // - condition true => branch to true_target 2887 // - branch to false_target 2888 if (IsBooleanValueOrMaterializedCondition(cond)) { 2889 // The condition instruction has been materialized, compare the output to 0. 2890 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 2891 DCHECK(cond_val.IsRegister()); 2892 if (true_target == nullptr) { 2893 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); 2894 } else { 2895 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); 2896 } 2897 } else { 2898 // The condition instruction has not been materialized, use its inputs as 2899 // the comparison and its condition as the branch condition. 2900 HCondition* condition = cond->AsCondition(); 2901 2902 Primitive::Type type = condition->InputAt(0)->GetType(); 2903 if (Primitive::IsFloatingPointType(type)) { 2904 FPRegister lhs = InputFPRegisterAt(condition, 0); 2905 if (condition->GetLocations()->InAt(1).IsConstant()) { 2906 DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant())); 2907 // 0.0 is the only immediate that can be encoded directly in an FCMP instruction. 2908 __ Fcmp(lhs, 0.0); 2909 } else { 2910 __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); 2911 } 2912 if (true_target == nullptr) { 2913 IfCondition opposite_condition = condition->GetOppositeCondition(); 2914 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); 2915 } else { 2916 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); 2917 } 2918 } else { 2919 // Integer cases. 2920 Register lhs = InputRegisterAt(condition, 0); 2921 Operand rhs = InputOperandAt(condition, 1); 2922 2923 Condition arm64_cond; 2924 vixl::Label* non_fallthrough_target; 2925 if (true_target == nullptr) { 2926 arm64_cond = ARM64Condition(condition->GetOppositeCondition()); 2927 non_fallthrough_target = false_target; 2928 } else { 2929 arm64_cond = ARM64Condition(condition->GetCondition()); 2930 non_fallthrough_target = true_target; 2931 } 2932 2933 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && 2934 rhs.IsImmediate() && (rhs.immediate() == 0)) { 2935 switch (arm64_cond) { 2936 case eq: 2937 __ Cbz(lhs, non_fallthrough_target); 2938 break; 2939 case ne: 2940 __ Cbnz(lhs, non_fallthrough_target); 2941 break; 2942 case lt: 2943 // Test the sign bit and branch accordingly. 2944 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 2945 break; 2946 case ge: 2947 // Test the sign bit and branch accordingly. 2948 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 2949 break; 2950 default: 2951 // Without the `static_cast` the compiler throws an error for 2952 // `-Werror=sign-promo`. 2953 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); 2954 } 2955 } else { 2956 __ Cmp(lhs, rhs); 2957 __ B(arm64_cond, non_fallthrough_target); 2958 } 2959 } 2960 } 2961 2962 // If neither branch falls through (case 3), the conditional branch to `true_target` 2963 // was already emitted (case 2) and we need to emit a jump to `false_target`. 2964 if (true_target != nullptr && false_target != nullptr) { 2965 __ B(false_target); 2966 } 2967 2968 if (fallthrough_target.IsLinked()) { 2969 __ Bind(&fallthrough_target); 2970 } 2971} 2972 2973void LocationsBuilderARM64::VisitIf(HIf* if_instr) { 2974 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); 2975 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 2976 locations->SetInAt(0, Location::RequiresRegister()); 2977 } 2978} 2979 2980void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { 2981 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 2982 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 2983 vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 2984 nullptr : codegen_->GetLabelOf(true_successor); 2985 vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 2986 nullptr : codegen_->GetLabelOf(false_successor); 2987 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); 2988} 2989 2990void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 2991 LocationSummary* locations = new (GetGraph()->GetArena()) 2992 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 2993 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 2994 locations->SetInAt(0, Location::RequiresRegister()); 2995 } 2996} 2997 2998void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 2999 SlowPathCodeARM64* slow_path = 3000 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); 3001 GenerateTestAndBranch(deoptimize, 3002 /* condition_input_index */ 0, 3003 slow_path->GetEntryLabel(), 3004 /* false_target */ nullptr); 3005} 3006 3007void LocationsBuilderARM64::VisitSelect(HSelect* select) { 3008 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); 3009 if (Primitive::IsFloatingPointType(select->GetType())) { 3010 locations->SetInAt(0, Location::RequiresFpuRegister()); 3011 locations->SetInAt(1, Location::RequiresFpuRegister()); 3012 } else { 3013 locations->SetInAt(0, Location::RequiresRegister()); 3014 locations->SetInAt(1, Location::RequiresRegister()); 3015 } 3016 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3017 locations->SetInAt(2, Location::RequiresRegister()); 3018 } 3019 locations->SetOut(Location::SameAsFirstInput()); 3020} 3021 3022void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { 3023 LocationSummary* locations = select->GetLocations(); 3024 vixl::Label false_target; 3025 GenerateTestAndBranch(select, 3026 /* condition_input_index */ 2, 3027 /* true_target */ nullptr, 3028 &false_target); 3029 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 3030 __ Bind(&false_target); 3031} 3032 3033void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3034 new (GetGraph()->GetArena()) LocationSummary(info); 3035} 3036 3037void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3038 if (codegen_->HasStackMapAtCurrentPc()) { 3039 // Ensure that we do not collide with the stack map of the previous instruction. 3040 __ Nop(); 3041 } 3042 codegen_->RecordPcInfo(info, info->GetDexPc()); 3043} 3044 3045void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3046 HandleFieldGet(instruction); 3047} 3048 3049void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3050 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3051} 3052 3053void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3054 HandleFieldSet(instruction); 3055} 3056 3057void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3058 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 3059} 3060 3061static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { 3062 return kEmitCompilerReadBarrier && 3063 (kUseBakerReadBarrier || 3064 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3065 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3066 type_check_kind == TypeCheckKind::kArrayObjectCheck); 3067} 3068 3069void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { 3070 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3071 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3072 switch (type_check_kind) { 3073 case TypeCheckKind::kExactCheck: 3074 case TypeCheckKind::kAbstractClassCheck: 3075 case TypeCheckKind::kClassHierarchyCheck: 3076 case TypeCheckKind::kArrayObjectCheck: 3077 call_kind = 3078 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 3079 break; 3080 case TypeCheckKind::kArrayCheck: 3081 case TypeCheckKind::kUnresolvedCheck: 3082 case TypeCheckKind::kInterfaceCheck: 3083 call_kind = LocationSummary::kCallOnSlowPath; 3084 break; 3085 } 3086 3087 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 3088 locations->SetInAt(0, Location::RequiresRegister()); 3089 locations->SetInAt(1, Location::RequiresRegister()); 3090 // The "out" register is used as a temporary, so it overlaps with the inputs. 3091 // Note that TypeCheckSlowPathARM64 uses this register too. 3092 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3093 // When read barriers are enabled, we need a temporary register for 3094 // some cases. 3095 if (TypeCheckNeedsATemporary(type_check_kind)) { 3096 locations->AddTemp(Location::RequiresRegister()); 3097 } 3098} 3099 3100void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { 3101 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3102 LocationSummary* locations = instruction->GetLocations(); 3103 Location obj_loc = locations->InAt(0); 3104 Register obj = InputRegisterAt(instruction, 0); 3105 Register cls = InputRegisterAt(instruction, 1); 3106 Location out_loc = locations->Out(); 3107 Register out = OutputRegister(instruction); 3108 Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? 3109 locations->GetTemp(0) : 3110 Location::NoLocation(); 3111 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3112 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3113 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3114 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3115 3116 vixl::Label done, zero; 3117 SlowPathCodeARM64* slow_path = nullptr; 3118 3119 // Return 0 if `obj` is null. 3120 // Avoid null check if we know `obj` is not null. 3121 if (instruction->MustDoNullCheck()) { 3122 __ Cbz(obj, &zero); 3123 } 3124 3125 // /* HeapReference<Class> */ out = obj->klass_ 3126 GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); 3127 3128 switch (type_check_kind) { 3129 case TypeCheckKind::kExactCheck: { 3130 __ Cmp(out, cls); 3131 __ Cset(out, eq); 3132 if (zero.IsLinked()) { 3133 __ B(&done); 3134 } 3135 break; 3136 } 3137 3138 case TypeCheckKind::kAbstractClassCheck: { 3139 // If the class is abstract, we eagerly fetch the super class of the 3140 // object to avoid doing a comparison we know will fail. 3141 vixl::Label loop, success; 3142 __ Bind(&loop); 3143 // /* HeapReference<Class> */ out = out->super_class_ 3144 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); 3145 // If `out` is null, we use it for the result, and jump to `done`. 3146 __ Cbz(out, &done); 3147 __ Cmp(out, cls); 3148 __ B(ne, &loop); 3149 __ Mov(out, 1); 3150 if (zero.IsLinked()) { 3151 __ B(&done); 3152 } 3153 break; 3154 } 3155 3156 case TypeCheckKind::kClassHierarchyCheck: { 3157 // Walk over the class hierarchy to find a match. 3158 vixl::Label loop, success; 3159 __ Bind(&loop); 3160 __ Cmp(out, cls); 3161 __ B(eq, &success); 3162 // /* HeapReference<Class> */ out = out->super_class_ 3163 GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); 3164 __ Cbnz(out, &loop); 3165 // If `out` is null, we use it for the result, and jump to `done`. 3166 __ B(&done); 3167 __ Bind(&success); 3168 __ Mov(out, 1); 3169 if (zero.IsLinked()) { 3170 __ B(&done); 3171 } 3172 break; 3173 } 3174 3175 case TypeCheckKind::kArrayObjectCheck: { 3176 // Do an exact check. 3177 vixl::Label exact_check; 3178 __ Cmp(out, cls); 3179 __ B(eq, &exact_check); 3180 // Otherwise, we need to check that the object's class is a non-primitive array. 3181 // /* HeapReference<Class> */ out = out->component_type_ 3182 GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc); 3183 // If `out` is null, we use it for the result, and jump to `done`. 3184 __ Cbz(out, &done); 3185 __ Ldrh(out, HeapOperand(out, primitive_offset)); 3186 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3187 __ Cbnz(out, &zero); 3188 __ Bind(&exact_check); 3189 __ Mov(out, 1); 3190 __ B(&done); 3191 break; 3192 } 3193 3194 case TypeCheckKind::kArrayCheck: { 3195 __ Cmp(out, cls); 3196 DCHECK(locations->OnlyCallsOnSlowPath()); 3197 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 3198 /* is_fatal */ false); 3199 codegen_->AddSlowPath(slow_path); 3200 __ B(ne, slow_path->GetEntryLabel()); 3201 __ Mov(out, 1); 3202 if (zero.IsLinked()) { 3203 __ B(&done); 3204 } 3205 break; 3206 } 3207 3208 case TypeCheckKind::kUnresolvedCheck: 3209 case TypeCheckKind::kInterfaceCheck: { 3210 // Note that we indeed only call on slow path, but we always go 3211 // into the slow path for the unresolved and interface check 3212 // cases. 3213 // 3214 // We cannot directly call the InstanceofNonTrivial runtime 3215 // entry point without resorting to a type checking slow path 3216 // here (i.e. by calling InvokeRuntime directly), as it would 3217 // require to assign fixed registers for the inputs of this 3218 // HInstanceOf instruction (following the runtime calling 3219 // convention), which might be cluttered by the potential first 3220 // read barrier emission at the beginning of this method. 3221 // 3222 // TODO: Introduce a new runtime entry point taking the object 3223 // to test (instead of its class) as argument, and let it deal 3224 // with the read barrier issues. This will let us refactor this 3225 // case of the `switch` code as it was previously (with a direct 3226 // call to the runtime not using a type checking slow path). 3227 // This should also be beneficial for the other cases above. 3228 DCHECK(locations->OnlyCallsOnSlowPath()); 3229 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 3230 /* is_fatal */ false); 3231 codegen_->AddSlowPath(slow_path); 3232 __ B(slow_path->GetEntryLabel()); 3233 if (zero.IsLinked()) { 3234 __ B(&done); 3235 } 3236 break; 3237 } 3238 } 3239 3240 if (zero.IsLinked()) { 3241 __ Bind(&zero); 3242 __ Mov(out, 0); 3243 } 3244 3245 if (done.IsLinked()) { 3246 __ Bind(&done); 3247 } 3248 3249 if (slow_path != nullptr) { 3250 __ Bind(slow_path->GetExitLabel()); 3251 } 3252} 3253 3254void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { 3255 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3256 bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); 3257 3258 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3259 switch (type_check_kind) { 3260 case TypeCheckKind::kExactCheck: 3261 case TypeCheckKind::kAbstractClassCheck: 3262 case TypeCheckKind::kClassHierarchyCheck: 3263 case TypeCheckKind::kArrayObjectCheck: 3264 call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? 3265 LocationSummary::kCallOnSlowPath : 3266 LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. 3267 break; 3268 case TypeCheckKind::kArrayCheck: 3269 case TypeCheckKind::kUnresolvedCheck: 3270 case TypeCheckKind::kInterfaceCheck: 3271 call_kind = LocationSummary::kCallOnSlowPath; 3272 break; 3273 } 3274 3275 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 3276 locations->SetInAt(0, Location::RequiresRegister()); 3277 locations->SetInAt(1, Location::RequiresRegister()); 3278 // Note that TypeCheckSlowPathARM64 uses this "temp" register too. 3279 locations->AddTemp(Location::RequiresRegister()); 3280 // When read barriers are enabled, we need an additional temporary 3281 // register for some cases. 3282 if (TypeCheckNeedsATemporary(type_check_kind)) { 3283 locations->AddTemp(Location::RequiresRegister()); 3284 } 3285} 3286 3287void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { 3288 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3289 LocationSummary* locations = instruction->GetLocations(); 3290 Location obj_loc = locations->InAt(0); 3291 Register obj = InputRegisterAt(instruction, 0); 3292 Register cls = InputRegisterAt(instruction, 1); 3293 Location temp_loc = locations->GetTemp(0); 3294 Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? 3295 locations->GetTemp(1) : 3296 Location::NoLocation(); 3297 Register temp = WRegisterFrom(temp_loc); 3298 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3299 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3300 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3301 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3302 3303 bool is_type_check_slow_path_fatal = 3304 (type_check_kind == TypeCheckKind::kExactCheck || 3305 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3306 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3307 type_check_kind == TypeCheckKind::kArrayObjectCheck) && 3308 !instruction->CanThrowIntoCatchBlock(); 3309 SlowPathCodeARM64* type_check_slow_path = 3310 new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, 3311 is_type_check_slow_path_fatal); 3312 codegen_->AddSlowPath(type_check_slow_path); 3313 3314 vixl::Label done; 3315 // Avoid null check if we know obj is not null. 3316 if (instruction->MustDoNullCheck()) { 3317 __ Cbz(obj, &done); 3318 } 3319 3320 // /* HeapReference<Class> */ temp = obj->klass_ 3321 GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); 3322 3323 switch (type_check_kind) { 3324 case TypeCheckKind::kExactCheck: 3325 case TypeCheckKind::kArrayCheck: { 3326 __ Cmp(temp, cls); 3327 // Jump to slow path for throwing the exception or doing a 3328 // more involved array check. 3329 __ B(ne, type_check_slow_path->GetEntryLabel()); 3330 break; 3331 } 3332 3333 case TypeCheckKind::kAbstractClassCheck: { 3334 // If the class is abstract, we eagerly fetch the super class of the 3335 // object to avoid doing a comparison we know will fail. 3336 vixl::Label loop, compare_classes; 3337 __ Bind(&loop); 3338 // /* HeapReference<Class> */ temp = temp->super_class_ 3339 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); 3340 3341 // If the class reference currently in `temp` is not null, jump 3342 // to the `compare_classes` label to compare it with the checked 3343 // class. 3344 __ Cbnz(temp, &compare_classes); 3345 // Otherwise, jump to the slow path to throw the exception. 3346 // 3347 // But before, move back the object's class into `temp` before 3348 // going into the slow path, as it has been overwritten in the 3349 // meantime. 3350 // /* HeapReference<Class> */ temp = obj->klass_ 3351 GenerateReferenceLoadTwoRegisters( 3352 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); 3353 __ B(type_check_slow_path->GetEntryLabel()); 3354 3355 __ Bind(&compare_classes); 3356 __ Cmp(temp, cls); 3357 __ B(ne, &loop); 3358 break; 3359 } 3360 3361 case TypeCheckKind::kClassHierarchyCheck: { 3362 // Walk over the class hierarchy to find a match. 3363 vixl::Label loop; 3364 __ Bind(&loop); 3365 __ Cmp(temp, cls); 3366 __ B(eq, &done); 3367 3368 // /* HeapReference<Class> */ temp = temp->super_class_ 3369 GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); 3370 3371 // If the class reference currently in `temp` is not null, jump 3372 // back at the beginning of the loop. 3373 __ Cbnz(temp, &loop); 3374 // Otherwise, jump to the slow path to throw the exception. 3375 // 3376 // But before, move back the object's class into `temp` before 3377 // going into the slow path, as it has been overwritten in the 3378 // meantime. 3379 // /* HeapReference<Class> */ temp = obj->klass_ 3380 GenerateReferenceLoadTwoRegisters( 3381 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); 3382 __ B(type_check_slow_path->GetEntryLabel()); 3383 break; 3384 } 3385 3386 case TypeCheckKind::kArrayObjectCheck: { 3387 // Do an exact check. 3388 vixl::Label check_non_primitive_component_type; 3389 __ Cmp(temp, cls); 3390 __ B(eq, &done); 3391 3392 // Otherwise, we need to check that the object's class is a non-primitive array. 3393 // /* HeapReference<Class> */ temp = temp->component_type_ 3394 GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); 3395 3396 // If the component type is not null (i.e. the object is indeed 3397 // an array), jump to label `check_non_primitive_component_type` 3398 // to further check that this component type is not a primitive 3399 // type. 3400 __ Cbnz(temp, &check_non_primitive_component_type); 3401 // Otherwise, jump to the slow path to throw the exception. 3402 // 3403 // But before, move back the object's class into `temp` before 3404 // going into the slow path, as it has been overwritten in the 3405 // meantime. 3406 // /* HeapReference<Class> */ temp = obj->klass_ 3407 GenerateReferenceLoadTwoRegisters( 3408 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); 3409 __ B(type_check_slow_path->GetEntryLabel()); 3410 3411 __ Bind(&check_non_primitive_component_type); 3412 __ Ldrh(temp, HeapOperand(temp, primitive_offset)); 3413 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3414 __ Cbz(temp, &done); 3415 // Same comment as above regarding `temp` and the slow path. 3416 // /* HeapReference<Class> */ temp = obj->klass_ 3417 GenerateReferenceLoadTwoRegisters( 3418 instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); 3419 __ B(type_check_slow_path->GetEntryLabel()); 3420 break; 3421 } 3422 3423 case TypeCheckKind::kUnresolvedCheck: 3424 case TypeCheckKind::kInterfaceCheck: 3425 // We always go into the type check slow path for the unresolved 3426 // and interface check cases. 3427 // 3428 // We cannot directly call the CheckCast runtime entry point 3429 // without resorting to a type checking slow path here (i.e. by 3430 // calling InvokeRuntime directly), as it would require to 3431 // assign fixed registers for the inputs of this HInstanceOf 3432 // instruction (following the runtime calling convention), which 3433 // might be cluttered by the potential first read barrier 3434 // emission at the beginning of this method. 3435 // 3436 // TODO: Introduce a new runtime entry point taking the object 3437 // to test (instead of its class) as argument, and let it deal 3438 // with the read barrier issues. This will let us refactor this 3439 // case of the `switch` code as it was previously (with a direct 3440 // call to the runtime not using a type checking slow path). 3441 // This should also be beneficial for the other cases above. 3442 __ B(type_check_slow_path->GetEntryLabel()); 3443 break; 3444 } 3445 __ Bind(&done); 3446 3447 __ Bind(type_check_slow_path->GetExitLabel()); 3448} 3449 3450void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { 3451 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 3452 locations->SetOut(Location::ConstantLocation(constant)); 3453} 3454 3455void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 3456 // Will be generated at use site. 3457} 3458 3459void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { 3460 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 3461 locations->SetOut(Location::ConstantLocation(constant)); 3462} 3463 3464void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 3465 // Will be generated at use site. 3466} 3467 3468void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3469 // The trampoline uses the same calling convention as dex calling conventions, 3470 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 3471 // the method_idx. 3472 HandleInvoke(invoke); 3473} 3474 3475void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 3476 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 3477} 3478 3479void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { 3480 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 3481 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 3482} 3483 3484void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 3485 HandleInvoke(invoke); 3486} 3487 3488void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 3489 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 3490 LocationSummary* locations = invoke->GetLocations(); 3491 Register temp = XRegisterFrom(locations->GetTemp(0)); 3492 uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( 3493 invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); 3494 Location receiver = locations->InAt(0); 3495 Offset class_offset = mirror::Object::ClassOffset(); 3496 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); 3497 3498 // The register ip1 is required to be used for the hidden argument in 3499 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. 3500 MacroAssembler* masm = GetVIXLAssembler(); 3501 UseScratchRegisterScope scratch_scope(masm); 3502 BlockPoolsScope block_pools(masm); 3503 scratch_scope.Exclude(ip1); 3504 __ Mov(ip1, invoke->GetDexMethodIndex()); 3505 3506 if (receiver.IsStackSlot()) { 3507 __ Ldr(temp.W(), StackOperandFrom(receiver)); 3508 // /* HeapReference<Class> */ temp = temp->klass_ 3509 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); 3510 } else { 3511 // /* HeapReference<Class> */ temp = receiver->klass_ 3512 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); 3513 } 3514 codegen_->MaybeRecordImplicitNullCheck(invoke); 3515 // Instead of simply (possibly) unpoisoning `temp` here, we should 3516 // emit a read barrier for the previous class reference load. 3517 // However this is not required in practice, as this is an 3518 // intermediate/temporary reference and because the current 3519 // concurrent copying collector keeps the from-space memory 3520 // intact/accessible until the end of the marking phase (the 3521 // concurrent copying collector may not in the future). 3522 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 3523 // temp = temp->GetImtEntryAt(method_offset); 3524 __ Ldr(temp, MemOperand(temp, method_offset)); 3525 // lr = temp->GetEntryPoint(); 3526 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); 3527 // lr(); 3528 __ Blr(lr); 3529 DCHECK(!codegen_->IsLeafMethod()); 3530 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 3531} 3532 3533void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 3534 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); 3535 if (intrinsic.TryDispatch(invoke)) { 3536 return; 3537 } 3538 3539 HandleInvoke(invoke); 3540} 3541 3542void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 3543 // Explicit clinit checks triggered by static invokes must have been pruned by 3544 // art::PrepareForRegisterAllocation. 3545 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 3546 3547 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); 3548 if (intrinsic.TryDispatch(invoke)) { 3549 return; 3550 } 3551 3552 HandleInvoke(invoke); 3553} 3554 3555static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) { 3556 if (invoke->GetLocations()->Intrinsified()) { 3557 IntrinsicCodeGeneratorARM64 intrinsic(codegen); 3558 intrinsic.Dispatch(invoke); 3559 return true; 3560 } 3561 return false; 3562} 3563 3564HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( 3565 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 3566 MethodReference target_method ATTRIBUTE_UNUSED) { 3567 // On ARM64 we support all dispatch types. 3568 return desired_dispatch_info; 3569} 3570 3571void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { 3572 // For better instruction scheduling we load the direct code pointer before the method pointer. 3573 bool direct_code_loaded = false; 3574 switch (invoke->GetCodePtrLocation()) { 3575 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: 3576 // LR = code address from literal pool with link-time patch. 3577 __ Ldr(lr, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); 3578 direct_code_loaded = true; 3579 break; 3580 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: 3581 // LR = invoke->GetDirectCodePtr(); 3582 __ Ldr(lr, DeduplicateUint64Literal(invoke->GetDirectCodePtr())); 3583 direct_code_loaded = true; 3584 break; 3585 default: 3586 break; 3587 } 3588 3589 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. 3590 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 3591 switch (invoke->GetMethodLoadKind()) { 3592 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: 3593 // temp = thread->string_init_entrypoint 3594 __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); 3595 break; 3596 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 3597 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 3598 break; 3599 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: 3600 // Load method address from literal pool. 3601 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); 3602 break; 3603 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: 3604 // Load method address from literal pool with a link-time patch. 3605 __ Ldr(XRegisterFrom(temp), 3606 DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); 3607 break; 3608 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { 3609 // Add ADRP with its PC-relative DexCache access patch. 3610 pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, 3611 invoke->GetDexCacheArrayOffset()); 3612 vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label; 3613 { 3614 vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); 3615 __ Bind(pc_insn_label); 3616 __ adrp(XRegisterFrom(temp), 0); 3617 } 3618 pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; 3619 // Add LDR with its PC-relative DexCache access patch. 3620 pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, 3621 invoke->GetDexCacheArrayOffset()); 3622 { 3623 vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); 3624 __ Bind(&pc_relative_dex_cache_patches_.back().label); 3625 __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0)); 3626 pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; 3627 } 3628 break; 3629 } 3630 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { 3631 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 3632 Register reg = XRegisterFrom(temp); 3633 Register method_reg; 3634 if (current_method.IsRegister()) { 3635 method_reg = XRegisterFrom(current_method); 3636 } else { 3637 DCHECK(invoke->GetLocations()->Intrinsified()); 3638 DCHECK(!current_method.IsValid()); 3639 method_reg = reg; 3640 __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); 3641 } 3642 3643 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; 3644 __ Ldr(reg.X(), 3645 MemOperand(method_reg.X(), 3646 ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); 3647 // temp = temp[index_in_cache]; 3648 uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index; 3649 __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache))); 3650 break; 3651 } 3652 } 3653 3654 switch (invoke->GetCodePtrLocation()) { 3655 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 3656 __ Bl(&frame_entry_label_); 3657 break; 3658 case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { 3659 relative_call_patches_.emplace_back(invoke->GetTargetMethod()); 3660 vixl::Label* label = &relative_call_patches_.back().label; 3661 vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); 3662 __ Bind(label); 3663 __ bl(0); // Branch and link to itself. This will be overriden at link time. 3664 break; 3665 } 3666 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: 3667 case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: 3668 // LR prepared above for better instruction scheduling. 3669 DCHECK(direct_code_loaded); 3670 // lr() 3671 __ Blr(lr); 3672 break; 3673 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 3674 // LR = callee_method->entry_point_from_quick_compiled_code_; 3675 __ Ldr(lr, MemOperand( 3676 XRegisterFrom(callee_method), 3677 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); 3678 // lr() 3679 __ Blr(lr); 3680 break; 3681 } 3682 3683 DCHECK(!IsLeafMethod()); 3684} 3685 3686void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { 3687 // Use the calling convention instead of the location of the receiver, as 3688 // intrinsics may have put the receiver in a different register. In the intrinsics 3689 // slow path, the arguments have been moved to the right place, so here we are 3690 // guaranteed that the receiver is the first register of the calling convention. 3691 InvokeDexCallingConvention calling_convention; 3692 Register receiver = calling_convention.GetRegisterAt(0); 3693 Register temp = XRegisterFrom(temp_in); 3694 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 3695 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); 3696 Offset class_offset = mirror::Object::ClassOffset(); 3697 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); 3698 3699 BlockPoolsScope block_pools(GetVIXLAssembler()); 3700 3701 DCHECK(receiver.IsRegister()); 3702 // /* HeapReference<Class> */ temp = receiver->klass_ 3703 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); 3704 MaybeRecordImplicitNullCheck(invoke); 3705 // Instead of simply (possibly) unpoisoning `temp` here, we should 3706 // emit a read barrier for the previous class reference load. 3707 // intermediate/temporary reference and because the current 3708 // concurrent copying collector keeps the from-space memory 3709 // intact/accessible until the end of the marking phase (the 3710 // concurrent copying collector may not in the future). 3711 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 3712 // temp = temp->GetMethodAt(method_offset); 3713 __ Ldr(temp, MemOperand(temp, method_offset)); 3714 // lr = temp->GetEntryPoint(); 3715 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); 3716 // lr(); 3717 __ Blr(lr); 3718} 3719 3720void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { 3721 DCHECK(linker_patches->empty()); 3722 size_t size = 3723 method_patches_.size() + 3724 call_patches_.size() + 3725 relative_call_patches_.size() + 3726 pc_relative_dex_cache_patches_.size(); 3727 linker_patches->reserve(size); 3728 for (const auto& entry : method_patches_) { 3729 const MethodReference& target_method = entry.first; 3730 vixl::Literal<uint64_t>* literal = entry.second; 3731 linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(), 3732 target_method.dex_file, 3733 target_method.dex_method_index)); 3734 } 3735 for (const auto& entry : call_patches_) { 3736 const MethodReference& target_method = entry.first; 3737 vixl::Literal<uint64_t>* literal = entry.second; 3738 linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(), 3739 target_method.dex_file, 3740 target_method.dex_method_index)); 3741 } 3742 for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) { 3743 linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(), 3744 info.target_method.dex_file, 3745 info.target_method.dex_method_index)); 3746 } 3747 for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { 3748 linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(), 3749 &info.target_dex_file, 3750 info.pc_insn_label->location(), 3751 info.element_offset)); 3752 } 3753} 3754 3755vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { 3756 // Look up the literal for value. 3757 auto lb = uint64_literals_.lower_bound(value); 3758 if (lb != uint64_literals_.end() && !uint64_literals_.key_comp()(value, lb->first)) { 3759 return lb->second; 3760 } 3761 // We don't have a literal for this value, insert a new one. 3762 vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(value); 3763 uint64_literals_.PutBefore(lb, value, literal); 3764 return literal; 3765} 3766 3767vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral( 3768 MethodReference target_method, 3769 MethodToLiteralMap* map) { 3770 // Look up the literal for target_method. 3771 auto lb = map->lower_bound(target_method); 3772 if (lb != map->end() && !map->key_comp()(target_method, lb->first)) { 3773 return lb->second; 3774 } 3775 // We don't have a literal for this method yet, insert a new one. 3776 vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(0u); 3777 map->PutBefore(lb, target_method, literal); 3778 return literal; 3779} 3780 3781vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral( 3782 MethodReference target_method) { 3783 return DeduplicateMethodLiteral(target_method, &method_patches_); 3784} 3785 3786vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral( 3787 MethodReference target_method) { 3788 return DeduplicateMethodLiteral(target_method, &call_patches_); 3789} 3790 3791 3792void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 3793 // Explicit clinit checks triggered by static invokes must have been pruned by 3794 // art::PrepareForRegisterAllocation. 3795 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 3796 3797 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 3798 return; 3799 } 3800 3801 BlockPoolsScope block_pools(GetVIXLAssembler()); 3802 LocationSummary* locations = invoke->GetLocations(); 3803 codegen_->GenerateStaticOrDirectCall( 3804 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 3805 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 3806} 3807 3808void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 3809 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 3810 return; 3811 } 3812 3813 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 3814 DCHECK(!codegen_->IsLeafMethod()); 3815 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 3816} 3817 3818void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { 3819 InvokeRuntimeCallingConvention calling_convention; 3820 CodeGenerator::CreateLoadClassLocationSummary( 3821 cls, 3822 LocationFrom(calling_convention.GetRegisterAt(0)), 3823 LocationFrom(vixl::x0), 3824 /* code_generator_supports_read_barrier */ true); 3825} 3826 3827void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { 3828 if (cls->NeedsAccessCheck()) { 3829 codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex()); 3830 codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), 3831 cls, 3832 cls->GetDexPc(), 3833 nullptr); 3834 CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); 3835 return; 3836 } 3837 3838 Location out_loc = cls->GetLocations()->Out(); 3839 Register out = OutputRegister(cls); 3840 Register current_method = InputRegisterAt(cls, 0); 3841 if (cls->IsReferrersClass()) { 3842 DCHECK(!cls->CanCallRuntime()); 3843 DCHECK(!cls->MustGenerateClinitCheck()); 3844 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 3845 GenerateGcRootFieldLoad( 3846 cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); 3847 } else { 3848 MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); 3849 // /* GcRoot<mirror::Class>[] */ out = 3850 // current_method.ptr_sized_fields_->dex_cache_resolved_types_ 3851 __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); 3852 // /* GcRoot<mirror::Class> */ out = out[type_index] 3853 GenerateGcRootFieldLoad( 3854 cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); 3855 3856 if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { 3857 DCHECK(cls->CanCallRuntime()); 3858 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( 3859 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); 3860 codegen_->AddSlowPath(slow_path); 3861 if (!cls->IsInDexCache()) { 3862 __ Cbz(out, slow_path->GetEntryLabel()); 3863 } 3864 if (cls->MustGenerateClinitCheck()) { 3865 GenerateClassInitializationCheck(slow_path, out); 3866 } else { 3867 __ Bind(slow_path->GetExitLabel()); 3868 } 3869 } 3870 } 3871} 3872 3873static MemOperand GetExceptionTlsAddress() { 3874 return MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value()); 3875} 3876 3877void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { 3878 LocationSummary* locations = 3879 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); 3880 locations->SetOut(Location::RequiresRegister()); 3881} 3882 3883void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { 3884 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); 3885} 3886 3887void LocationsBuilderARM64::VisitClearException(HClearException* clear) { 3888 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); 3889} 3890 3891void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 3892 __ Str(wzr, GetExceptionTlsAddress()); 3893} 3894 3895void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) { 3896 load->SetLocations(nullptr); 3897} 3898 3899void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) { 3900 // Nothing to do, this is driven by the code generator. 3901} 3902 3903void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { 3904 LocationSummary::CallKind call_kind = (!load->IsInDexCache() || kEmitCompilerReadBarrier) 3905 ? LocationSummary::kCallOnSlowPath 3906 : LocationSummary::kNoCall; 3907 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); 3908 locations->SetInAt(0, Location::RequiresRegister()); 3909 locations->SetOut(Location::RequiresRegister()); 3910} 3911 3912void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { 3913 Location out_loc = load->GetLocations()->Out(); 3914 Register out = OutputRegister(load); 3915 Register current_method = InputRegisterAt(load, 0); 3916 3917 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 3918 GenerateGcRootFieldLoad( 3919 load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); 3920 // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ 3921 __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); 3922 // /* GcRoot<mirror::String> */ out = out[string_index] 3923 GenerateGcRootFieldLoad( 3924 load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); 3925 3926 if (!load->IsInDexCache()) { 3927 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); 3928 codegen_->AddSlowPath(slow_path); 3929 __ Cbz(out, slow_path->GetEntryLabel()); 3930 __ Bind(slow_path->GetExitLabel()); 3931 } 3932} 3933 3934void LocationsBuilderARM64::VisitLocal(HLocal* local) { 3935 local->SetLocations(nullptr); 3936} 3937 3938void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) { 3939 DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock()); 3940} 3941 3942void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { 3943 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); 3944 locations->SetOut(Location::ConstantLocation(constant)); 3945} 3946 3947void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 3948 // Will be generated at use site. 3949} 3950 3951void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 3952 LocationSummary* locations = 3953 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); 3954 InvokeRuntimeCallingConvention calling_convention; 3955 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 3956} 3957 3958void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 3959 codegen_->InvokeRuntime(instruction->IsEnter() 3960 ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject), 3961 instruction, 3962 instruction->GetDexPc(), 3963 nullptr); 3964 if (instruction->IsEnter()) { 3965 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 3966 } else { 3967 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 3968 } 3969} 3970 3971void LocationsBuilderARM64::VisitMul(HMul* mul) { 3972 LocationSummary* locations = 3973 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); 3974 switch (mul->GetResultType()) { 3975 case Primitive::kPrimInt: 3976 case Primitive::kPrimLong: 3977 locations->SetInAt(0, Location::RequiresRegister()); 3978 locations->SetInAt(1, Location::RequiresRegister()); 3979 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3980 break; 3981 3982 case Primitive::kPrimFloat: 3983 case Primitive::kPrimDouble: 3984 locations->SetInAt(0, Location::RequiresFpuRegister()); 3985 locations->SetInAt(1, Location::RequiresFpuRegister()); 3986 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3987 break; 3988 3989 default: 3990 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3991 } 3992} 3993 3994void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { 3995 switch (mul->GetResultType()) { 3996 case Primitive::kPrimInt: 3997 case Primitive::kPrimLong: 3998 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 3999 break; 4000 4001 case Primitive::kPrimFloat: 4002 case Primitive::kPrimDouble: 4003 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); 4004 break; 4005 4006 default: 4007 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 4008 } 4009} 4010 4011void LocationsBuilderARM64::VisitNeg(HNeg* neg) { 4012 LocationSummary* locations = 4013 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); 4014 switch (neg->GetResultType()) { 4015 case Primitive::kPrimInt: 4016 case Primitive::kPrimLong: 4017 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); 4018 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4019 break; 4020 4021 case Primitive::kPrimFloat: 4022 case Primitive::kPrimDouble: 4023 locations->SetInAt(0, Location::RequiresFpuRegister()); 4024 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4025 break; 4026 4027 default: 4028 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 4029 } 4030} 4031 4032void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { 4033 switch (neg->GetResultType()) { 4034 case Primitive::kPrimInt: 4035 case Primitive::kPrimLong: 4036 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); 4037 break; 4038 4039 case Primitive::kPrimFloat: 4040 case Primitive::kPrimDouble: 4041 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); 4042 break; 4043 4044 default: 4045 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 4046 } 4047} 4048 4049void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { 4050 LocationSummary* locations = 4051 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); 4052 InvokeRuntimeCallingConvention calling_convention; 4053 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); 4054 locations->SetOut(LocationFrom(x0)); 4055 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); 4056 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); 4057} 4058 4059void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { 4060 LocationSummary* locations = instruction->GetLocations(); 4061 InvokeRuntimeCallingConvention calling_convention; 4062 Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); 4063 DCHECK(type_index.Is(w0)); 4064 __ Mov(type_index, instruction->GetTypeIndex()); 4065 // Note: if heap poisoning is enabled, the entry point takes cares 4066 // of poisoning the reference. 4067 codegen_->InvokeRuntime(instruction->GetEntrypoint(), 4068 instruction, 4069 instruction->GetDexPc(), 4070 nullptr); 4071 CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); 4072} 4073 4074void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { 4075 LocationSummary* locations = 4076 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); 4077 InvokeRuntimeCallingConvention calling_convention; 4078 if (instruction->IsStringAlloc()) { 4079 locations->AddTemp(LocationFrom(kArtMethodRegister)); 4080 } else { 4081 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 4082 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 4083 } 4084 locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); 4085} 4086 4087void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { 4088 // Note: if heap poisoning is enabled, the entry point takes cares 4089 // of poisoning the reference. 4090 if (instruction->IsStringAlloc()) { 4091 // String is allocated through StringFactory. Call NewEmptyString entry point. 4092 Location temp = instruction->GetLocations()->GetTemp(0); 4093 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); 4094 __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString))); 4095 __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value())); 4096 __ Blr(lr); 4097 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 4098 } else { 4099 codegen_->InvokeRuntime(instruction->GetEntrypoint(), 4100 instruction, 4101 instruction->GetDexPc(), 4102 nullptr); 4103 CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); 4104 } 4105} 4106 4107void LocationsBuilderARM64::VisitNot(HNot* instruction) { 4108 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 4109 locations->SetInAt(0, Location::RequiresRegister()); 4110 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4111} 4112 4113void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { 4114 switch (instruction->GetResultType()) { 4115 case Primitive::kPrimInt: 4116 case Primitive::kPrimLong: 4117 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); 4118 break; 4119 4120 default: 4121 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); 4122 } 4123} 4124 4125void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { 4126 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 4127 locations->SetInAt(0, Location::RequiresRegister()); 4128 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4129} 4130 4131void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { 4132 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::Operand(1)); 4133} 4134 4135void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { 4136 LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() 4137 ? LocationSummary::kCallOnSlowPath 4138 : LocationSummary::kNoCall; 4139 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); 4140 locations->SetInAt(0, Location::RequiresRegister()); 4141 if (instruction->HasUses()) { 4142 locations->SetOut(Location::SameAsFirstInput()); 4143 } 4144} 4145 4146void InstructionCodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { 4147 if (codegen_->CanMoveNullCheckToUser(instruction)) { 4148 return; 4149 } 4150 4151 BlockPoolsScope block_pools(GetVIXLAssembler()); 4152 Location obj = instruction->GetLocations()->InAt(0); 4153 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); 4154 codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); 4155} 4156 4157void InstructionCodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { 4158 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction); 4159 codegen_->AddSlowPath(slow_path); 4160 4161 LocationSummary* locations = instruction->GetLocations(); 4162 Location obj = locations->InAt(0); 4163 4164 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); 4165} 4166 4167void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { 4168 if (codegen_->IsImplicitNullCheckAllowed(instruction)) { 4169 GenerateImplicitNullCheck(instruction); 4170 } else { 4171 GenerateExplicitNullCheck(instruction); 4172 } 4173} 4174 4175void LocationsBuilderARM64::VisitOr(HOr* instruction) { 4176 HandleBinaryOp(instruction); 4177} 4178 4179void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { 4180 HandleBinaryOp(instruction); 4181} 4182 4183void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 4184 LOG(FATAL) << "Unreachable"; 4185} 4186 4187void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { 4188 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 4189} 4190 4191void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { 4192 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 4193 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4194 if (location.IsStackSlot()) { 4195 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4196 } else if (location.IsDoubleStackSlot()) { 4197 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4198 } 4199 locations->SetOut(location); 4200} 4201 4202void InstructionCodeGeneratorARM64::VisitParameterValue( 4203 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4204 // Nothing to do, the parameter is already at its location. 4205} 4206 4207void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { 4208 LocationSummary* locations = 4209 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); 4210 locations->SetOut(LocationFrom(kArtMethodRegister)); 4211} 4212 4213void InstructionCodeGeneratorARM64::VisitCurrentMethod( 4214 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4215 // Nothing to do, the method is already at its location. 4216} 4217 4218void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { 4219 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 4220 for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { 4221 locations->SetInAt(i, Location::Any()); 4222 } 4223 locations->SetOut(Location::Any()); 4224} 4225 4226void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4227 LOG(FATAL) << "Unreachable"; 4228} 4229 4230void LocationsBuilderARM64::VisitRem(HRem* rem) { 4231 Primitive::Type type = rem->GetResultType(); 4232 LocationSummary::CallKind call_kind = 4233 Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall; 4234 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); 4235 4236 switch (type) { 4237 case Primitive::kPrimInt: 4238 case Primitive::kPrimLong: 4239 locations->SetInAt(0, Location::RequiresRegister()); 4240 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 4241 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4242 break; 4243 4244 case Primitive::kPrimFloat: 4245 case Primitive::kPrimDouble: { 4246 InvokeRuntimeCallingConvention calling_convention; 4247 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 4248 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 4249 locations->SetOut(calling_convention.GetReturnLocation(type)); 4250 4251 break; 4252 } 4253 4254 default: 4255 LOG(FATAL) << "Unexpected rem type " << type; 4256 } 4257} 4258 4259void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { 4260 Primitive::Type type = rem->GetResultType(); 4261 4262 switch (type) { 4263 case Primitive::kPrimInt: 4264 case Primitive::kPrimLong: { 4265 GenerateDivRemIntegral(rem); 4266 break; 4267 } 4268 4269 case Primitive::kPrimFloat: 4270 case Primitive::kPrimDouble: { 4271 int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) 4272 : QUICK_ENTRY_POINT(pFmod); 4273 codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); 4274 if (type == Primitive::kPrimFloat) { 4275 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 4276 } else { 4277 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 4278 } 4279 break; 4280 } 4281 4282 default: 4283 LOG(FATAL) << "Unexpected rem type " << type; 4284 UNREACHABLE(); 4285 } 4286} 4287 4288void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 4289 memory_barrier->SetLocations(nullptr); 4290} 4291 4292void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 4293 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 4294} 4295 4296void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { 4297 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); 4298 Primitive::Type return_type = instruction->InputAt(0)->GetType(); 4299 locations->SetInAt(0, ARM64ReturnLocation(return_type)); 4300} 4301 4302void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) { 4303 codegen_->GenerateFrameExit(); 4304} 4305 4306void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { 4307 instruction->SetLocations(nullptr); 4308} 4309 4310void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { 4311 codegen_->GenerateFrameExit(); 4312} 4313 4314void LocationsBuilderARM64::VisitRor(HRor* ror) { 4315 HandleBinaryOp(ror); 4316} 4317 4318void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { 4319 HandleBinaryOp(ror); 4320} 4321 4322void LocationsBuilderARM64::VisitShl(HShl* shl) { 4323 HandleShift(shl); 4324} 4325 4326void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { 4327 HandleShift(shl); 4328} 4329 4330void LocationsBuilderARM64::VisitShr(HShr* shr) { 4331 HandleShift(shr); 4332} 4333 4334void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { 4335 HandleShift(shr); 4336} 4337 4338void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) { 4339 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store); 4340 Primitive::Type field_type = store->InputAt(1)->GetType(); 4341 switch (field_type) { 4342 case Primitive::kPrimNot: 4343 case Primitive::kPrimBoolean: 4344 case Primitive::kPrimByte: 4345 case Primitive::kPrimChar: 4346 case Primitive::kPrimShort: 4347 case Primitive::kPrimInt: 4348 case Primitive::kPrimFloat: 4349 locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal()))); 4350 break; 4351 4352 case Primitive::kPrimLong: 4353 case Primitive::kPrimDouble: 4354 locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal()))); 4355 break; 4356 4357 default: 4358 LOG(FATAL) << "Unimplemented local type " << field_type; 4359 UNREACHABLE(); 4360 } 4361} 4362 4363void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) { 4364} 4365 4366void LocationsBuilderARM64::VisitSub(HSub* instruction) { 4367 HandleBinaryOp(instruction); 4368} 4369 4370void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { 4371 HandleBinaryOp(instruction); 4372} 4373 4374void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4375 HandleFieldGet(instruction); 4376} 4377 4378void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4379 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4380} 4381 4382void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4383 HandleFieldSet(instruction); 4384} 4385 4386void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4387 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4388} 4389 4390void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( 4391 HUnresolvedInstanceFieldGet* instruction) { 4392 FieldAccessCallingConventionARM64 calling_convention; 4393 codegen_->CreateUnresolvedFieldLocationSummary( 4394 instruction, instruction->GetFieldType(), calling_convention); 4395} 4396 4397void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( 4398 HUnresolvedInstanceFieldGet* instruction) { 4399 FieldAccessCallingConventionARM64 calling_convention; 4400 codegen_->GenerateUnresolvedFieldAccess(instruction, 4401 instruction->GetFieldType(), 4402 instruction->GetFieldIndex(), 4403 instruction->GetDexPc(), 4404 calling_convention); 4405} 4406 4407void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( 4408 HUnresolvedInstanceFieldSet* instruction) { 4409 FieldAccessCallingConventionARM64 calling_convention; 4410 codegen_->CreateUnresolvedFieldLocationSummary( 4411 instruction, instruction->GetFieldType(), calling_convention); 4412} 4413 4414void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( 4415 HUnresolvedInstanceFieldSet* instruction) { 4416 FieldAccessCallingConventionARM64 calling_convention; 4417 codegen_->GenerateUnresolvedFieldAccess(instruction, 4418 instruction->GetFieldType(), 4419 instruction->GetFieldIndex(), 4420 instruction->GetDexPc(), 4421 calling_convention); 4422} 4423 4424void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( 4425 HUnresolvedStaticFieldGet* instruction) { 4426 FieldAccessCallingConventionARM64 calling_convention; 4427 codegen_->CreateUnresolvedFieldLocationSummary( 4428 instruction, instruction->GetFieldType(), calling_convention); 4429} 4430 4431void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( 4432 HUnresolvedStaticFieldGet* instruction) { 4433 FieldAccessCallingConventionARM64 calling_convention; 4434 codegen_->GenerateUnresolvedFieldAccess(instruction, 4435 instruction->GetFieldType(), 4436 instruction->GetFieldIndex(), 4437 instruction->GetDexPc(), 4438 calling_convention); 4439} 4440 4441void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( 4442 HUnresolvedStaticFieldSet* instruction) { 4443 FieldAccessCallingConventionARM64 calling_convention; 4444 codegen_->CreateUnresolvedFieldLocationSummary( 4445 instruction, instruction->GetFieldType(), calling_convention); 4446} 4447 4448void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( 4449 HUnresolvedStaticFieldSet* instruction) { 4450 FieldAccessCallingConventionARM64 calling_convention; 4451 codegen_->GenerateUnresolvedFieldAccess(instruction, 4452 instruction->GetFieldType(), 4453 instruction->GetFieldIndex(), 4454 instruction->GetDexPc(), 4455 calling_convention); 4456} 4457 4458void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 4459 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); 4460} 4461 4462void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 4463 HBasicBlock* block = instruction->GetBlock(); 4464 if (block->GetLoopInformation() != nullptr) { 4465 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 4466 // The back edge will generate the suspend check. 4467 return; 4468 } 4469 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 4470 // The goto will generate the suspend check. 4471 return; 4472 } 4473 GenerateSuspendCheck(instruction, nullptr); 4474} 4475 4476void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) { 4477 temp->SetLocations(nullptr); 4478} 4479 4480void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) { 4481 // Nothing to do, this is driven by the code generator. 4482} 4483 4484void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { 4485 LocationSummary* locations = 4486 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); 4487 InvokeRuntimeCallingConvention calling_convention; 4488 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 4489} 4490 4491void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { 4492 codegen_->InvokeRuntime( 4493 QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); 4494 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 4495} 4496 4497void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { 4498 LocationSummary* locations = 4499 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); 4500 Primitive::Type input_type = conversion->GetInputType(); 4501 Primitive::Type result_type = conversion->GetResultType(); 4502 DCHECK_NE(input_type, result_type); 4503 if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || 4504 (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { 4505 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; 4506 } 4507 4508 if (Primitive::IsFloatingPointType(input_type)) { 4509 locations->SetInAt(0, Location::RequiresFpuRegister()); 4510 } else { 4511 locations->SetInAt(0, Location::RequiresRegister()); 4512 } 4513 4514 if (Primitive::IsFloatingPointType(result_type)) { 4515 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 4516 } else { 4517 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 4518 } 4519} 4520 4521void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { 4522 Primitive::Type result_type = conversion->GetResultType(); 4523 Primitive::Type input_type = conversion->GetInputType(); 4524 4525 DCHECK_NE(input_type, result_type); 4526 4527 if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { 4528 int result_size = Primitive::ComponentSize(result_type); 4529 int input_size = Primitive::ComponentSize(input_type); 4530 int min_size = std::min(result_size, input_size); 4531 Register output = OutputRegister(conversion); 4532 Register source = InputRegisterAt(conversion, 0); 4533 if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { 4534 // 'int' values are used directly as W registers, discarding the top 4535 // bits, so we don't need to sign-extend and can just perform a move. 4536 // We do not pass the `kDiscardForSameWReg` argument to force clearing the 4537 // top 32 bits of the target register. We theoretically could leave those 4538 // bits unchanged, but we would have to make sure that no code uses a 4539 // 32bit input value as a 64bit value assuming that the top 32 bits are 4540 // zero. 4541 __ Mov(output.W(), source.W()); 4542 } else if (result_type == Primitive::kPrimChar || 4543 (input_type == Primitive::kPrimChar && input_size < result_size)) { 4544 __ Ubfx(output, 4545 output.IsX() ? source.X() : source.W(), 4546 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); 4547 } else { 4548 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); 4549 } 4550 } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { 4551 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); 4552 } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { 4553 CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); 4554 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); 4555 } else if (Primitive::IsFloatingPointType(result_type) && 4556 Primitive::IsFloatingPointType(input_type)) { 4557 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); 4558 } else { 4559 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type 4560 << " to " << result_type; 4561 } 4562} 4563 4564void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { 4565 HandleShift(ushr); 4566} 4567 4568void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { 4569 HandleShift(ushr); 4570} 4571 4572void LocationsBuilderARM64::VisitXor(HXor* instruction) { 4573 HandleBinaryOp(instruction); 4574} 4575 4576void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { 4577 HandleBinaryOp(instruction); 4578} 4579 4580void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 4581 // Nothing to do, this should be removed during prepare for register allocator. 4582 LOG(FATAL) << "Unreachable"; 4583} 4584 4585void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 4586 // Nothing to do, this should be removed during prepare for register allocator. 4587 LOG(FATAL) << "Unreachable"; 4588} 4589 4590// Simple implementation of packed switch - generate cascaded compare/jumps. 4591void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 4592 LocationSummary* locations = 4593 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); 4594 locations->SetInAt(0, Location::RequiresRegister()); 4595} 4596 4597void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 4598 int32_t lower_bound = switch_instr->GetStartValue(); 4599 uint32_t num_entries = switch_instr->GetNumEntries(); 4600 Register value_reg = InputRegisterAt(switch_instr, 0); 4601 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 4602 4603 // Roughly set 16 as max average assemblies generated per HIR in a graph. 4604 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize; 4605 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to 4606 // make sure we don't emit it if the target may run out of range. 4607 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR 4608 // ranges and emit the tables only as required. 4609 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; 4610 4611 if (num_entries <= kPackedSwitchCompareJumpThreshold || 4612 // Current instruction id is an upper bound of the number of HIRs in the graph. 4613 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { 4614 // Create a series of compare/jumps. 4615 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 4616 Register temp = temps.AcquireW(); 4617 __ Subs(temp, value_reg, Operand(lower_bound)); 4618 4619 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 4620 // Jump to successors[0] if value == lower_bound. 4621 __ B(eq, codegen_->GetLabelOf(successors[0])); 4622 int32_t last_index = 0; 4623 for (; num_entries - last_index > 2; last_index += 2) { 4624 __ Subs(temp, temp, Operand(2)); 4625 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 4626 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 4627 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 4628 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 4629 } 4630 if (num_entries - last_index == 2) { 4631 // The last missing case_value. 4632 __ Cmp(temp, Operand(1)); 4633 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 4634 } 4635 4636 // And the default for any other value. 4637 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 4638 __ B(codegen_->GetLabelOf(default_block)); 4639 } 4640 } else { 4641 JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr); 4642 codegen_->AddJumpTable(jump_table); 4643 4644 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 4645 4646 // Below instructions should use at most one blocked register. Since there are two blocked 4647 // registers, we are free to block one. 4648 Register temp_w = temps.AcquireW(); 4649 Register index; 4650 // Remove the bias. 4651 if (lower_bound != 0) { 4652 index = temp_w; 4653 __ Sub(index, value_reg, Operand(lower_bound)); 4654 } else { 4655 index = value_reg; 4656 } 4657 4658 // Jump to default block if index is out of the range. 4659 __ Cmp(index, Operand(num_entries)); 4660 __ B(hs, codegen_->GetLabelOf(default_block)); 4661 4662 // In current VIXL implementation, it won't require any blocked registers to encode the 4663 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the 4664 // register pressure. 4665 Register table_base = temps.AcquireX(); 4666 // Load jump offset from the table. 4667 __ Adr(table_base, jump_table->GetTableStartLabel()); 4668 Register jump_offset = temp_w; 4669 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); 4670 4671 // Jump to target block by branching to table_base(pc related) + offset. 4672 Register target_address = table_base; 4673 __ Add(target_address, table_base, Operand(jump_offset, SXTW)); 4674 __ Br(target_address); 4675 } 4676} 4677 4678void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction, 4679 Location out, 4680 uint32_t offset, 4681 Location maybe_temp) { 4682 Primitive::Type type = Primitive::kPrimNot; 4683 Register out_reg = RegisterFrom(out, type); 4684 if (kEmitCompilerReadBarrier) { 4685 Register temp_reg = RegisterFrom(maybe_temp, type); 4686 if (kUseBakerReadBarrier) { 4687 // Load with fast path based Baker's read barrier. 4688 // /* HeapReference<Object> */ out = *(out + offset) 4689 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 4690 out, 4691 out_reg, 4692 offset, 4693 temp_reg, 4694 /* needs_null_check */ false, 4695 /* use_load_acquire */ false); 4696 } else { 4697 // Load with slow path based read barrier. 4698 // Save the value of `out` into `maybe_temp` before overwriting it 4699 // in the following move operation, as we will need it for the 4700 // read barrier below. 4701 __ Mov(temp_reg, out_reg); 4702 // /* HeapReference<Object> */ out = *(out + offset) 4703 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 4704 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 4705 } 4706 } else { 4707 // Plain load with no read barrier. 4708 // /* HeapReference<Object> */ out = *(out + offset) 4709 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 4710 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 4711 } 4712} 4713 4714void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 4715 Location out, 4716 Location obj, 4717 uint32_t offset, 4718 Location maybe_temp) { 4719 Primitive::Type type = Primitive::kPrimNot; 4720 Register out_reg = RegisterFrom(out, type); 4721 Register obj_reg = RegisterFrom(obj, type); 4722 if (kEmitCompilerReadBarrier) { 4723 if (kUseBakerReadBarrier) { 4724 // Load with fast path based Baker's read barrier. 4725 Register temp_reg = RegisterFrom(maybe_temp, type); 4726 // /* HeapReference<Object> */ out = *(obj + offset) 4727 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 4728 out, 4729 obj_reg, 4730 offset, 4731 temp_reg, 4732 /* needs_null_check */ false, 4733 /* use_load_acquire */ false); 4734 } else { 4735 // Load with slow path based read barrier. 4736 // /* HeapReference<Object> */ out = *(obj + offset) 4737 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 4738 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 4739 } 4740 } else { 4741 // Plain load with no read barrier. 4742 // /* HeapReference<Object> */ out = *(obj + offset) 4743 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 4744 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 4745 } 4746} 4747 4748void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction, 4749 Location root, 4750 vixl::Register obj, 4751 uint32_t offset) { 4752 Register root_reg = RegisterFrom(root, Primitive::kPrimNot); 4753 if (kEmitCompilerReadBarrier) { 4754 if (kUseBakerReadBarrier) { 4755 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 4756 // Baker's read barrier are used: 4757 // 4758 // root = obj.field; 4759 // if (Thread::Current()->GetIsGcMarking()) { 4760 // root = ReadBarrier::Mark(root) 4761 // } 4762 4763 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 4764 __ Ldr(root_reg, MemOperand(obj, offset)); 4765 static_assert( 4766 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 4767 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 4768 "have different sizes."); 4769 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 4770 "art::mirror::CompressedReference<mirror::Object> and int32_t " 4771 "have different sizes."); 4772 4773 // Slow path used to mark the GC root `root`. 4774 SlowPathCodeARM64* slow_path = 4775 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root); 4776 codegen_->AddSlowPath(slow_path); 4777 4778 MacroAssembler* masm = GetVIXLAssembler(); 4779 UseScratchRegisterScope temps(masm); 4780 Register temp = temps.AcquireW(); 4781 // temp = Thread::Current()->GetIsGcMarking() 4782 __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value())); 4783 __ Cbnz(temp, slow_path->GetEntryLabel()); 4784 __ Bind(slow_path->GetExitLabel()); 4785 } else { 4786 // GC root loaded through a slow path for read barriers other 4787 // than Baker's. 4788 // /* GcRoot<mirror::Object>* */ root = obj + offset 4789 __ Add(root_reg.X(), obj.X(), offset); 4790 // /* mirror::Object* */ root = root->Read() 4791 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 4792 } 4793 } else { 4794 // Plain GC root load with no read barrier. 4795 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 4796 __ Ldr(root_reg, MemOperand(obj, offset)); 4797 // Note that GC roots are not affected by heap poisoning, thus we 4798 // do not have to unpoison `root_reg` here. 4799 } 4800} 4801 4802void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 4803 Location ref, 4804 vixl::Register obj, 4805 uint32_t offset, 4806 Register temp, 4807 bool needs_null_check, 4808 bool use_load_acquire) { 4809 DCHECK(kEmitCompilerReadBarrier); 4810 DCHECK(kUseBakerReadBarrier); 4811 4812 // /* HeapReference<Object> */ ref = *(obj + offset) 4813 Location no_index = Location::NoLocation(); 4814 GenerateReferenceLoadWithBakerReadBarrier( 4815 instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); 4816} 4817 4818void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 4819 Location ref, 4820 vixl::Register obj, 4821 uint32_t data_offset, 4822 Location index, 4823 Register temp, 4824 bool needs_null_check) { 4825 DCHECK(kEmitCompilerReadBarrier); 4826 DCHECK(kUseBakerReadBarrier); 4827 4828 // Array cells are never volatile variables, therefore array loads 4829 // never use Load-Acquire instructions on ARM64. 4830 const bool use_load_acquire = false; 4831 4832 // /* HeapReference<Object> */ ref = 4833 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 4834 GenerateReferenceLoadWithBakerReadBarrier( 4835 instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); 4836} 4837 4838void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 4839 Location ref, 4840 vixl::Register obj, 4841 uint32_t offset, 4842 Location index, 4843 Register temp, 4844 bool needs_null_check, 4845 bool use_load_acquire) { 4846 DCHECK(kEmitCompilerReadBarrier); 4847 DCHECK(kUseBakerReadBarrier); 4848 // If `index` is a valid location, then we are emitting an array 4849 // load, so we shouldn't be using a Load Acquire instruction. 4850 // In other words: `index.IsValid()` => `!use_load_acquire`. 4851 DCHECK(!index.IsValid() || !use_load_acquire); 4852 4853 MacroAssembler* masm = GetVIXLAssembler(); 4854 UseScratchRegisterScope temps(masm); 4855 4856 // In slow path based read barriers, the read barrier call is 4857 // inserted after the original load. However, in fast path based 4858 // Baker's read barriers, we need to perform the load of 4859 // mirror::Object::monitor_ *before* the original reference load. 4860 // This load-load ordering is required by the read barrier. 4861 // The fast path/slow path (for Baker's algorithm) should look like: 4862 // 4863 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 4864 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 4865 // HeapReference<Object> ref = *src; // Original reference load. 4866 // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); 4867 // if (is_gray) { 4868 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 4869 // } 4870 // 4871 // Note: the original implementation in ReadBarrier::Barrier is 4872 // slightly more complex as it performs additional checks that we do 4873 // not do here for performance reasons. 4874 4875 Primitive::Type type = Primitive::kPrimNot; 4876 Register ref_reg = RegisterFrom(ref, type); 4877 DCHECK(obj.IsW()); 4878 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 4879 4880 // /* int32_t */ monitor = obj->monitor_ 4881 __ Ldr(temp, HeapOperand(obj, monitor_offset)); 4882 if (needs_null_check) { 4883 MaybeRecordImplicitNullCheck(instruction); 4884 } 4885 // /* LockWord */ lock_word = LockWord(monitor) 4886 static_assert(sizeof(LockWord) == sizeof(int32_t), 4887 "art::LockWord and int32_t have different sizes."); 4888 // /* uint32_t */ rb_state = lock_word.ReadBarrierState() 4889 __ Lsr(temp, temp, LockWord::kReadBarrierStateShift); 4890 __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask)); 4891 static_assert( 4892 LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, 4893 "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); 4894 4895 // Introduce a dependency on the high bits of rb_state, which shall 4896 // be all zeroes, to prevent load-load reordering, and without using 4897 // a memory barrier (which would be more expensive). 4898 // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0 4899 Register temp2 = temps.AcquireW(); 4900 __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask)); 4901 // obj is unchanged by this operation, but its value now depends on 4902 // temp2, which depends on temp. 4903 __ Add(obj, obj, Operand(temp2)); 4904 temps.Release(temp2); 4905 4906 // The actual reference load. 4907 if (index.IsValid()) { 4908 static_assert( 4909 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 4910 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 4911 temp2 = temps.AcquireW(); 4912 // /* HeapReference<Object> */ ref = 4913 // *(obj + offset + index * sizeof(HeapReference<Object>)) 4914 MemOperand source = HeapOperand(obj); 4915 if (index.IsConstant()) { 4916 uint32_t computed_offset = 4917 offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type)); 4918 source = HeapOperand(obj, computed_offset); 4919 } else { 4920 __ Add(temp2, obj, offset); 4921 source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); 4922 } 4923 Load(type, ref_reg, source); 4924 temps.Release(temp2); 4925 } else { 4926 // /* HeapReference<Object> */ ref = *(obj + offset) 4927 MemOperand field = HeapOperand(obj, offset); 4928 if (use_load_acquire) { 4929 LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); 4930 } else { 4931 Load(type, ref_reg, field); 4932 } 4933 } 4934 4935 // Object* ref = ref_addr->AsMirrorPtr() 4936 GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); 4937 4938 // Slow path used to mark the object `ref` when it is gray. 4939 SlowPathCodeARM64* slow_path = 4940 new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref); 4941 AddSlowPath(slow_path); 4942 4943 // if (rb_state == ReadBarrier::gray_ptr_) 4944 // ref = ReadBarrier::Mark(ref); 4945 __ Cmp(temp, ReadBarrier::gray_ptr_); 4946 __ B(eq, slow_path->GetEntryLabel()); 4947 __ Bind(slow_path->GetExitLabel()); 4948} 4949 4950void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, 4951 Location out, 4952 Location ref, 4953 Location obj, 4954 uint32_t offset, 4955 Location index) { 4956 DCHECK(kEmitCompilerReadBarrier); 4957 4958 // Insert a slow path based read barrier *after* the reference load. 4959 // 4960 // If heap poisoning is enabled, the unpoisoning of the loaded 4961 // reference will be carried out by the runtime within the slow 4962 // path. 4963 // 4964 // Note that `ref` currently does not get unpoisoned (when heap 4965 // poisoning is enabled), which is alright as the `ref` argument is 4966 // not used by the artReadBarrierSlow entry point. 4967 // 4968 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 4969 SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) 4970 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); 4971 AddSlowPath(slow_path); 4972 4973 __ B(slow_path->GetEntryLabel()); 4974 __ Bind(slow_path->GetExitLabel()); 4975} 4976 4977void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 4978 Location out, 4979 Location ref, 4980 Location obj, 4981 uint32_t offset, 4982 Location index) { 4983 if (kEmitCompilerReadBarrier) { 4984 // Baker's read barriers shall be handled by the fast path 4985 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). 4986 DCHECK(!kUseBakerReadBarrier); 4987 // If heap poisoning is enabled, unpoisoning will be taken care of 4988 // by the runtime within the slow path. 4989 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 4990 } else if (kPoisonHeapReferences) { 4991 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); 4992 } 4993} 4994 4995void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 4996 Location out, 4997 Location root) { 4998 DCHECK(kEmitCompilerReadBarrier); 4999 5000 // Insert a slow path based read barrier *after* the GC root load. 5001 // 5002 // Note that GC roots are not affected by heap poisoning, so we do 5003 // not need to do anything special for this here. 5004 SlowPathCodeARM64* slow_path = 5005 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); 5006 AddSlowPath(slow_path); 5007 5008 __ B(slow_path->GetEntryLabel()); 5009 __ Bind(slow_path->GetExitLabel()); 5010} 5011 5012#undef __ 5013#undef QUICK_ENTRY_POINT 5014 5015} // namespace arm64 5016} // namespace art 5017