1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "code_generator_arm64.h"
18
19#include "arch/arm64/asm_support_arm64.h"
20#include "arch/arm64/instruction_set_features_arm64.h"
21#include "art_method.h"
22#include "code_generator_utils.h"
23#include "compiled_method.h"
24#include "entrypoints/quick/quick_entrypoints.h"
25#include "entrypoints/quick/quick_entrypoints_enum.h"
26#include "gc/accounting/card_table.h"
27#include "intrinsics.h"
28#include "intrinsics_arm64.h"
29#include "linker/arm64/relative_patcher_arm64.h"
30#include "mirror/array-inl.h"
31#include "mirror/class-inl.h"
32#include "offsets.h"
33#include "thread.h"
34#include "utils/arm64/assembler_arm64.h"
35#include "utils/assembler.h"
36#include "utils/stack_checks.h"
37
38using namespace vixl::aarch64;  // NOLINT(build/namespaces)
39using vixl::ExactAssemblyScope;
40using vixl::CodeBufferCheckScope;
41using vixl::EmissionCheckScope;
42
43#ifdef __
44#error "ARM64 Codegen VIXL macro-assembler macro already defined."
45#endif
46
47namespace art {
48
49template<class MirrorType>
50class GcRoot;
51
52namespace arm64 {
53
54using helpers::ARM64EncodableConstantOrRegister;
55using helpers::ArtVixlRegCodeCoherentForRegSet;
56using helpers::CPURegisterFrom;
57using helpers::DRegisterFrom;
58using helpers::FPRegisterFrom;
59using helpers::HeapOperand;
60using helpers::HeapOperandFrom;
61using helpers::InputCPURegisterAt;
62using helpers::InputCPURegisterOrZeroRegAt;
63using helpers::InputFPRegisterAt;
64using helpers::InputOperandAt;
65using helpers::InputRegisterAt;
66using helpers::Int64ConstantFrom;
67using helpers::IsConstantZeroBitPattern;
68using helpers::LocationFrom;
69using helpers::OperandFromMemOperand;
70using helpers::OutputCPURegister;
71using helpers::OutputFPRegister;
72using helpers::OutputRegister;
73using helpers::QRegisterFrom;
74using helpers::RegisterFrom;
75using helpers::StackOperandFrom;
76using helpers::VIXLRegCodeFromART;
77using helpers::WRegisterFrom;
78using helpers::XRegisterFrom;
79
80static constexpr int kCurrentMethodStackOffset = 0;
81// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
82// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
83// generates less code/data with a small num_entries.
84static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
85
86// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
87// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
88// For the Baker read barrier implementation using link-generated thunks we need to split
89// the offset explicitly.
90constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
91
92// Flags controlling the use of link-time generated thunks for Baker read barriers.
93constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
94constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
95
96// Some instructions have special requirements for a temporary, for example
97// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
98// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
99// loads with large offsets need a fixed register to limit the number of link-time
100// thunks we generate. For these and similar cases, we want to reserve a specific
101// register that's neither callee-save nor an argument register. We choose x15.
102inline Location FixedTempLocation() {
103  return Location::RegisterLocation(x15.GetCode());
104}
105
106inline Condition ARM64Condition(IfCondition cond) {
107  switch (cond) {
108    case kCondEQ: return eq;
109    case kCondNE: return ne;
110    case kCondLT: return lt;
111    case kCondLE: return le;
112    case kCondGT: return gt;
113    case kCondGE: return ge;
114    case kCondB:  return lo;
115    case kCondBE: return ls;
116    case kCondA:  return hi;
117    case kCondAE: return hs;
118  }
119  LOG(FATAL) << "Unreachable";
120  UNREACHABLE();
121}
122
123inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
124  // The ARM64 condition codes can express all the necessary branches, see the
125  // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
126  // There is no dex instruction or HIR that would need the missing conditions
127  // "equal or unordered" or "not equal".
128  switch (cond) {
129    case kCondEQ: return eq;
130    case kCondNE: return ne /* unordered */;
131    case kCondLT: return gt_bias ? cc : lt /* unordered */;
132    case kCondLE: return gt_bias ? ls : le /* unordered */;
133    case kCondGT: return gt_bias ? hi /* unordered */ : gt;
134    case kCondGE: return gt_bias ? cs /* unordered */ : ge;
135    default:
136      LOG(FATAL) << "UNREACHABLE";
137      UNREACHABLE();
138  }
139}
140
141Location ARM64ReturnLocation(Primitive::Type return_type) {
142  // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
143  // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
144  // but we use the exact registers for clarity.
145  if (return_type == Primitive::kPrimFloat) {
146    return LocationFrom(s0);
147  } else if (return_type == Primitive::kPrimDouble) {
148    return LocationFrom(d0);
149  } else if (return_type == Primitive::kPrimLong) {
150    return LocationFrom(x0);
151  } else if (return_type == Primitive::kPrimVoid) {
152    return Location::NoLocation();
153  } else {
154    return LocationFrom(w0);
155  }
156}
157
158Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
159  return ARM64ReturnLocation(return_type);
160}
161
162// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
164#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165
166// Calculate memory accessing operand for save/restore live registers.
167static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168                                           LocationSummary* locations,
169                                           int64_t spill_offset,
170                                           bool is_save) {
171  const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
172  const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
173  DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174                                         codegen->GetNumberOfCoreRegisters(),
175                                         fp_spills,
176                                         codegen->GetNumberOfFloatingPointRegisters()));
177
178  CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179  unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
180  CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
181
182  MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
183  UseScratchRegisterScope temps(masm);
184
185  Register base = masm->StackPointer();
186  int64_t core_spill_size = core_list.GetTotalSizeInBytes();
187  int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
188  int64_t reg_size = kXRegSizeInBytes;
189  int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
190  uint32_t ls_access_size = WhichPowerOf2(reg_size);
191  if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
192      !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
193    // If the offset does not fit in the instruction's immediate field, use an alternate register
194    // to compute the base address(float point registers spill base address).
195    Register new_base = temps.AcquireSameSizeAs(base);
196    __ Add(new_base, base, Operand(spill_offset + core_spill_size));
197    base = new_base;
198    spill_offset = -core_spill_size;
199    int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
200    DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
201    DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
202  }
203
204  if (is_save) {
205    __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
206    __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
207  } else {
208    __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
209    __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
210  }
211}
212
213void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
214  size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
215  const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
216  for (uint32_t i : LowToHighBits(core_spills)) {
217    // If the register holds an object, update the stack mask.
218    if (locations->RegisterContainsObject(i)) {
219      locations->SetStackBit(stack_offset / kVRegSize);
220    }
221    DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
222    DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
223    saved_core_stack_offsets_[i] = stack_offset;
224    stack_offset += kXRegSizeInBytes;
225  }
226
227  const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
228  for (uint32_t i : LowToHighBits(fp_spills)) {
229    DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
230    DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
231    saved_fpu_stack_offsets_[i] = stack_offset;
232    stack_offset += kDRegSizeInBytes;
233  }
234
235  SaveRestoreLiveRegistersHelper(codegen,
236                                 locations,
237                                 codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
238}
239
240void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
241  SaveRestoreLiveRegistersHelper(codegen,
242                                 locations,
243                                 codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
244}
245
246class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
247 public:
248  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
249
250  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
251    LocationSummary* locations = instruction_->GetLocations();
252    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
253
254    __ Bind(GetEntryLabel());
255    if (instruction_->CanThrowIntoCatchBlock()) {
256      // Live registers will be restored in the catch block if caught.
257      SaveLiveRegisters(codegen, instruction_->GetLocations());
258    }
259    // We're moving two locations to locations that could overlap, so we need a parallel
260    // move resolver.
261    InvokeRuntimeCallingConvention calling_convention;
262    codegen->EmitParallelMoves(
263        locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt,
264        locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt);
265    QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
266        ? kQuickThrowStringBounds
267        : kQuickThrowArrayBounds;
268    arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
269    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
270    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
271  }
272
273  bool IsFatal() const OVERRIDE { return true; }
274
275  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
276
277 private:
278  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
279};
280
281class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
282 public:
283  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
284
285  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
286    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
287    __ Bind(GetEntryLabel());
288    arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
289    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
290  }
291
292  bool IsFatal() const OVERRIDE { return true; }
293
294  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
295
296 private:
297  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
298};
299
300class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
301 public:
302  LoadClassSlowPathARM64(HLoadClass* cls,
303                         HInstruction* at,
304                         uint32_t dex_pc,
305                         bool do_clinit,
306                         vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(),
307                         vixl::aarch64::Label* bss_entry_adrp_label = nullptr)
308      : SlowPathCodeARM64(at),
309        cls_(cls),
310        dex_pc_(dex_pc),
311        do_clinit_(do_clinit),
312        bss_entry_temp_(bss_entry_temp),
313        bss_entry_adrp_label_(bss_entry_adrp_label) {
314    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
315  }
316
317  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
318    LocationSummary* locations = instruction_->GetLocations();
319    Location out = locations->Out();
320    constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
321    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
322
323    InvokeRuntimeCallingConvention calling_convention;
324    // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
325    // register, make sure it's not clobbered by the call or by saving/restoring registers.
326    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
327    bool is_load_class_bss_entry =
328        (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
329    if (is_load_class_bss_entry) {
330      DCHECK(bss_entry_temp_.IsValid());
331      DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
332      DCHECK(
333          !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
334    }
335
336    __ Bind(GetEntryLabel());
337    SaveLiveRegisters(codegen, locations);
338
339    dex::TypeIndex type_index = cls_->GetTypeIndex();
340    __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
341    QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
342                                                : kQuickInitializeType;
343    arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
344    if (do_clinit_) {
345      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
346    } else {
347      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
348    }
349
350    // Move the class to the desired location.
351    if (out.IsValid()) {
352      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
353      Primitive::Type type = instruction_->GetType();
354      arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
355    }
356    RestoreLiveRegisters(codegen, locations);
357    // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
358    if (is_load_class_bss_entry) {
359      DCHECK(out.IsValid());
360      const DexFile& dex_file = cls_->GetDexFile();
361      if (call_saves_everything_except_r0_ip0) {
362        // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything.
363      } else {
364        // For non-Baker read barrier, we need to re-calculate the address of the class entry page.
365        bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index);
366        arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_);
367      }
368      vixl::aarch64::Label* strp_label =
369          arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_);
370      {
371        SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
372        __ Bind(strp_label);
373        __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
374               MemOperand(bss_entry_temp_, /* offset placeholder */ 0));
375      }
376    }
377    __ B(GetExitLabel());
378  }
379
380  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
381
382 private:
383  // The class this slow path will load.
384  HLoadClass* const cls_;
385
386  // The dex PC of `at_`.
387  const uint32_t dex_pc_;
388
389  // Whether to initialize the class.
390  const bool do_clinit_;
391
392  // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded.
393  vixl::aarch64::Register bss_entry_temp_;
394  vixl::aarch64::Label* bss_entry_adrp_label_;
395
396  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
397};
398
399class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
400 public:
401  LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
402      : SlowPathCodeARM64(instruction),
403        temp_(temp),
404        adrp_label_(adrp_label) {}
405
406  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407    LocationSummary* locations = instruction_->GetLocations();
408    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
409    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
410
411    InvokeRuntimeCallingConvention calling_convention;
412    // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
413    DCHECK(temp_.IsValid());
414    DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
415    DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
416
417    __ Bind(GetEntryLabel());
418    SaveLiveRegisters(codegen, locations);
419
420    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
421    __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
422    arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
423    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
424    Primitive::Type type = instruction_->GetType();
425    arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
426
427    RestoreLiveRegisters(codegen, locations);
428
429    // Store the resolved String to the BSS entry.
430    const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
431    if (!kUseReadBarrier || kUseBakerReadBarrier) {
432      // The string entry page address was preserved in temp_ thanks to kSaveEverything.
433    } else {
434      // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
435      adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
436      arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
437    }
438    vixl::aarch64::Label* strp_label =
439        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
440    {
441      SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
442      __ Bind(strp_label);
443      __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
444             MemOperand(temp_, /* offset placeholder */ 0));
445    }
446
447    __ B(GetExitLabel());
448  }
449
450  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
451
452 private:
453  const Register temp_;
454  vixl::aarch64::Label* adrp_label_;
455
456  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
457};
458
459class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
460 public:
461  explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
462
463  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
464    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
465    __ Bind(GetEntryLabel());
466    if (instruction_->CanThrowIntoCatchBlock()) {
467      // Live registers will be restored in the catch block if caught.
468      SaveLiveRegisters(codegen, instruction_->GetLocations());
469    }
470    arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
471                                 instruction_,
472                                 instruction_->GetDexPc(),
473                                 this);
474    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
475  }
476
477  bool IsFatal() const OVERRIDE { return true; }
478
479  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
480
481 private:
482  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
483};
484
485class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
486 public:
487  SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
488      : SlowPathCodeARM64(instruction), successor_(successor) {}
489
490  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
491    LocationSummary* locations = instruction_->GetLocations();
492    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
493    __ Bind(GetEntryLabel());
494    SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
495    arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
496    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
497    RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
498    if (successor_ == nullptr) {
499      __ B(GetReturnLabel());
500    } else {
501      __ B(arm64_codegen->GetLabelOf(successor_));
502    }
503  }
504
505  vixl::aarch64::Label* GetReturnLabel() {
506    DCHECK(successor_ == nullptr);
507    return &return_label_;
508  }
509
510  HBasicBlock* GetSuccessor() const {
511    return successor_;
512  }
513
514  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
515
516 private:
517  // If not null, the block to branch to after the suspend check.
518  HBasicBlock* const successor_;
519
520  // If `successor_` is null, the label to branch to after the suspend check.
521  vixl::aarch64::Label return_label_;
522
523  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
524};
525
526class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
527 public:
528  TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
529      : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
530
531  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
532    LocationSummary* locations = instruction_->GetLocations();
533
534    DCHECK(instruction_->IsCheckCast()
535           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
536    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
537    uint32_t dex_pc = instruction_->GetDexPc();
538
539    __ Bind(GetEntryLabel());
540
541    if (!is_fatal_) {
542      SaveLiveRegisters(codegen, locations);
543    }
544
545    // We're moving two locations to locations that could overlap, so we need a parallel
546    // move resolver.
547    InvokeRuntimeCallingConvention calling_convention;
548    codegen->EmitParallelMoves(locations->InAt(0),
549                               LocationFrom(calling_convention.GetRegisterAt(0)),
550                               Primitive::kPrimNot,
551                               locations->InAt(1),
552                               LocationFrom(calling_convention.GetRegisterAt(1)),
553                               Primitive::kPrimNot);
554    if (instruction_->IsInstanceOf()) {
555      arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
556      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
557      Primitive::Type ret_type = instruction_->GetType();
558      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
559      arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
560    } else {
561      DCHECK(instruction_->IsCheckCast());
562      arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
563      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
564    }
565
566    if (!is_fatal_) {
567      RestoreLiveRegisters(codegen, locations);
568      __ B(GetExitLabel());
569    }
570  }
571
572  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
573  bool IsFatal() const OVERRIDE { return is_fatal_; }
574
575 private:
576  const bool is_fatal_;
577
578  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
579};
580
581class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
582 public:
583  explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
584      : SlowPathCodeARM64(instruction) {}
585
586  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
587    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
588    __ Bind(GetEntryLabel());
589    LocationSummary* locations = instruction_->GetLocations();
590    SaveLiveRegisters(codegen, locations);
591    InvokeRuntimeCallingConvention calling_convention;
592    __ Mov(calling_convention.GetRegisterAt(0),
593           static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
594    arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
595    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
596  }
597
598  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
599
600 private:
601  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
602};
603
604class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
605 public:
606  explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
607
608  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
609    LocationSummary* locations = instruction_->GetLocations();
610    __ Bind(GetEntryLabel());
611    SaveLiveRegisters(codegen, locations);
612
613    InvokeRuntimeCallingConvention calling_convention;
614    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
615    parallel_move.AddMove(
616        locations->InAt(0),
617        LocationFrom(calling_convention.GetRegisterAt(0)),
618        Primitive::kPrimNot,
619        nullptr);
620    parallel_move.AddMove(
621        locations->InAt(1),
622        LocationFrom(calling_convention.GetRegisterAt(1)),
623        Primitive::kPrimInt,
624        nullptr);
625    parallel_move.AddMove(
626        locations->InAt(2),
627        LocationFrom(calling_convention.GetRegisterAt(2)),
628        Primitive::kPrimNot,
629        nullptr);
630    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
631
632    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
633    arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
634    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
635    RestoreLiveRegisters(codegen, locations);
636    __ B(GetExitLabel());
637  }
638
639  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
640
641 private:
642  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
643};
644
645void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
646  uint32_t num_entries = switch_instr_->GetNumEntries();
647  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
648
649  // We are about to use the assembler to place literals directly. Make sure we have enough
650  // underlying code buffer and we have generated the jump table with right size.
651  EmissionCheckScope scope(codegen->GetVIXLAssembler(),
652                           num_entries * sizeof(int32_t),
653                           CodeBufferCheckScope::kExactSize);
654
655  __ Bind(&table_start_);
656  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
657  for (uint32_t i = 0; i < num_entries; i++) {
658    vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
659    DCHECK(target_label->IsBound());
660    ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
661    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
662    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
663    Literal<int32_t> literal(jump_offset);
664    __ place(&literal);
665  }
666}
667
668// Abstract base class for read barrier slow paths marking a reference
669// `ref`.
670//
671// Argument `entrypoint` must be a register location holding the read
672// barrier marking runtime entry point to be invoked.
673class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
674 protected:
675  ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
676      : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
677    DCHECK(kEmitCompilerReadBarrier);
678  }
679
680  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
681
682  // Generate assembly code calling the read barrier marking runtime
683  // entry point (ReadBarrierMarkRegX).
684  void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
685    // No need to save live registers; it's taken care of by the
686    // entrypoint. Also, there is no need to update the stack mask,
687    // as this runtime call will not trigger a garbage collection.
688    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
689    DCHECK_NE(ref_.reg(), LR);
690    DCHECK_NE(ref_.reg(), WSP);
691    DCHECK_NE(ref_.reg(), WZR);
692    // IP0 is used internally by the ReadBarrierMarkRegX entry point
693    // as a temporary, it cannot be the entry point's input/output.
694    DCHECK_NE(ref_.reg(), IP0);
695    DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
696    // "Compact" slow path, saving two moves.
697    //
698    // Instead of using the standard runtime calling convention (input
699    // and output in W0):
700    //
701    //   W0 <- ref
702    //   W0 <- ReadBarrierMark(W0)
703    //   ref <- W0
704    //
705    // we just use rX (the register containing `ref`) as input and output
706    // of a dedicated entrypoint:
707    //
708    //   rX <- ReadBarrierMarkRegX(rX)
709    //
710    if (entrypoint_.IsValid()) {
711      arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
712      __ Blr(XRegisterFrom(entrypoint_));
713    } else {
714      // Entrypoint is not already loaded, load from the thread.
715      int32_t entry_point_offset =
716          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
717      // This runtime call does not require a stack map.
718      arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
719    }
720  }
721
722  // The location (register) of the marked object reference.
723  const Location ref_;
724
725  // The location of the entrypoint if it is already loaded.
726  const Location entrypoint_;
727
728 private:
729  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
730};
731
732// Slow path marking an object reference `ref` during a read
733// barrier. The field `obj.field` in the object `obj` holding this
734// reference does not get updated by this slow path after marking.
735//
736// This means that after the execution of this slow path, `ref` will
737// always be up-to-date, but `obj.field` may not; i.e., after the
738// flip, `ref` will be a to-space reference, but `obj.field` will
739// probably still be a from-space reference (unless it gets updated by
740// another thread, or if another thread installed another object
741// reference (different from `ref`) in `obj.field`).
742//
743// If `entrypoint` is a valid location it is assumed to already be
744// holding the entrypoint. The case where the entrypoint is passed in
745// is when the decision to mark is based on whether the GC is marking.
746class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
747 public:
748  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
749                               Location ref,
750                               Location entrypoint = Location::NoLocation())
751      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
752    DCHECK(kEmitCompilerReadBarrier);
753  }
754
755  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
756
757  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
758    LocationSummary* locations = instruction_->GetLocations();
759    DCHECK(locations->CanCall());
760    DCHECK(ref_.IsRegister()) << ref_;
761    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
762    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
763        << "Unexpected instruction in read barrier marking slow path: "
764        << instruction_->DebugName();
765
766    __ Bind(GetEntryLabel());
767    GenerateReadBarrierMarkRuntimeCall(codegen);
768    __ B(GetExitLabel());
769  }
770
771 private:
772  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
773};
774
775// Slow path loading `obj`'s lock word, loading a reference from
776// object `*(obj + offset + (index << scale_factor))` into `ref`, and
777// marking `ref` if `obj` is gray according to the lock word (Baker
778// read barrier). The field `obj.field` in the object `obj` holding
779// this reference does not get updated by this slow path after marking
780// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
781// below for that).
782//
783// This means that after the execution of this slow path, `ref` will
784// always be up-to-date, but `obj.field` may not; i.e., after the
785// flip, `ref` will be a to-space reference, but `obj.field` will
786// probably still be a from-space reference (unless it gets updated by
787// another thread, or if another thread installed another object
788// reference (different from `ref`) in `obj.field`).
789//
790// Argument `entrypoint` must be a register location holding the read
791// barrier marking runtime entry point to be invoked.
792class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
793 public:
794  LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
795                                                 Location ref,
796                                                 Register obj,
797                                                 uint32_t offset,
798                                                 Location index,
799                                                 size_t scale_factor,
800                                                 bool needs_null_check,
801                                                 bool use_load_acquire,
802                                                 Register temp,
803                                                 Location entrypoint)
804      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
805        obj_(obj),
806        offset_(offset),
807        index_(index),
808        scale_factor_(scale_factor),
809        needs_null_check_(needs_null_check),
810        use_load_acquire_(use_load_acquire),
811        temp_(temp) {
812    DCHECK(kEmitCompilerReadBarrier);
813    DCHECK(kUseBakerReadBarrier);
814  }
815
816  const char* GetDescription() const OVERRIDE {
817    return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
818  }
819
820  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
821    LocationSummary* locations = instruction_->GetLocations();
822    DCHECK(locations->CanCall());
823    DCHECK(ref_.IsRegister()) << ref_;
824    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
825    DCHECK(obj_.IsW());
826    DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
827    DCHECK(instruction_->IsInstanceFieldGet() ||
828           instruction_->IsStaticFieldGet() ||
829           instruction_->IsArrayGet() ||
830           instruction_->IsArraySet() ||
831           instruction_->IsInstanceOf() ||
832           instruction_->IsCheckCast() ||
833           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
834           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
835        << "Unexpected instruction in read barrier marking slow path: "
836        << instruction_->DebugName();
837    // The read barrier instrumentation of object ArrayGet
838    // instructions does not support the HIntermediateAddress
839    // instruction.
840    DCHECK(!(instruction_->IsArrayGet() &&
841             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
842
843    // Temporary register `temp_`, used to store the lock word, must
844    // not be IP0 nor IP1, as we may use them to emit the reference
845    // load (in the call to GenerateRawReferenceLoad below), and we
846    // need the lock word to still be in `temp_` after the reference
847    // load.
848    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
849    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
850
851    __ Bind(GetEntryLabel());
852
853    // When using MaybeGenerateReadBarrierSlow, the read barrier call is
854    // inserted after the original load. However, in fast path based
855    // Baker's read barriers, we need to perform the load of
856    // mirror::Object::monitor_ *before* the original reference load.
857    // This load-load ordering is required by the read barrier.
858    // The fast path/slow path (for Baker's algorithm) should look like:
859    //
860    //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
861    //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
862    //   HeapReference<mirror::Object> ref = *src;  // Original reference load.
863    //   bool is_gray = (rb_state == ReadBarrier::GrayState());
864    //   if (is_gray) {
865    //     ref = entrypoint(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
866    //   }
867    //
868    // Note: the original implementation in ReadBarrier::Barrier is
869    // slightly more complex as it performs additional checks that we do
870    // not do here for performance reasons.
871
872    // /* int32_t */ monitor = obj->monitor_
873    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
874    __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
875    if (needs_null_check_) {
876      codegen->MaybeRecordImplicitNullCheck(instruction_);
877    }
878    // /* LockWord */ lock_word = LockWord(monitor)
879    static_assert(sizeof(LockWord) == sizeof(int32_t),
880                  "art::LockWord and int32_t have different sizes.");
881
882    // Introduce a dependency on the lock_word including rb_state,
883    // to prevent load-load reordering, and without using
884    // a memory barrier (which would be more expensive).
885    // `obj` is unchanged by this operation, but its value now depends
886    // on `temp`.
887    __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
888
889    // The actual reference load.
890    // A possible implicit null check has already been handled above.
891    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
892    arm64_codegen->GenerateRawReferenceLoad(instruction_,
893                                            ref_,
894                                            obj_,
895                                            offset_,
896                                            index_,
897                                            scale_factor_,
898                                            /* needs_null_check */ false,
899                                            use_load_acquire_);
900
901    // Mark the object `ref` when `obj` is gray.
902    //
903    //   if (rb_state == ReadBarrier::GrayState())
904    //     ref = ReadBarrier::Mark(ref);
905    //
906    // Given the numeric representation, it's enough to check the low bit of the rb_state.
907    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
908    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
909    __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
910    GenerateReadBarrierMarkRuntimeCall(codegen);
911
912    __ B(GetExitLabel());
913  }
914
915 private:
916  // The register containing the object holding the marked object reference field.
917  Register obj_;
918  // The offset, index and scale factor to access the reference in `obj_`.
919  uint32_t offset_;
920  Location index_;
921  size_t scale_factor_;
922  // Is a null check required?
923  bool needs_null_check_;
924  // Should this reference load use Load-Acquire semantics?
925  bool use_load_acquire_;
926  // A temporary register used to hold the lock word of `obj_`.
927  Register temp_;
928
929  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
930};
931
932// Slow path loading `obj`'s lock word, loading a reference from
933// object `*(obj + offset + (index << scale_factor))` into `ref`, and
934// marking `ref` if `obj` is gray according to the lock word (Baker
935// read barrier). If needed, this slow path also atomically updates
936// the field `obj.field` in the object `obj` holding this reference
937// after marking (contrary to
938// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
939// tries to update `obj.field`).
940//
941// This means that after the execution of this slow path, both `ref`
942// and `obj.field` will be up-to-date; i.e., after the flip, both will
943// hold the same to-space reference (unless another thread installed
944// another object reference (different from `ref`) in `obj.field`).
945//
946// Argument `entrypoint` must be a register location holding the read
947// barrier marking runtime entry point to be invoked.
948class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
949    : public ReadBarrierMarkSlowPathBaseARM64 {
950 public:
951  LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction,
952                                                               Location ref,
953                                                               Register obj,
954                                                               uint32_t offset,
955                                                               Location index,
956                                                               size_t scale_factor,
957                                                               bool needs_null_check,
958                                                               bool use_load_acquire,
959                                                               Register temp,
960                                                               Location entrypoint)
961      : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
962        obj_(obj),
963        offset_(offset),
964        index_(index),
965        scale_factor_(scale_factor),
966        needs_null_check_(needs_null_check),
967        use_load_acquire_(use_load_acquire),
968        temp_(temp) {
969    DCHECK(kEmitCompilerReadBarrier);
970    DCHECK(kUseBakerReadBarrier);
971  }
972
973  const char* GetDescription() const OVERRIDE {
974    return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
975  }
976
977  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
978    LocationSummary* locations = instruction_->GetLocations();
979    Register ref_reg = WRegisterFrom(ref_);
980    DCHECK(locations->CanCall());
981    DCHECK(ref_.IsRegister()) << ref_;
982    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
983    DCHECK(obj_.IsW());
984    DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
985
986    // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
987    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
988        << "Unexpected instruction in read barrier marking and field updating slow path: "
989        << instruction_->DebugName();
990    DCHECK(instruction_->GetLocations()->Intrinsified());
991    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
992    DCHECK_EQ(offset_, 0u);
993    DCHECK_EQ(scale_factor_, 0u);
994    DCHECK_EQ(use_load_acquire_, false);
995    // The location of the offset of the marked reference field within `obj_`.
996    Location field_offset = index_;
997    DCHECK(field_offset.IsRegister()) << field_offset;
998
999    // Temporary register `temp_`, used to store the lock word, must
1000    // not be IP0 nor IP1, as we may use them to emit the reference
1001    // load (in the call to GenerateRawReferenceLoad below), and we
1002    // need the lock word to still be in `temp_` after the reference
1003    // load.
1004    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1005    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
1006
1007    __ Bind(GetEntryLabel());
1008
1009    // /* int32_t */ monitor = obj->monitor_
1010    uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1011    __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
1012    if (needs_null_check_) {
1013      codegen->MaybeRecordImplicitNullCheck(instruction_);
1014    }
1015    // /* LockWord */ lock_word = LockWord(monitor)
1016    static_assert(sizeof(LockWord) == sizeof(int32_t),
1017                  "art::LockWord and int32_t have different sizes.");
1018
1019    // Introduce a dependency on the lock_word including rb_state,
1020    // to prevent load-load reordering, and without using
1021    // a memory barrier (which would be more expensive).
1022    // `obj` is unchanged by this operation, but its value now depends
1023    // on `temp`.
1024    __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
1025
1026    // The actual reference load.
1027    // A possible implicit null check has already been handled above.
1028    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1029    arm64_codegen->GenerateRawReferenceLoad(instruction_,
1030                                            ref_,
1031                                            obj_,
1032                                            offset_,
1033                                            index_,
1034                                            scale_factor_,
1035                                            /* needs_null_check */ false,
1036                                            use_load_acquire_);
1037
1038    // Mark the object `ref` when `obj` is gray.
1039    //
1040    //   if (rb_state == ReadBarrier::GrayState())
1041    //     ref = ReadBarrier::Mark(ref);
1042    //
1043    // Given the numeric representation, it's enough to check the low bit of the rb_state.
1044    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
1045    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1046    __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
1047
1048    // Save the old value of the reference before marking it.
1049    // Note that we cannot use IP to save the old reference, as IP is
1050    // used internally by the ReadBarrierMarkRegX entry point, and we
1051    // need the old reference after the call to that entry point.
1052    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
1053    __ Mov(temp_.W(), ref_reg);
1054
1055    GenerateReadBarrierMarkRuntimeCall(codegen);
1056
1057    // If the new reference is different from the old reference,
1058    // update the field in the holder (`*(obj_ + field_offset)`).
1059    //
1060    // Note that this field could also hold a different object, if
1061    // another thread had concurrently changed it. In that case, the
1062    // LDXR/CMP/BNE sequence of instructions in the compare-and-set
1063    // (CAS) operation below would abort the CAS, leaving the field
1064    // as-is.
1065    __ Cmp(temp_.W(), ref_reg);
1066    __ B(eq, GetExitLabel());
1067
1068    // Update the the holder's field atomically.  This may fail if
1069    // mutator updates before us, but it's OK.  This is achieved
1070    // using a strong compare-and-set (CAS) operation with relaxed
1071    // memory synchronization ordering, where the expected value is
1072    // the old reference and the desired value is the new reference.
1073
1074    MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
1075    UseScratchRegisterScope temps(masm);
1076
1077    // Convenience aliases.
1078    Register base = obj_.W();
1079    Register offset = XRegisterFrom(field_offset);
1080    Register expected = temp_.W();
1081    Register value = ref_reg;
1082    Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
1083    Register tmp_value = temps.AcquireW();  // Value in memory.
1084
1085    __ Add(tmp_ptr, base.X(), Operand(offset));
1086
1087    if (kPoisonHeapReferences) {
1088      arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
1089      if (value.Is(expected)) {
1090        // Do not poison `value`, as it is the same register as
1091        // `expected`, which has just been poisoned.
1092      } else {
1093        arm64_codegen->GetAssembler()->PoisonHeapReference(value);
1094      }
1095    }
1096
1097    // do {
1098    //   tmp_value = [tmp_ptr] - expected;
1099    // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1100
1101    vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
1102    __ Bind(&loop_head);
1103    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
1104    __ Cmp(tmp_value, expected);
1105    __ B(&comparison_failed, ne);
1106    __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
1107    __ Cbnz(tmp_value, &loop_head);
1108    __ B(&exit_loop);
1109    __ Bind(&comparison_failed);
1110    __ Clrex();
1111    __ Bind(&exit_loop);
1112
1113    if (kPoisonHeapReferences) {
1114      arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
1115      if (value.Is(expected)) {
1116        // Do not unpoison `value`, as it is the same register as
1117        // `expected`, which has just been unpoisoned.
1118      } else {
1119        arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
1120      }
1121    }
1122
1123    __ B(GetExitLabel());
1124  }
1125
1126 private:
1127  // The register containing the object holding the marked object reference field.
1128  const Register obj_;
1129  // The offset, index and scale factor to access the reference in `obj_`.
1130  uint32_t offset_;
1131  Location index_;
1132  size_t scale_factor_;
1133  // Is a null check required?
1134  bool needs_null_check_;
1135  // Should this reference load use Load-Acquire semantics?
1136  bool use_load_acquire_;
1137  // A temporary register used to hold the lock word of `obj_`; and
1138  // also to hold the original reference value, when the reference is
1139  // marked.
1140  const Register temp_;
1141
1142  DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
1143};
1144
1145// Slow path generating a read barrier for a heap reference.
1146class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
1147 public:
1148  ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
1149                                           Location out,
1150                                           Location ref,
1151                                           Location obj,
1152                                           uint32_t offset,
1153                                           Location index)
1154      : SlowPathCodeARM64(instruction),
1155        out_(out),
1156        ref_(ref),
1157        obj_(obj),
1158        offset_(offset),
1159        index_(index) {
1160    DCHECK(kEmitCompilerReadBarrier);
1161    // If `obj` is equal to `out` or `ref`, it means the initial object
1162    // has been overwritten by (or after) the heap object reference load
1163    // to be instrumented, e.g.:
1164    //
1165    //   __ Ldr(out, HeapOperand(out, class_offset);
1166    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
1167    //
1168    // In that case, we have lost the information about the original
1169    // object, and the emitted read barrier cannot work properly.
1170    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
1171    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
1172  }
1173
1174  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1175    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1176    LocationSummary* locations = instruction_->GetLocations();
1177    Primitive::Type type = Primitive::kPrimNot;
1178    DCHECK(locations->CanCall());
1179    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1180    DCHECK(instruction_->IsInstanceFieldGet() ||
1181           instruction_->IsStaticFieldGet() ||
1182           instruction_->IsArrayGet() ||
1183           instruction_->IsInstanceOf() ||
1184           instruction_->IsCheckCast() ||
1185           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
1186        << "Unexpected instruction in read barrier for heap reference slow path: "
1187        << instruction_->DebugName();
1188    // The read barrier instrumentation of object ArrayGet
1189    // instructions does not support the HIntermediateAddress
1190    // instruction.
1191    DCHECK(!(instruction_->IsArrayGet() &&
1192             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
1193
1194    __ Bind(GetEntryLabel());
1195
1196    SaveLiveRegisters(codegen, locations);
1197
1198    // We may have to change the index's value, but as `index_` is a
1199    // constant member (like other "inputs" of this slow path),
1200    // introduce a copy of it, `index`.
1201    Location index = index_;
1202    if (index_.IsValid()) {
1203      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
1204      if (instruction_->IsArrayGet()) {
1205        // Compute the actual memory offset and store it in `index`.
1206        Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
1207        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
1208        if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
1209          // We are about to change the value of `index_reg` (see the
1210          // calls to vixl::MacroAssembler::Lsl and
1211          // vixl::MacroAssembler::Mov below), but it has
1212          // not been saved by the previous call to
1213          // art::SlowPathCode::SaveLiveRegisters, as it is a
1214          // callee-save register --
1215          // art::SlowPathCode::SaveLiveRegisters does not consider
1216          // callee-save registers, as it has been designed with the
1217          // assumption that callee-save registers are supposed to be
1218          // handled by the called function.  So, as a callee-save
1219          // register, `index_reg` _would_ eventually be saved onto
1220          // the stack, but it would be too late: we would have
1221          // changed its value earlier.  Therefore, we manually save
1222          // it here into another freely available register,
1223          // `free_reg`, chosen of course among the caller-save
1224          // registers (as a callee-save `free_reg` register would
1225          // exhibit the same problem).
1226          //
1227          // Note we could have requested a temporary register from
1228          // the register allocator instead; but we prefer not to, as
1229          // this is a slow path, and we know we can find a
1230          // caller-save register that is available.
1231          Register free_reg = FindAvailableCallerSaveRegister(codegen);
1232          __ Mov(free_reg.W(), index_reg);
1233          index_reg = free_reg;
1234          index = LocationFrom(index_reg);
1235        } else {
1236          // The initial register stored in `index_` has already been
1237          // saved in the call to art::SlowPathCode::SaveLiveRegisters
1238          // (as it is not a callee-save register), so we can freely
1239          // use it.
1240        }
1241        // Shifting the index value contained in `index_reg` by the scale
1242        // factor (2) cannot overflow in practice, as the runtime is
1243        // unable to allocate object arrays with a size larger than
1244        // 2^26 - 1 (that is, 2^28 - 4 bytes).
1245        __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
1246        static_assert(
1247            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
1248            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
1249        __ Add(index_reg, index_reg, Operand(offset_));
1250      } else {
1251        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
1252        // intrinsics, `index_` is not shifted by a scale factor of 2
1253        // (as in the case of ArrayGet), as it is actually an offset
1254        // to an object field within an object.
1255        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
1256        DCHECK(instruction_->GetLocations()->Intrinsified());
1257        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
1258               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
1259            << instruction_->AsInvoke()->GetIntrinsic();
1260        DCHECK_EQ(offset_, 0u);
1261        DCHECK(index_.IsRegister());
1262      }
1263    }
1264
1265    // We're moving two or three locations to locations that could
1266    // overlap, so we need a parallel move resolver.
1267    InvokeRuntimeCallingConvention calling_convention;
1268    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
1269    parallel_move.AddMove(ref_,
1270                          LocationFrom(calling_convention.GetRegisterAt(0)),
1271                          type,
1272                          nullptr);
1273    parallel_move.AddMove(obj_,
1274                          LocationFrom(calling_convention.GetRegisterAt(1)),
1275                          type,
1276                          nullptr);
1277    if (index.IsValid()) {
1278      parallel_move.AddMove(index,
1279                            LocationFrom(calling_convention.GetRegisterAt(2)),
1280                            Primitive::kPrimInt,
1281                            nullptr);
1282      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1283    } else {
1284      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
1285      arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
1286    }
1287    arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
1288                                 instruction_,
1289                                 instruction_->GetDexPc(),
1290                                 this);
1291    CheckEntrypointTypes<
1292        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
1293    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1294
1295    RestoreLiveRegisters(codegen, locations);
1296
1297    __ B(GetExitLabel());
1298  }
1299
1300  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
1301
1302 private:
1303  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
1304    size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
1305    size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
1306    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
1307      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
1308        return Register(VIXLRegCodeFromART(i), kXRegSize);
1309      }
1310    }
1311    // We shall never fail to find a free caller-save register, as
1312    // there are more than two core caller-save registers on ARM64
1313    // (meaning it is possible to find one which is different from
1314    // `ref` and `obj`).
1315    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
1316    LOG(FATAL) << "Could not find a free register";
1317    UNREACHABLE();
1318  }
1319
1320  const Location out_;
1321  const Location ref_;
1322  const Location obj_;
1323  const uint32_t offset_;
1324  // An additional location containing an index to an array.
1325  // Only used for HArrayGet and the UnsafeGetObject &
1326  // UnsafeGetObjectVolatile intrinsics.
1327  const Location index_;
1328
1329  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
1330};
1331
1332// Slow path generating a read barrier for a GC root.
1333class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
1334 public:
1335  ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
1336      : SlowPathCodeARM64(instruction), out_(out), root_(root) {
1337    DCHECK(kEmitCompilerReadBarrier);
1338  }
1339
1340  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
1341    LocationSummary* locations = instruction_->GetLocations();
1342    Primitive::Type type = Primitive::kPrimNot;
1343    DCHECK(locations->CanCall());
1344    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
1345    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
1346        << "Unexpected instruction in read barrier for GC root slow path: "
1347        << instruction_->DebugName();
1348
1349    __ Bind(GetEntryLabel());
1350    SaveLiveRegisters(codegen, locations);
1351
1352    InvokeRuntimeCallingConvention calling_convention;
1353    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
1354    // The argument of the ReadBarrierForRootSlow is not a managed
1355    // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
1356    // thus we need a 64-bit move here, and we cannot use
1357    //
1358    //   arm64_codegen->MoveLocation(
1359    //       LocationFrom(calling_convention.GetRegisterAt(0)),
1360    //       root_,
1361    //       type);
1362    //
1363    // which would emit a 32-bit move, as `type` is a (32-bit wide)
1364    // reference type (`Primitive::kPrimNot`).
1365    __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
1366    arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
1367                                 instruction_,
1368                                 instruction_->GetDexPc(),
1369                                 this);
1370    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
1371    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
1372
1373    RestoreLiveRegisters(codegen, locations);
1374    __ B(GetExitLabel());
1375  }
1376
1377  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
1378
1379 private:
1380  const Location out_;
1381  const Location root_;
1382
1383  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
1384};
1385
1386#undef __
1387
1388Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
1389  Location next_location;
1390  if (type == Primitive::kPrimVoid) {
1391    LOG(FATAL) << "Unreachable type " << type;
1392  }
1393
1394  if (Primitive::IsFloatingPointType(type) &&
1395      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
1396    next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
1397  } else if (!Primitive::IsFloatingPointType(type) &&
1398             (gp_index_ < calling_convention.GetNumberOfRegisters())) {
1399    next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
1400  } else {
1401    size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
1402    next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
1403                                                 : Location::StackSlot(stack_offset);
1404  }
1405
1406  // Space on the stack is reserved for all arguments.
1407  stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
1408  return next_location;
1409}
1410
1411Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
1412  return LocationFrom(kArtMethodRegister);
1413}
1414
1415CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
1416                                       const Arm64InstructionSetFeatures& isa_features,
1417                                       const CompilerOptions& compiler_options,
1418                                       OptimizingCompilerStats* stats)
1419    : CodeGenerator(graph,
1420                    kNumberOfAllocatableRegisters,
1421                    kNumberOfAllocatableFPRegisters,
1422                    kNumberOfAllocatableRegisterPairs,
1423                    callee_saved_core_registers.GetList(),
1424                    callee_saved_fp_registers.GetList(),
1425                    compiler_options,
1426                    stats),
1427      block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1428      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1429      location_builder_(graph, this),
1430      instruction_visitor_(graph, this),
1431      move_resolver_(graph->GetArena(), this),
1432      assembler_(graph->GetArena()),
1433      isa_features_(isa_features),
1434      uint32_literals_(std::less<uint32_t>(),
1435                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1436      uint64_literals_(std::less<uint64_t>(),
1437                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1438      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1439      boot_image_string_patches_(StringReferenceValueComparator(),
1440                                 graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1441      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1442      boot_image_type_patches_(TypeReferenceValueComparator(),
1443                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1444      pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1445      type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1446      baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1447      jit_string_patches_(StringReferenceValueComparator(),
1448                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1449      jit_class_patches_(TypeReferenceValueComparator(),
1450                         graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1451  // Save the link register (containing the return address) to mimic Quick.
1452  AddAllocatedRegister(LocationFrom(lr));
1453}
1454
1455#define __ GetVIXLAssembler()->
1456
1457void CodeGeneratorARM64::EmitJumpTables() {
1458  for (auto&& jump_table : jump_tables_) {
1459    jump_table->EmitTable(this);
1460  }
1461}
1462
1463void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
1464  EmitJumpTables();
1465  // Ensure we emit the literal pool.
1466  __ FinalizeCode();
1467
1468  CodeGenerator::Finalize(allocator);
1469}
1470
1471void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1472  // Note: There are 6 kinds of moves:
1473  // 1. constant -> GPR/FPR (non-cycle)
1474  // 2. constant -> stack (non-cycle)
1475  // 3. GPR/FPR -> GPR/FPR
1476  // 4. GPR/FPR -> stack
1477  // 5. stack -> GPR/FPR
1478  // 6. stack -> stack (non-cycle)
1479  // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1480  // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1481  // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1482  // dependency.
1483  vixl_temps_.Open(GetVIXLAssembler());
1484}
1485
1486void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1487  vixl_temps_.Close();
1488}
1489
1490Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1491  DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1492         || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1493         || kind == Location::kSIMDStackSlot);
1494  kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1495      ? Location::kFpuRegister
1496      : Location::kRegister;
1497  Location scratch = GetScratchLocation(kind);
1498  if (!scratch.Equals(Location::NoLocation())) {
1499    return scratch;
1500  }
1501  // Allocate from VIXL temp registers.
1502  if (kind == Location::kRegister) {
1503    scratch = LocationFrom(vixl_temps_.AcquireX());
1504  } else {
1505    DCHECK(kind == Location::kFpuRegister);
1506    scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1507        ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1508        : vixl_temps_.AcquireD());
1509  }
1510  AddScratchLocation(scratch);
1511  return scratch;
1512}
1513
1514void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1515  if (loc.IsRegister()) {
1516    vixl_temps_.Release(XRegisterFrom(loc));
1517  } else {
1518    DCHECK(loc.IsFpuRegister());
1519    vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1520  }
1521  RemoveScratchLocation(loc);
1522}
1523
1524void ParallelMoveResolverARM64::EmitMove(size_t index) {
1525  MoveOperands* move = moves_[index];
1526  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
1527}
1528
1529void CodeGeneratorARM64::GenerateFrameEntry() {
1530  MacroAssembler* masm = GetVIXLAssembler();
1531  __ Bind(&frame_entry_label_);
1532
1533  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
1534  if (do_overflow_check) {
1535    UseScratchRegisterScope temps(masm);
1536    Register temp = temps.AcquireX();
1537    DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1538    __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
1539    {
1540      // Ensure that between load and RecordPcInfo there are no pools emitted.
1541      ExactAssemblyScope eas(GetVIXLAssembler(),
1542                             kInstructionSize,
1543                             CodeBufferCheckScope::kExactSize);
1544      __ ldr(wzr, MemOperand(temp, 0));
1545      RecordPcInfo(nullptr, 0);
1546    }
1547  }
1548
1549  if (!HasEmptyFrame()) {
1550    int frame_size = GetFrameSize();
1551    // Stack layout:
1552    //      sp[frame_size - 8]        : lr.
1553    //      ...                       : other preserved core registers.
1554    //      ...                       : other preserved fp registers.
1555    //      ...                       : reserved frame space.
1556    //      sp[0]                     : current method.
1557
1558    // Save the current method if we need it. Note that we do not
1559    // do this in HCurrentMethod, as the instruction might have been removed
1560    // in the SSA graph.
1561    if (RequiresCurrentMethod()) {
1562      __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1563    } else {
1564      __ Claim(frame_size);
1565    }
1566    GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1567    GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
1568        frame_size - GetCoreSpillSize());
1569    GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
1570        frame_size - FrameEntrySpillSize());
1571
1572    if (GetGraph()->HasShouldDeoptimizeFlag()) {
1573      // Initialize should_deoptimize flag to 0.
1574      Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1575      __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1576    }
1577  }
1578}
1579
1580void CodeGeneratorARM64::GenerateFrameExit() {
1581  GetAssembler()->cfi().RememberState();
1582  if (!HasEmptyFrame()) {
1583    int frame_size = GetFrameSize();
1584    GetAssembler()->UnspillRegisters(GetFramePreservedFPRegisters(),
1585        frame_size - FrameEntrySpillSize());
1586    GetAssembler()->UnspillRegisters(GetFramePreservedCoreRegisters(),
1587        frame_size - GetCoreSpillSize());
1588    __ Drop(frame_size);
1589    GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1590  }
1591  __ Ret();
1592  GetAssembler()->cfi().RestoreState();
1593  GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1594}
1595
1596CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1597  DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1598  return CPURegList(CPURegister::kRegister, kXRegSize,
1599                    core_spill_mask_);
1600}
1601
1602CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1603  DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1604                                         GetNumberOfFloatingPointRegisters()));
1605  return CPURegList(CPURegister::kFPRegister, kDRegSize,
1606                    fpu_spill_mask_);
1607}
1608
1609void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1610  __ Bind(GetLabelOf(block));
1611}
1612
1613void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1614  DCHECK(location.IsRegister());
1615  __ Mov(RegisterFrom(location, Primitive::kPrimInt), value);
1616}
1617
1618void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1619  if (location.IsRegister()) {
1620    locations->AddTemp(location);
1621  } else {
1622    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1623  }
1624}
1625
1626void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1627  UseScratchRegisterScope temps(GetVIXLAssembler());
1628  Register card = temps.AcquireX();
1629  Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1630  vixl::aarch64::Label done;
1631  if (value_can_be_null) {
1632    __ Cbz(value, &done);
1633  }
1634  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1635  __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1636  __ Strb(card, MemOperand(card, temp.X()));
1637  if (value_can_be_null) {
1638    __ Bind(&done);
1639  }
1640}
1641
1642void CodeGeneratorARM64::SetupBlockedRegisters() const {
1643  // Blocked core registers:
1644  //      lr        : Runtime reserved.
1645  //      tr        : Runtime reserved.
1646  //      xSuspend  : Runtime reserved. TODO: Unblock this when the runtime stops using it.
1647  //      ip1       : VIXL core temp.
1648  //      ip0       : VIXL core temp.
1649  //
1650  // Blocked fp registers:
1651  //      d31       : VIXL fp temp.
1652  CPURegList reserved_core_registers = vixl_reserved_core_registers;
1653  reserved_core_registers.Combine(runtime_reserved_core_registers);
1654  while (!reserved_core_registers.IsEmpty()) {
1655    blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1656  }
1657
1658  CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1659  while (!reserved_fp_registers.IsEmpty()) {
1660    blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1661  }
1662
1663  if (GetGraph()->IsDebuggable()) {
1664    // Stubs do not save callee-save floating point registers. If the graph
1665    // is debuggable, we need to deal with these registers differently. For
1666    // now, just block them.
1667    CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1668    while (!reserved_fp_registers_debuggable.IsEmpty()) {
1669      blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1670    }
1671  }
1672}
1673
1674size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1675  Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1676  __ Str(reg, MemOperand(sp, stack_index));
1677  return kArm64WordSize;
1678}
1679
1680size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1681  Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1682  __ Ldr(reg, MemOperand(sp, stack_index));
1683  return kArm64WordSize;
1684}
1685
1686size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1687  FPRegister reg = FPRegister(reg_id, kDRegSize);
1688  __ Str(reg, MemOperand(sp, stack_index));
1689  return kArm64WordSize;
1690}
1691
1692size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1693  FPRegister reg = FPRegister(reg_id, kDRegSize);
1694  __ Ldr(reg, MemOperand(sp, stack_index));
1695  return kArm64WordSize;
1696}
1697
1698void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1699  stream << XRegister(reg);
1700}
1701
1702void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1703  stream << DRegister(reg);
1704}
1705
1706void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1707  if (constant->IsIntConstant()) {
1708    __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1709  } else if (constant->IsLongConstant()) {
1710    __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1711  } else if (constant->IsNullConstant()) {
1712    __ Mov(Register(destination), 0);
1713  } else if (constant->IsFloatConstant()) {
1714    __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
1715  } else {
1716    DCHECK(constant->IsDoubleConstant());
1717    __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
1718  }
1719}
1720
1721
1722static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
1723  DCHECK(constant.IsConstant());
1724  HConstant* cst = constant.GetConstant();
1725  return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
1726         // Null is mapped to a core W register, which we associate with kPrimInt.
1727         (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
1728         (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
1729         (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
1730         (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
1731}
1732
1733// Allocate a scratch register from the VIXL pool, querying first into
1734// the floating-point register pool, and then the the core register
1735// pool.  This is essentially a reimplementation of
1736// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1737// using a different allocation strategy.
1738static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1739                                                    vixl::aarch64::UseScratchRegisterScope* temps,
1740                                                    int size_in_bits) {
1741  return masm->GetScratchFPRegisterList()->IsEmpty()
1742      ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1743      : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1744}
1745
1746void CodeGeneratorARM64::MoveLocation(Location destination,
1747                                      Location source,
1748                                      Primitive::Type dst_type) {
1749  if (source.Equals(destination)) {
1750    return;
1751  }
1752
1753  // A valid move can always be inferred from the destination and source
1754  // locations. When moving from and to a register, the argument type can be
1755  // used to generate 32bit instead of 64bit moves. In debug mode we also
1756  // checks the coherency of the locations and the type.
1757  bool unspecified_type = (dst_type == Primitive::kPrimVoid);
1758
1759  if (destination.IsRegister() || destination.IsFpuRegister()) {
1760    if (unspecified_type) {
1761      HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1762      if (source.IsStackSlot() ||
1763          (src_cst != nullptr && (src_cst->IsIntConstant()
1764                                  || src_cst->IsFloatConstant()
1765                                  || src_cst->IsNullConstant()))) {
1766        // For stack slots and 32bit constants, a 64bit type is appropriate.
1767        dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
1768      } else {
1769        // If the source is a double stack slot or a 64bit constant, a 64bit
1770        // type is appropriate. Else the source is a register, and since the
1771        // type has not been specified, we chose a 64bit type to force a 64bit
1772        // move.
1773        dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
1774      }
1775    }
1776    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) ||
1777           (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type)));
1778    CPURegister dst = CPURegisterFrom(destination, dst_type);
1779    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1780      DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1781      __ Ldr(dst, StackOperandFrom(source));
1782    } else if (source.IsSIMDStackSlot()) {
1783      __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1784    } else if (source.IsConstant()) {
1785      DCHECK(CoherentConstantAndType(source, dst_type));
1786      MoveConstant(dst, source.GetConstant());
1787    } else if (source.IsRegister()) {
1788      if (destination.IsRegister()) {
1789        __ Mov(Register(dst), RegisterFrom(source, dst_type));
1790      } else {
1791        DCHECK(destination.IsFpuRegister());
1792        Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1793            ? Primitive::kPrimLong
1794            : Primitive::kPrimInt;
1795        __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1796      }
1797    } else {
1798      DCHECK(source.IsFpuRegister());
1799      if (destination.IsRegister()) {
1800        Primitive::Type source_type = Primitive::Is64BitType(dst_type)
1801            ? Primitive::kPrimDouble
1802            : Primitive::kPrimFloat;
1803        __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1804      } else {
1805        DCHECK(destination.IsFpuRegister());
1806        if (GetGraph()->HasSIMD()) {
1807          __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1808        } else {
1809          __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type));
1810        }
1811      }
1812    }
1813  } else if (destination.IsSIMDStackSlot()) {
1814    if (source.IsFpuRegister()) {
1815      __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1816    } else {
1817      DCHECK(source.IsSIMDStackSlot());
1818      UseScratchRegisterScope temps(GetVIXLAssembler());
1819      if (GetVIXLAssembler()->GetScratchFPRegisterList()->IsEmpty()) {
1820        Register temp = temps.AcquireX();
1821        __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1822        __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1823        __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1824        __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1825      } else {
1826        FPRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1827        __ Ldr(temp, StackOperandFrom(source));
1828        __ Str(temp, StackOperandFrom(destination));
1829      }
1830    }
1831  } else {  // The destination is not a register. It must be a stack slot.
1832    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1833    if (source.IsRegister() || source.IsFpuRegister()) {
1834      if (unspecified_type) {
1835        if (source.IsRegister()) {
1836          dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
1837        } else {
1838          dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
1839        }
1840      }
1841      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) &&
1842             (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type)));
1843      __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1844    } else if (source.IsConstant()) {
1845      DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1846          << source << " " << dst_type;
1847      UseScratchRegisterScope temps(GetVIXLAssembler());
1848      HConstant* src_cst = source.GetConstant();
1849      CPURegister temp;
1850      if (src_cst->IsZeroBitPattern()) {
1851        temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1852            ? Register(xzr)
1853            : Register(wzr);
1854      } else {
1855        if (src_cst->IsIntConstant()) {
1856          temp = temps.AcquireW();
1857        } else if (src_cst->IsLongConstant()) {
1858          temp = temps.AcquireX();
1859        } else if (src_cst->IsFloatConstant()) {
1860          temp = temps.AcquireS();
1861        } else {
1862          DCHECK(src_cst->IsDoubleConstant());
1863          temp = temps.AcquireD();
1864        }
1865        MoveConstant(temp, src_cst);
1866      }
1867      __ Str(temp, StackOperandFrom(destination));
1868    } else {
1869      DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1870      DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1871      UseScratchRegisterScope temps(GetVIXLAssembler());
1872      // Use any scratch register (a core or a floating-point one)
1873      // from VIXL scratch register pools as a temporary.
1874      //
1875      // We used to only use the FP scratch register pool, but in some
1876      // rare cases the only register from this pool (D31) would
1877      // already be used (e.g. within a ParallelMove instruction, when
1878      // a move is blocked by a another move requiring a scratch FP
1879      // register, which would reserve D31). To prevent this issue, we
1880      // ask for a scratch register of any type (core or FP).
1881      //
1882      // Also, we start by asking for a FP scratch register first, as the
1883      // demand of scratch core registers is higher.  This is why we
1884      // use AcquireFPOrCoreCPURegisterOfSize instead of
1885      // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1886      // allocates core scratch registers first.
1887      CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1888          GetVIXLAssembler(),
1889          &temps,
1890          (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1891      __ Ldr(temp, StackOperandFrom(source));
1892      __ Str(temp, StackOperandFrom(destination));
1893    }
1894  }
1895}
1896
1897void CodeGeneratorARM64::Load(Primitive::Type type,
1898                              CPURegister dst,
1899                              const MemOperand& src) {
1900  switch (type) {
1901    case Primitive::kPrimBoolean:
1902      __ Ldrb(Register(dst), src);
1903      break;
1904    case Primitive::kPrimByte:
1905      __ Ldrsb(Register(dst), src);
1906      break;
1907    case Primitive::kPrimShort:
1908      __ Ldrsh(Register(dst), src);
1909      break;
1910    case Primitive::kPrimChar:
1911      __ Ldrh(Register(dst), src);
1912      break;
1913    case Primitive::kPrimInt:
1914    case Primitive::kPrimNot:
1915    case Primitive::kPrimLong:
1916    case Primitive::kPrimFloat:
1917    case Primitive::kPrimDouble:
1918      DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1919      __ Ldr(dst, src);
1920      break;
1921    case Primitive::kPrimVoid:
1922      LOG(FATAL) << "Unreachable type " << type;
1923  }
1924}
1925
1926void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1927                                     CPURegister dst,
1928                                     const MemOperand& src,
1929                                     bool needs_null_check) {
1930  MacroAssembler* masm = GetVIXLAssembler();
1931  UseScratchRegisterScope temps(masm);
1932  Register temp_base = temps.AcquireX();
1933  Primitive::Type type = instruction->GetType();
1934
1935  DCHECK(!src.IsPreIndex());
1936  DCHECK(!src.IsPostIndex());
1937
1938  // TODO(vixl): Let the MacroAssembler handle MemOperand.
1939  __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1940  {
1941    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1942    MemOperand base = MemOperand(temp_base);
1943    switch (type) {
1944      case Primitive::kPrimBoolean:
1945        {
1946          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1947          __ ldarb(Register(dst), base);
1948          if (needs_null_check) {
1949            MaybeRecordImplicitNullCheck(instruction);
1950          }
1951        }
1952        break;
1953      case Primitive::kPrimByte:
1954        {
1955          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1956          __ ldarb(Register(dst), base);
1957          if (needs_null_check) {
1958            MaybeRecordImplicitNullCheck(instruction);
1959          }
1960        }
1961        __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1962        break;
1963      case Primitive::kPrimChar:
1964        {
1965          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1966          __ ldarh(Register(dst), base);
1967          if (needs_null_check) {
1968            MaybeRecordImplicitNullCheck(instruction);
1969          }
1970        }
1971        break;
1972      case Primitive::kPrimShort:
1973        {
1974          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1975          __ ldarh(Register(dst), base);
1976          if (needs_null_check) {
1977            MaybeRecordImplicitNullCheck(instruction);
1978          }
1979        }
1980        __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
1981        break;
1982      case Primitive::kPrimInt:
1983      case Primitive::kPrimNot:
1984      case Primitive::kPrimLong:
1985        DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1986        {
1987          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1988          __ ldar(Register(dst), base);
1989          if (needs_null_check) {
1990            MaybeRecordImplicitNullCheck(instruction);
1991          }
1992        }
1993        break;
1994      case Primitive::kPrimFloat:
1995      case Primitive::kPrimDouble: {
1996        DCHECK(dst.IsFPRegister());
1997        DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
1998
1999        Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2000        {
2001          ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2002          __ ldar(temp, base);
2003          if (needs_null_check) {
2004            MaybeRecordImplicitNullCheck(instruction);
2005          }
2006        }
2007        __ Fmov(FPRegister(dst), temp);
2008        break;
2009      }
2010      case Primitive::kPrimVoid:
2011        LOG(FATAL) << "Unreachable type " << type;
2012    }
2013  }
2014}
2015
2016void CodeGeneratorARM64::Store(Primitive::Type type,
2017                               CPURegister src,
2018                               const MemOperand& dst) {
2019  switch (type) {
2020    case Primitive::kPrimBoolean:
2021    case Primitive::kPrimByte:
2022      __ Strb(Register(src), dst);
2023      break;
2024    case Primitive::kPrimChar:
2025    case Primitive::kPrimShort:
2026      __ Strh(Register(src), dst);
2027      break;
2028    case Primitive::kPrimInt:
2029    case Primitive::kPrimNot:
2030    case Primitive::kPrimLong:
2031    case Primitive::kPrimFloat:
2032    case Primitive::kPrimDouble:
2033      DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2034      __ Str(src, dst);
2035      break;
2036    case Primitive::kPrimVoid:
2037      LOG(FATAL) << "Unreachable type " << type;
2038  }
2039}
2040
2041void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
2042                                      Primitive::Type type,
2043                                      CPURegister src,
2044                                      const MemOperand& dst,
2045                                      bool needs_null_check) {
2046  MacroAssembler* masm = GetVIXLAssembler();
2047  UseScratchRegisterScope temps(GetVIXLAssembler());
2048  Register temp_base = temps.AcquireX();
2049
2050  DCHECK(!dst.IsPreIndex());
2051  DCHECK(!dst.IsPostIndex());
2052
2053  // TODO(vixl): Let the MacroAssembler handle this.
2054  Operand op = OperandFromMemOperand(dst);
2055  __ Add(temp_base, dst.GetBaseRegister(), op);
2056  MemOperand base = MemOperand(temp_base);
2057  // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2058  switch (type) {
2059    case Primitive::kPrimBoolean:
2060    case Primitive::kPrimByte:
2061      {
2062        ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2063        __ stlrb(Register(src), base);
2064        if (needs_null_check) {
2065          MaybeRecordImplicitNullCheck(instruction);
2066        }
2067      }
2068      break;
2069    case Primitive::kPrimChar:
2070    case Primitive::kPrimShort:
2071      {
2072        ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2073        __ stlrh(Register(src), base);
2074        if (needs_null_check) {
2075          MaybeRecordImplicitNullCheck(instruction);
2076        }
2077      }
2078      break;
2079    case Primitive::kPrimInt:
2080    case Primitive::kPrimNot:
2081    case Primitive::kPrimLong:
2082      DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2083      {
2084        ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2085        __ stlr(Register(src), base);
2086        if (needs_null_check) {
2087          MaybeRecordImplicitNullCheck(instruction);
2088        }
2089      }
2090      break;
2091    case Primitive::kPrimFloat:
2092    case Primitive::kPrimDouble: {
2093      DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type));
2094      Register temp_src;
2095      if (src.IsZero()) {
2096        // The zero register is used to avoid synthesizing zero constants.
2097        temp_src = Register(src);
2098      } else {
2099        DCHECK(src.IsFPRegister());
2100        temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
2101        __ Fmov(temp_src, FPRegister(src));
2102      }
2103      {
2104        ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2105        __ stlr(temp_src, base);
2106        if (needs_null_check) {
2107          MaybeRecordImplicitNullCheck(instruction);
2108        }
2109      }
2110      break;
2111    }
2112    case Primitive::kPrimVoid:
2113      LOG(FATAL) << "Unreachable type " << type;
2114  }
2115}
2116
2117void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
2118                                       HInstruction* instruction,
2119                                       uint32_t dex_pc,
2120                                       SlowPathCode* slow_path) {
2121  ValidateInvokeRuntime(entrypoint, instruction, slow_path);
2122
2123  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
2124  {
2125    // Ensure the pc position is recorded immediately after the `blr` instruction.
2126    ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
2127    __ blr(lr);
2128    if (EntrypointRequiresStackMap(entrypoint)) {
2129      RecordPcInfo(instruction, dex_pc, slow_path);
2130    }
2131  }
2132}
2133
2134void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
2135                                                             HInstruction* instruction,
2136                                                             SlowPathCode* slow_path) {
2137  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
2138  __ Ldr(lr, MemOperand(tr, entry_point_offset));
2139  __ Blr(lr);
2140}
2141
2142void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
2143                                                                     Register class_reg) {
2144  UseScratchRegisterScope temps(GetVIXLAssembler());
2145  Register temp = temps.AcquireW();
2146  size_t status_offset = mirror::Class::StatusOffset().SizeValue();
2147
2148  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
2149  // TODO(vixl): Let the MacroAssembler handle MemOperand.
2150  __ Add(temp, class_reg, status_offset);
2151  __ Ldar(temp, HeapOperand(temp));
2152  __ Cmp(temp, mirror::Class::kStatusInitialized);
2153  __ B(lt, slow_path->GetEntryLabel());
2154  __ Bind(slow_path->GetExitLabel());
2155}
2156
2157void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
2158  BarrierType type = BarrierAll;
2159
2160  switch (kind) {
2161    case MemBarrierKind::kAnyAny:
2162    case MemBarrierKind::kAnyStore: {
2163      type = BarrierAll;
2164      break;
2165    }
2166    case MemBarrierKind::kLoadAny: {
2167      type = BarrierReads;
2168      break;
2169    }
2170    case MemBarrierKind::kStoreStore: {
2171      type = BarrierWrites;
2172      break;
2173    }
2174    default:
2175      LOG(FATAL) << "Unexpected memory barrier " << kind;
2176  }
2177  __ Dmb(InnerShareable, type);
2178}
2179
2180void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
2181                                                         HBasicBlock* successor) {
2182  SuspendCheckSlowPathARM64* slow_path =
2183      down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
2184  if (slow_path == nullptr) {
2185    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
2186    instruction->SetSlowPath(slow_path);
2187    codegen_->AddSlowPath(slow_path);
2188    if (successor != nullptr) {
2189      DCHECK(successor->IsLoopHeader());
2190      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
2191    }
2192  } else {
2193    DCHECK_EQ(slow_path->GetSuccessor(), successor);
2194  }
2195
2196  UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
2197  Register temp = temps.AcquireW();
2198
2199  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
2200  if (successor == nullptr) {
2201    __ Cbnz(temp, slow_path->GetEntryLabel());
2202    __ Bind(slow_path->GetReturnLabel());
2203  } else {
2204    __ Cbz(temp, codegen_->GetLabelOf(successor));
2205    __ B(slow_path->GetEntryLabel());
2206    // slow_path will return to GetLabelOf(successor).
2207  }
2208}
2209
2210InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
2211                                                             CodeGeneratorARM64* codegen)
2212      : InstructionCodeGenerator(graph, codegen),
2213        assembler_(codegen->GetAssembler()),
2214        codegen_(codegen) {}
2215
2216#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
2217  /* No unimplemented IR. */
2218
2219#define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
2220
2221enum UnimplementedInstructionBreakCode {
2222  // Using a base helps identify when we hit such breakpoints.
2223  UnimplementedInstructionBreakCodeBaseCode = 0x900,
2224#define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
2225  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
2226#undef ENUM_UNIMPLEMENTED_INSTRUCTION
2227};
2228
2229#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
2230  void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) {  \
2231    __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
2232  }                                                                                   \
2233  void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
2234    LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
2235    locations->SetOut(Location::Any());                                               \
2236  }
2237  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
2238#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
2239
2240#undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
2241#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
2242
2243void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
2244  DCHECK_EQ(instr->InputCount(), 2U);
2245  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2246  Primitive::Type type = instr->GetResultType();
2247  switch (type) {
2248    case Primitive::kPrimInt:
2249    case Primitive::kPrimLong:
2250      locations->SetInAt(0, Location::RequiresRegister());
2251      locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
2252      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2253      break;
2254
2255    case Primitive::kPrimFloat:
2256    case Primitive::kPrimDouble:
2257      locations->SetInAt(0, Location::RequiresFpuRegister());
2258      locations->SetInAt(1, Location::RequiresFpuRegister());
2259      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2260      break;
2261
2262    default:
2263      LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
2264  }
2265}
2266
2267void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
2268                                           const FieldInfo& field_info) {
2269  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2270
2271  bool object_field_get_with_read_barrier =
2272      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2273  LocationSummary* locations =
2274      new (GetGraph()->GetArena()) LocationSummary(instruction,
2275                                                   object_field_get_with_read_barrier ?
2276                                                       LocationSummary::kCallOnSlowPath :
2277                                                       LocationSummary::kNoCall);
2278  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
2279    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2280    // We need a temporary register for the read barrier marking slow
2281    // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
2282    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2283        !Runtime::Current()->UseJitCompilation() &&
2284        !field_info.IsVolatile()) {
2285      // If link-time thunks for the Baker read barrier are enabled, for AOT
2286      // non-volatile loads we need a temporary only if the offset is too big.
2287      if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
2288        locations->AddTemp(FixedTempLocation());
2289      }
2290    } else {
2291      locations->AddTemp(Location::RequiresRegister());
2292    }
2293  }
2294  locations->SetInAt(0, Location::RequiresRegister());
2295  if (Primitive::IsFloatingPointType(instruction->GetType())) {
2296    locations->SetOut(Location::RequiresFpuRegister());
2297  } else {
2298    // The output overlaps for an object field get when read barriers
2299    // are enabled: we do not want the load to overwrite the object's
2300    // location, as we need it to emit the read barrier.
2301    locations->SetOut(
2302        Location::RequiresRegister(),
2303        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2304  }
2305}
2306
2307void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
2308                                                   const FieldInfo& field_info) {
2309  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
2310  LocationSummary* locations = instruction->GetLocations();
2311  Location base_loc = locations->InAt(0);
2312  Location out = locations->Out();
2313  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
2314  Primitive::Type field_type = field_info.GetFieldType();
2315  MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
2316
2317  if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2318    // Object FieldGet with Baker's read barrier case.
2319    // /* HeapReference<Object> */ out = *(base + offset)
2320    Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
2321    Location maybe_temp =
2322        (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2323    // Note that potential implicit null checks are handled in this
2324    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
2325    codegen_->GenerateFieldLoadWithBakerReadBarrier(
2326        instruction,
2327        out,
2328        base,
2329        offset,
2330        maybe_temp,
2331        /* needs_null_check */ true,
2332        field_info.IsVolatile());
2333  } else {
2334    // General case.
2335    if (field_info.IsVolatile()) {
2336      // Note that a potential implicit null check is handled in this
2337      // CodeGeneratorARM64::LoadAcquire call.
2338      // NB: LoadAcquire will record the pc info if needed.
2339      codegen_->LoadAcquire(
2340          instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
2341    } else {
2342      // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2343      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2344      codegen_->Load(field_type, OutputCPURegister(instruction), field);
2345      codegen_->MaybeRecordImplicitNullCheck(instruction);
2346    }
2347    if (field_type == Primitive::kPrimNot) {
2348      // If read barriers are enabled, emit read barriers other than
2349      // Baker's using a slow path (and also unpoison the loaded
2350      // reference, if heap poisoning is enabled).
2351      codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2352    }
2353  }
2354}
2355
2356void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2357  LocationSummary* locations =
2358      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2359  locations->SetInAt(0, Location::RequiresRegister());
2360  if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2361    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2362  } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2363    locations->SetInAt(1, Location::RequiresFpuRegister());
2364  } else {
2365    locations->SetInAt(1, Location::RequiresRegister());
2366  }
2367}
2368
2369void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2370                                                   const FieldInfo& field_info,
2371                                                   bool value_can_be_null) {
2372  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2373
2374  Register obj = InputRegisterAt(instruction, 0);
2375  CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2376  CPURegister source = value;
2377  Offset offset = field_info.GetFieldOffset();
2378  Primitive::Type field_type = field_info.GetFieldType();
2379
2380  {
2381    // We use a block to end the scratch scope before the write barrier, thus
2382    // freeing the temporary registers so they can be used in `MarkGCCard`.
2383    UseScratchRegisterScope temps(GetVIXLAssembler());
2384
2385    if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
2386      DCHECK(value.IsW());
2387      Register temp = temps.AcquireW();
2388      __ Mov(temp, value.W());
2389      GetAssembler()->PoisonHeapReference(temp.W());
2390      source = temp;
2391    }
2392
2393    if (field_info.IsVolatile()) {
2394      codegen_->StoreRelease(
2395          instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
2396    } else {
2397      // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2398      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2399      codegen_->Store(field_type, source, HeapOperand(obj, offset));
2400      codegen_->MaybeRecordImplicitNullCheck(instruction);
2401    }
2402  }
2403
2404  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2405    codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2406  }
2407}
2408
2409void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2410  Primitive::Type type = instr->GetType();
2411
2412  switch (type) {
2413    case Primitive::kPrimInt:
2414    case Primitive::kPrimLong: {
2415      Register dst = OutputRegister(instr);
2416      Register lhs = InputRegisterAt(instr, 0);
2417      Operand rhs = InputOperandAt(instr, 1);
2418      if (instr->IsAdd()) {
2419        __ Add(dst, lhs, rhs);
2420      } else if (instr->IsAnd()) {
2421        __ And(dst, lhs, rhs);
2422      } else if (instr->IsOr()) {
2423        __ Orr(dst, lhs, rhs);
2424      } else if (instr->IsSub()) {
2425        __ Sub(dst, lhs, rhs);
2426      } else if (instr->IsRor()) {
2427        if (rhs.IsImmediate()) {
2428          uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2429          __ Ror(dst, lhs, shift);
2430        } else {
2431          // Ensure shift distance is in the same size register as the result. If
2432          // we are rotating a long and the shift comes in a w register originally,
2433          // we don't need to sxtw for use as an x since the shift distances are
2434          // all & reg_bits - 1.
2435          __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2436        }
2437      } else {
2438        DCHECK(instr->IsXor());
2439        __ Eor(dst, lhs, rhs);
2440      }
2441      break;
2442    }
2443    case Primitive::kPrimFloat:
2444    case Primitive::kPrimDouble: {
2445      FPRegister dst = OutputFPRegister(instr);
2446      FPRegister lhs = InputFPRegisterAt(instr, 0);
2447      FPRegister rhs = InputFPRegisterAt(instr, 1);
2448      if (instr->IsAdd()) {
2449        __ Fadd(dst, lhs, rhs);
2450      } else if (instr->IsSub()) {
2451        __ Fsub(dst, lhs, rhs);
2452      } else {
2453        LOG(FATAL) << "Unexpected floating-point binary operation";
2454      }
2455      break;
2456    }
2457    default:
2458      LOG(FATAL) << "Unexpected binary operation type " << type;
2459  }
2460}
2461
2462void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2463  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2464
2465  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2466  Primitive::Type type = instr->GetResultType();
2467  switch (type) {
2468    case Primitive::kPrimInt:
2469    case Primitive::kPrimLong: {
2470      locations->SetInAt(0, Location::RequiresRegister());
2471      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2472      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2473      break;
2474    }
2475    default:
2476      LOG(FATAL) << "Unexpected shift type " << type;
2477  }
2478}
2479
2480void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2481  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2482
2483  Primitive::Type type = instr->GetType();
2484  switch (type) {
2485    case Primitive::kPrimInt:
2486    case Primitive::kPrimLong: {
2487      Register dst = OutputRegister(instr);
2488      Register lhs = InputRegisterAt(instr, 0);
2489      Operand rhs = InputOperandAt(instr, 1);
2490      if (rhs.IsImmediate()) {
2491        uint32_t shift_value = rhs.GetImmediate() &
2492            (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2493        if (instr->IsShl()) {
2494          __ Lsl(dst, lhs, shift_value);
2495        } else if (instr->IsShr()) {
2496          __ Asr(dst, lhs, shift_value);
2497        } else {
2498          __ Lsr(dst, lhs, shift_value);
2499        }
2500      } else {
2501        Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2502
2503        if (instr->IsShl()) {
2504          __ Lsl(dst, lhs, rhs_reg);
2505        } else if (instr->IsShr()) {
2506          __ Asr(dst, lhs, rhs_reg);
2507        } else {
2508          __ Lsr(dst, lhs, rhs_reg);
2509        }
2510      }
2511      break;
2512    }
2513    default:
2514      LOG(FATAL) << "Unexpected shift operation type " << type;
2515  }
2516}
2517
2518void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2519  HandleBinaryOp(instruction);
2520}
2521
2522void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2523  HandleBinaryOp(instruction);
2524}
2525
2526void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2527  HandleBinaryOp(instruction);
2528}
2529
2530void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2531  HandleBinaryOp(instruction);
2532}
2533
2534void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2535  DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
2536  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
2537  locations->SetInAt(0, Location::RequiresRegister());
2538  // There is no immediate variant of negated bitwise instructions in AArch64.
2539  locations->SetInAt(1, Location::RequiresRegister());
2540  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2541}
2542
2543void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2544  Register dst = OutputRegister(instr);
2545  Register lhs = InputRegisterAt(instr, 0);
2546  Register rhs = InputRegisterAt(instr, 1);
2547
2548  switch (instr->GetOpKind()) {
2549    case HInstruction::kAnd:
2550      __ Bic(dst, lhs, rhs);
2551      break;
2552    case HInstruction::kOr:
2553      __ Orn(dst, lhs, rhs);
2554      break;
2555    case HInstruction::kXor:
2556      __ Eon(dst, lhs, rhs);
2557      break;
2558    default:
2559      LOG(FATAL) << "Unreachable";
2560  }
2561}
2562
2563void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2564    HDataProcWithShifterOp* instruction) {
2565  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
2566         instruction->GetType() == Primitive::kPrimLong);
2567  LocationSummary* locations =
2568      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2569  if (instruction->GetInstrKind() == HInstruction::kNeg) {
2570    locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2571  } else {
2572    locations->SetInAt(0, Location::RequiresRegister());
2573  }
2574  locations->SetInAt(1, Location::RequiresRegister());
2575  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2576}
2577
2578void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2579    HDataProcWithShifterOp* instruction) {
2580  Primitive::Type type = instruction->GetType();
2581  HInstruction::InstructionKind kind = instruction->GetInstrKind();
2582  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
2583  Register out = OutputRegister(instruction);
2584  Register left;
2585  if (kind != HInstruction::kNeg) {
2586    left = InputRegisterAt(instruction, 0);
2587  }
2588  // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2589  // shifter operand operation, the IR generating `right_reg` (input to the type
2590  // conversion) can have a different type from the current instruction's type,
2591  // so we manually indicate the type.
2592  Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2593  Operand right_operand(0);
2594
2595  HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2596  if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2597    right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2598  } else {
2599    right_operand = Operand(right_reg,
2600                            helpers::ShiftFromOpKind(op_kind),
2601                            instruction->GetShiftAmount());
2602  }
2603
2604  // Logical binary operations do not support extension operations in the
2605  // operand. Note that VIXL would still manage if it was passed by generating
2606  // the extension as a separate instruction.
2607  // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2608  DCHECK(!right_operand.IsExtendedRegister() ||
2609         (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2610          kind != HInstruction::kNeg));
2611  switch (kind) {
2612    case HInstruction::kAdd:
2613      __ Add(out, left, right_operand);
2614      break;
2615    case HInstruction::kAnd:
2616      __ And(out, left, right_operand);
2617      break;
2618    case HInstruction::kNeg:
2619      DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2620      __ Neg(out, right_operand);
2621      break;
2622    case HInstruction::kOr:
2623      __ Orr(out, left, right_operand);
2624      break;
2625    case HInstruction::kSub:
2626      __ Sub(out, left, right_operand);
2627      break;
2628    case HInstruction::kXor:
2629      __ Eor(out, left, right_operand);
2630      break;
2631    default:
2632      LOG(FATAL) << "Unexpected operation kind: " << kind;
2633      UNREACHABLE();
2634  }
2635}
2636
2637void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2638  LocationSummary* locations =
2639      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
2640  locations->SetInAt(0, Location::RequiresRegister());
2641  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2642  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2643}
2644
2645void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2646  __ Add(OutputRegister(instruction),
2647         InputRegisterAt(instruction, 0),
2648         Operand(InputOperandAt(instruction, 1)));
2649}
2650
2651void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2652  LocationSummary* locations =
2653      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
2654  HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2655  if (instr->GetOpKind() == HInstruction::kSub &&
2656      accumulator->IsConstant() &&
2657      accumulator->AsConstant()->IsArithmeticZero()) {
2658    // Don't allocate register for Mneg instruction.
2659  } else {
2660    locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2661                       Location::RequiresRegister());
2662  }
2663  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2664  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2665  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2666}
2667
2668void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2669  Register res = OutputRegister(instr);
2670  Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2671  Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2672
2673  // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2674  // This fixup should be carried out for all multiply-accumulate instructions:
2675  // madd, msub, smaddl, smsubl, umaddl and umsubl.
2676  if (instr->GetType() == Primitive::kPrimLong &&
2677      codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2678    MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2679    vixl::aarch64::Instruction* prev =
2680        masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2681    if (prev->IsLoadOrStore()) {
2682      // Make sure we emit only exactly one nop.
2683      ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2684      __ nop();
2685    }
2686  }
2687
2688  if (instr->GetOpKind() == HInstruction::kAdd) {
2689    Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2690    __ Madd(res, mul_left, mul_right, accumulator);
2691  } else {
2692    DCHECK(instr->GetOpKind() == HInstruction::kSub);
2693    HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2694    if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2695      __ Mneg(res, mul_left, mul_right);
2696    } else {
2697      Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2698      __ Msub(res, mul_left, mul_right, accumulator);
2699    }
2700  }
2701}
2702
2703void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2704  bool object_array_get_with_read_barrier =
2705      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
2706  LocationSummary* locations =
2707      new (GetGraph()->GetArena()) LocationSummary(instruction,
2708                                                   object_array_get_with_read_barrier ?
2709                                                       LocationSummary::kCallOnSlowPath :
2710                                                       LocationSummary::kNoCall);
2711  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2712    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2713    // We need a temporary register for the read barrier marking slow
2714    // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
2715    if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
2716        !Runtime::Current()->UseJitCompilation() &&
2717        instruction->GetIndex()->IsConstant()) {
2718      // Array loads with constant index are treated as field loads.
2719      // If link-time thunks for the Baker read barrier are enabled, for AOT
2720      // constant index loads we need a temporary only if the offset is too big.
2721      uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2722      uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2723      offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
2724      if (offset >= kReferenceLoadMinFarOffset) {
2725        locations->AddTemp(FixedTempLocation());
2726      }
2727    } else {
2728      locations->AddTemp(Location::RequiresRegister());
2729    }
2730  }
2731  locations->SetInAt(0, Location::RequiresRegister());
2732  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2733  if (Primitive::IsFloatingPointType(instruction->GetType())) {
2734    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2735  } else {
2736    // The output overlaps in the case of an object array get with
2737    // read barriers enabled: we do not want the move to overwrite the
2738    // array's location, as we need it to emit the read barrier.
2739    locations->SetOut(
2740        Location::RequiresRegister(),
2741        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2742  }
2743}
2744
2745void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2746  Primitive::Type type = instruction->GetType();
2747  Register obj = InputRegisterAt(instruction, 0);
2748  LocationSummary* locations = instruction->GetLocations();
2749  Location index = locations->InAt(1);
2750  Location out = locations->Out();
2751  uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2752  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2753                                        instruction->IsStringCharAt();
2754  MacroAssembler* masm = GetVIXLAssembler();
2755  UseScratchRegisterScope temps(masm);
2756
2757  // The read barrier instrumentation of object ArrayGet instructions
2758  // does not support the HIntermediateAddress instruction.
2759  DCHECK(!((type == Primitive::kPrimNot) &&
2760           instruction->GetArray()->IsIntermediateAddress() &&
2761           kEmitCompilerReadBarrier));
2762
2763  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2764    // Object ArrayGet with Baker's read barrier case.
2765    // Note that a potential implicit null check is handled in the
2766    // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2767    if (index.IsConstant()) {
2768      // Array load with a constant index can be treated as a field load.
2769      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2770      Location maybe_temp =
2771          (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2772      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2773                                                      out,
2774                                                      obj.W(),
2775                                                      offset,
2776                                                      maybe_temp,
2777                                                      /* needs_null_check */ true,
2778                                                      /* use_load_acquire */ false);
2779    } else {
2780      Register temp = WRegisterFrom(locations->GetTemp(0));
2781      codegen_->GenerateArrayLoadWithBakerReadBarrier(
2782          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
2783    }
2784  } else {
2785    // General case.
2786    MemOperand source = HeapOperand(obj);
2787    Register length;
2788    if (maybe_compressed_char_at) {
2789      uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2790      length = temps.AcquireW();
2791      {
2792        // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2793        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2794
2795        if (instruction->GetArray()->IsIntermediateAddress()) {
2796          DCHECK_LT(count_offset, offset);
2797          int64_t adjusted_offset =
2798              static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2799          // Note that `adjusted_offset` is negative, so this will be a LDUR.
2800          __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2801        } else {
2802          __ Ldr(length, HeapOperand(obj, count_offset));
2803        }
2804        codegen_->MaybeRecordImplicitNullCheck(instruction);
2805      }
2806    }
2807    if (index.IsConstant()) {
2808      if (maybe_compressed_char_at) {
2809        vixl::aarch64::Label uncompressed_load, done;
2810        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2811                      "Expecting 0=compressed, 1=uncompressed");
2812        __ Tbnz(length.W(), 0, &uncompressed_load);
2813        __ Ldrb(Register(OutputCPURegister(instruction)),
2814                HeapOperand(obj, offset + Int64ConstantFrom(index)));
2815        __ B(&done);
2816        __ Bind(&uncompressed_load);
2817        __ Ldrh(Register(OutputCPURegister(instruction)),
2818                HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
2819        __ Bind(&done);
2820      } else {
2821        offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
2822        source = HeapOperand(obj, offset);
2823      }
2824    } else {
2825      Register temp = temps.AcquireSameSizeAs(obj);
2826      if (instruction->GetArray()->IsIntermediateAddress()) {
2827        // We do not need to compute the intermediate address from the array: the
2828        // input instruction has done it already. See the comment in
2829        // `TryExtractArrayAccessAddress()`.
2830        if (kIsDebugBuild) {
2831          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2832          DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2833        }
2834        temp = obj;
2835      } else {
2836        __ Add(temp, obj, offset);
2837      }
2838      if (maybe_compressed_char_at) {
2839        vixl::aarch64::Label uncompressed_load, done;
2840        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2841                      "Expecting 0=compressed, 1=uncompressed");
2842        __ Tbnz(length.W(), 0, &uncompressed_load);
2843        __ Ldrb(Register(OutputCPURegister(instruction)),
2844                HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2845        __ B(&done);
2846        __ Bind(&uncompressed_load);
2847        __ Ldrh(Register(OutputCPURegister(instruction)),
2848                HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2849        __ Bind(&done);
2850      } else {
2851        source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
2852      }
2853    }
2854    if (!maybe_compressed_char_at) {
2855      // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2856      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2857      codegen_->Load(type, OutputCPURegister(instruction), source);
2858      codegen_->MaybeRecordImplicitNullCheck(instruction);
2859    }
2860
2861    if (type == Primitive::kPrimNot) {
2862      static_assert(
2863          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2864          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2865      Location obj_loc = locations->InAt(0);
2866      if (index.IsConstant()) {
2867        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2868      } else {
2869        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2870      }
2871    }
2872  }
2873}
2874
2875void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2876  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
2877  locations->SetInAt(0, Location::RequiresRegister());
2878  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2879}
2880
2881void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2882  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2883  vixl::aarch64::Register out = OutputRegister(instruction);
2884  {
2885    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2886    EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2887    __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2888    codegen_->MaybeRecordImplicitNullCheck(instruction);
2889  }
2890  // Mask out compression flag from String's array length.
2891  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2892    __ Lsr(out.W(), out.W(), 1u);
2893  }
2894}
2895
2896void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2897  Primitive::Type value_type = instruction->GetComponentType();
2898
2899  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2900  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
2901      instruction,
2902      may_need_runtime_call_for_type_check ?
2903          LocationSummary::kCallOnSlowPath :
2904          LocationSummary::kNoCall);
2905  locations->SetInAt(0, Location::RequiresRegister());
2906  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2907  if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2908    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2909  } else if (Primitive::IsFloatingPointType(value_type)) {
2910    locations->SetInAt(2, Location::RequiresFpuRegister());
2911  } else {
2912    locations->SetInAt(2, Location::RequiresRegister());
2913  }
2914}
2915
2916void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2917  Primitive::Type value_type = instruction->GetComponentType();
2918  LocationSummary* locations = instruction->GetLocations();
2919  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
2920  bool needs_write_barrier =
2921      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2922
2923  Register array = InputRegisterAt(instruction, 0);
2924  CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2925  CPURegister source = value;
2926  Location index = locations->InAt(1);
2927  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
2928  MemOperand destination = HeapOperand(array);
2929  MacroAssembler* masm = GetVIXLAssembler();
2930
2931  if (!needs_write_barrier) {
2932    DCHECK(!may_need_runtime_call_for_type_check);
2933    if (index.IsConstant()) {
2934      offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2935      destination = HeapOperand(array, offset);
2936    } else {
2937      UseScratchRegisterScope temps(masm);
2938      Register temp = temps.AcquireSameSizeAs(array);
2939      if (instruction->GetArray()->IsIntermediateAddress()) {
2940        // We do not need to compute the intermediate address from the array: the
2941        // input instruction has done it already. See the comment in
2942        // `TryExtractArrayAccessAddress()`.
2943        if (kIsDebugBuild) {
2944          HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
2945          DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2946        }
2947        temp = array;
2948      } else {
2949        __ Add(temp, array, offset);
2950      }
2951      destination = HeapOperand(temp,
2952                                XRegisterFrom(index),
2953                                LSL,
2954                                Primitive::ComponentSizeShift(value_type));
2955    }
2956    {
2957      // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2958      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2959      codegen_->Store(value_type, value, destination);
2960      codegen_->MaybeRecordImplicitNullCheck(instruction);
2961    }
2962  } else {
2963    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2964    vixl::aarch64::Label done;
2965    SlowPathCodeARM64* slow_path = nullptr;
2966    {
2967      // We use a block to end the scratch scope before the write barrier, thus
2968      // freeing the temporary registers so they can be used in `MarkGCCard`.
2969      UseScratchRegisterScope temps(masm);
2970      Register temp = temps.AcquireSameSizeAs(array);
2971      if (index.IsConstant()) {
2972        offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
2973        destination = HeapOperand(array, offset);
2974      } else {
2975        destination = HeapOperand(temp,
2976                                  XRegisterFrom(index),
2977                                  LSL,
2978                                  Primitive::ComponentSizeShift(value_type));
2979      }
2980
2981      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2982      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2983      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2984
2985      if (may_need_runtime_call_for_type_check) {
2986        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
2987        codegen_->AddSlowPath(slow_path);
2988        if (instruction->GetValueCanBeNull()) {
2989          vixl::aarch64::Label non_zero;
2990          __ Cbnz(Register(value), &non_zero);
2991          if (!index.IsConstant()) {
2992            __ Add(temp, array, offset);
2993          }
2994          {
2995            // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools
2996            // emitted.
2997            EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2998            __ Str(wzr, destination);
2999            codegen_->MaybeRecordImplicitNullCheck(instruction);
3000          }
3001          __ B(&done);
3002          __ Bind(&non_zero);
3003        }
3004
3005        // Note that when Baker read barriers are enabled, the type
3006        // checks are performed without read barriers.  This is fine,
3007        // even in the case where a class object is in the from-space
3008        // after the flip, as a comparison involving such a type would
3009        // not produce a false positive; it may of course produce a
3010        // false negative, in which case we would take the ArraySet
3011        // slow path.
3012
3013        Register temp2 = temps.AcquireSameSizeAs(array);
3014        // /* HeapReference<Class> */ temp = array->klass_
3015        {
3016          // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
3017          EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3018          __ Ldr(temp, HeapOperand(array, class_offset));
3019          codegen_->MaybeRecordImplicitNullCheck(instruction);
3020        }
3021        GetAssembler()->MaybeUnpoisonHeapReference(temp);
3022
3023        // /* HeapReference<Class> */ temp = temp->component_type_
3024        __ Ldr(temp, HeapOperand(temp, component_offset));
3025        // /* HeapReference<Class> */ temp2 = value->klass_
3026        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
3027        // If heap poisoning is enabled, no need to unpoison `temp`
3028        // nor `temp2`, as we are comparing two poisoned references.
3029        __ Cmp(temp, temp2);
3030        temps.Release(temp2);
3031
3032        if (instruction->StaticTypeOfArrayIsObjectArray()) {
3033          vixl::aarch64::Label do_put;
3034          __ B(eq, &do_put);
3035          // If heap poisoning is enabled, the `temp` reference has
3036          // not been unpoisoned yet; unpoison it now.
3037          GetAssembler()->MaybeUnpoisonHeapReference(temp);
3038
3039          // /* HeapReference<Class> */ temp = temp->super_class_
3040          __ Ldr(temp, HeapOperand(temp, super_offset));
3041          // If heap poisoning is enabled, no need to unpoison
3042          // `temp`, as we are comparing against null below.
3043          __ Cbnz(temp, slow_path->GetEntryLabel());
3044          __ Bind(&do_put);
3045        } else {
3046          __ B(ne, slow_path->GetEntryLabel());
3047        }
3048      }
3049
3050      if (kPoisonHeapReferences) {
3051        Register temp2 = temps.AcquireSameSizeAs(array);
3052          DCHECK(value.IsW());
3053        __ Mov(temp2, value.W());
3054        GetAssembler()->PoisonHeapReference(temp2);
3055        source = temp2;
3056      }
3057
3058      if (!index.IsConstant()) {
3059        __ Add(temp, array, offset);
3060      } else {
3061        // We no longer need the `temp` here so release it as the store below may
3062        // need a scratch register (if the constant index makes the offset too large)
3063        // and the poisoned `source` could be using the other scratch register.
3064        temps.Release(temp);
3065      }
3066      {
3067        // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
3068        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
3069        __ Str(source, destination);
3070
3071        if (!may_need_runtime_call_for_type_check) {
3072          codegen_->MaybeRecordImplicitNullCheck(instruction);
3073        }
3074      }
3075    }
3076
3077    codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull());
3078
3079    if (done.IsLinked()) {
3080      __ Bind(&done);
3081    }
3082
3083    if (slow_path != nullptr) {
3084      __ Bind(slow_path->GetExitLabel());
3085    }
3086  }
3087}
3088
3089void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3090  RegisterSet caller_saves = RegisterSet::Empty();
3091  InvokeRuntimeCallingConvention calling_convention;
3092  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3093  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
3094  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
3095  locations->SetInAt(0, Location::RequiresRegister());
3096  locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3097}
3098
3099void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
3100  BoundsCheckSlowPathARM64* slow_path =
3101      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction);
3102  codegen_->AddSlowPath(slow_path);
3103  __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
3104  __ B(slow_path->GetEntryLabel(), hs);
3105}
3106
3107void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
3108  LocationSummary* locations =
3109      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
3110  locations->SetInAt(0, Location::RequiresRegister());
3111  if (check->HasUses()) {
3112    locations->SetOut(Location::SameAsFirstInput());
3113  }
3114}
3115
3116void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
3117  // We assume the class is not null.
3118  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
3119      check->GetLoadClass(), check, check->GetDexPc(), true);
3120  codegen_->AddSlowPath(slow_path);
3121  GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
3122}
3123
3124static bool IsFloatingPointZeroConstant(HInstruction* inst) {
3125  return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
3126      || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
3127}
3128
3129void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
3130  FPRegister lhs_reg = InputFPRegisterAt(instruction, 0);
3131  Location rhs_loc = instruction->GetLocations()->InAt(1);
3132  if (rhs_loc.IsConstant()) {
3133    // 0.0 is the only immediate that can be encoded directly in
3134    // an FCMP instruction.
3135    //
3136    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
3137    // specify that in a floating-point comparison, positive zero
3138    // and negative zero are considered equal, so we can use the
3139    // literal 0.0 for both cases here.
3140    //
3141    // Note however that some methods (Float.equal, Float.compare,
3142    // Float.compareTo, Double.equal, Double.compare,
3143    // Double.compareTo, Math.max, Math.min, StrictMath.max,
3144    // StrictMath.min) consider 0.0 to be (strictly) greater than
3145    // -0.0. So if we ever translate calls to these methods into a
3146    // HCompare instruction, we must handle the -0.0 case with
3147    // care here.
3148    DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
3149    __ Fcmp(lhs_reg, 0.0);
3150  } else {
3151    __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
3152  }
3153}
3154
3155void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
3156  LocationSummary* locations =
3157      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
3158  Primitive::Type in_type = compare->InputAt(0)->GetType();
3159  switch (in_type) {
3160    case Primitive::kPrimBoolean:
3161    case Primitive::kPrimByte:
3162    case Primitive::kPrimShort:
3163    case Primitive::kPrimChar:
3164    case Primitive::kPrimInt:
3165    case Primitive::kPrimLong: {
3166      locations->SetInAt(0, Location::RequiresRegister());
3167      locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
3168      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3169      break;
3170    }
3171    case Primitive::kPrimFloat:
3172    case Primitive::kPrimDouble: {
3173      locations->SetInAt(0, Location::RequiresFpuRegister());
3174      locations->SetInAt(1,
3175                         IsFloatingPointZeroConstant(compare->InputAt(1))
3176                             ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
3177                             : Location::RequiresFpuRegister());
3178      locations->SetOut(Location::RequiresRegister());
3179      break;
3180    }
3181    default:
3182      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
3183  }
3184}
3185
3186void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
3187  Primitive::Type in_type = compare->InputAt(0)->GetType();
3188
3189  //  0 if: left == right
3190  //  1 if: left  > right
3191  // -1 if: left  < right
3192  switch (in_type) {
3193    case Primitive::kPrimBoolean:
3194    case Primitive::kPrimByte:
3195    case Primitive::kPrimShort:
3196    case Primitive::kPrimChar:
3197    case Primitive::kPrimInt:
3198    case Primitive::kPrimLong: {
3199      Register result = OutputRegister(compare);
3200      Register left = InputRegisterAt(compare, 0);
3201      Operand right = InputOperandAt(compare, 1);
3202      __ Cmp(left, right);
3203      __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
3204      __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
3205      break;
3206    }
3207    case Primitive::kPrimFloat:
3208    case Primitive::kPrimDouble: {
3209      Register result = OutputRegister(compare);
3210      GenerateFcmp(compare);
3211      __ Cset(result, ne);
3212      __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
3213      break;
3214    }
3215    default:
3216      LOG(FATAL) << "Unimplemented compare type " << in_type;
3217  }
3218}
3219
3220void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
3221  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
3222
3223  if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3224    locations->SetInAt(0, Location::RequiresFpuRegister());
3225    locations->SetInAt(1,
3226                       IsFloatingPointZeroConstant(instruction->InputAt(1))
3227                           ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
3228                           : Location::RequiresFpuRegister());
3229  } else {
3230    // Integer cases.
3231    locations->SetInAt(0, Location::RequiresRegister());
3232    locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
3233  }
3234
3235  if (!instruction->IsEmittedAtUseSite()) {
3236    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3237  }
3238}
3239
3240void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
3241  if (instruction->IsEmittedAtUseSite()) {
3242    return;
3243  }
3244
3245  LocationSummary* locations = instruction->GetLocations();
3246  Register res = RegisterFrom(locations->Out(), instruction->GetType());
3247  IfCondition if_cond = instruction->GetCondition();
3248
3249  if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
3250    GenerateFcmp(instruction);
3251    __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
3252  } else {
3253    // Integer cases.
3254    Register lhs = InputRegisterAt(instruction, 0);
3255    Operand rhs = InputOperandAt(instruction, 1);
3256    __ Cmp(lhs, rhs);
3257    __ Cset(res, ARM64Condition(if_cond));
3258  }
3259}
3260
3261#define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
3262  M(Equal)                                                                               \
3263  M(NotEqual)                                                                            \
3264  M(LessThan)                                                                            \
3265  M(LessThanOrEqual)                                                                     \
3266  M(GreaterThan)                                                                         \
3267  M(GreaterThanOrEqual)                                                                  \
3268  M(Below)                                                                               \
3269  M(BelowOrEqual)                                                                        \
3270  M(Above)                                                                               \
3271  M(AboveOrEqual)
3272#define DEFINE_CONDITION_VISITORS(Name)                                                  \
3273void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3274void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
3275FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3276#undef DEFINE_CONDITION_VISITORS
3277#undef FOR_EACH_CONDITION_INSTRUCTION
3278
3279void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3280  DCHECK(instruction->IsDiv() || instruction->IsRem());
3281
3282  LocationSummary* locations = instruction->GetLocations();
3283  Location second = locations->InAt(1);
3284  DCHECK(second.IsConstant());
3285
3286  Register out = OutputRegister(instruction);
3287  Register dividend = InputRegisterAt(instruction, 0);
3288  int64_t imm = Int64FromConstant(second.GetConstant());
3289  DCHECK(imm == 1 || imm == -1);
3290
3291  if (instruction->IsRem()) {
3292    __ Mov(out, 0);
3293  } else {
3294    if (imm == 1) {
3295      __ Mov(out, dividend);
3296    } else {
3297      __ Neg(out, dividend);
3298    }
3299  }
3300}
3301
3302void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
3303  DCHECK(instruction->IsDiv() || instruction->IsRem());
3304
3305  LocationSummary* locations = instruction->GetLocations();
3306  Location second = locations->InAt(1);
3307  DCHECK(second.IsConstant());
3308
3309  Register out = OutputRegister(instruction);
3310  Register dividend = InputRegisterAt(instruction, 0);
3311  int64_t imm = Int64FromConstant(second.GetConstant());
3312  uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3313  int ctz_imm = CTZ(abs_imm);
3314
3315  UseScratchRegisterScope temps(GetVIXLAssembler());
3316  Register temp = temps.AcquireSameSizeAs(out);
3317
3318  if (instruction->IsDiv()) {
3319    __ Add(temp, dividend, abs_imm - 1);
3320    __ Cmp(dividend, 0);
3321    __ Csel(out, temp, dividend, lt);
3322    if (imm > 0) {
3323      __ Asr(out, out, ctz_imm);
3324    } else {
3325      __ Neg(out, Operand(out, ASR, ctz_imm));
3326    }
3327  } else {
3328    int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64;
3329    __ Asr(temp, dividend, bits - 1);
3330    __ Lsr(temp, temp, bits - ctz_imm);
3331    __ Add(out, dividend, temp);
3332    __ And(out, out, abs_imm - 1);
3333    __ Sub(out, out, temp);
3334  }
3335}
3336
3337void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3338  DCHECK(instruction->IsDiv() || instruction->IsRem());
3339
3340  LocationSummary* locations = instruction->GetLocations();
3341  Location second = locations->InAt(1);
3342  DCHECK(second.IsConstant());
3343
3344  Register out = OutputRegister(instruction);
3345  Register dividend = InputRegisterAt(instruction, 0);
3346  int64_t imm = Int64FromConstant(second.GetConstant());
3347
3348  Primitive::Type type = instruction->GetResultType();
3349  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3350
3351  int64_t magic;
3352  int shift;
3353  CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift);
3354
3355  UseScratchRegisterScope temps(GetVIXLAssembler());
3356  Register temp = temps.AcquireSameSizeAs(out);
3357
3358  // temp = get_high(dividend * magic)
3359  __ Mov(temp, magic);
3360  if (type == Primitive::kPrimLong) {
3361    __ Smulh(temp, dividend, temp);
3362  } else {
3363    __ Smull(temp.X(), dividend, temp);
3364    __ Lsr(temp.X(), temp.X(), 32);
3365  }
3366
3367  if (imm > 0 && magic < 0) {
3368    __ Add(temp, temp, dividend);
3369  } else if (imm < 0 && magic > 0) {
3370    __ Sub(temp, temp, dividend);
3371  }
3372
3373  if (shift != 0) {
3374    __ Asr(temp, temp, shift);
3375  }
3376
3377  if (instruction->IsDiv()) {
3378    __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3379  } else {
3380    __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31));
3381    // TODO: Strength reduction for msub.
3382    Register temp_imm = temps.AcquireSameSizeAs(out);
3383    __ Mov(temp_imm, imm);
3384    __ Msub(out, temp, temp_imm, dividend);
3385  }
3386}
3387
3388void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3389  DCHECK(instruction->IsDiv() || instruction->IsRem());
3390  Primitive::Type type = instruction->GetResultType();
3391  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3392
3393  LocationSummary* locations = instruction->GetLocations();
3394  Register out = OutputRegister(instruction);
3395  Location second = locations->InAt(1);
3396
3397  if (second.IsConstant()) {
3398    int64_t imm = Int64FromConstant(second.GetConstant());
3399
3400    if (imm == 0) {
3401      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3402    } else if (imm == 1 || imm == -1) {
3403      DivRemOneOrMinusOne(instruction);
3404    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3405      DivRemByPowerOfTwo(instruction);
3406    } else {
3407      DCHECK(imm <= -2 || imm >= 2);
3408      GenerateDivRemWithAnyConstant(instruction);
3409    }
3410  } else {
3411    Register dividend = InputRegisterAt(instruction, 0);
3412    Register divisor = InputRegisterAt(instruction, 1);
3413    if (instruction->IsDiv()) {
3414      __ Sdiv(out, dividend, divisor);
3415    } else {
3416      UseScratchRegisterScope temps(GetVIXLAssembler());
3417      Register temp = temps.AcquireSameSizeAs(out);
3418      __ Sdiv(temp, dividend, divisor);
3419      __ Msub(out, temp, divisor, dividend);
3420    }
3421  }
3422}
3423
3424void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3425  LocationSummary* locations =
3426      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3427  switch (div->GetResultType()) {
3428    case Primitive::kPrimInt:
3429    case Primitive::kPrimLong:
3430      locations->SetInAt(0, Location::RequiresRegister());
3431      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3432      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3433      break;
3434
3435    case Primitive::kPrimFloat:
3436    case Primitive::kPrimDouble:
3437      locations->SetInAt(0, Location::RequiresFpuRegister());
3438      locations->SetInAt(1, Location::RequiresFpuRegister());
3439      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3440      break;
3441
3442    default:
3443      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3444  }
3445}
3446
3447void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3448  Primitive::Type type = div->GetResultType();
3449  switch (type) {
3450    case Primitive::kPrimInt:
3451    case Primitive::kPrimLong:
3452      GenerateDivRemIntegral(div);
3453      break;
3454
3455    case Primitive::kPrimFloat:
3456    case Primitive::kPrimDouble:
3457      __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3458      break;
3459
3460    default:
3461      LOG(FATAL) << "Unexpected div type " << type;
3462  }
3463}
3464
3465void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3466  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3467  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3468}
3469
3470void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3471  SlowPathCodeARM64* slow_path =
3472      new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
3473  codegen_->AddSlowPath(slow_path);
3474  Location value = instruction->GetLocations()->InAt(0);
3475
3476  Primitive::Type type = instruction->GetType();
3477
3478  if (!Primitive::IsIntegralType(type)) {
3479    LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3480    return;
3481  }
3482
3483  if (value.IsConstant()) {
3484    int64_t divisor = Int64ConstantFrom(value);
3485    if (divisor == 0) {
3486      __ B(slow_path->GetEntryLabel());
3487    } else {
3488      // A division by a non-null constant is valid. We don't need to perform
3489      // any check, so simply fall through.
3490    }
3491  } else {
3492    __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3493  }
3494}
3495
3496void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3497  LocationSummary* locations =
3498      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3499  locations->SetOut(Location::ConstantLocation(constant));
3500}
3501
3502void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3503    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3504  // Will be generated at use site.
3505}
3506
3507void LocationsBuilderARM64::VisitExit(HExit* exit) {
3508  exit->SetLocations(nullptr);
3509}
3510
3511void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3512}
3513
3514void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3515  LocationSummary* locations =
3516      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
3517  locations->SetOut(Location::ConstantLocation(constant));
3518}
3519
3520void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3521  // Will be generated at use site.
3522}
3523
3524void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3525  DCHECK(!successor->IsExitBlock());
3526  HBasicBlock* block = got->GetBlock();
3527  HInstruction* previous = got->GetPrevious();
3528  HLoopInformation* info = block->GetLoopInformation();
3529
3530  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3531    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
3532    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3533    return;
3534  }
3535  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3536    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3537  }
3538  if (!codegen_->GoesToNextBlock(block, successor)) {
3539    __ B(codegen_->GetLabelOf(successor));
3540  }
3541}
3542
3543void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3544  got->SetLocations(nullptr);
3545}
3546
3547void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3548  HandleGoto(got, got->GetSuccessor());
3549}
3550
3551void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3552  try_boundary->SetLocations(nullptr);
3553}
3554
3555void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3556  HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3557  if (!successor->IsExitBlock()) {
3558    HandleGoto(try_boundary, successor);
3559  }
3560}
3561
3562void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3563                                                          size_t condition_input_index,
3564                                                          vixl::aarch64::Label* true_target,
3565                                                          vixl::aarch64::Label* false_target) {
3566  HInstruction* cond = instruction->InputAt(condition_input_index);
3567
3568  if (true_target == nullptr && false_target == nullptr) {
3569    // Nothing to do. The code always falls through.
3570    return;
3571  } else if (cond->IsIntConstant()) {
3572    // Constant condition, statically compared against "true" (integer value 1).
3573    if (cond->AsIntConstant()->IsTrue()) {
3574      if (true_target != nullptr) {
3575        __ B(true_target);
3576      }
3577    } else {
3578      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3579      if (false_target != nullptr) {
3580        __ B(false_target);
3581      }
3582    }
3583    return;
3584  }
3585
3586  // The following code generates these patterns:
3587  //  (1) true_target == nullptr && false_target != nullptr
3588  //        - opposite condition true => branch to false_target
3589  //  (2) true_target != nullptr && false_target == nullptr
3590  //        - condition true => branch to true_target
3591  //  (3) true_target != nullptr && false_target != nullptr
3592  //        - condition true => branch to true_target
3593  //        - branch to false_target
3594  if (IsBooleanValueOrMaterializedCondition(cond)) {
3595    // The condition instruction has been materialized, compare the output to 0.
3596    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3597    DCHECK(cond_val.IsRegister());
3598      if (true_target == nullptr) {
3599      __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3600    } else {
3601      __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3602    }
3603  } else {
3604    // The condition instruction has not been materialized, use its inputs as
3605    // the comparison and its condition as the branch condition.
3606    HCondition* condition = cond->AsCondition();
3607
3608    Primitive::Type type = condition->InputAt(0)->GetType();
3609    if (Primitive::IsFloatingPointType(type)) {
3610      GenerateFcmp(condition);
3611      if (true_target == nullptr) {
3612        IfCondition opposite_condition = condition->GetOppositeCondition();
3613        __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3614      } else {
3615        __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3616      }
3617    } else {
3618      // Integer cases.
3619      Register lhs = InputRegisterAt(condition, 0);
3620      Operand rhs = InputOperandAt(condition, 1);
3621
3622      Condition arm64_cond;
3623      vixl::aarch64::Label* non_fallthrough_target;
3624      if (true_target == nullptr) {
3625        arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3626        non_fallthrough_target = false_target;
3627      } else {
3628        arm64_cond = ARM64Condition(condition->GetCondition());
3629        non_fallthrough_target = true_target;
3630      }
3631
3632      if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3633          rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3634        switch (arm64_cond) {
3635          case eq:
3636            __ Cbz(lhs, non_fallthrough_target);
3637            break;
3638          case ne:
3639            __ Cbnz(lhs, non_fallthrough_target);
3640            break;
3641          case lt:
3642            // Test the sign bit and branch accordingly.
3643            __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3644            break;
3645          case ge:
3646            // Test the sign bit and branch accordingly.
3647            __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3648            break;
3649          default:
3650            // Without the `static_cast` the compiler throws an error for
3651            // `-Werror=sign-promo`.
3652            LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3653        }
3654      } else {
3655        __ Cmp(lhs, rhs);
3656        __ B(arm64_cond, non_fallthrough_target);
3657      }
3658    }
3659  }
3660
3661  // If neither branch falls through (case 3), the conditional branch to `true_target`
3662  // was already emitted (case 2) and we need to emit a jump to `false_target`.
3663  if (true_target != nullptr && false_target != nullptr) {
3664    __ B(false_target);
3665  }
3666}
3667
3668void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3669  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
3670  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3671    locations->SetInAt(0, Location::RequiresRegister());
3672  }
3673}
3674
3675void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3676  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3677  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3678  vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3679  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3680    true_target = nullptr;
3681  }
3682  vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3683  if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3684    false_target = nullptr;
3685  }
3686  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
3687}
3688
3689void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3690  LocationSummary* locations = new (GetGraph()->GetArena())
3691      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3692  InvokeRuntimeCallingConvention calling_convention;
3693  RegisterSet caller_saves = RegisterSet::Empty();
3694  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3695  locations->SetCustomSlowPathCallerSaves(caller_saves);
3696  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3697    locations->SetInAt(0, Location::RequiresRegister());
3698  }
3699}
3700
3701void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3702  SlowPathCodeARM64* slow_path =
3703      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3704  GenerateTestAndBranch(deoptimize,
3705                        /* condition_input_index */ 0,
3706                        slow_path->GetEntryLabel(),
3707                        /* false_target */ nullptr);
3708}
3709
3710void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3711  LocationSummary* locations = new (GetGraph()->GetArena())
3712      LocationSummary(flag, LocationSummary::kNoCall);
3713  locations->SetOut(Location::RequiresRegister());
3714}
3715
3716void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3717  __ Ldr(OutputRegister(flag),
3718         MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3719}
3720
3721static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3722  return condition->IsCondition() &&
3723         Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
3724}
3725
3726static inline Condition GetConditionForSelect(HCondition* condition) {
3727  IfCondition cond = condition->AsCondition()->GetCondition();
3728  return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3729                                                     : ARM64Condition(cond);
3730}
3731
3732void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3733  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
3734  if (Primitive::IsFloatingPointType(select->GetType())) {
3735    locations->SetInAt(0, Location::RequiresFpuRegister());
3736    locations->SetInAt(1, Location::RequiresFpuRegister());
3737    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3738  } else {
3739    HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3740    HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3741    bool is_true_value_constant = cst_true_value != nullptr;
3742    bool is_false_value_constant = cst_false_value != nullptr;
3743    // Ask VIXL whether we should synthesize constants in registers.
3744    // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3745    Operand true_op = is_true_value_constant ?
3746        Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3747    Operand false_op = is_false_value_constant ?
3748        Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3749    bool true_value_in_register = false;
3750    bool false_value_in_register = false;
3751    MacroAssembler::GetCselSynthesisInformation(
3752        x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3753    true_value_in_register |= !is_true_value_constant;
3754    false_value_in_register |= !is_false_value_constant;
3755
3756    locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3757                                                 : Location::ConstantLocation(cst_true_value));
3758    locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3759                                                  : Location::ConstantLocation(cst_false_value));
3760    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3761  }
3762
3763  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3764    locations->SetInAt(2, Location::RequiresRegister());
3765  }
3766}
3767
3768void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3769  HInstruction* cond = select->GetCondition();
3770  Condition csel_cond;
3771
3772  if (IsBooleanValueOrMaterializedCondition(cond)) {
3773    if (cond->IsCondition() && cond->GetNext() == select) {
3774      // Use the condition flags set by the previous instruction.
3775      csel_cond = GetConditionForSelect(cond->AsCondition());
3776    } else {
3777      __ Cmp(InputRegisterAt(select, 2), 0);
3778      csel_cond = ne;
3779    }
3780  } else if (IsConditionOnFloatingPointValues(cond)) {
3781    GenerateFcmp(cond);
3782    csel_cond = GetConditionForSelect(cond->AsCondition());
3783  } else {
3784    __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3785    csel_cond = GetConditionForSelect(cond->AsCondition());
3786  }
3787
3788  if (Primitive::IsFloatingPointType(select->GetType())) {
3789    __ Fcsel(OutputFPRegister(select),
3790             InputFPRegisterAt(select, 1),
3791             InputFPRegisterAt(select, 0),
3792             csel_cond);
3793  } else {
3794    __ Csel(OutputRegister(select),
3795            InputOperandAt(select, 1),
3796            InputOperandAt(select, 0),
3797            csel_cond);
3798  }
3799}
3800
3801void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3802  new (GetGraph()->GetArena()) LocationSummary(info);
3803}
3804
3805void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3806  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3807}
3808
3809void CodeGeneratorARM64::GenerateNop() {
3810  __ Nop();
3811}
3812
3813void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3814  HandleFieldGet(instruction, instruction->GetFieldInfo());
3815}
3816
3817void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3818  HandleFieldGet(instruction, instruction->GetFieldInfo());
3819}
3820
3821void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3822  HandleFieldSet(instruction);
3823}
3824
3825void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3826  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3827}
3828
3829// Temp is used for read barrier.
3830static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3831  if (kEmitCompilerReadBarrier &&
3832      (kUseBakerReadBarrier ||
3833          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3834          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3835          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3836    return 1;
3837  }
3838  return 0;
3839}
3840
3841// Interface case has 3 temps, one for holding the number of interfaces, one for the current
3842// interface pointer, one for loading the current interface.
3843// The other checks have one temp for loading the object's class.
3844static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3845  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3846    return 3;
3847  }
3848  return 1 + NumberOfInstanceOfTemps(type_check_kind);
3849}
3850
3851void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3852  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3853  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3854  bool baker_read_barrier_slow_path = false;
3855  switch (type_check_kind) {
3856    case TypeCheckKind::kExactCheck:
3857    case TypeCheckKind::kAbstractClassCheck:
3858    case TypeCheckKind::kClassHierarchyCheck:
3859    case TypeCheckKind::kArrayObjectCheck:
3860      call_kind =
3861          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3862      baker_read_barrier_slow_path = kUseBakerReadBarrier;
3863      break;
3864    case TypeCheckKind::kArrayCheck:
3865    case TypeCheckKind::kUnresolvedCheck:
3866    case TypeCheckKind::kInterfaceCheck:
3867      call_kind = LocationSummary::kCallOnSlowPath;
3868      break;
3869  }
3870
3871  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3872  if (baker_read_barrier_slow_path) {
3873    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3874  }
3875  locations->SetInAt(0, Location::RequiresRegister());
3876  locations->SetInAt(1, Location::RequiresRegister());
3877  // The "out" register is used as a temporary, so it overlaps with the inputs.
3878  // Note that TypeCheckSlowPathARM64 uses this register too.
3879  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3880  // Add temps if necessary for read barriers.
3881  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3882}
3883
3884void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3885  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3886  LocationSummary* locations = instruction->GetLocations();
3887  Location obj_loc = locations->InAt(0);
3888  Register obj = InputRegisterAt(instruction, 0);
3889  Register cls = InputRegisterAt(instruction, 1);
3890  Location out_loc = locations->Out();
3891  Register out = OutputRegister(instruction);
3892  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3893  DCHECK_LE(num_temps, 1u);
3894  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3895  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3896  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3897  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3898  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3899
3900  vixl::aarch64::Label done, zero;
3901  SlowPathCodeARM64* slow_path = nullptr;
3902
3903  // Return 0 if `obj` is null.
3904  // Avoid null check if we know `obj` is not null.
3905  if (instruction->MustDoNullCheck()) {
3906    __ Cbz(obj, &zero);
3907  }
3908
3909  switch (type_check_kind) {
3910    case TypeCheckKind::kExactCheck: {
3911      // /* HeapReference<Class> */ out = obj->klass_
3912      GenerateReferenceLoadTwoRegisters(instruction,
3913                                        out_loc,
3914                                        obj_loc,
3915                                        class_offset,
3916                                        maybe_temp_loc,
3917                                        kCompilerReadBarrierOption);
3918      __ Cmp(out, cls);
3919      __ Cset(out, eq);
3920      if (zero.IsLinked()) {
3921        __ B(&done);
3922      }
3923      break;
3924    }
3925
3926    case TypeCheckKind::kAbstractClassCheck: {
3927      // /* HeapReference<Class> */ out = obj->klass_
3928      GenerateReferenceLoadTwoRegisters(instruction,
3929                                        out_loc,
3930                                        obj_loc,
3931                                        class_offset,
3932                                        maybe_temp_loc,
3933                                        kCompilerReadBarrierOption);
3934      // If the class is abstract, we eagerly fetch the super class of the
3935      // object to avoid doing a comparison we know will fail.
3936      vixl::aarch64::Label loop, success;
3937      __ Bind(&loop);
3938      // /* HeapReference<Class> */ out = out->super_class_
3939      GenerateReferenceLoadOneRegister(instruction,
3940                                       out_loc,
3941                                       super_offset,
3942                                       maybe_temp_loc,
3943                                       kCompilerReadBarrierOption);
3944      // If `out` is null, we use it for the result, and jump to `done`.
3945      __ Cbz(out, &done);
3946      __ Cmp(out, cls);
3947      __ B(ne, &loop);
3948      __ Mov(out, 1);
3949      if (zero.IsLinked()) {
3950        __ B(&done);
3951      }
3952      break;
3953    }
3954
3955    case TypeCheckKind::kClassHierarchyCheck: {
3956      // /* HeapReference<Class> */ out = obj->klass_
3957      GenerateReferenceLoadTwoRegisters(instruction,
3958                                        out_loc,
3959                                        obj_loc,
3960                                        class_offset,
3961                                        maybe_temp_loc,
3962                                        kCompilerReadBarrierOption);
3963      // Walk over the class hierarchy to find a match.
3964      vixl::aarch64::Label loop, success;
3965      __ Bind(&loop);
3966      __ Cmp(out, cls);
3967      __ B(eq, &success);
3968      // /* HeapReference<Class> */ out = out->super_class_
3969      GenerateReferenceLoadOneRegister(instruction,
3970                                       out_loc,
3971                                       super_offset,
3972                                       maybe_temp_loc,
3973                                       kCompilerReadBarrierOption);
3974      __ Cbnz(out, &loop);
3975      // If `out` is null, we use it for the result, and jump to `done`.
3976      __ B(&done);
3977      __ Bind(&success);
3978      __ Mov(out, 1);
3979      if (zero.IsLinked()) {
3980        __ B(&done);
3981      }
3982      break;
3983    }
3984
3985    case TypeCheckKind::kArrayObjectCheck: {
3986      // /* HeapReference<Class> */ out = obj->klass_
3987      GenerateReferenceLoadTwoRegisters(instruction,
3988                                        out_loc,
3989                                        obj_loc,
3990                                        class_offset,
3991                                        maybe_temp_loc,
3992                                        kCompilerReadBarrierOption);
3993      // Do an exact check.
3994      vixl::aarch64::Label exact_check;
3995      __ Cmp(out, cls);
3996      __ B(eq, &exact_check);
3997      // Otherwise, we need to check that the object's class is a non-primitive array.
3998      // /* HeapReference<Class> */ out = out->component_type_
3999      GenerateReferenceLoadOneRegister(instruction,
4000                                       out_loc,
4001                                       component_offset,
4002                                       maybe_temp_loc,
4003                                       kCompilerReadBarrierOption);
4004      // If `out` is null, we use it for the result, and jump to `done`.
4005      __ Cbz(out, &done);
4006      __ Ldrh(out, HeapOperand(out, primitive_offset));
4007      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4008      __ Cbnz(out, &zero);
4009      __ Bind(&exact_check);
4010      __ Mov(out, 1);
4011      __ B(&done);
4012      break;
4013    }
4014
4015    case TypeCheckKind::kArrayCheck: {
4016      // No read barrier since the slow path will retry upon failure.
4017      // /* HeapReference<Class> */ out = obj->klass_
4018      GenerateReferenceLoadTwoRegisters(instruction,
4019                                        out_loc,
4020                                        obj_loc,
4021                                        class_offset,
4022                                        maybe_temp_loc,
4023                                        kWithoutReadBarrier);
4024      __ Cmp(out, cls);
4025      DCHECK(locations->OnlyCallsOnSlowPath());
4026      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4027                                                                      /* is_fatal */ false);
4028      codegen_->AddSlowPath(slow_path);
4029      __ B(ne, slow_path->GetEntryLabel());
4030      __ Mov(out, 1);
4031      if (zero.IsLinked()) {
4032        __ B(&done);
4033      }
4034      break;
4035    }
4036
4037    case TypeCheckKind::kUnresolvedCheck:
4038    case TypeCheckKind::kInterfaceCheck: {
4039      // Note that we indeed only call on slow path, but we always go
4040      // into the slow path for the unresolved and interface check
4041      // cases.
4042      //
4043      // We cannot directly call the InstanceofNonTrivial runtime
4044      // entry point without resorting to a type checking slow path
4045      // here (i.e. by calling InvokeRuntime directly), as it would
4046      // require to assign fixed registers for the inputs of this
4047      // HInstanceOf instruction (following the runtime calling
4048      // convention), which might be cluttered by the potential first
4049      // read barrier emission at the beginning of this method.
4050      //
4051      // TODO: Introduce a new runtime entry point taking the object
4052      // to test (instead of its class) as argument, and let it deal
4053      // with the read barrier issues. This will let us refactor this
4054      // case of the `switch` code as it was previously (with a direct
4055      // call to the runtime not using a type checking slow path).
4056      // This should also be beneficial for the other cases above.
4057      DCHECK(locations->OnlyCallsOnSlowPath());
4058      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4059                                                                      /* is_fatal */ false);
4060      codegen_->AddSlowPath(slow_path);
4061      __ B(slow_path->GetEntryLabel());
4062      if (zero.IsLinked()) {
4063        __ B(&done);
4064      }
4065      break;
4066    }
4067  }
4068
4069  if (zero.IsLinked()) {
4070    __ Bind(&zero);
4071    __ Mov(out, 0);
4072  }
4073
4074  if (done.IsLinked()) {
4075    __ Bind(&done);
4076  }
4077
4078  if (slow_path != nullptr) {
4079    __ Bind(slow_path->GetExitLabel());
4080  }
4081}
4082
4083void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
4084  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
4085  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
4086
4087  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4088  switch (type_check_kind) {
4089    case TypeCheckKind::kExactCheck:
4090    case TypeCheckKind::kAbstractClassCheck:
4091    case TypeCheckKind::kClassHierarchyCheck:
4092    case TypeCheckKind::kArrayObjectCheck:
4093      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
4094          LocationSummary::kCallOnSlowPath :
4095          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
4096      break;
4097    case TypeCheckKind::kArrayCheck:
4098    case TypeCheckKind::kUnresolvedCheck:
4099    case TypeCheckKind::kInterfaceCheck:
4100      call_kind = LocationSummary::kCallOnSlowPath;
4101      break;
4102  }
4103
4104  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4105  locations->SetInAt(0, Location::RequiresRegister());
4106  locations->SetInAt(1, Location::RequiresRegister());
4107  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
4108  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
4109}
4110
4111void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
4112  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
4113  LocationSummary* locations = instruction->GetLocations();
4114  Location obj_loc = locations->InAt(0);
4115  Register obj = InputRegisterAt(instruction, 0);
4116  Register cls = InputRegisterAt(instruction, 1);
4117  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
4118  DCHECK_GE(num_temps, 1u);
4119  DCHECK_LE(num_temps, 3u);
4120  Location temp_loc = locations->GetTemp(0);
4121  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
4122  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
4123  Register temp = WRegisterFrom(temp_loc);
4124  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4125  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4126  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4127  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
4128  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
4129  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
4130  const uint32_t object_array_data_offset =
4131      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
4132
4133  bool is_type_check_slow_path_fatal = false;
4134  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
4135  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
4136  // read barriers is done for performance and code size reasons.
4137  if (!kEmitCompilerReadBarrier) {
4138    is_type_check_slow_path_fatal =
4139        (type_check_kind == TypeCheckKind::kExactCheck ||
4140         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
4141         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
4142         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
4143        !instruction->CanThrowIntoCatchBlock();
4144  }
4145  SlowPathCodeARM64* type_check_slow_path =
4146      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
4147                                                          is_type_check_slow_path_fatal);
4148  codegen_->AddSlowPath(type_check_slow_path);
4149
4150  vixl::aarch64::Label done;
4151  // Avoid null check if we know obj is not null.
4152  if (instruction->MustDoNullCheck()) {
4153    __ Cbz(obj, &done);
4154  }
4155
4156  switch (type_check_kind) {
4157    case TypeCheckKind::kExactCheck:
4158    case TypeCheckKind::kArrayCheck: {
4159      // /* HeapReference<Class> */ temp = obj->klass_
4160      GenerateReferenceLoadTwoRegisters(instruction,
4161                                        temp_loc,
4162                                        obj_loc,
4163                                        class_offset,
4164                                        maybe_temp2_loc,
4165                                        kWithoutReadBarrier);
4166
4167      __ Cmp(temp, cls);
4168      // Jump to slow path for throwing the exception or doing a
4169      // more involved array check.
4170      __ B(ne, type_check_slow_path->GetEntryLabel());
4171      break;
4172    }
4173
4174    case TypeCheckKind::kAbstractClassCheck: {
4175      // /* HeapReference<Class> */ temp = obj->klass_
4176      GenerateReferenceLoadTwoRegisters(instruction,
4177                                        temp_loc,
4178                                        obj_loc,
4179                                        class_offset,
4180                                        maybe_temp2_loc,
4181                                        kWithoutReadBarrier);
4182
4183      // If the class is abstract, we eagerly fetch the super class of the
4184      // object to avoid doing a comparison we know will fail.
4185      vixl::aarch64::Label loop;
4186      __ Bind(&loop);
4187      // /* HeapReference<Class> */ temp = temp->super_class_
4188      GenerateReferenceLoadOneRegister(instruction,
4189                                       temp_loc,
4190                                       super_offset,
4191                                       maybe_temp2_loc,
4192                                       kWithoutReadBarrier);
4193
4194      // If the class reference currently in `temp` is null, jump to the slow path to throw the
4195      // exception.
4196      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4197      // Otherwise, compare classes.
4198      __ Cmp(temp, cls);
4199      __ B(ne, &loop);
4200      break;
4201    }
4202
4203    case TypeCheckKind::kClassHierarchyCheck: {
4204      // /* HeapReference<Class> */ temp = obj->klass_
4205      GenerateReferenceLoadTwoRegisters(instruction,
4206                                        temp_loc,
4207                                        obj_loc,
4208                                        class_offset,
4209                                        maybe_temp2_loc,
4210                                        kWithoutReadBarrier);
4211
4212      // Walk over the class hierarchy to find a match.
4213      vixl::aarch64::Label loop;
4214      __ Bind(&loop);
4215      __ Cmp(temp, cls);
4216      __ B(eq, &done);
4217
4218      // /* HeapReference<Class> */ temp = temp->super_class_
4219      GenerateReferenceLoadOneRegister(instruction,
4220                                       temp_loc,
4221                                       super_offset,
4222                                       maybe_temp2_loc,
4223                                       kWithoutReadBarrier);
4224
4225      // If the class reference currently in `temp` is not null, jump
4226      // back at the beginning of the loop.
4227      __ Cbnz(temp, &loop);
4228      // Otherwise, jump to the slow path to throw the exception.
4229      __ B(type_check_slow_path->GetEntryLabel());
4230      break;
4231    }
4232
4233    case TypeCheckKind::kArrayObjectCheck: {
4234      // /* HeapReference<Class> */ temp = obj->klass_
4235      GenerateReferenceLoadTwoRegisters(instruction,
4236                                        temp_loc,
4237                                        obj_loc,
4238                                        class_offset,
4239                                        maybe_temp2_loc,
4240                                        kWithoutReadBarrier);
4241
4242      // Do an exact check.
4243      __ Cmp(temp, cls);
4244      __ B(eq, &done);
4245
4246      // Otherwise, we need to check that the object's class is a non-primitive array.
4247      // /* HeapReference<Class> */ temp = temp->component_type_
4248      GenerateReferenceLoadOneRegister(instruction,
4249                                       temp_loc,
4250                                       component_offset,
4251                                       maybe_temp2_loc,
4252                                       kWithoutReadBarrier);
4253
4254      // If the component type is null, jump to the slow path to throw the exception.
4255      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
4256      // Otherwise, the object is indeed an array. Further check that this component type is not a
4257      // primitive type.
4258      __ Ldrh(temp, HeapOperand(temp, primitive_offset));
4259      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
4260      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
4261      break;
4262    }
4263
4264    case TypeCheckKind::kUnresolvedCheck:
4265      // We always go into the type check slow path for the unresolved check cases.
4266      //
4267      // We cannot directly call the CheckCast runtime entry point
4268      // without resorting to a type checking slow path here (i.e. by
4269      // calling InvokeRuntime directly), as it would require to
4270      // assign fixed registers for the inputs of this HInstanceOf
4271      // instruction (following the runtime calling convention), which
4272      // might be cluttered by the potential first read barrier
4273      // emission at the beginning of this method.
4274      __ B(type_check_slow_path->GetEntryLabel());
4275      break;
4276    case TypeCheckKind::kInterfaceCheck: {
4277      // /* HeapReference<Class> */ temp = obj->klass_
4278      GenerateReferenceLoadTwoRegisters(instruction,
4279                                        temp_loc,
4280                                        obj_loc,
4281                                        class_offset,
4282                                        maybe_temp2_loc,
4283                                        kWithoutReadBarrier);
4284
4285      // /* HeapReference<Class> */ temp = temp->iftable_
4286      GenerateReferenceLoadTwoRegisters(instruction,
4287                                        temp_loc,
4288                                        temp_loc,
4289                                        iftable_offset,
4290                                        maybe_temp2_loc,
4291                                        kWithoutReadBarrier);
4292      // Iftable is never null.
4293      __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4294      // Loop through the iftable and check if any class matches.
4295      vixl::aarch64::Label start_loop;
4296      __ Bind(&start_loop);
4297      __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4298      __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4299      GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4300      // Go to next interface.
4301      __ Add(temp, temp, 2 * kHeapReferenceSize);
4302      __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4303      // Compare the classes and continue the loop if they do not match.
4304      __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4305      __ B(ne, &start_loop);
4306      break;
4307    }
4308  }
4309  __ Bind(&done);
4310
4311  __ Bind(type_check_slow_path->GetExitLabel());
4312}
4313
4314void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4315  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4316  locations->SetOut(Location::ConstantLocation(constant));
4317}
4318
4319void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4320  // Will be generated at use site.
4321}
4322
4323void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4324  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
4325  locations->SetOut(Location::ConstantLocation(constant));
4326}
4327
4328void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4329  // Will be generated at use site.
4330}
4331
4332void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4333  // The trampoline uses the same calling convention as dex calling conventions,
4334  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4335  // the method_idx.
4336  HandleInvoke(invoke);
4337}
4338
4339void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4340  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4341}
4342
4343void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4344  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4345  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4346}
4347
4348void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4349  HandleInvoke(invoke);
4350}
4351
4352void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4353  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4354  LocationSummary* locations = invoke->GetLocations();
4355  Register temp = XRegisterFrom(locations->GetTemp(0));
4356  Location receiver = locations->InAt(0);
4357  Offset class_offset = mirror::Object::ClassOffset();
4358  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4359
4360  // The register ip1 is required to be used for the hidden argument in
4361  // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4362  MacroAssembler* masm = GetVIXLAssembler();
4363  UseScratchRegisterScope scratch_scope(masm);
4364  scratch_scope.Exclude(ip1);
4365  __ Mov(ip1, invoke->GetDexMethodIndex());
4366
4367  // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4368  if (receiver.IsStackSlot()) {
4369    __ Ldr(temp.W(), StackOperandFrom(receiver));
4370    {
4371      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4372      // /* HeapReference<Class> */ temp = temp->klass_
4373      __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4374      codegen_->MaybeRecordImplicitNullCheck(invoke);
4375    }
4376  } else {
4377    EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4378    // /* HeapReference<Class> */ temp = receiver->klass_
4379    __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4380    codegen_->MaybeRecordImplicitNullCheck(invoke);
4381  }
4382
4383  // Instead of simply (possibly) unpoisoning `temp` here, we should
4384  // emit a read barrier for the previous class reference load.
4385  // However this is not required in practice, as this is an
4386  // intermediate/temporary reference and because the current
4387  // concurrent copying collector keeps the from-space memory
4388  // intact/accessible until the end of the marking phase (the
4389  // concurrent copying collector may not in the future).
4390  GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4391  __ Ldr(temp,
4392      MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4393  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4394      invoke->GetImtIndex(), kArm64PointerSize));
4395  // temp = temp->GetImtEntryAt(method_offset);
4396  __ Ldr(temp, MemOperand(temp, method_offset));
4397  // lr = temp->GetEntryPoint();
4398  __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4399
4400  {
4401    // Ensure the pc position is recorded immediately after the `blr` instruction.
4402    ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4403
4404    // lr();
4405    __ blr(lr);
4406    DCHECK(!codegen_->IsLeafMethod());
4407    codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4408  }
4409}
4410
4411void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4412  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4413  if (intrinsic.TryDispatch(invoke)) {
4414    return;
4415  }
4416
4417  HandleInvoke(invoke);
4418}
4419
4420void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4421  // Explicit clinit checks triggered by static invokes must have been pruned by
4422  // art::PrepareForRegisterAllocation.
4423  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4424
4425  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
4426  if (intrinsic.TryDispatch(invoke)) {
4427    return;
4428  }
4429
4430  HandleInvoke(invoke);
4431}
4432
4433static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4434  if (invoke->GetLocations()->Intrinsified()) {
4435    IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4436    intrinsic.Dispatch(invoke);
4437    return true;
4438  }
4439  return false;
4440}
4441
4442HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4443      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4444      HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
4445  // On ARM64 we support all dispatch types.
4446  return desired_dispatch_info;
4447}
4448
4449Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
4450                                                                    Location temp) {
4451  // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4452  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4453  switch (invoke->GetMethodLoadKind()) {
4454    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4455      uint32_t offset =
4456          GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4457      // temp = thread->string_init_entrypoint
4458      __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4459      break;
4460    }
4461    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4462      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4463      break;
4464    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
4465      // Load method address from literal pool.
4466      __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4467      break;
4468    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
4469      // Add ADRP with its PC-relative DexCache access patch.
4470      const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache();
4471      uint32_t element_offset = invoke->GetDexCacheArrayOffset();
4472      vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
4473      EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4474      // Add LDR with its PC-relative DexCache access patch.
4475      vixl::aarch64::Label* ldr_label =
4476          NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
4477      EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4478      break;
4479    }
4480    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
4481      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4482      Register reg = XRegisterFrom(temp);
4483      Register method_reg;
4484      if (current_method.IsRegister()) {
4485        method_reg = XRegisterFrom(current_method);
4486      } else {
4487        DCHECK(invoke->GetLocations()->Intrinsified());
4488        DCHECK(!current_method.IsValid());
4489        method_reg = reg;
4490        __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
4491      }
4492
4493      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
4494      __ Ldr(reg.X(),
4495             MemOperand(method_reg.X(),
4496                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
4497      // temp = temp[index_in_cache];
4498      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
4499      uint32_t index_in_cache = invoke->GetDexMethodIndex();
4500    __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
4501      break;
4502    }
4503  }
4504  return callee_method;
4505}
4506
4507void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
4508  // All registers are assumed to be correctly set up.
4509  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
4510
4511  switch (invoke->GetCodePtrLocation()) {
4512    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4513      __ Bl(&frame_entry_label_);
4514      break;
4515    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4516      // LR = callee_method->entry_point_from_quick_compiled_code_;
4517      __ Ldr(lr, MemOperand(
4518          XRegisterFrom(callee_method),
4519          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4520      {
4521        // To ensure that the pc position is recorded immediately after the `blr` instruction
4522        // BLR must be the last instruction emitted in this function.
4523        // Recording the pc will occur right after returning from this function.
4524        ExactAssemblyScope eas(GetVIXLAssembler(),
4525                               kInstructionSize,
4526                               CodeBufferCheckScope::kExactSize);
4527        // lr()
4528        __ blr(lr);
4529      }
4530      break;
4531  }
4532
4533  DCHECK(!IsLeafMethod());
4534}
4535
4536void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
4537  // Use the calling convention instead of the location of the receiver, as
4538  // intrinsics may have put the receiver in a different register. In the intrinsics
4539  // slow path, the arguments have been moved to the right place, so here we are
4540  // guaranteed that the receiver is the first register of the calling convention.
4541  InvokeDexCallingConvention calling_convention;
4542  Register receiver = calling_convention.GetRegisterAt(0);
4543  Register temp = XRegisterFrom(temp_in);
4544  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4545      invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4546  Offset class_offset = mirror::Object::ClassOffset();
4547  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4548
4549  DCHECK(receiver.IsRegister());
4550
4551  {
4552    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4553    EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4554    // /* HeapReference<Class> */ temp = receiver->klass_
4555    __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4556    MaybeRecordImplicitNullCheck(invoke);
4557  }
4558  // Instead of simply (possibly) unpoisoning `temp` here, we should
4559  // emit a read barrier for the previous class reference load.
4560  // intermediate/temporary reference and because the current
4561  // concurrent copying collector keeps the from-space memory
4562  // intact/accessible until the end of the marking phase (the
4563  // concurrent copying collector may not in the future).
4564  GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4565  // temp = temp->GetMethodAt(method_offset);
4566  __ Ldr(temp, MemOperand(temp, method_offset));
4567  // lr = temp->GetEntryPoint();
4568  __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4569  {
4570    // To ensure that the pc position is recorded immediately after the `blr` instruction
4571    // BLR should be the last instruction emitted in this function.
4572    // Recording the pc will occur right after returning from this function.
4573    ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4574    // lr();
4575    __ blr(lr);
4576  }
4577}
4578
4579void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4580  HandleInvoke(invoke);
4581}
4582
4583void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4584  codegen_->GenerateInvokePolymorphicCall(invoke);
4585}
4586
4587vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
4588    const DexFile& dex_file,
4589    dex::StringIndex string_index,
4590    vixl::aarch64::Label* adrp_label) {
4591  return
4592      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
4593}
4594
4595vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
4596    const DexFile& dex_file,
4597    dex::TypeIndex type_index,
4598    vixl::aarch64::Label* adrp_label) {
4599  return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_);
4600}
4601
4602vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4603    const DexFile& dex_file,
4604    dex::TypeIndex type_index,
4605    vixl::aarch64::Label* adrp_label) {
4606  return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4607}
4608
4609vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
4610    const DexFile& dex_file,
4611    uint32_t element_offset,
4612    vixl::aarch64::Label* adrp_label) {
4613  return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
4614}
4615
4616vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
4617  baker_read_barrier_patches_.emplace_back(custom_data);
4618  return &baker_read_barrier_patches_.back().label;
4619}
4620
4621vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4622    const DexFile& dex_file,
4623    uint32_t offset_or_index,
4624    vixl::aarch64::Label* adrp_label,
4625    ArenaDeque<PcRelativePatchInfo>* patches) {
4626  // Add a patch entry and return the label.
4627  patches->emplace_back(dex_file, offset_or_index);
4628  PcRelativePatchInfo* info = &patches->back();
4629  vixl::aarch64::Label* label = &info->label;
4630  // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4631  info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4632  return label;
4633}
4634
4635vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageStringLiteral(
4636    const DexFile& dex_file, dex::StringIndex string_index) {
4637  return boot_image_string_patches_.GetOrCreate(
4638      StringReference(&dex_file, string_index),
4639      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4640}
4641
4642vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageTypeLiteral(
4643    const DexFile& dex_file, dex::TypeIndex type_index) {
4644  return boot_image_type_patches_.GetOrCreate(
4645      TypeReference(&dex_file, type_index),
4646      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4647}
4648
4649vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4650    uint64_t address) {
4651  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
4652}
4653
4654vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4655    const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4656  jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
4657                              reinterpret_cast64<uint64_t>(handle.GetReference()));
4658  return jit_string_patches_.GetOrCreate(
4659      StringReference(&dex_file, string_index),
4660      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4661}
4662
4663vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4664    const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4665  jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
4666                             reinterpret_cast64<uint64_t>(handle.GetReference()));
4667  return jit_class_patches_.GetOrCreate(
4668      TypeReference(&dex_file, type_index),
4669      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
4670}
4671
4672void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4673                                             vixl::aarch64::Register reg) {
4674  DCHECK(reg.IsX());
4675  SingleEmissionCheckScope guard(GetVIXLAssembler());
4676  __ Bind(fixup_label);
4677  __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4678}
4679
4680void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4681                                            vixl::aarch64::Register out,
4682                                            vixl::aarch64::Register base) {
4683  DCHECK(out.IsX());
4684  DCHECK(base.IsX());
4685  SingleEmissionCheckScope guard(GetVIXLAssembler());
4686  __ Bind(fixup_label);
4687  __ add(out, base, Operand(/* offset placeholder */ 0));
4688}
4689
4690void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4691                                                  vixl::aarch64::Register out,
4692                                                  vixl::aarch64::Register base) {
4693  DCHECK(base.IsX());
4694  SingleEmissionCheckScope guard(GetVIXLAssembler());
4695  __ Bind(fixup_label);
4696  __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4697}
4698
4699template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
4700inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4701    const ArenaDeque<PcRelativePatchInfo>& infos,
4702    ArenaVector<LinkerPatch>* linker_patches) {
4703  for (const PcRelativePatchInfo& info : infos) {
4704    linker_patches->push_back(Factory(info.label.GetLocation(),
4705                                      &info.target_dex_file,
4706                                      info.pc_insn_label->GetLocation(),
4707                                      info.offset_or_index));
4708  }
4709}
4710
4711void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
4712  DCHECK(linker_patches->empty());
4713  size_t size =
4714      pc_relative_dex_cache_patches_.size() +
4715      boot_image_string_patches_.size() +
4716      pc_relative_string_patches_.size() +
4717      boot_image_type_patches_.size() +
4718      pc_relative_type_patches_.size() +
4719      type_bss_entry_patches_.size() +
4720      baker_read_barrier_patches_.size();
4721  linker_patches->reserve(size);
4722  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
4723    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
4724                                                              &info.target_dex_file,
4725                                                              info.pc_insn_label->GetLocation(),
4726                                                              info.offset_or_index));
4727  }
4728  for (const auto& entry : boot_image_string_patches_) {
4729    const StringReference& target_string = entry.first;
4730    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4731    linker_patches->push_back(LinkerPatch::StringPatch(literal->GetOffset(),
4732                                                       target_string.dex_file,
4733                                                       target_string.string_index.index_));
4734  }
4735  if (!GetCompilerOptions().IsBootImage()) {
4736    DCHECK(pc_relative_type_patches_.empty());
4737    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
4738                                                                  linker_patches);
4739  } else {
4740    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
4741                                                                linker_patches);
4742    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
4743                                                                  linker_patches);
4744  }
4745  EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
4746                                                              linker_patches);
4747  for (const auto& entry : boot_image_type_patches_) {
4748    const TypeReference& target_type = entry.first;
4749    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
4750    linker_patches->push_back(LinkerPatch::TypePatch(literal->GetOffset(),
4751                                                     target_type.dex_file,
4752                                                     target_type.type_index.index_));
4753  }
4754  for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4755    linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
4756                                                                       info.custom_data));
4757  }
4758  DCHECK_EQ(size, linker_patches->size());
4759}
4760
4761vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
4762                                                                      Uint32ToLiteralMap* map) {
4763  return map->GetOrCreate(
4764      value,
4765      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4766}
4767
4768vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4769  return uint64_literals_.GetOrCreate(
4770      value,
4771      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4772}
4773
4774vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
4775    MethodReference target_method,
4776    MethodToLiteralMap* map) {
4777  return map->GetOrCreate(
4778      target_method,
4779      [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); });
4780}
4781
4782void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4783  // Explicit clinit checks triggered by static invokes must have been pruned by
4784  // art::PrepareForRegisterAllocation.
4785  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4786
4787  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4788    return;
4789  }
4790
4791  // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4792  // are no pools emitted.
4793  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4794  LocationSummary* locations = invoke->GetLocations();
4795  codegen_->GenerateStaticOrDirectCall(
4796      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4797  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4798}
4799
4800void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4801  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4802    return;
4803  }
4804
4805  // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4806  // are no pools emitted.
4807  EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4808  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4809  DCHECK(!codegen_->IsLeafMethod());
4810  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4811}
4812
4813HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4814    HLoadClass::LoadKind desired_class_load_kind) {
4815  switch (desired_class_load_kind) {
4816    case HLoadClass::LoadKind::kInvalid:
4817      LOG(FATAL) << "UNREACHABLE";
4818      UNREACHABLE();
4819    case HLoadClass::LoadKind::kReferrersClass:
4820      break;
4821    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4822      DCHECK(!GetCompilerOptions().GetCompilePic());
4823      break;
4824    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4825      DCHECK(GetCompilerOptions().GetCompilePic());
4826      break;
4827    case HLoadClass::LoadKind::kBootImageAddress:
4828      break;
4829    case HLoadClass::LoadKind::kBssEntry:
4830      DCHECK(!Runtime::Current()->UseJitCompilation());
4831      break;
4832    case HLoadClass::LoadKind::kJitTableAddress:
4833      DCHECK(Runtime::Current()->UseJitCompilation());
4834      break;
4835    case HLoadClass::LoadKind::kDexCacheViaMethod:
4836      break;
4837  }
4838  return desired_class_load_kind;
4839}
4840
4841void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4842  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4843  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4844    InvokeRuntimeCallingConvention calling_convention;
4845    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4846        cls,
4847        LocationFrom(calling_convention.GetRegisterAt(0)),
4848        LocationFrom(vixl::aarch64::x0));
4849    DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4850    return;
4851  }
4852  DCHECK(!cls->NeedsAccessCheck());
4853
4854  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4855  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4856      ? LocationSummary::kCallOnSlowPath
4857      : LocationSummary::kNoCall;
4858  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
4859  if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4860    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4861  }
4862
4863  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4864    locations->SetInAt(0, Location::RequiresRegister());
4865  }
4866  locations->SetOut(Location::RequiresRegister());
4867  if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4868    if (!kUseReadBarrier || kUseBakerReadBarrier) {
4869      // Rely on the type resolution or initialization and marking to save everything we need.
4870      locations->AddTemp(FixedTempLocation());
4871      RegisterSet caller_saves = RegisterSet::Empty();
4872      InvokeRuntimeCallingConvention calling_convention;
4873      caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
4874      DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
4875                RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
4876                             Primitive::kPrimNot).GetCode());
4877      locations->SetCustomSlowPathCallerSaves(caller_saves);
4878    } else {
4879      // For non-Baker read barrier we have a temp-clobbering call.
4880    }
4881  }
4882}
4883
4884// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4885// move.
4886void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4887  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4888  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
4889    codegen_->GenerateLoadClassRuntimeCall(cls);
4890    return;
4891  }
4892  DCHECK(!cls->NeedsAccessCheck());
4893
4894  Location out_loc = cls->GetLocations()->Out();
4895  Register out = OutputRegister(cls);
4896  Register bss_entry_temp;
4897  vixl::aarch64::Label* bss_entry_adrp_label = nullptr;
4898
4899  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4900      ? kWithoutReadBarrier
4901      : kCompilerReadBarrierOption;
4902  bool generate_null_check = false;
4903  switch (load_kind) {
4904    case HLoadClass::LoadKind::kReferrersClass: {
4905      DCHECK(!cls->CanCallRuntime());
4906      DCHECK(!cls->MustGenerateClinitCheck());
4907      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4908      Register current_method = InputRegisterAt(cls, 0);
4909      GenerateGcRootFieldLoad(cls,
4910                              out_loc,
4911                              current_method,
4912                              ArtMethod::DeclaringClassOffset().Int32Value(),
4913                              /* fixup_label */ nullptr,
4914                              read_barrier_option);
4915      break;
4916    }
4917    case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
4918      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4919      __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
4920                                                            cls->GetTypeIndex()));
4921      break;
4922    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4923      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4924      // Add ADRP with its PC-relative type patch.
4925      const DexFile& dex_file = cls->GetDexFile();
4926      dex::TypeIndex type_index = cls->GetTypeIndex();
4927      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index);
4928      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4929      // Add ADD with its PC-relative type patch.
4930      vixl::aarch64::Label* add_label =
4931          codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label);
4932      codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4933      break;
4934    }
4935    case HLoadClass::LoadKind::kBootImageAddress: {
4936      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4937      uint32_t address = dchecked_integral_cast<uint32_t>(
4938          reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
4939      DCHECK_NE(address, 0u);
4940      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4941      break;
4942    }
4943    case HLoadClass::LoadKind::kBssEntry: {
4944      // Add ADRP with its PC-relative Class .bss entry patch.
4945      const DexFile& dex_file = cls->GetDexFile();
4946      dex::TypeIndex type_index = cls->GetTypeIndex();
4947      bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
4948      bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4949      codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
4950      // Add LDR with its PC-relative Class patch.
4951      vixl::aarch64::Label* ldr_label =
4952          codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label);
4953      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
4954      GenerateGcRootFieldLoad(cls,
4955                              out_loc,
4956                              bss_entry_temp,
4957                              /* offset placeholder */ 0u,
4958                              ldr_label,
4959                              read_barrier_option);
4960      generate_null_check = true;
4961      break;
4962    }
4963    case HLoadClass::LoadKind::kJitTableAddress: {
4964      __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4965                                                       cls->GetTypeIndex(),
4966                                                       cls->GetClass()));
4967      GenerateGcRootFieldLoad(cls,
4968                              out_loc,
4969                              out.X(),
4970                              /* offset */ 0,
4971                              /* fixup_label */ nullptr,
4972                              read_barrier_option);
4973      break;
4974    }
4975    case HLoadClass::LoadKind::kDexCacheViaMethod:
4976    case HLoadClass::LoadKind::kInvalid:
4977      LOG(FATAL) << "UNREACHABLE";
4978      UNREACHABLE();
4979  }
4980
4981  bool do_clinit = cls->MustGenerateClinitCheck();
4982  if (generate_null_check || do_clinit) {
4983    DCHECK(cls->CanCallRuntime());
4984    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
4985        cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label);
4986    codegen_->AddSlowPath(slow_path);
4987    if (generate_null_check) {
4988      __ Cbz(out, slow_path->GetEntryLabel());
4989    }
4990    if (cls->MustGenerateClinitCheck()) {
4991      GenerateClassInitializationCheck(slow_path, out);
4992    } else {
4993      __ Bind(slow_path->GetExitLabel());
4994    }
4995  }
4996}
4997
4998static MemOperand GetExceptionTlsAddress() {
4999  return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
5000}
5001
5002void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
5003  LocationSummary* locations =
5004      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5005  locations->SetOut(Location::RequiresRegister());
5006}
5007
5008void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
5009  __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
5010}
5011
5012void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
5013  new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5014}
5015
5016void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5017  __ Str(wzr, GetExceptionTlsAddress());
5018}
5019
5020HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
5021    HLoadString::LoadKind desired_string_load_kind) {
5022  switch (desired_string_load_kind) {
5023    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5024      DCHECK(!GetCompilerOptions().GetCompilePic());
5025      break;
5026    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5027      DCHECK(GetCompilerOptions().GetCompilePic());
5028      break;
5029    case HLoadString::LoadKind::kBootImageAddress:
5030      break;
5031    case HLoadString::LoadKind::kBssEntry:
5032      DCHECK(!Runtime::Current()->UseJitCompilation());
5033      break;
5034    case HLoadString::LoadKind::kJitTableAddress:
5035      DCHECK(Runtime::Current()->UseJitCompilation());
5036      break;
5037    case HLoadString::LoadKind::kDexCacheViaMethod:
5038      break;
5039  }
5040  return desired_string_load_kind;
5041}
5042
5043void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
5044  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5045  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5046  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5047    InvokeRuntimeCallingConvention calling_convention;
5048    locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
5049  } else {
5050    locations->SetOut(Location::RequiresRegister());
5051    if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5052      if (!kUseReadBarrier || kUseBakerReadBarrier) {
5053        // Rely on the pResolveString and marking to save everything we need.
5054        locations->AddTemp(FixedTempLocation());
5055        RegisterSet caller_saves = RegisterSet::Empty();
5056        InvokeRuntimeCallingConvention calling_convention;
5057        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
5058        DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
5059                  RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
5060                               Primitive::kPrimNot).GetCode());
5061        locations->SetCustomSlowPathCallerSaves(caller_saves);
5062      } else {
5063        // For non-Baker read barrier we have a temp-clobbering call.
5064      }
5065    }
5066  }
5067}
5068
5069// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5070// move.
5071void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5072  Register out = OutputRegister(load);
5073  Location out_loc = load->GetLocations()->Out();
5074
5075  switch (load->GetLoadKind()) {
5076    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5077      __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
5078                                                              load->GetStringIndex()));
5079      return;  // No dex cache slow path.
5080    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5081      // Add ADRP with its PC-relative String patch.
5082      const DexFile& dex_file = load->GetDexFile();
5083      const dex::StringIndex string_index = load->GetStringIndex();
5084      DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5085      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5086      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5087      // Add ADD with its PC-relative String patch.
5088      vixl::aarch64::Label* add_label =
5089          codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5090      codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5091      return;  // No dex cache slow path.
5092    }
5093    case HLoadString::LoadKind::kBootImageAddress: {
5094      uint32_t address = dchecked_integral_cast<uint32_t>(
5095          reinterpret_cast<uintptr_t>(load->GetString().Get()));
5096      DCHECK_NE(address, 0u);
5097      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5098      return;  // No dex cache slow path.
5099    }
5100    case HLoadString::LoadKind::kBssEntry: {
5101      // Add ADRP with its PC-relative String .bss entry patch.
5102      const DexFile& dex_file = load->GetDexFile();
5103      const dex::StringIndex string_index = load->GetStringIndex();
5104      DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5105      Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
5106      vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
5107      codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5108      // Add LDR with its PC-relative String patch.
5109      vixl::aarch64::Label* ldr_label =
5110          codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
5111      // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5112      GenerateGcRootFieldLoad(load,
5113                              out_loc,
5114                              temp,
5115                              /* offset placeholder */ 0u,
5116                              ldr_label,
5117                              kCompilerReadBarrierOption);
5118      SlowPathCodeARM64* slow_path =
5119          new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
5120      codegen_->AddSlowPath(slow_path);
5121      __ Cbz(out.X(), slow_path->GetEntryLabel());
5122      __ Bind(slow_path->GetExitLabel());
5123      return;
5124    }
5125    case HLoadString::LoadKind::kJitTableAddress: {
5126      __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5127                                                        load->GetStringIndex(),
5128                                                        load->GetString()));
5129      GenerateGcRootFieldLoad(load,
5130                              out_loc,
5131                              out.X(),
5132                              /* offset */ 0,
5133                              /* fixup_label */ nullptr,
5134                              kCompilerReadBarrierOption);
5135      return;
5136    }
5137    default:
5138      break;
5139  }
5140
5141  // TODO: Re-add the compiler code to do string dex cache lookup again.
5142  InvokeRuntimeCallingConvention calling_convention;
5143  DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5144  __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5145  codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5146  CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5147}
5148
5149void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5150  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
5151  locations->SetOut(Location::ConstantLocation(constant));
5152}
5153
5154void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5155  // Will be generated at use site.
5156}
5157
5158void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5159  LocationSummary* locations =
5160      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5161  InvokeRuntimeCallingConvention calling_convention;
5162  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5163}
5164
5165void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5166  codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5167                          instruction,
5168                          instruction->GetDexPc());
5169  if (instruction->IsEnter()) {
5170    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5171  } else {
5172    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5173  }
5174}
5175
5176void LocationsBuilderARM64::VisitMul(HMul* mul) {
5177  LocationSummary* locations =
5178      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
5179  switch (mul->GetResultType()) {
5180    case Primitive::kPrimInt:
5181    case Primitive::kPrimLong:
5182      locations->SetInAt(0, Location::RequiresRegister());
5183      locations->SetInAt(1, Location::RequiresRegister());
5184      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5185      break;
5186
5187    case Primitive::kPrimFloat:
5188    case Primitive::kPrimDouble:
5189      locations->SetInAt(0, Location::RequiresFpuRegister());
5190      locations->SetInAt(1, Location::RequiresFpuRegister());
5191      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5192      break;
5193
5194    default:
5195      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5196  }
5197}
5198
5199void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5200  switch (mul->GetResultType()) {
5201    case Primitive::kPrimInt:
5202    case Primitive::kPrimLong:
5203      __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5204      break;
5205
5206    case Primitive::kPrimFloat:
5207    case Primitive::kPrimDouble:
5208      __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5209      break;
5210
5211    default:
5212      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5213  }
5214}
5215
5216void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5217  LocationSummary* locations =
5218      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
5219  switch (neg->GetResultType()) {
5220    case Primitive::kPrimInt:
5221    case Primitive::kPrimLong:
5222      locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5223      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5224      break;
5225
5226    case Primitive::kPrimFloat:
5227    case Primitive::kPrimDouble:
5228      locations->SetInAt(0, Location::RequiresFpuRegister());
5229      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5230      break;
5231
5232    default:
5233      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5234  }
5235}
5236
5237void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5238  switch (neg->GetResultType()) {
5239    case Primitive::kPrimInt:
5240    case Primitive::kPrimLong:
5241      __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5242      break;
5243
5244    case Primitive::kPrimFloat:
5245    case Primitive::kPrimDouble:
5246      __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5247      break;
5248
5249    default:
5250      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5251  }
5252}
5253
5254void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5255  LocationSummary* locations =
5256      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5257  InvokeRuntimeCallingConvention calling_convention;
5258  locations->SetOut(LocationFrom(x0));
5259  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5260  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5261}
5262
5263void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5264  // Note: if heap poisoning is enabled, the entry point takes cares
5265  // of poisoning the reference.
5266  QuickEntrypointEnum entrypoint =
5267      CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
5268  codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5269  CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5270}
5271
5272void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5273  LocationSummary* locations =
5274      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5275  InvokeRuntimeCallingConvention calling_convention;
5276  if (instruction->IsStringAlloc()) {
5277    locations->AddTemp(LocationFrom(kArtMethodRegister));
5278  } else {
5279    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5280  }
5281  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
5282}
5283
5284void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5285  // Note: if heap poisoning is enabled, the entry point takes cares
5286  // of poisoning the reference.
5287  if (instruction->IsStringAlloc()) {
5288    // String is allocated through StringFactory. Call NewEmptyString entry point.
5289    Location temp = instruction->GetLocations()->GetTemp(0);
5290    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
5291    __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
5292    __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
5293
5294    {
5295      // Ensure the pc position is recorded immediately after the `blr` instruction.
5296      ExactAssemblyScope eas(GetVIXLAssembler(),
5297                             kInstructionSize,
5298                             CodeBufferCheckScope::kExactSize);
5299      __ blr(lr);
5300      codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
5301    }
5302  } else {
5303    codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5304    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5305  }
5306}
5307
5308void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5309  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5310  locations->SetInAt(0, Location::RequiresRegister());
5311  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5312}
5313
5314void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5315  switch (instruction->GetResultType()) {
5316    case Primitive::kPrimInt:
5317    case Primitive::kPrimLong:
5318      __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5319      break;
5320
5321    default:
5322      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5323  }
5324}
5325
5326void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5327  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5328  locations->SetInAt(0, Location::RequiresRegister());
5329  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5330}
5331
5332void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5333  __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5334}
5335
5336void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5337  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5338  locations->SetInAt(0, Location::RequiresRegister());
5339}
5340
5341void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5342  if (CanMoveNullCheckToUser(instruction)) {
5343    return;
5344  }
5345  {
5346    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
5347    EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5348    Location obj = instruction->GetLocations()->InAt(0);
5349    __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5350    RecordPcInfo(instruction, instruction->GetDexPc());
5351  }
5352}
5353
5354void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5355  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
5356  AddSlowPath(slow_path);
5357
5358  LocationSummary* locations = instruction->GetLocations();
5359  Location obj = locations->InAt(0);
5360
5361  __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5362}
5363
5364void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5365  codegen_->GenerateNullCheck(instruction);
5366}
5367
5368void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5369  HandleBinaryOp(instruction);
5370}
5371
5372void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5373  HandleBinaryOp(instruction);
5374}
5375
5376void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5377  LOG(FATAL) << "Unreachable";
5378}
5379
5380void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5381  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5382}
5383
5384void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5385  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5386  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5387  if (location.IsStackSlot()) {
5388    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5389  } else if (location.IsDoubleStackSlot()) {
5390    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5391  }
5392  locations->SetOut(location);
5393}
5394
5395void InstructionCodeGeneratorARM64::VisitParameterValue(
5396    HParameterValue* instruction ATTRIBUTE_UNUSED) {
5397  // Nothing to do, the parameter is already at its location.
5398}
5399
5400void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5401  LocationSummary* locations =
5402      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
5403  locations->SetOut(LocationFrom(kArtMethodRegister));
5404}
5405
5406void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5407    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5408  // Nothing to do, the method is already at its location.
5409}
5410
5411void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5412  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5413  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5414    locations->SetInAt(i, Location::Any());
5415  }
5416  locations->SetOut(Location::Any());
5417}
5418
5419void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5420  LOG(FATAL) << "Unreachable";
5421}
5422
5423void LocationsBuilderARM64::VisitRem(HRem* rem) {
5424  Primitive::Type type = rem->GetResultType();
5425  LocationSummary::CallKind call_kind =
5426      Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5427                                           : LocationSummary::kNoCall;
5428  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
5429
5430  switch (type) {
5431    case Primitive::kPrimInt:
5432    case Primitive::kPrimLong:
5433      locations->SetInAt(0, Location::RequiresRegister());
5434      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5435      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5436      break;
5437
5438    case Primitive::kPrimFloat:
5439    case Primitive::kPrimDouble: {
5440      InvokeRuntimeCallingConvention calling_convention;
5441      locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5442      locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5443      locations->SetOut(calling_convention.GetReturnLocation(type));
5444
5445      break;
5446    }
5447
5448    default:
5449      LOG(FATAL) << "Unexpected rem type " << type;
5450  }
5451}
5452
5453void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5454  Primitive::Type type = rem->GetResultType();
5455
5456  switch (type) {
5457    case Primitive::kPrimInt:
5458    case Primitive::kPrimLong: {
5459      GenerateDivRemIntegral(rem);
5460      break;
5461    }
5462
5463    case Primitive::kPrimFloat:
5464    case Primitive::kPrimDouble: {
5465      QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod;
5466      codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5467      if (type == Primitive::kPrimFloat) {
5468        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5469      } else {
5470        CheckEntrypointTypes<kQuickFmod, double, double, double>();
5471      }
5472      break;
5473    }
5474
5475    default:
5476      LOG(FATAL) << "Unexpected rem type " << type;
5477      UNREACHABLE();
5478  }
5479}
5480
5481void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5482  memory_barrier->SetLocations(nullptr);
5483}
5484
5485void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5486  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5487}
5488
5489void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5490  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
5491  Primitive::Type return_type = instruction->InputAt(0)->GetType();
5492  locations->SetInAt(0, ARM64ReturnLocation(return_type));
5493}
5494
5495void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction ATTRIBUTE_UNUSED) {
5496  codegen_->GenerateFrameExit();
5497}
5498
5499void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5500  instruction->SetLocations(nullptr);
5501}
5502
5503void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5504  codegen_->GenerateFrameExit();
5505}
5506
5507void LocationsBuilderARM64::VisitRor(HRor* ror) {
5508  HandleBinaryOp(ror);
5509}
5510
5511void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5512  HandleBinaryOp(ror);
5513}
5514
5515void LocationsBuilderARM64::VisitShl(HShl* shl) {
5516  HandleShift(shl);
5517}
5518
5519void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5520  HandleShift(shl);
5521}
5522
5523void LocationsBuilderARM64::VisitShr(HShr* shr) {
5524  HandleShift(shr);
5525}
5526
5527void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5528  HandleShift(shr);
5529}
5530
5531void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5532  HandleBinaryOp(instruction);
5533}
5534
5535void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5536  HandleBinaryOp(instruction);
5537}
5538
5539void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5540  HandleFieldGet(instruction, instruction->GetFieldInfo());
5541}
5542
5543void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5544  HandleFieldGet(instruction, instruction->GetFieldInfo());
5545}
5546
5547void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5548  HandleFieldSet(instruction);
5549}
5550
5551void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5552  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5553}
5554
5555void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5556    HUnresolvedInstanceFieldGet* instruction) {
5557  FieldAccessCallingConventionARM64 calling_convention;
5558  codegen_->CreateUnresolvedFieldLocationSummary(
5559      instruction, instruction->GetFieldType(), calling_convention);
5560}
5561
5562void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5563    HUnresolvedInstanceFieldGet* instruction) {
5564  FieldAccessCallingConventionARM64 calling_convention;
5565  codegen_->GenerateUnresolvedFieldAccess(instruction,
5566                                          instruction->GetFieldType(),
5567                                          instruction->GetFieldIndex(),
5568                                          instruction->GetDexPc(),
5569                                          calling_convention);
5570}
5571
5572void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5573    HUnresolvedInstanceFieldSet* instruction) {
5574  FieldAccessCallingConventionARM64 calling_convention;
5575  codegen_->CreateUnresolvedFieldLocationSummary(
5576      instruction, instruction->GetFieldType(), calling_convention);
5577}
5578
5579void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5580    HUnresolvedInstanceFieldSet* instruction) {
5581  FieldAccessCallingConventionARM64 calling_convention;
5582  codegen_->GenerateUnresolvedFieldAccess(instruction,
5583                                          instruction->GetFieldType(),
5584                                          instruction->GetFieldIndex(),
5585                                          instruction->GetDexPc(),
5586                                          calling_convention);
5587}
5588
5589void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5590    HUnresolvedStaticFieldGet* instruction) {
5591  FieldAccessCallingConventionARM64 calling_convention;
5592  codegen_->CreateUnresolvedFieldLocationSummary(
5593      instruction, instruction->GetFieldType(), calling_convention);
5594}
5595
5596void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5597    HUnresolvedStaticFieldGet* instruction) {
5598  FieldAccessCallingConventionARM64 calling_convention;
5599  codegen_->GenerateUnresolvedFieldAccess(instruction,
5600                                          instruction->GetFieldType(),
5601                                          instruction->GetFieldIndex(),
5602                                          instruction->GetDexPc(),
5603                                          calling_convention);
5604}
5605
5606void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5607    HUnresolvedStaticFieldSet* instruction) {
5608  FieldAccessCallingConventionARM64 calling_convention;
5609  codegen_->CreateUnresolvedFieldLocationSummary(
5610      instruction, instruction->GetFieldType(), calling_convention);
5611}
5612
5613void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5614    HUnresolvedStaticFieldSet* instruction) {
5615  FieldAccessCallingConventionARM64 calling_convention;
5616  codegen_->GenerateUnresolvedFieldAccess(instruction,
5617                                          instruction->GetFieldType(),
5618                                          instruction->GetFieldIndex(),
5619                                          instruction->GetDexPc(),
5620                                          calling_convention);
5621}
5622
5623void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5624  LocationSummary* locations =
5625      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5626  // In suspend check slow path, usually there are no caller-save registers at all.
5627  // If SIMD instructions are present, however, we force spilling all live SIMD
5628  // registers in full width (since the runtime only saves/restores lower part).
5629  locations->SetCustomSlowPathCallerSaves(
5630      GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5631}
5632
5633void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5634  HBasicBlock* block = instruction->GetBlock();
5635  if (block->GetLoopInformation() != nullptr) {
5636    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5637    // The back edge will generate the suspend check.
5638    return;
5639  }
5640  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5641    // The goto will generate the suspend check.
5642    return;
5643  }
5644  GenerateSuspendCheck(instruction, nullptr);
5645}
5646
5647void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5648  LocationSummary* locations =
5649      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5650  InvokeRuntimeCallingConvention calling_convention;
5651  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5652}
5653
5654void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5655  codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5656  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5657}
5658
5659void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5660  LocationSummary* locations =
5661      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
5662  Primitive::Type input_type = conversion->GetInputType();
5663  Primitive::Type result_type = conversion->GetResultType();
5664  DCHECK_NE(input_type, result_type);
5665  if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
5666      (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
5667    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5668  }
5669
5670  if (Primitive::IsFloatingPointType(input_type)) {
5671    locations->SetInAt(0, Location::RequiresFpuRegister());
5672  } else {
5673    locations->SetInAt(0, Location::RequiresRegister());
5674  }
5675
5676  if (Primitive::IsFloatingPointType(result_type)) {
5677    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5678  } else {
5679    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5680  }
5681}
5682
5683void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5684  Primitive::Type result_type = conversion->GetResultType();
5685  Primitive::Type input_type = conversion->GetInputType();
5686
5687  DCHECK_NE(input_type, result_type);
5688
5689  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
5690    int result_size = Primitive::ComponentSize(result_type);
5691    int input_size = Primitive::ComponentSize(input_type);
5692    int min_size = std::min(result_size, input_size);
5693    Register output = OutputRegister(conversion);
5694    Register source = InputRegisterAt(conversion, 0);
5695    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
5696      // 'int' values are used directly as W registers, discarding the top
5697      // bits, so we don't need to sign-extend and can just perform a move.
5698      // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5699      // top 32 bits of the target register. We theoretically could leave those
5700      // bits unchanged, but we would have to make sure that no code uses a
5701      // 32bit input value as a 64bit value assuming that the top 32 bits are
5702      // zero.
5703      __ Mov(output.W(), source.W());
5704    } else if (result_type == Primitive::kPrimChar ||
5705               (input_type == Primitive::kPrimChar && input_size < result_size)) {
5706      __ Ubfx(output,
5707              output.IsX() ? source.X() : source.W(),
5708              0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
5709    } else {
5710      __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5711    }
5712  } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
5713    __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5714  } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
5715    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
5716    __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5717  } else if (Primitive::IsFloatingPointType(result_type) &&
5718             Primitive::IsFloatingPointType(input_type)) {
5719    __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5720  } else {
5721    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5722                << " to " << result_type;
5723  }
5724}
5725
5726void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5727  HandleShift(ushr);
5728}
5729
5730void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5731  HandleShift(ushr);
5732}
5733
5734void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5735  HandleBinaryOp(instruction);
5736}
5737
5738void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5739  HandleBinaryOp(instruction);
5740}
5741
5742void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5743  // Nothing to do, this should be removed during prepare for register allocator.
5744  LOG(FATAL) << "Unreachable";
5745}
5746
5747void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5748  // Nothing to do, this should be removed during prepare for register allocator.
5749  LOG(FATAL) << "Unreachable";
5750}
5751
5752// Simple implementation of packed switch - generate cascaded compare/jumps.
5753void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5754  LocationSummary* locations =
5755      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5756  locations->SetInAt(0, Location::RequiresRegister());
5757}
5758
5759void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5760  int32_t lower_bound = switch_instr->GetStartValue();
5761  uint32_t num_entries = switch_instr->GetNumEntries();
5762  Register value_reg = InputRegisterAt(switch_instr, 0);
5763  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5764
5765  // Roughly set 16 as max average assemblies generated per HIR in a graph.
5766  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5767  // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5768  // make sure we don't emit it if the target may run out of range.
5769  // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5770  // ranges and emit the tables only as required.
5771  static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5772
5773  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5774      // Current instruction id is an upper bound of the number of HIRs in the graph.
5775      GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5776    // Create a series of compare/jumps.
5777    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5778    Register temp = temps.AcquireW();
5779    __ Subs(temp, value_reg, Operand(lower_bound));
5780
5781    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5782    // Jump to successors[0] if value == lower_bound.
5783    __ B(eq, codegen_->GetLabelOf(successors[0]));
5784    int32_t last_index = 0;
5785    for (; num_entries - last_index > 2; last_index += 2) {
5786      __ Subs(temp, temp, Operand(2));
5787      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5788      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5789      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5790      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5791    }
5792    if (num_entries - last_index == 2) {
5793      // The last missing case_value.
5794      __ Cmp(temp, Operand(1));
5795      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5796    }
5797
5798    // And the default for any other value.
5799    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5800      __ B(codegen_->GetLabelOf(default_block));
5801    }
5802  } else {
5803    JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5804
5805    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5806
5807    // Below instructions should use at most one blocked register. Since there are two blocked
5808    // registers, we are free to block one.
5809    Register temp_w = temps.AcquireW();
5810    Register index;
5811    // Remove the bias.
5812    if (lower_bound != 0) {
5813      index = temp_w;
5814      __ Sub(index, value_reg, Operand(lower_bound));
5815    } else {
5816      index = value_reg;
5817    }
5818
5819    // Jump to default block if index is out of the range.
5820    __ Cmp(index, Operand(num_entries));
5821    __ B(hs, codegen_->GetLabelOf(default_block));
5822
5823    // In current VIXL implementation, it won't require any blocked registers to encode the
5824    // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5825    // register pressure.
5826    Register table_base = temps.AcquireX();
5827    // Load jump offset from the table.
5828    __ Adr(table_base, jump_table->GetTableStartLabel());
5829    Register jump_offset = temp_w;
5830    __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5831
5832    // Jump to target block by branching to table_base(pc related) + offset.
5833    Register target_address = table_base;
5834    __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5835    __ Br(target_address);
5836  }
5837}
5838
5839void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5840    HInstruction* instruction,
5841    Location out,
5842    uint32_t offset,
5843    Location maybe_temp,
5844    ReadBarrierOption read_barrier_option) {
5845  Primitive::Type type = Primitive::kPrimNot;
5846  Register out_reg = RegisterFrom(out, type);
5847  if (read_barrier_option == kWithReadBarrier) {
5848    CHECK(kEmitCompilerReadBarrier);
5849    if (kUseBakerReadBarrier) {
5850      // Load with fast path based Baker's read barrier.
5851      // /* HeapReference<Object> */ out = *(out + offset)
5852      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5853                                                      out,
5854                                                      out_reg,
5855                                                      offset,
5856                                                      maybe_temp,
5857                                                      /* needs_null_check */ false,
5858                                                      /* use_load_acquire */ false);
5859    } else {
5860      // Load with slow path based read barrier.
5861      // Save the value of `out` into `maybe_temp` before overwriting it
5862      // in the following move operation, as we will need it for the
5863      // read barrier below.
5864      Register temp_reg = RegisterFrom(maybe_temp, type);
5865      __ Mov(temp_reg, out_reg);
5866      // /* HeapReference<Object> */ out = *(out + offset)
5867      __ Ldr(out_reg, HeapOperand(out_reg, offset));
5868      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5869    }
5870  } else {
5871    // Plain load with no read barrier.
5872    // /* HeapReference<Object> */ out = *(out + offset)
5873    __ Ldr(out_reg, HeapOperand(out_reg, offset));
5874    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5875  }
5876}
5877
5878void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5879    HInstruction* instruction,
5880    Location out,
5881    Location obj,
5882    uint32_t offset,
5883    Location maybe_temp,
5884    ReadBarrierOption read_barrier_option) {
5885  Primitive::Type type = Primitive::kPrimNot;
5886  Register out_reg = RegisterFrom(out, type);
5887  Register obj_reg = RegisterFrom(obj, type);
5888  if (read_barrier_option == kWithReadBarrier) {
5889    CHECK(kEmitCompilerReadBarrier);
5890    if (kUseBakerReadBarrier) {
5891      // Load with fast path based Baker's read barrier.
5892      // /* HeapReference<Object> */ out = *(obj + offset)
5893      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5894                                                      out,
5895                                                      obj_reg,
5896                                                      offset,
5897                                                      maybe_temp,
5898                                                      /* needs_null_check */ false,
5899                                                      /* use_load_acquire */ false);
5900    } else {
5901      // Load with slow path based read barrier.
5902      // /* HeapReference<Object> */ out = *(obj + offset)
5903      __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5904      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5905    }
5906  } else {
5907    // Plain load with no read barrier.
5908    // /* HeapReference<Object> */ out = *(obj + offset)
5909    __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5910    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5911  }
5912}
5913
5914void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
5915    HInstruction* instruction,
5916    Location root,
5917    Register obj,
5918    uint32_t offset,
5919    vixl::aarch64::Label* fixup_label,
5920    ReadBarrierOption read_barrier_option) {
5921  DCHECK(fixup_label == nullptr || offset == 0u);
5922  Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
5923  if (read_barrier_option == kWithReadBarrier) {
5924    DCHECK(kEmitCompilerReadBarrier);
5925    if (kUseBakerReadBarrier) {
5926      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5927      // Baker's read barrier are used.
5928      if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
5929          !Runtime::Current()->UseJitCompilation()) {
5930        // Note that we do not actually check the value of `GetIsGcMarking()`
5931        // to decide whether to mark the loaded GC root or not.  Instead, we
5932        // load into `temp` the read barrier mark introspection entrypoint.
5933        // If `temp` is null, it means that `GetIsGcMarking()` is false, and
5934        // vice versa.
5935        //
5936        // We use link-time generated thunks for the slow path. That thunk
5937        // checks the reference and jumps to the entrypoint if needed.
5938        //
5939        //     temp = Thread::Current()->pReadBarrierMarkIntrospection
5940        //     lr = &return_address;
5941        //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5942        //     if (temp != nullptr) {
5943        //        goto gc_root_thunk<root_reg>(lr)
5944        //     }
5945        //   return_address:
5946
5947        UseScratchRegisterScope temps(GetVIXLAssembler());
5948        DCHECK(temps.IsAvailable(ip0));
5949        DCHECK(temps.IsAvailable(ip1));
5950        temps.Exclude(ip0, ip1);
5951        uint32_t custom_data =
5952            linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
5953        vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
5954
5955        // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
5956        DCHECK_EQ(ip0.GetCode(), 16u);
5957        const int32_t entry_point_offset =
5958            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
5959        __ Ldr(ip1, MemOperand(tr, entry_point_offset));
5960        EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
5961        vixl::aarch64::Label return_address;
5962        __ adr(lr, &return_address);
5963        if (fixup_label != nullptr) {
5964          __ Bind(fixup_label);
5965        }
5966        static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
5967                      "GC root LDR must be 2 instruction (8B) before the return address label.");
5968        __ ldr(root_reg, MemOperand(obj.X(), offset));
5969        __ Bind(cbnz_label);
5970        __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
5971        __ Bind(&return_address);
5972      } else {
5973        // Note that we do not actually check the value of
5974        // `GetIsGcMarking()` to decide whether to mark the loaded GC
5975        // root or not.  Instead, we load into `temp` the read barrier
5976        // mark entry point corresponding to register `root`. If `temp`
5977        // is null, it means that `GetIsGcMarking()` is false, and vice
5978        // versa.
5979        //
5980        //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5981        //   GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
5982        //   if (temp != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
5983        //     // Slow path.
5984        //     root = temp(root);  // root = ReadBarrier::Mark(root);  // Runtime entry point call.
5985        //   }
5986
5987        // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
5988        Register temp = lr;
5989        SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
5990            instruction, root, /* entrypoint */ LocationFrom(temp));
5991        codegen_->AddSlowPath(slow_path);
5992
5993        // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
5994        const int32_t entry_point_offset =
5995            CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
5996        // Loading the entrypoint does not require a load acquire since it is only changed when
5997        // threads are suspended or running a checkpoint.
5998        __ Ldr(temp, MemOperand(tr, entry_point_offset));
5999
6000        // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6001        if (fixup_label == nullptr) {
6002          __ Ldr(root_reg, MemOperand(obj, offset));
6003        } else {
6004          codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
6005        }
6006        static_assert(
6007            sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6008            "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6009            "have different sizes.");
6010        static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6011                      "art::mirror::CompressedReference<mirror::Object> and int32_t "
6012                      "have different sizes.");
6013
6014        // The entrypoint is null when the GC is not marking, this prevents one load compared to
6015        // checking GetIsGcMarking.
6016        __ Cbnz(temp, slow_path->GetEntryLabel());
6017        __ Bind(slow_path->GetExitLabel());
6018      }
6019    } else {
6020      // GC root loaded through a slow path for read barriers other
6021      // than Baker's.
6022      // /* GcRoot<mirror::Object>* */ root = obj + offset
6023      if (fixup_label == nullptr) {
6024        __ Add(root_reg.X(), obj.X(), offset);
6025      } else {
6026        codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6027      }
6028      // /* mirror::Object* */ root = root->Read()
6029      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6030    }
6031  } else {
6032    // Plain GC root load with no read barrier.
6033    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6034    if (fixup_label == nullptr) {
6035      __ Ldr(root_reg, MemOperand(obj, offset));
6036    } else {
6037      codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6038    }
6039    // Note that GC roots are not affected by heap poisoning, thus we
6040    // do not have to unpoison `root_reg` here.
6041  }
6042}
6043
6044void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6045                                                               Location ref,
6046                                                               Register obj,
6047                                                               uint32_t offset,
6048                                                               Location maybe_temp,
6049                                                               bool needs_null_check,
6050                                                               bool use_load_acquire) {
6051  DCHECK(kEmitCompilerReadBarrier);
6052  DCHECK(kUseBakerReadBarrier);
6053
6054  if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
6055      !use_load_acquire &&
6056      !Runtime::Current()->UseJitCompilation()) {
6057    // Note that we do not actually check the value of `GetIsGcMarking()`
6058    // to decide whether to mark the loaded GC root or not.  Instead, we
6059    // load into `temp` the read barrier mark introspection entrypoint.
6060    // If `temp` is null, it means that `GetIsGcMarking()` is false, and
6061    // vice versa.
6062    //
6063    // We use link-time generated thunks for the slow path. That thunk checks
6064    // the holder and jumps to the entrypoint if needed. If the holder is not
6065    // gray, it creates a fake dependency and returns to the LDR instruction.
6066    //
6067    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
6068    //     lr = &return_address;
6069    //     if (temp != nullptr) {
6070    //        goto field_thunk<holder_reg, base_reg>(lr)
6071    //     }
6072    //   not_gray_return_address:
6073    //     // Original reference load. If the offset is too large to fit
6074    //     // into LDR, we use an adjusted base register here.
6075    //     GcRoot<mirror::Object> root = *(obj+offset);
6076    //   gray_return_address:
6077
6078    DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6079    Register base = obj;
6080    if (offset >= kReferenceLoadMinFarOffset) {
6081      DCHECK(maybe_temp.IsRegister());
6082      base = WRegisterFrom(maybe_temp);
6083      static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6084      __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6085      offset &= (kReferenceLoadMinFarOffset - 1u);
6086    }
6087    UseScratchRegisterScope temps(GetVIXLAssembler());
6088    DCHECK(temps.IsAvailable(ip0));
6089    DCHECK(temps.IsAvailable(ip1));
6090    temps.Exclude(ip0, ip1);
6091    uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
6092        base.GetCode(),
6093        obj.GetCode());
6094    vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
6095
6096    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6097    DCHECK_EQ(ip0.GetCode(), 16u);
6098    const int32_t entry_point_offset =
6099        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6100    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
6101    EmissionCheckScope guard(GetVIXLAssembler(),
6102                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6103    vixl::aarch64::Label return_address;
6104    __ adr(lr, &return_address);
6105    __ Bind(cbnz_label);
6106    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
6107    static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6108                  "Field LDR must be 1 instruction (4B) before the return address label; "
6109                  " 2 instructions (8B) for heap poisoning.");
6110    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
6111    __ ldr(ref_reg, MemOperand(base.X(), offset));
6112    if (needs_null_check) {
6113      MaybeRecordImplicitNullCheck(instruction);
6114    }
6115    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6116    __ Bind(&return_address);
6117    return;
6118  }
6119
6120  // /* HeapReference<Object> */ ref = *(obj + offset)
6121  Register temp = WRegisterFrom(maybe_temp);
6122  Location no_index = Location::NoLocation();
6123  size_t no_scale_factor = 0u;
6124  GenerateReferenceLoadWithBakerReadBarrier(instruction,
6125                                            ref,
6126                                            obj,
6127                                            offset,
6128                                            no_index,
6129                                            no_scale_factor,
6130                                            temp,
6131                                            needs_null_check,
6132                                            use_load_acquire);
6133}
6134
6135void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6136                                                               Location ref,
6137                                                               Register obj,
6138                                                               uint32_t data_offset,
6139                                                               Location index,
6140                                                               Register temp,
6141                                                               bool needs_null_check) {
6142  DCHECK(kEmitCompilerReadBarrier);
6143  DCHECK(kUseBakerReadBarrier);
6144
6145  // Array cells are never volatile variables, therefore array loads
6146  // never use Load-Acquire instructions on ARM64.
6147  const bool use_load_acquire = false;
6148
6149  static_assert(
6150      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6151      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6152  // /* HeapReference<Object> */ ref =
6153  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6154  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
6155  GenerateReferenceLoadWithBakerReadBarrier(instruction,
6156                                            ref,
6157                                            obj,
6158                                            data_offset,
6159                                            index,
6160                                            scale_factor,
6161                                            temp,
6162                                            needs_null_check,
6163                                            use_load_acquire);
6164}
6165
6166void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6167                                                                   Location ref,
6168                                                                   Register obj,
6169                                                                   uint32_t offset,
6170                                                                   Location index,
6171                                                                   size_t scale_factor,
6172                                                                   Register temp,
6173                                                                   bool needs_null_check,
6174                                                                   bool use_load_acquire,
6175                                                                   bool always_update_field) {
6176  DCHECK(kEmitCompilerReadBarrier);
6177  DCHECK(kUseBakerReadBarrier);
6178  // If we are emitting an array load, we should not be using a
6179  // Load Acquire instruction.  In other words:
6180  // `instruction->IsArrayGet()` => `!use_load_acquire`.
6181  DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
6182
6183  // Query `art::Thread::Current()->GetIsGcMarking()` to decide
6184  // whether we need to enter the slow path to mark the reference.
6185  // Then, in the slow path, check the gray bit in the lock word of
6186  // the reference's holder (`obj`) to decide whether to mark `ref` or
6187  // not.
6188  //
6189  // Note that we do not actually check the value of `GetIsGcMarking()`;
6190  // instead, we load into `temp2` the read barrier mark entry point
6191  // corresponding to register `ref`. If `temp2` is null, it means
6192  // that `GetIsGcMarking()` is false, and vice versa.
6193  //
6194  //   temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6195  //   if (temp2 != nullptr) {  // <=> Thread::Current()->GetIsGcMarking()
6196  //     // Slow path.
6197  //     uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6198  //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6199  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6200  //     bool is_gray = (rb_state == ReadBarrier::GrayState());
6201  //     if (is_gray) {
6202  //       ref = temp2(ref);  // ref = ReadBarrier::Mark(ref);  // Runtime entry point call.
6203  //     }
6204  //   } else {
6205  //     HeapReference<mirror::Object> ref = *src;  // Original reference load.
6206  //   }
6207
6208  // Slow path marking the object `ref` when the GC is marking. The
6209  // entrypoint will already be loaded in `temp2`.
6210  Register temp2 = lr;
6211  Location temp2_loc = LocationFrom(temp2);
6212  SlowPathCodeARM64* slow_path;
6213  if (always_update_field) {
6214    // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
6215    // only supports address of the form `obj + field_offset`, where
6216    // `obj` is a register and `field_offset` is a register. Thus
6217    // `offset` and `scale_factor` above are expected to be null in
6218    // this code path.
6219    DCHECK_EQ(offset, 0u);
6220    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
6221    Location field_offset = index;
6222    slow_path =
6223        new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
6224            instruction,
6225            ref,
6226            obj,
6227            offset,
6228            /* index */ field_offset,
6229            scale_factor,
6230            needs_null_check,
6231            use_load_acquire,
6232            temp,
6233            /* entrypoint */ temp2_loc);
6234  } else {
6235    slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
6236        instruction,
6237        ref,
6238        obj,
6239        offset,
6240        index,
6241        scale_factor,
6242        needs_null_check,
6243        use_load_acquire,
6244        temp,
6245        /* entrypoint */ temp2_loc);
6246  }
6247  AddSlowPath(slow_path);
6248
6249  // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg()
6250  const int32_t entry_point_offset =
6251      CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg());
6252  // Loading the entrypoint does not require a load acquire since it is only changed when
6253  // threads are suspended or running a checkpoint.
6254  __ Ldr(temp2, MemOperand(tr, entry_point_offset));
6255  // The entrypoint is null when the GC is not marking, this prevents one load compared to
6256  // checking GetIsGcMarking.
6257  __ Cbnz(temp2, slow_path->GetEntryLabel());
6258  // Fast path: just load the reference.
6259  GenerateRawReferenceLoad(
6260      instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
6261  __ Bind(slow_path->GetExitLabel());
6262}
6263
6264void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
6265                                                  Location ref,
6266                                                  Register obj,
6267                                                  uint32_t offset,
6268                                                  Location index,
6269                                                  size_t scale_factor,
6270                                                  bool needs_null_check,
6271                                                  bool use_load_acquire) {
6272  DCHECK(obj.IsW());
6273  Primitive::Type type = Primitive::kPrimNot;
6274  Register ref_reg = RegisterFrom(ref, type);
6275
6276  // If needed, vixl::EmissionCheckScope guards are used to ensure
6277  // that no pools are emitted between the load (macro) instruction
6278  // and MaybeRecordImplicitNullCheck.
6279
6280  if (index.IsValid()) {
6281    // Load types involving an "index": ArrayGet,
6282    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
6283    // intrinsics.
6284    if (use_load_acquire) {
6285      // UnsafeGetObjectVolatile intrinsic case.
6286      // Register `index` is not an index in an object array, but an
6287      // offset to an object reference field within object `obj`.
6288      DCHECK(instruction->IsInvoke()) << instruction->DebugName();
6289      DCHECK(instruction->GetLocations()->Intrinsified());
6290      DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
6291          << instruction->AsInvoke()->GetIntrinsic();
6292      DCHECK_EQ(offset, 0u);
6293      DCHECK_EQ(scale_factor, 0u);
6294      DCHECK_EQ(needs_null_check, false);
6295      // /* HeapReference<mirror::Object> */ ref = *(obj + index)
6296      MemOperand field = HeapOperand(obj, XRegisterFrom(index));
6297      LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
6298    } else {
6299      // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
6300      // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
6301      if (index.IsConstant()) {
6302        uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
6303        EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6304        Load(type, ref_reg, HeapOperand(obj, computed_offset));
6305        if (needs_null_check) {
6306          MaybeRecordImplicitNullCheck(instruction);
6307        }
6308      } else {
6309        UseScratchRegisterScope temps(GetVIXLAssembler());
6310        Register temp = temps.AcquireW();
6311        __ Add(temp, obj, offset);
6312        {
6313          EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6314          Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
6315          if (needs_null_check) {
6316            MaybeRecordImplicitNullCheck(instruction);
6317          }
6318        }
6319      }
6320    }
6321  } else {
6322    // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
6323    MemOperand field = HeapOperand(obj, offset);
6324    if (use_load_acquire) {
6325      // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
6326      LoadAcquire(instruction, ref_reg, field, needs_null_check);
6327    } else {
6328      EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
6329      Load(type, ref_reg, field);
6330      if (needs_null_check) {
6331        MaybeRecordImplicitNullCheck(instruction);
6332      }
6333    }
6334  }
6335
6336  // Object* ref = ref_addr->AsMirrorPtr()
6337  GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
6338}
6339
6340void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6341                                                 Location out,
6342                                                 Location ref,
6343                                                 Location obj,
6344                                                 uint32_t offset,
6345                                                 Location index) {
6346  DCHECK(kEmitCompilerReadBarrier);
6347
6348  // Insert a slow path based read barrier *after* the reference load.
6349  //
6350  // If heap poisoning is enabled, the unpoisoning of the loaded
6351  // reference will be carried out by the runtime within the slow
6352  // path.
6353  //
6354  // Note that `ref` currently does not get unpoisoned (when heap
6355  // poisoning is enabled), which is alright as the `ref` argument is
6356  // not used by the artReadBarrierSlow entry point.
6357  //
6358  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6359  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
6360      ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6361  AddSlowPath(slow_path);
6362
6363  __ B(slow_path->GetEntryLabel());
6364  __ Bind(slow_path->GetExitLabel());
6365}
6366
6367void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6368                                                      Location out,
6369                                                      Location ref,
6370                                                      Location obj,
6371                                                      uint32_t offset,
6372                                                      Location index) {
6373  if (kEmitCompilerReadBarrier) {
6374    // Baker's read barriers shall be handled by the fast path
6375    // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6376    DCHECK(!kUseBakerReadBarrier);
6377    // If heap poisoning is enabled, unpoisoning will be taken care of
6378    // by the runtime within the slow path.
6379    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6380  } else if (kPoisonHeapReferences) {
6381    GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6382  }
6383}
6384
6385void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6386                                                        Location out,
6387                                                        Location root) {
6388  DCHECK(kEmitCompilerReadBarrier);
6389
6390  // Insert a slow path based read barrier *after* the GC root load.
6391  //
6392  // Note that GC roots are not affected by heap poisoning, so we do
6393  // not need to do anything special for this here.
6394  SlowPathCodeARM64* slow_path =
6395      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6396  AddSlowPath(slow_path);
6397
6398  __ B(slow_path->GetEntryLabel());
6399  __ Bind(slow_path->GetExitLabel());
6400}
6401
6402void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6403  LocationSummary* locations =
6404      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6405  locations->SetInAt(0, Location::RequiresRegister());
6406  locations->SetOut(Location::RequiresRegister());
6407}
6408
6409void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6410  LocationSummary* locations = instruction->GetLocations();
6411  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6412    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6413        instruction->GetIndex(), kArm64PointerSize).SizeValue();
6414    __ Ldr(XRegisterFrom(locations->Out()),
6415           MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6416  } else {
6417    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6418        instruction->GetIndex(), kArm64PointerSize));
6419    __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6420        mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6421    __ Ldr(XRegisterFrom(locations->Out()),
6422           MemOperand(XRegisterFrom(locations->Out()), method_offset));
6423  }
6424}
6425
6426static void PatchJitRootUse(uint8_t* code,
6427                            const uint8_t* roots_data,
6428                            vixl::aarch64::Literal<uint32_t>* literal,
6429                            uint64_t index_in_table) {
6430  uint32_t literal_offset = literal->GetOffset();
6431  uintptr_t address =
6432      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6433  uint8_t* data = code + literal_offset;
6434  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6435}
6436
6437void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6438  for (const auto& entry : jit_string_patches_) {
6439    const auto& it = jit_string_roots_.find(entry.first);
6440    DCHECK(it != jit_string_roots_.end());
6441    PatchJitRootUse(code, roots_data, entry.second, it->second);
6442  }
6443  for (const auto& entry : jit_class_patches_) {
6444    const auto& it = jit_class_roots_.find(entry.first);
6445    DCHECK(it != jit_class_roots_.end());
6446    PatchJitRootUse(code, roots_data, entry.second, it->second);
6447  }
6448}
6449
6450#undef __
6451#undef QUICK_ENTRY_POINT
6452
6453}  // namespace arm64
6454}  // namespace art
6455