1/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm64.h"
18
19#include "arch/arm64/instruction_set_features_arm64.h"
20#include "art_method.h"
21#include "code_generator_arm64.h"
22#include "common_arm64.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24#include "heap_poisoning.h"
25#include "intrinsics.h"
26#include "lock_word.h"
27#include "mirror/array-inl.h"
28#include "mirror/object_array-inl.h"
29#include "mirror/reference.h"
30#include "mirror/string-inl.h"
31#include "scoped_thread_state_change-inl.h"
32#include "thread-current-inl.h"
33#include "utils/arm64/assembler_arm64.h"
34
35using namespace vixl::aarch64;  // NOLINT(build/namespaces)
36
37// TODO(VIXL): Make VIXL compile with -Wshadow.
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Wshadow"
40#include "aarch64/disasm-aarch64.h"
41#include "aarch64/macro-assembler-aarch64.h"
42#pragma GCC diagnostic pop
43
44namespace art {
45
46namespace arm64 {
47
48using helpers::DRegisterFrom;
49using helpers::FPRegisterFrom;
50using helpers::HeapOperand;
51using helpers::LocationFrom;
52using helpers::OperandFrom;
53using helpers::RegisterFrom;
54using helpers::SRegisterFrom;
55using helpers::WRegisterFrom;
56using helpers::XRegisterFrom;
57using helpers::InputRegisterAt;
58using helpers::OutputRegister;
59
60namespace {
61
62ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) {
63  return MemOperand(XRegisterFrom(location), offset);
64}
65
66}  // namespace
67
68MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() {
69  return codegen_->GetVIXLAssembler();
70}
71
72ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() {
73  return codegen_->GetGraph()->GetAllocator();
74}
75
76#define __ codegen->GetVIXLAssembler()->
77
78static void MoveFromReturnRegister(Location trg,
79                                   DataType::Type type,
80                                   CodeGeneratorARM64* codegen) {
81  if (!trg.IsValid()) {
82    DCHECK(type == DataType::Type::kVoid);
83    return;
84  }
85
86  DCHECK_NE(type, DataType::Type::kVoid);
87
88  if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
89    Register trg_reg = RegisterFrom(trg, type);
90    Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
91    __ Mov(trg_reg, res_reg, kDiscardForSameWReg);
92  } else {
93    FPRegister trg_reg = FPRegisterFrom(trg, type);
94    FPRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type);
95    __ Fmov(trg_reg, res_reg);
96  }
97}
98
99static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
100  InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
101  IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
102}
103
104// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
105// call. This will copy the arguments into the positions for a regular call.
106//
107// Note: The actual parameters are required to be in the locations given by the invoke's location
108//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
109//       restored!
110class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
111 public:
112  explicit IntrinsicSlowPathARM64(HInvoke* invoke)
113      : SlowPathCodeARM64(invoke), invoke_(invoke) { }
114
115  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
116    CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
117    __ Bind(GetEntryLabel());
118
119    SaveLiveRegisters(codegen, invoke_->GetLocations());
120
121    MoveArguments(invoke_, codegen);
122
123    {
124      // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there
125      // are no pools emitted.
126      vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
127      if (invoke_->IsInvokeStaticOrDirect()) {
128        codegen->GenerateStaticOrDirectCall(
129            invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
130      } else {
131        codegen->GenerateVirtualCall(
132            invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
133      }
134    }
135
136    // Copy the result back to the expected output.
137    Location out = invoke_->GetLocations()->Out();
138    if (out.IsValid()) {
139      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
140      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
141      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
142    }
143
144    RestoreLiveRegisters(codegen, invoke_->GetLocations());
145    __ B(GetExitLabel());
146  }
147
148  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
149
150 private:
151  // The instruction where this slow path is happening.
152  HInvoke* const invoke_;
153
154  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
155};
156
157// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
158class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
159 public:
160  ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
161      : SlowPathCodeARM64(instruction), tmp_(tmp) {
162    DCHECK(kEmitCompilerReadBarrier);
163    DCHECK(kUseBakerReadBarrier);
164  }
165
166  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
167    CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
168    LocationSummary* locations = instruction_->GetLocations();
169    DCHECK(locations->CanCall());
170    DCHECK(instruction_->IsInvokeStaticOrDirect())
171        << "Unexpected instruction in read barrier arraycopy slow path: "
172        << instruction_->DebugName();
173    DCHECK(instruction_->GetLocations()->Intrinsified());
174    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
175
176    const int32_t element_size = DataType::Size(DataType::Type::kReference);
177
178    Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
179    Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
180    Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
181    Register tmp_reg = WRegisterFrom(tmp_);
182
183    __ Bind(GetEntryLabel());
184    vixl::aarch64::Label slow_copy_loop;
185    __ Bind(&slow_copy_loop);
186    __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
187    codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
188    // TODO: Inline the mark bit check before calling the runtime?
189    // tmp_reg = ReadBarrier::Mark(tmp_reg);
190    // No need to save live registers; it's taken care of by the
191    // entrypoint. Also, there is no need to update the stack mask,
192    // as this runtime call will not trigger a garbage collection.
193    // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
194    // explanations.)
195    DCHECK_NE(tmp_.reg(), LR);
196    DCHECK_NE(tmp_.reg(), WSP);
197    DCHECK_NE(tmp_.reg(), WZR);
198    // IP0 is used internally by the ReadBarrierMarkRegX entry point
199    // as a temporary (and not preserved).  It thus cannot be used by
200    // any live register in this slow path.
201    DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
202    DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
203    DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
204    DCHECK_NE(tmp_.reg(), IP0);
205    DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
206    // TODO: Load the entrypoint once before the loop, instead of
207    // loading it at every iteration.
208    int32_t entry_point_offset =
209        Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
210    // This runtime call does not require a stack map.
211    codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
212    codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
213    __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
214    __ Cmp(src_curr_addr, src_stop_addr);
215    __ B(&slow_copy_loop, ne);
216    __ B(GetExitLabel());
217  }
218
219  const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
220
221 private:
222  Location tmp_;
223
224  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
225};
226#undef __
227
228bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
229  Dispatch(invoke);
230  LocationSummary* res = invoke->GetLocations();
231  if (res == nullptr) {
232    return false;
233  }
234  return res->Intrinsified();
235}
236
237#define __ masm->
238
239static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
240  LocationSummary* locations =
241      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
242  locations->SetInAt(0, Location::RequiresFpuRegister());
243  locations->SetOut(Location::RequiresRegister());
244}
245
246static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
247  LocationSummary* locations =
248      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
249  locations->SetInAt(0, Location::RequiresRegister());
250  locations->SetOut(Location::RequiresFpuRegister());
251}
252
253static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
254  Location input = locations->InAt(0);
255  Location output = locations->Out();
256  __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output),
257          is64bit ? DRegisterFrom(input) : SRegisterFrom(input));
258}
259
260static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
261  Location input = locations->InAt(0);
262  Location output = locations->Out();
263  __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output),
264          is64bit ? XRegisterFrom(input) : WRegisterFrom(input));
265}
266
267void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
268  CreateFPToIntLocations(allocator_, invoke);
269}
270void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
271  CreateIntToFPLocations(allocator_, invoke);
272}
273
274void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
275  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
276}
277void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
278  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
279}
280
281void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
282  CreateFPToIntLocations(allocator_, invoke);
283}
284void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
285  CreateIntToFPLocations(allocator_, invoke);
286}
287
288void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
289  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
290}
291void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
292  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
293}
294
295static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
296  LocationSummary* locations =
297      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
298  locations->SetInAt(0, Location::RequiresRegister());
299  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
300}
301
302static void GenReverseBytes(LocationSummary* locations,
303                            DataType::Type type,
304                            MacroAssembler* masm) {
305  Location in = locations->InAt(0);
306  Location out = locations->Out();
307
308  switch (type) {
309    case DataType::Type::kInt16:
310      __ Rev16(WRegisterFrom(out), WRegisterFrom(in));
311      __ Sxth(WRegisterFrom(out), WRegisterFrom(out));
312      break;
313    case DataType::Type::kInt32:
314    case DataType::Type::kInt64:
315      __ Rev(RegisterFrom(out, type), RegisterFrom(in, type));
316      break;
317    default:
318      LOG(FATAL) << "Unexpected size for reverse-bytes: " << type;
319      UNREACHABLE();
320  }
321}
322
323void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
324  CreateIntToIntLocations(allocator_, invoke);
325}
326
327void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) {
328  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
329}
330
331void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) {
332  CreateIntToIntLocations(allocator_, invoke);
333}
334
335void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) {
336  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
337}
338
339void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) {
340  CreateIntToIntLocations(allocator_, invoke);
341}
342
343void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
344  GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
345}
346
347static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
348  LocationSummary* locations =
349      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
350  locations->SetInAt(0, Location::RequiresRegister());
351  locations->SetInAt(1, Location::RequiresRegister());
352  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
353}
354
355static void GenNumberOfLeadingZeros(LocationSummary* locations,
356                                    DataType::Type type,
357                                    MacroAssembler* masm) {
358  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
359
360  Location in = locations->InAt(0);
361  Location out = locations->Out();
362
363  __ Clz(RegisterFrom(out, type), RegisterFrom(in, type));
364}
365
366void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
367  CreateIntToIntLocations(allocator_, invoke);
368}
369
370void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
371  GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
372}
373
374void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
375  CreateIntToIntLocations(allocator_, invoke);
376}
377
378void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
379  GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
380}
381
382static void GenNumberOfTrailingZeros(LocationSummary* locations,
383                                     DataType::Type type,
384                                     MacroAssembler* masm) {
385  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
386
387  Location in = locations->InAt(0);
388  Location out = locations->Out();
389
390  __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
391  __ Clz(RegisterFrom(out, type), RegisterFrom(out, type));
392}
393
394void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
395  CreateIntToIntLocations(allocator_, invoke);
396}
397
398void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
399  GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
400}
401
402void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
403  CreateIntToIntLocations(allocator_, invoke);
404}
405
406void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
407  GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
408}
409
410static void GenReverse(LocationSummary* locations,
411                       DataType::Type type,
412                       MacroAssembler* masm) {
413  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
414
415  Location in = locations->InAt(0);
416  Location out = locations->Out();
417
418  __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type));
419}
420
421void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) {
422  CreateIntToIntLocations(allocator_, invoke);
423}
424
425void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) {
426  GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler());
427}
428
429void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) {
430  CreateIntToIntLocations(allocator_, invoke);
431}
432
433void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
434  GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler());
435}
436
437static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) {
438  DCHECK(DataType::IsIntOrLongType(type)) << type;
439  DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
440  DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
441
442  UseScratchRegisterScope temps(masm);
443
444  Register src = InputRegisterAt(instr, 0);
445  Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
446  FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS();
447
448  __ Fmov(fpr, src);
449  __ Cnt(fpr.V8B(), fpr.V8B());
450  __ Addv(fpr.B(), fpr.V8B());
451  __ Fmov(dst, fpr);
452}
453
454void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
455  CreateIntToIntLocations(allocator_, invoke);
456}
457
458void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
459  GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler());
460}
461
462void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
463  CreateIntToIntLocations(allocator_, invoke);
464}
465
466void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
467  GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler());
468}
469
470static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
471  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
472
473  UseScratchRegisterScope temps(masm);
474
475  Register src = InputRegisterAt(invoke, 0);
476  Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
477  Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
478  size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u;
479  size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u;
480
481  __ Clz(temp, src);
482  __ Mov(dst, UINT64_C(1) << high_bit);  // MOV (bitmask immediate)
483  __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit));  // Clear dst if src was 0.
484  __ Lsr(dst, dst, temp);
485}
486
487void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
488  CreateIntToIntLocations(allocator_, invoke);
489}
490
491void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
492  GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
493}
494
495void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
496  CreateIntToIntLocations(allocator_, invoke);
497}
498
499void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
500  GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
501}
502
503static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) {
504  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
505
506  UseScratchRegisterScope temps(masm);
507
508  Register src = InputRegisterAt(invoke, 0);
509  Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
510  Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW();
511
512  __ Neg(temp, src);
513  __ And(dst, temp, src);
514}
515
516void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
517  CreateIntToIntLocations(allocator_, invoke);
518}
519
520void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
521  GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler());
522}
523
524void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
525  CreateIntToIntLocations(allocator_, invoke);
526}
527
528void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
529  GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
530}
531
532static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
533  LocationSummary* locations =
534      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
535  locations->SetInAt(0, Location::RequiresFpuRegister());
536  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
537}
538
539static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
540  Location in = locations->InAt(0);
541  Location out = locations->Out();
542
543  FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
544  FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
545
546  __ Fabs(out_reg, in_reg);
547}
548
549void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
550  CreateFPToFPLocations(allocator_, invoke);
551}
552
553void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
554  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
555}
556
557void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
558  CreateFPToFPLocations(allocator_, invoke);
559}
560
561void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
562  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
563}
564
565static void GenAbsInteger(LocationSummary* locations,
566                          bool is64bit,
567                          MacroAssembler* masm) {
568  Location in = locations->InAt(0);
569  Location output = locations->Out();
570
571  Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
572  Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
573
574  __ Cmp(in_reg, Operand(0));
575  __ Cneg(out_reg, in_reg, lt);
576}
577
578void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
579  CreateIntToIntLocations(allocator_, invoke);
580}
581
582void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
583  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
584}
585
586void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
587  CreateIntToIntLocations(allocator_, invoke);
588}
589
590void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
591  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
592}
593
594static void GenMinMaxFP(LocationSummary* locations,
595                        bool is_min,
596                        bool is_double,
597                        MacroAssembler* masm) {
598  Location op1 = locations->InAt(0);
599  Location op2 = locations->InAt(1);
600  Location out = locations->Out();
601
602  FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
603  FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
604  FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
605  if (is_min) {
606    __ Fmin(out_reg, op1_reg, op2_reg);
607  } else {
608    __ Fmax(out_reg, op1_reg, op2_reg);
609  }
610}
611
612static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
613  LocationSummary* locations =
614      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
615  locations->SetInAt(0, Location::RequiresFpuRegister());
616  locations->SetInAt(1, Location::RequiresFpuRegister());
617  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
618}
619
620void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
621  CreateFPFPToFPLocations(allocator_, invoke);
622}
623
624void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
625  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
626}
627
628void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
629  CreateFPFPToFPLocations(allocator_, invoke);
630}
631
632void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
633  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
634}
635
636void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
637  CreateFPFPToFPLocations(allocator_, invoke);
638}
639
640void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
641  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
642}
643
644void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
645  CreateFPFPToFPLocations(allocator_, invoke);
646}
647
648void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
649  GenMinMaxFP(
650      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
651}
652
653static void GenMinMax(LocationSummary* locations,
654                      bool is_min,
655                      bool is_long,
656                      MacroAssembler* masm) {
657  Location op1 = locations->InAt(0);
658  Location op2 = locations->InAt(1);
659  Location out = locations->Out();
660
661  Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
662  Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
663  Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
664
665  __ Cmp(op1_reg, op2_reg);
666  __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
667}
668
669void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
670  CreateIntIntToIntLocations(allocator_, invoke);
671}
672
673void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
674  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
675}
676
677void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
678  CreateIntIntToIntLocations(allocator_, invoke);
679}
680
681void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
682  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
683}
684
685void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
686  CreateIntIntToIntLocations(allocator_, invoke);
687}
688
689void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
690  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
691}
692
693void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
694  CreateIntIntToIntLocations(allocator_, invoke);
695}
696
697void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
698  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
699}
700
701void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
702  CreateFPToFPLocations(allocator_, invoke);
703}
704
705void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) {
706  LocationSummary* locations = invoke->GetLocations();
707  MacroAssembler* masm = GetVIXLAssembler();
708  __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
709}
710
711void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) {
712  CreateFPToFPLocations(allocator_, invoke);
713}
714
715void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) {
716  LocationSummary* locations = invoke->GetLocations();
717  MacroAssembler* masm = GetVIXLAssembler();
718  __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
719}
720
721void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) {
722  CreateFPToFPLocations(allocator_, invoke);
723}
724
725void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) {
726  LocationSummary* locations = invoke->GetLocations();
727  MacroAssembler* masm = GetVIXLAssembler();
728  __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
729}
730
731void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) {
732  CreateFPToFPLocations(allocator_, invoke);
733}
734
735void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) {
736  LocationSummary* locations = invoke->GetLocations();
737  MacroAssembler* masm = GetVIXLAssembler();
738  __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0)));
739}
740
741static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) {
742  LocationSummary* locations =
743      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
744  locations->SetInAt(0, Location::RequiresFpuRegister());
745  locations->SetOut(Location::RequiresRegister());
746  locations->AddTemp(Location::RequiresFpuRegister());
747}
748
749static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) {
750  // Java 8 API definition for Math.round():
751  // Return the closest long or int to the argument, with ties rounding to positive infinity.
752  //
753  // There is no single instruction in ARMv8 that can support the above definition.
754  // We choose to use FCVTAS here, because it has closest semantic.
755  // FCVTAS performs rounding to nearest integer, ties away from zero.
756  // For most inputs (positive values, zero or NaN), this instruction is enough.
757  // We only need a few handling code after FCVTAS if the input is negative half value.
758  //
759  // The reason why we didn't choose FCVTPS instruction here is that
760  // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest.
761  // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2.
762  // If we were using this instruction, for most inputs, more handling code would be needed.
763  LocationSummary* l = invoke->GetLocations();
764  FPRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0));
765  FPRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0));
766  Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out());
767  vixl::aarch64::Label done;
768
769  // Round to nearest integer, ties away from zero.
770  __ Fcvtas(out_reg, in_reg);
771
772  // For positive values, zero or NaN inputs, rounding is done.
773  __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done);
774
775  // Handle input < 0 cases.
776  // If input is negative but not a tie, previous result (round to nearest) is valid.
777  // If input is a negative tie, out_reg += 1.
778  __ Frinta(tmp_fp, in_reg);
779  __ Fsub(tmp_fp, in_reg, tmp_fp);
780  __ Fcmp(tmp_fp, 0.5);
781  __ Cinc(out_reg, out_reg, eq);
782
783  __ Bind(&done);
784}
785
786void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
787  CreateFPToIntPlusFPTempLocations(allocator_, invoke);
788}
789
790void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
791  GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
792}
793
794void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
795  CreateFPToIntPlusFPTempLocations(allocator_, invoke);
796}
797
798void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
799  GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
800}
801
802void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
803  CreateIntToIntLocations(allocator_, invoke);
804}
805
806void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) {
807  MacroAssembler* masm = GetVIXLAssembler();
808  __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()),
809          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
810}
811
812void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
813  CreateIntToIntLocations(allocator_, invoke);
814}
815
816void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) {
817  MacroAssembler* masm = GetVIXLAssembler();
818  __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()),
819         AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
820}
821
822void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
823  CreateIntToIntLocations(allocator_, invoke);
824}
825
826void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) {
827  MacroAssembler* masm = GetVIXLAssembler();
828  __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()),
829         AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
830}
831
832void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
833  CreateIntToIntLocations(allocator_, invoke);
834}
835
836void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) {
837  MacroAssembler* masm = GetVIXLAssembler();
838  __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()),
839           AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
840}
841
842static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
843  LocationSummary* locations =
844      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
845  locations->SetInAt(0, Location::RequiresRegister());
846  locations->SetInAt(1, Location::RequiresRegister());
847}
848
849void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) {
850  CreateIntIntToVoidLocations(allocator_, invoke);
851}
852
853void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) {
854  MacroAssembler* masm = GetVIXLAssembler();
855  __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)),
856          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
857}
858
859void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
860  CreateIntIntToVoidLocations(allocator_, invoke);
861}
862
863void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) {
864  MacroAssembler* masm = GetVIXLAssembler();
865  __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)),
866         AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
867}
868
869void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
870  CreateIntIntToVoidLocations(allocator_, invoke);
871}
872
873void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) {
874  MacroAssembler* masm = GetVIXLAssembler();
875  __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)),
876         AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
877}
878
879void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
880  CreateIntIntToVoidLocations(allocator_, invoke);
881}
882
883void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) {
884  MacroAssembler* masm = GetVIXLAssembler();
885  __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)),
886          AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0));
887}
888
889void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) {
890  LocationSummary* locations =
891      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
892  locations->SetOut(Location::RequiresRegister());
893}
894
895void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) {
896  codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()),
897                 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value()));
898}
899
900static void GenUnsafeGet(HInvoke* invoke,
901                         DataType::Type type,
902                         bool is_volatile,
903                         CodeGeneratorARM64* codegen) {
904  LocationSummary* locations = invoke->GetLocations();
905  DCHECK((type == DataType::Type::kInt32) ||
906         (type == DataType::Type::kInt64) ||
907         (type == DataType::Type::kReference));
908  Location base_loc = locations->InAt(1);
909  Register base = WRegisterFrom(base_loc);      // Object pointer.
910  Location offset_loc = locations->InAt(2);
911  Register offset = XRegisterFrom(offset_loc);  // Long offset.
912  Location trg_loc = locations->Out();
913  Register trg = RegisterFrom(trg_loc, type);
914
915  if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
916    // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
917    Register temp = WRegisterFrom(locations->GetTemp(0));
918    codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
919                                                       trg_loc,
920                                                       base,
921                                                       /* offset */ 0u,
922                                                       /* index */ offset_loc,
923                                                       /* scale_factor */ 0u,
924                                                       temp,
925                                                       /* needs_null_check */ false,
926                                                       is_volatile);
927  } else {
928    // Other cases.
929    MemOperand mem_op(base.X(), offset);
930    if (is_volatile) {
931      codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
932    } else {
933      codegen->Load(type, trg, mem_op);
934    }
935
936    if (type == DataType::Type::kReference) {
937      DCHECK(trg.IsW());
938      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
939    }
940  }
941}
942
943static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
944  bool can_call = kEmitCompilerReadBarrier &&
945      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
946       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
947  LocationSummary* locations =
948      new (allocator) LocationSummary(invoke,
949                                      can_call
950                                          ? LocationSummary::kCallOnSlowPath
951                                          : LocationSummary::kNoCall,
952                                      kIntrinsified);
953  if (can_call && kUseBakerReadBarrier) {
954    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
955    // We need a temporary register for the read barrier marking slow
956    // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
957    locations->AddTemp(Location::RequiresRegister());
958  }
959  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
960  locations->SetInAt(1, Location::RequiresRegister());
961  locations->SetInAt(2, Location::RequiresRegister());
962  locations->SetOut(Location::RequiresRegister(),
963                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
964}
965
966void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
967  CreateIntIntIntToIntLocations(allocator_, invoke);
968}
969void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
970  CreateIntIntIntToIntLocations(allocator_, invoke);
971}
972void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) {
973  CreateIntIntIntToIntLocations(allocator_, invoke);
974}
975void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
976  CreateIntIntIntToIntLocations(allocator_, invoke);
977}
978void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) {
979  CreateIntIntIntToIntLocations(allocator_, invoke);
980}
981void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
982  CreateIntIntIntToIntLocations(allocator_, invoke);
983}
984
985void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
986  GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
987}
988void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
989  GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
990}
991void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
992  GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
993}
994void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
995  GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
996}
997void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
998  GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
999}
1000void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1001  GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
1002}
1003
1004static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
1005  LocationSummary* locations =
1006      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1007  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1008  locations->SetInAt(1, Location::RequiresRegister());
1009  locations->SetInAt(2, Location::RequiresRegister());
1010  locations->SetInAt(3, Location::RequiresRegister());
1011}
1012
1013void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) {
1014  CreateIntIntIntIntToVoid(allocator_, invoke);
1015}
1016void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1017  CreateIntIntIntIntToVoid(allocator_, invoke);
1018}
1019void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1020  CreateIntIntIntIntToVoid(allocator_, invoke);
1021}
1022void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) {
1023  CreateIntIntIntIntToVoid(allocator_, invoke);
1024}
1025void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1026  CreateIntIntIntIntToVoid(allocator_, invoke);
1027}
1028void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1029  CreateIntIntIntIntToVoid(allocator_, invoke);
1030}
1031void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) {
1032  CreateIntIntIntIntToVoid(allocator_, invoke);
1033}
1034void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1035  CreateIntIntIntIntToVoid(allocator_, invoke);
1036}
1037void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1038  CreateIntIntIntIntToVoid(allocator_, invoke);
1039}
1040
1041static void GenUnsafePut(HInvoke* invoke,
1042                         DataType::Type type,
1043                         bool is_volatile,
1044                         bool is_ordered,
1045                         CodeGeneratorARM64* codegen) {
1046  LocationSummary* locations = invoke->GetLocations();
1047  MacroAssembler* masm = codegen->GetVIXLAssembler();
1048
1049  Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
1050  Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
1051  Register value = RegisterFrom(locations->InAt(3), type);
1052  Register source = value;
1053  MemOperand mem_op(base.X(), offset);
1054
1055  {
1056    // We use a block to end the scratch scope before the write barrier, thus
1057    // freeing the temporary registers so they can be used in `MarkGCCard`.
1058    UseScratchRegisterScope temps(masm);
1059
1060    if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1061      DCHECK(value.IsW());
1062      Register temp = temps.AcquireW();
1063      __ Mov(temp.W(), value.W());
1064      codegen->GetAssembler()->PoisonHeapReference(temp.W());
1065      source = temp;
1066    }
1067
1068    if (is_volatile || is_ordered) {
1069      codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
1070    } else {
1071      codegen->Store(type, source, mem_op);
1072    }
1073  }
1074
1075  if (type == DataType::Type::kReference) {
1076    bool value_can_be_null = true;  // TODO: Worth finding out this information?
1077    codegen->MarkGCCard(base, value, value_can_be_null);
1078  }
1079}
1080
1081void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
1082  GenUnsafePut(invoke,
1083               DataType::Type::kInt32,
1084               /* is_volatile */ false,
1085               /* is_ordered */ false,
1086               codegen_);
1087}
1088void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
1089  GenUnsafePut(invoke,
1090               DataType::Type::kInt32,
1091               /* is_volatile */ false,
1092               /* is_ordered */ true,
1093               codegen_);
1094}
1095void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
1096  GenUnsafePut(invoke,
1097               DataType::Type::kInt32,
1098               /* is_volatile */ true,
1099               /* is_ordered */ false,
1100               codegen_);
1101}
1102void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
1103  GenUnsafePut(invoke,
1104               DataType::Type::kReference,
1105               /* is_volatile */ false,
1106               /* is_ordered */ false,
1107               codegen_);
1108}
1109void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1110  GenUnsafePut(invoke,
1111               DataType::Type::kReference,
1112               /* is_volatile */ false,
1113               /* is_ordered */ true,
1114               codegen_);
1115}
1116void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1117  GenUnsafePut(invoke,
1118               DataType::Type::kReference,
1119               /* is_volatile */ true,
1120               /* is_ordered */ false,
1121               codegen_);
1122}
1123void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
1124  GenUnsafePut(invoke,
1125               DataType::Type::kInt64,
1126               /* is_volatile */ false,
1127               /* is_ordered */ false,
1128               codegen_);
1129}
1130void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1131  GenUnsafePut(invoke,
1132               DataType::Type::kInt64,
1133               /* is_volatile */ false,
1134               /* is_ordered */ true,
1135               codegen_);
1136}
1137void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1138  GenUnsafePut(invoke,
1139               DataType::Type::kInt64,
1140               /* is_volatile */ true,
1141               /* is_ordered */ false,
1142               codegen_);
1143}
1144
1145static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
1146                                       HInvoke* invoke,
1147                                       DataType::Type type) {
1148  bool can_call = kEmitCompilerReadBarrier &&
1149      kUseBakerReadBarrier &&
1150      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1151  LocationSummary* locations =
1152      new (allocator) LocationSummary(invoke,
1153                                      can_call
1154                                          ? LocationSummary::kCallOnSlowPath
1155                                          : LocationSummary::kNoCall,
1156                                      kIntrinsified);
1157  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
1158  locations->SetInAt(1, Location::RequiresRegister());
1159  locations->SetInAt(2, Location::RequiresRegister());
1160  locations->SetInAt(3, Location::RequiresRegister());
1161  locations->SetInAt(4, Location::RequiresRegister());
1162
1163  // If heap poisoning is enabled, we don't want the unpoisoning
1164  // operations to potentially clobber the output. Likewise when
1165  // emitting a (Baker) read barrier, which may call.
1166  Location::OutputOverlap overlaps =
1167      ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
1168      ? Location::kOutputOverlap
1169      : Location::kNoOutputOverlap;
1170  locations->SetOut(Location::RequiresRegister(), overlaps);
1171  if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1172    // Temporary register for (Baker) read barrier.
1173    locations->AddTemp(Location::RequiresRegister());
1174  }
1175}
1176
1177static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
1178  MacroAssembler* masm = codegen->GetVIXLAssembler();
1179  LocationSummary* locations = invoke->GetLocations();
1180
1181  Location out_loc = locations->Out();
1182  Register out = WRegisterFrom(out_loc);                           // Boolean result.
1183
1184  Register base = WRegisterFrom(locations->InAt(1));               // Object pointer.
1185  Location offset_loc = locations->InAt(2);
1186  Register offset = XRegisterFrom(offset_loc);                     // Long offset.
1187  Register expected = RegisterFrom(locations->InAt(3), type);      // Expected.
1188  Register value = RegisterFrom(locations->InAt(4), type);         // Value.
1189
1190  // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
1191  if (type == DataType::Type::kReference) {
1192    // Mark card for object assuming new value is stored.
1193    bool value_can_be_null = true;  // TODO: Worth finding out this information?
1194    codegen->MarkGCCard(base, value, value_can_be_null);
1195
1196    // The only read barrier implementation supporting the
1197    // UnsafeCASObject intrinsic is the Baker-style read barriers.
1198    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1199
1200    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1201      Register temp = WRegisterFrom(locations->GetTemp(0));
1202      // Need to make sure the reference stored in the field is a to-space
1203      // one before attempting the CAS or the CAS could fail incorrectly.
1204      codegen->UpdateReferenceFieldWithBakerReadBarrier(
1205          invoke,
1206          out_loc,  // Unused, used only as a "temporary" within the read barrier.
1207          base,
1208          /* field_offset */ offset_loc,
1209          temp,
1210          /* needs_null_check */ false,
1211          /* use_load_acquire */ false);
1212    }
1213  }
1214
1215  UseScratchRegisterScope temps(masm);
1216  Register tmp_ptr = temps.AcquireX();                             // Pointer to actual memory.
1217  Register tmp_value = temps.AcquireSameSizeAs(value);             // Value in memory.
1218
1219  Register tmp_32 = tmp_value.W();
1220
1221  __ Add(tmp_ptr, base.X(), Operand(offset));
1222
1223  if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1224    codegen->GetAssembler()->PoisonHeapReference(expected);
1225    if (value.Is(expected)) {
1226      // Do not poison `value`, as it is the same register as
1227      // `expected`, which has just been poisoned.
1228    } else {
1229      codegen->GetAssembler()->PoisonHeapReference(value);
1230    }
1231  }
1232
1233  // do {
1234  //   tmp_value = [tmp_ptr] - expected;
1235  // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
1236  // result = tmp_value != 0;
1237
1238  vixl::aarch64::Label loop_head, exit_loop;
1239  __ Bind(&loop_head);
1240  __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
1241  __ Cmp(tmp_value, expected);
1242  __ B(&exit_loop, ne);
1243  __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
1244  __ Cbnz(tmp_32, &loop_head);
1245  __ Bind(&exit_loop);
1246  __ Cset(out, eq);
1247
1248  if (kPoisonHeapReferences && type == DataType::Type::kReference) {
1249    codegen->GetAssembler()->UnpoisonHeapReference(expected);
1250    if (value.Is(expected)) {
1251      // Do not unpoison `value`, as it is the same register as
1252      // `expected`, which has just been unpoisoned.
1253    } else {
1254      codegen->GetAssembler()->UnpoisonHeapReference(value);
1255    }
1256  }
1257}
1258
1259void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1260  CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32);
1261}
1262void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1263  CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64);
1264}
1265void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1266  // The only read barrier implementation supporting the
1267  // UnsafeCASObject intrinsic is the Baker-style read barriers.
1268  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1269    return;
1270  }
1271
1272  CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference);
1273}
1274
1275void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
1276  GenCas(invoke, DataType::Type::kInt32, codegen_);
1277}
1278void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
1279  GenCas(invoke, DataType::Type::kInt64, codegen_);
1280}
1281void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
1282  // The only read barrier implementation supporting the
1283  // UnsafeCASObject intrinsic is the Baker-style read barriers.
1284  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1285
1286  GenCas(invoke, DataType::Type::kReference, codegen_);
1287}
1288
1289void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
1290  LocationSummary* locations =
1291      new (allocator_) LocationSummary(invoke,
1292                                       invoke->InputAt(1)->CanBeNull()
1293                                           ? LocationSummary::kCallOnSlowPath
1294                                           : LocationSummary::kNoCall,
1295                                       kIntrinsified);
1296  locations->SetInAt(0, Location::RequiresRegister());
1297  locations->SetInAt(1, Location::RequiresRegister());
1298  locations->AddTemp(Location::RequiresRegister());
1299  locations->AddTemp(Location::RequiresRegister());
1300  locations->AddTemp(Location::RequiresRegister());
1301  // Need temporary registers for String compression's feature.
1302  if (mirror::kUseStringCompression) {
1303    locations->AddTemp(Location::RequiresRegister());
1304  }
1305  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1306}
1307
1308void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
1309  MacroAssembler* masm = GetVIXLAssembler();
1310  LocationSummary* locations = invoke->GetLocations();
1311
1312  Register str = InputRegisterAt(invoke, 0);
1313  Register arg = InputRegisterAt(invoke, 1);
1314  DCHECK(str.IsW());
1315  DCHECK(arg.IsW());
1316  Register out = OutputRegister(invoke);
1317
1318  Register temp0 = WRegisterFrom(locations->GetTemp(0));
1319  Register temp1 = WRegisterFrom(locations->GetTemp(1));
1320  Register temp2 = WRegisterFrom(locations->GetTemp(2));
1321  Register temp3;
1322  if (mirror::kUseStringCompression) {
1323    temp3 = WRegisterFrom(locations->GetTemp(3));
1324  }
1325
1326  vixl::aarch64::Label loop;
1327  vixl::aarch64::Label find_char_diff;
1328  vixl::aarch64::Label end;
1329  vixl::aarch64::Label different_compression;
1330
1331  // Get offsets of count and value fields within a string object.
1332  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1333  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1334
1335  // Note that the null check must have been done earlier.
1336  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1337
1338  // Take slow path and throw if input can be and is null.
1339  SlowPathCodeARM64* slow_path = nullptr;
1340  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1341  if (can_slow_path) {
1342    slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1343    codegen_->AddSlowPath(slow_path);
1344    __ Cbz(arg, slow_path->GetEntryLabel());
1345  }
1346
1347  // Reference equality check, return 0 if same reference.
1348  __ Subs(out, str, arg);
1349  __ B(&end, eq);
1350
1351  if (mirror::kUseStringCompression) {
1352    // Load `count` fields of this and argument strings.
1353    __ Ldr(temp3, HeapOperand(str, count_offset));
1354    __ Ldr(temp2, HeapOperand(arg, count_offset));
1355    // Clean out compression flag from lengths.
1356    __ Lsr(temp0, temp3, 1u);
1357    __ Lsr(temp1, temp2, 1u);
1358  } else {
1359    // Load lengths of this and argument strings.
1360    __ Ldr(temp0, HeapOperand(str, count_offset));
1361    __ Ldr(temp1, HeapOperand(arg, count_offset));
1362  }
1363  // out = length diff.
1364  __ Subs(out, temp0, temp1);
1365  // temp0 = min(len(str), len(arg)).
1366  __ Csel(temp0, temp1, temp0, ge);
1367  // Shorter string is empty?
1368  __ Cbz(temp0, &end);
1369
1370  if (mirror::kUseStringCompression) {
1371    // Check if both strings using same compression style to use this comparison loop.
1372    __ Eor(temp2, temp2, Operand(temp3));
1373    // Interleave with compression flag extraction which is needed for both paths
1374    // and also set flags which is needed only for the different compressions path.
1375    __ Ands(temp3.W(), temp3.W(), Operand(1));
1376    __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
1377  }
1378  // Store offset of string value in preparation for comparison loop.
1379  __ Mov(temp1, value_offset);
1380  if (mirror::kUseStringCompression) {
1381    // For string compression, calculate the number of bytes to compare (not chars).
1382    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1383    __ Lsl(temp0, temp0, temp3);
1384  }
1385
1386  UseScratchRegisterScope scratch_scope(masm);
1387  Register temp4 = scratch_scope.AcquireX();
1388
1389  // Assertions that must hold in order to compare strings 8 bytes at a time.
1390  DCHECK_ALIGNED(value_offset, 8);
1391  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1392
1393  const size_t char_size = DataType::Size(DataType::Type::kUint16);
1394  DCHECK_EQ(char_size, 2u);
1395
1396  // Promote temp2 to an X reg, ready for LDR.
1397  temp2 = temp2.X();
1398
1399  // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
1400  __ Bind(&loop);
1401  __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
1402  __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
1403  __ Cmp(temp4, temp2);
1404  __ B(ne, &find_char_diff);
1405  __ Add(temp1, temp1, char_size * 4);
1406  // With string compression, we have compared 8 bytes, otherwise 4 chars.
1407  __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
1408  __ B(&loop, hi);
1409  __ B(&end);
1410
1411  // Promote temp1 to an X reg, ready for EOR.
1412  temp1 = temp1.X();
1413
1414  // Find the single character difference.
1415  __ Bind(&find_char_diff);
1416  // Get the bit position of the first character that differs.
1417  __ Eor(temp1, temp2, temp4);
1418  __ Rbit(temp1, temp1);
1419  __ Clz(temp1, temp1);
1420
1421  // If the number of chars remaining <= the index where the difference occurs (0-3), then
1422  // the difference occurs outside the remaining string data, so just return length diff (out).
1423  // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
1424  // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
1425  // unsigned when string compression is disabled.
1426  // When it's enabled, the comparison must be unsigned.
1427  __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
1428  __ B(ls, &end);
1429
1430  // Extract the characters and calculate the difference.
1431  if (mirror:: kUseStringCompression) {
1432    __ Bic(temp1, temp1, 0x7);
1433    __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
1434  } else {
1435    __ Bic(temp1, temp1, 0xf);
1436  }
1437  __ Lsr(temp2, temp2, temp1);
1438  __ Lsr(temp4, temp4, temp1);
1439  if (mirror::kUseStringCompression) {
1440    // Prioritize the case of compressed strings and calculate such result first.
1441    __ Uxtb(temp1, temp4);
1442    __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
1443    __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
1444  }
1445  __ Uxth(temp4, temp4);
1446  __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
1447
1448  if (mirror::kUseStringCompression) {
1449    __ B(&end);
1450    __ Bind(&different_compression);
1451
1452    // Comparison for different compression style.
1453    const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1454    DCHECK_EQ(c_char_size, 1u);
1455    temp1 = temp1.W();
1456    temp2 = temp2.W();
1457    temp4 = temp4.W();
1458
1459    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
1460    // Note that flags have been set by the `str` compression flag extraction to `temp3`
1461    // before branching to the `different_compression` label.
1462    __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
1463    __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
1464
1465    // We want to free up the temp3, currently holding `str` compression flag, for comparison.
1466    // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
1467    // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
1468    // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
1469    __ Lsl(temp0, temp0, 1u);
1470
1471    // Adjust temp1 and temp2 from string pointers to data pointers.
1472    __ Add(temp1, temp1, Operand(value_offset));
1473    __ Add(temp2, temp2, Operand(value_offset));
1474
1475    // Complete the move of the compression flag.
1476    __ Sub(temp0, temp0, Operand(temp3));
1477
1478    vixl::aarch64::Label different_compression_loop;
1479    vixl::aarch64::Label different_compression_diff;
1480
1481    __ Bind(&different_compression_loop);
1482    __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
1483    __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
1484    __ Subs(temp4, temp4, Operand(temp3));
1485    __ B(&different_compression_diff, ne);
1486    __ Subs(temp0, temp0, 2);
1487    __ B(&different_compression_loop, hi);
1488    __ B(&end);
1489
1490    // Calculate the difference.
1491    __ Bind(&different_compression_diff);
1492    __ Tst(temp0, Operand(1));
1493    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1494                  "Expecting 0=compressed, 1=uncompressed");
1495    __ Cneg(out, temp4, ne);
1496  }
1497
1498  __ Bind(&end);
1499
1500  if (can_slow_path) {
1501    __ Bind(slow_path->GetExitLabel());
1502  }
1503}
1504
1505// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1506// The normal loop plus the pre-header is 9 instructions without string compression and 12
1507// instructions with string compression. We can compare up to 8 bytes in 4 instructions
1508// (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up
1509// to 10 instructions for the unrolled loop.
1510constexpr size_t kShortConstStringEqualsCutoffInBytes = 32;
1511
1512static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1513  if (candidate->IsLoadString()) {
1514    HLoadString* load_string = candidate->AsLoadString();
1515    const DexFile& dex_file = load_string->GetDexFile();
1516    return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1517  }
1518  return nullptr;
1519}
1520
1521void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
1522  if (kEmitCompilerReadBarrier &&
1523      !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
1524      !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
1525    // No support for this odd case (String class is moveable, not in the boot image).
1526    return;
1527  }
1528
1529  LocationSummary* locations =
1530      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1531  locations->SetInAt(0, Location::RequiresRegister());
1532  locations->SetInAt(1, Location::RequiresRegister());
1533
1534  // For the generic implementation and for long const strings we need a temporary.
1535  // We do not need it for short const strings, up to 8 bytes, see code generation below.
1536  uint32_t const_string_length = 0u;
1537  const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1538  if (const_string == nullptr) {
1539    const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1540  }
1541  bool is_compressed =
1542      mirror::kUseStringCompression &&
1543      const_string != nullptr &&
1544      mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1545  if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) {
1546    locations->AddTemp(Location::RequiresRegister());
1547  }
1548
1549  // TODO: If the String.equals() is used only for an immediately following HIf, we can
1550  // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1551  // Then we shall need an extra temporary register instead of the output register.
1552  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1553}
1554
1555void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
1556  MacroAssembler* masm = GetVIXLAssembler();
1557  LocationSummary* locations = invoke->GetLocations();
1558
1559  Register str = WRegisterFrom(locations->InAt(0));
1560  Register arg = WRegisterFrom(locations->InAt(1));
1561  Register out = XRegisterFrom(locations->Out());
1562
1563  UseScratchRegisterScope scratch_scope(masm);
1564  Register temp = scratch_scope.AcquireW();
1565  Register temp1 = scratch_scope.AcquireW();
1566
1567  vixl::aarch64::Label loop;
1568  vixl::aarch64::Label end;
1569  vixl::aarch64::Label return_true;
1570  vixl::aarch64::Label return_false;
1571
1572  // Get offsets of count, value, and class fields within a string object.
1573  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1574  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1575  const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1576
1577  // Note that the null check must have been done earlier.
1578  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1579
1580  StringEqualsOptimizations optimizations(invoke);
1581  if (!optimizations.GetArgumentNotNull()) {
1582    // Check if input is null, return false if it is.
1583    __ Cbz(arg, &return_false);
1584  }
1585
1586  // Reference equality check, return true if same reference.
1587  __ Cmp(str, arg);
1588  __ B(&return_true, eq);
1589
1590  if (!optimizations.GetArgumentIsString()) {
1591    // Instanceof check for the argument by comparing class fields.
1592    // All string objects must have the same type since String cannot be subclassed.
1593    // Receiver must be a string object, so its class field is equal to all strings' class fields.
1594    // If the argument is a string object, its class field must be equal to receiver's class field.
1595    __ Ldr(temp, MemOperand(str.X(), class_offset));
1596    __ Ldr(temp1, MemOperand(arg.X(), class_offset));
1597    __ Cmp(temp, temp1);
1598    __ B(&return_false, ne);
1599  }
1600
1601  // Check if one of the inputs is a const string. Do not special-case both strings
1602  // being const, such cases should be handled by constant folding if needed.
1603  uint32_t const_string_length = 0u;
1604  const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1605  if (const_string == nullptr) {
1606    const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1607    if (const_string != nullptr) {
1608      std::swap(str, arg);  // Make sure the const string is in `str`.
1609    }
1610  }
1611  bool is_compressed =
1612      mirror::kUseStringCompression &&
1613      const_string != nullptr &&
1614      mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1615
1616  if (const_string != nullptr) {
1617    // Load `count` field of the argument string and check if it matches the const string.
1618    // Also compares the compression style, if differs return false.
1619    __ Ldr(temp, MemOperand(arg.X(), count_offset));
1620    // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
1621    scratch_scope.Release(temp1);
1622    __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1623    temp1 = scratch_scope.AcquireW();
1624    __ B(&return_false, ne);
1625  } else {
1626    // Load `count` fields of this and argument strings.
1627    __ Ldr(temp, MemOperand(str.X(), count_offset));
1628    __ Ldr(temp1, MemOperand(arg.X(), count_offset));
1629    // Check if `count` fields are equal, return false if they're not.
1630    // Also compares the compression style, if differs return false.
1631    __ Cmp(temp, temp1);
1632    __ B(&return_false, ne);
1633  }
1634
1635  // Assertions that must hold in order to compare strings 8 bytes at a time.
1636  // Ok to do this because strings are zero-padded to kObjectAlignment.
1637  DCHECK_ALIGNED(value_offset, 8);
1638  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
1639
1640  if (const_string != nullptr &&
1641      const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1642                                            : kShortConstStringEqualsCutoffInBytes / 2u)) {
1643    // Load and compare the contents. Though we know the contents of the short const string
1644    // at compile time, materializing constants may be more code than loading from memory.
1645    int32_t offset = value_offset;
1646    size_t remaining_bytes =
1647        RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
1648    temp = temp.X();
1649    temp1 = temp1.X();
1650    while (remaining_bytes > sizeof(uint64_t)) {
1651      Register temp2 = XRegisterFrom(locations->GetTemp(0));
1652      __ Ldp(temp, temp1, MemOperand(str.X(), offset));
1653      __ Ldp(temp2, out, MemOperand(arg.X(), offset));
1654      __ Cmp(temp, temp2);
1655      __ Ccmp(temp1, out, NoFlag, eq);
1656      __ B(&return_false, ne);
1657      offset += 2u * sizeof(uint64_t);
1658      remaining_bytes -= 2u * sizeof(uint64_t);
1659    }
1660    if (remaining_bytes != 0u) {
1661      __ Ldr(temp, MemOperand(str.X(), offset));
1662      __ Ldr(temp1, MemOperand(arg.X(), offset));
1663      __ Cmp(temp, temp1);
1664      __ B(&return_false, ne);
1665    }
1666  } else {
1667    // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1668    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1669                  "Expecting 0=compressed, 1=uncompressed");
1670    __ Cbz(temp, &return_true);
1671
1672    if (mirror::kUseStringCompression) {
1673      // For string compression, calculate the number of bytes to compare (not chars).
1674      // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1675      __ And(temp1, temp, Operand(1));    // Extract compression flag.
1676      __ Lsr(temp, temp, 1u);             // Extract length.
1677      __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
1678    }
1679
1680    // Store offset of string value in preparation for comparison loop
1681    __ Mov(temp1, value_offset);
1682
1683    temp1 = temp1.X();
1684    Register temp2 = XRegisterFrom(locations->GetTemp(0));
1685    // Loop to compare strings 8 bytes at a time starting at the front of the string.
1686    __ Bind(&loop);
1687    __ Ldr(out, MemOperand(str.X(), temp1));
1688    __ Ldr(temp2, MemOperand(arg.X(), temp1));
1689    __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
1690    __ Cmp(out, temp2);
1691    __ B(&return_false, ne);
1692    // With string compression, we have compared 8 bytes, otherwise 4 chars.
1693    __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
1694    __ B(&loop, hi);
1695  }
1696
1697  // Return true and exit the function.
1698  // If loop does not result in returning false, we return true.
1699  __ Bind(&return_true);
1700  __ Mov(out, 1);
1701  __ B(&end);
1702
1703  // Return false and exit the function.
1704  __ Bind(&return_false);
1705  __ Mov(out, 0);
1706  __ Bind(&end);
1707}
1708
1709static void GenerateVisitStringIndexOf(HInvoke* invoke,
1710                                       MacroAssembler* masm,
1711                                       CodeGeneratorARM64* codegen,
1712                                       bool start_at_zero) {
1713  LocationSummary* locations = invoke->GetLocations();
1714
1715  // Note that the null check must have been done earlier.
1716  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1717
1718  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1719  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1720  SlowPathCodeARM64* slow_path = nullptr;
1721  HInstruction* code_point = invoke->InputAt(1);
1722  if (code_point->IsIntConstant()) {
1723    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) {
1724      // Always needs the slow-path. We could directly dispatch to it, but this case should be
1725      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1726      slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1727      codegen->AddSlowPath(slow_path);
1728      __ B(slow_path->GetEntryLabel());
1729      __ Bind(slow_path->GetExitLabel());
1730      return;
1731    }
1732  } else if (code_point->GetType() != DataType::Type::kUint16) {
1733    Register char_reg = WRegisterFrom(locations->InAt(1));
1734    __ Tst(char_reg, 0xFFFF0000);
1735    slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1736    codegen->AddSlowPath(slow_path);
1737    __ B(ne, slow_path->GetEntryLabel());
1738  }
1739
1740  if (start_at_zero) {
1741    // Start-index = 0.
1742    Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
1743    __ Mov(tmp_reg, 0);
1744  }
1745
1746  codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1747  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1748
1749  if (slow_path != nullptr) {
1750    __ Bind(slow_path->GetExitLabel());
1751  }
1752}
1753
1754void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
1755  LocationSummary* locations = new (allocator_) LocationSummary(
1756      invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1757  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1758  // best to align the inputs accordingly.
1759  InvokeRuntimeCallingConvention calling_convention;
1760  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1761  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1762  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1763
1764  // Need to send start_index=0.
1765  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1766}
1767
1768void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
1769  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true);
1770}
1771
1772void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1773  LocationSummary* locations = new (allocator_) LocationSummary(
1774      invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1775  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1776  // best to align the inputs accordingly.
1777  InvokeRuntimeCallingConvention calling_convention;
1778  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1779  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1780  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1781  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32));
1782}
1783
1784void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
1785  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false);
1786}
1787
1788void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1789  LocationSummary* locations = new (allocator_) LocationSummary(
1790      invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1791  InvokeRuntimeCallingConvention calling_convention;
1792  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1793  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1794  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1795  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1796  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1797}
1798
1799void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1800  MacroAssembler* masm = GetVIXLAssembler();
1801  LocationSummary* locations = invoke->GetLocations();
1802
1803  Register byte_array = WRegisterFrom(locations->InAt(0));
1804  __ Cmp(byte_array, 0);
1805  SlowPathCodeARM64* slow_path =
1806      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1807  codegen_->AddSlowPath(slow_path);
1808  __ B(eq, slow_path->GetEntryLabel());
1809
1810  codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1811  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1812  __ Bind(slow_path->GetExitLabel());
1813}
1814
1815void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1816  LocationSummary* locations =
1817      new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1818  InvokeRuntimeCallingConvention calling_convention;
1819  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1820  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1821  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1822  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1823}
1824
1825void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
1826  // No need to emit code checking whether `locations->InAt(2)` is a null
1827  // pointer, as callers of the native method
1828  //
1829  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1830  //
1831  // all include a null check on `data` before calling that method.
1832  codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1833  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1834}
1835
1836void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1837  LocationSummary* locations = new (allocator_) LocationSummary(
1838      invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1839  InvokeRuntimeCallingConvention calling_convention;
1840  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1841  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
1842}
1843
1844void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
1845  MacroAssembler* masm = GetVIXLAssembler();
1846  LocationSummary* locations = invoke->GetLocations();
1847
1848  Register string_to_copy = WRegisterFrom(locations->InAt(0));
1849  __ Cmp(string_to_copy, 0);
1850  SlowPathCodeARM64* slow_path =
1851      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
1852  codegen_->AddSlowPath(slow_path);
1853  __ B(eq, slow_path->GetEntryLabel());
1854
1855  codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1856  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1857  __ Bind(slow_path->GetExitLabel());
1858}
1859
1860static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1861  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
1862  DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1863  DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1864
1865  LocationSummary* const locations =
1866      new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1867  InvokeRuntimeCallingConvention calling_convention;
1868
1869  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1870  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1871}
1872
1873static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1874  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
1875  DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType()));
1876  DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType()));
1877  DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
1878
1879  LocationSummary* const locations =
1880      new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1881  InvokeRuntimeCallingConvention calling_convention;
1882
1883  locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
1884  locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
1885  locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
1886}
1887
1888static void GenFPToFPCall(HInvoke* invoke,
1889                          CodeGeneratorARM64* codegen,
1890                          QuickEntrypointEnum entry) {
1891  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
1892}
1893
1894void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) {
1895  CreateFPToFPCallLocations(allocator_, invoke);
1896}
1897
1898void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) {
1899  GenFPToFPCall(invoke, codegen_, kQuickCos);
1900}
1901
1902void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) {
1903  CreateFPToFPCallLocations(allocator_, invoke);
1904}
1905
1906void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) {
1907  GenFPToFPCall(invoke, codegen_, kQuickSin);
1908}
1909
1910void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) {
1911  CreateFPToFPCallLocations(allocator_, invoke);
1912}
1913
1914void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) {
1915  GenFPToFPCall(invoke, codegen_, kQuickAcos);
1916}
1917
1918void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) {
1919  CreateFPToFPCallLocations(allocator_, invoke);
1920}
1921
1922void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) {
1923  GenFPToFPCall(invoke, codegen_, kQuickAsin);
1924}
1925
1926void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) {
1927  CreateFPToFPCallLocations(allocator_, invoke);
1928}
1929
1930void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) {
1931  GenFPToFPCall(invoke, codegen_, kQuickAtan);
1932}
1933
1934void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) {
1935  CreateFPToFPCallLocations(allocator_, invoke);
1936}
1937
1938void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) {
1939  GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1940}
1941
1942void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) {
1943  CreateFPToFPCallLocations(allocator_, invoke);
1944}
1945
1946void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) {
1947  GenFPToFPCall(invoke, codegen_, kQuickCosh);
1948}
1949
1950void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) {
1951  CreateFPToFPCallLocations(allocator_, invoke);
1952}
1953
1954void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) {
1955  GenFPToFPCall(invoke, codegen_, kQuickExp);
1956}
1957
1958void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) {
1959  CreateFPToFPCallLocations(allocator_, invoke);
1960}
1961
1962void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) {
1963  GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1964}
1965
1966void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) {
1967  CreateFPToFPCallLocations(allocator_, invoke);
1968}
1969
1970void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) {
1971  GenFPToFPCall(invoke, codegen_, kQuickLog);
1972}
1973
1974void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) {
1975  CreateFPToFPCallLocations(allocator_, invoke);
1976}
1977
1978void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) {
1979  GenFPToFPCall(invoke, codegen_, kQuickLog10);
1980}
1981
1982void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) {
1983  CreateFPToFPCallLocations(allocator_, invoke);
1984}
1985
1986void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) {
1987  GenFPToFPCall(invoke, codegen_, kQuickSinh);
1988}
1989
1990void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) {
1991  CreateFPToFPCallLocations(allocator_, invoke);
1992}
1993
1994void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) {
1995  GenFPToFPCall(invoke, codegen_, kQuickTan);
1996}
1997
1998void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) {
1999  CreateFPToFPCallLocations(allocator_, invoke);
2000}
2001
2002void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) {
2003  GenFPToFPCall(invoke, codegen_, kQuickTanh);
2004}
2005
2006void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) {
2007  CreateFPFPToFPCallLocations(allocator_, invoke);
2008}
2009
2010void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) {
2011  GenFPToFPCall(invoke, codegen_, kQuickAtan2);
2012}
2013
2014void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) {
2015  CreateFPFPToFPCallLocations(allocator_, invoke);
2016}
2017
2018void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) {
2019  GenFPToFPCall(invoke, codegen_, kQuickPow);
2020}
2021
2022void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) {
2023  CreateFPFPToFPCallLocations(allocator_, invoke);
2024}
2025
2026void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) {
2027  GenFPToFPCall(invoke, codegen_, kQuickHypot);
2028}
2029
2030void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) {
2031  CreateFPFPToFPCallLocations(allocator_, invoke);
2032}
2033
2034void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) {
2035  GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
2036}
2037
2038void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2039  LocationSummary* locations =
2040      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2041  locations->SetInAt(0, Location::RequiresRegister());
2042  locations->SetInAt(1, Location::RequiresRegister());
2043  locations->SetInAt(2, Location::RequiresRegister());
2044  locations->SetInAt(3, Location::RequiresRegister());
2045  locations->SetInAt(4, Location::RequiresRegister());
2046
2047  locations->AddTemp(Location::RequiresRegister());
2048  locations->AddTemp(Location::RequiresRegister());
2049  locations->AddTemp(Location::RequiresRegister());
2050}
2051
2052void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2053  MacroAssembler* masm = GetVIXLAssembler();
2054  LocationSummary* locations = invoke->GetLocations();
2055
2056  // Check assumption that sizeof(Char) is 2 (used in scaling below).
2057  const size_t char_size = DataType::Size(DataType::Type::kUint16);
2058  DCHECK_EQ(char_size, 2u);
2059
2060  // Location of data in char array buffer.
2061  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2062
2063  // Location of char array data in string.
2064  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2065
2066  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2067  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2068  Register srcObj = XRegisterFrom(locations->InAt(0));
2069  Register srcBegin = XRegisterFrom(locations->InAt(1));
2070  Register srcEnd = XRegisterFrom(locations->InAt(2));
2071  Register dstObj = XRegisterFrom(locations->InAt(3));
2072  Register dstBegin = XRegisterFrom(locations->InAt(4));
2073
2074  Register src_ptr = XRegisterFrom(locations->GetTemp(0));
2075  Register num_chr = XRegisterFrom(locations->GetTemp(1));
2076  Register tmp1 = XRegisterFrom(locations->GetTemp(2));
2077
2078  UseScratchRegisterScope temps(masm);
2079  Register dst_ptr = temps.AcquireX();
2080  Register tmp2 = temps.AcquireX();
2081
2082  vixl::aarch64::Label done;
2083  vixl::aarch64::Label compressed_string_loop;
2084  __ Sub(num_chr, srcEnd, srcBegin);
2085  // Early out for valid zero-length retrievals.
2086  __ Cbz(num_chr, &done);
2087
2088  // dst address start to copy to.
2089  __ Add(dst_ptr, dstObj, Operand(data_offset));
2090  __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
2091
2092  // src address to copy from.
2093  __ Add(src_ptr, srcObj, Operand(value_offset));
2094  vixl::aarch64::Label compressed_string_preloop;
2095  if (mirror::kUseStringCompression) {
2096    // Location of count in string.
2097    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2098    // String's length.
2099    __ Ldr(tmp2, MemOperand(srcObj, count_offset));
2100    __ Tbz(tmp2, 0, &compressed_string_preloop);
2101  }
2102  __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
2103
2104  // Do the copy.
2105  vixl::aarch64::Label loop;
2106  vixl::aarch64::Label remainder;
2107
2108  // Save repairing the value of num_chr on the < 8 character path.
2109  __ Subs(tmp1, num_chr, 8);
2110  __ B(lt, &remainder);
2111
2112  // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
2113  __ Mov(num_chr, tmp1);
2114
2115  // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
2116  // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
2117  __ Bind(&loop);
2118  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex));
2119  __ Subs(num_chr, num_chr, 8);
2120  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex));
2121  __ B(ge, &loop);
2122
2123  __ Adds(num_chr, num_chr, 8);
2124  __ B(eq, &done);
2125
2126  // Main loop for < 8 character case and remainder handling. Loads and stores one
2127  // 16-bit Java character at a time.
2128  __ Bind(&remainder);
2129  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex));
2130  __ Subs(num_chr, num_chr, 1);
2131  __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2132  __ B(gt, &remainder);
2133  __ B(&done);
2134
2135  if (mirror::kUseStringCompression) {
2136    const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
2137    DCHECK_EQ(c_char_size, 1u);
2138    __ Bind(&compressed_string_preloop);
2139    __ Add(src_ptr, src_ptr, Operand(srcBegin));
2140    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2141    __ Bind(&compressed_string_loop);
2142    __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex));
2143    __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex));
2144    __ Subs(num_chr, num_chr, Operand(1));
2145    __ B(gt, &compressed_string_loop);
2146  }
2147
2148  __ Bind(&done);
2149}
2150
2151// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
2152// implementation there for longer copy lengths.
2153static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
2154
2155static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
2156                                               uint32_t at,
2157                                               HInstruction* input) {
2158  HIntConstant* const_input = input->AsIntConstant();
2159  if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
2160    locations->SetInAt(at, Location::RequiresRegister());
2161  } else {
2162    locations->SetInAt(at, Location::RegisterOrConstant(input));
2163  }
2164}
2165
2166void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2167  // Check to see if we have known failures that will cause us to have to bail out
2168  // to the runtime, and just generate the runtime call directly.
2169  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2170  HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
2171
2172  // The positions must be non-negative.
2173  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2174      (dst_pos != nullptr && dst_pos->GetValue() < 0)) {
2175    // We will have to fail anyways.
2176    return;
2177  }
2178
2179  // The length must be >= 0 and not so long that we would (currently) prefer libcore's
2180  // native implementation.
2181  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2182  if (length != nullptr) {
2183    int32_t len = length->GetValue();
2184    if (len < 0 || len > kSystemArrayCopyCharThreshold) {
2185      // Just call as normal.
2186      return;
2187    }
2188  }
2189
2190  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2191  LocationSummary* locations =
2192      new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2193  // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length).
2194  locations->SetInAt(0, Location::RequiresRegister());
2195  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2196  locations->SetInAt(2, Location::RequiresRegister());
2197  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2198  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2199
2200  locations->AddTemp(Location::RequiresRegister());
2201  locations->AddTemp(Location::RequiresRegister());
2202  locations->AddTemp(Location::RequiresRegister());
2203}
2204
2205static void CheckSystemArrayCopyPosition(MacroAssembler* masm,
2206                                         const Location& pos,
2207                                         const Register& input,
2208                                         const Location& length,
2209                                         SlowPathCodeARM64* slow_path,
2210                                         const Register& temp,
2211                                         bool length_is_input_length = false) {
2212  const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
2213  if (pos.IsConstant()) {
2214    int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
2215    if (pos_const == 0) {
2216      if (!length_is_input_length) {
2217        // Check that length(input) >= length.
2218        __ Ldr(temp, MemOperand(input, length_offset));
2219        __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2220        __ B(slow_path->GetEntryLabel(), lt);
2221      }
2222    } else {
2223      // Check that length(input) >= pos.
2224      __ Ldr(temp, MemOperand(input, length_offset));
2225      __ Subs(temp, temp, pos_const);
2226      __ B(slow_path->GetEntryLabel(), lt);
2227
2228      // Check that (length(input) - pos) >= length.
2229      __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32));
2230      __ B(slow_path->GetEntryLabel(), lt);
2231    }
2232  } else if (length_is_input_length) {
2233    // The only way the copy can succeed is if pos is zero.
2234    __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel());
2235  } else {
2236    // Check that pos >= 0.
2237    Register pos_reg = WRegisterFrom(pos);
2238    __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel());
2239
2240    // Check that pos <= length(input) && (length(input) - pos) >= length.
2241    __ Ldr(temp, MemOperand(input, length_offset));
2242    __ Subs(temp, temp, pos_reg);
2243    // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt).
2244    __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge);
2245    __ B(slow_path->GetEntryLabel(), lt);
2246  }
2247}
2248
2249// Compute base source address, base destination address, and end
2250// source address for System.arraycopy* intrinsics in `src_base`,
2251// `dst_base` and `src_end` respectively.
2252static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
2253                                        DataType::Type type,
2254                                        const Register& src,
2255                                        const Location& src_pos,
2256                                        const Register& dst,
2257                                        const Location& dst_pos,
2258                                        const Location& copy_length,
2259                                        const Register& src_base,
2260                                        const Register& dst_base,
2261                                        const Register& src_end) {
2262  // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
2263  DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
2264      << "Unexpected element type: " << type;
2265  const int32_t element_size = DataType::Size(type);
2266  const int32_t element_size_shift = DataType::SizeShift(type);
2267  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2268
2269  if (src_pos.IsConstant()) {
2270    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2271    __ Add(src_base, src, element_size * constant + data_offset);
2272  } else {
2273    __ Add(src_base, src, data_offset);
2274    __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift));
2275  }
2276
2277  if (dst_pos.IsConstant()) {
2278    int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
2279    __ Add(dst_base, dst, element_size * constant + data_offset);
2280  } else {
2281    __ Add(dst_base, dst, data_offset);
2282    __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
2283  }
2284
2285  if (copy_length.IsConstant()) {
2286    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2287    __ Add(src_end, src_base, element_size * constant);
2288  } else {
2289    __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
2290  }
2291}
2292
2293void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
2294  MacroAssembler* masm = GetVIXLAssembler();
2295  LocationSummary* locations = invoke->GetLocations();
2296  Register src = XRegisterFrom(locations->InAt(0));
2297  Location src_pos = locations->InAt(1);
2298  Register dst = XRegisterFrom(locations->InAt(2));
2299  Location dst_pos = locations->InAt(3);
2300  Location length = locations->InAt(4);
2301
2302  SlowPathCodeARM64* slow_path =
2303      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2304  codegen_->AddSlowPath(slow_path);
2305
2306  // If source and destination are the same, take the slow path. Overlapping copy regions must be
2307  // copied in reverse and we can't know in all cases if it's needed.
2308  __ Cmp(src, dst);
2309  __ B(slow_path->GetEntryLabel(), eq);
2310
2311  // Bail out if the source is null.
2312  __ Cbz(src, slow_path->GetEntryLabel());
2313
2314  // Bail out if the destination is null.
2315  __ Cbz(dst, slow_path->GetEntryLabel());
2316
2317  if (!length.IsConstant()) {
2318    // Merge the following two comparisons into one:
2319    //   If the length is negative, bail out (delegate to libcore's native implementation).
2320    //   If the length > 32 then (currently) prefer libcore's native implementation.
2321    __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
2322    __ B(slow_path->GetEntryLabel(), hi);
2323  } else {
2324    // We have already checked in the LocationsBuilder for the constant case.
2325    DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
2326    DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
2327  }
2328
2329  Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
2330  Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1));
2331  Register src_stop_addr = WRegisterFrom(locations->GetTemp(2));
2332
2333  CheckSystemArrayCopyPosition(masm,
2334                               src_pos,
2335                               src,
2336                               length,
2337                               slow_path,
2338                               src_curr_addr,
2339                               false);
2340
2341  CheckSystemArrayCopyPosition(masm,
2342                               dst_pos,
2343                               dst,
2344                               length,
2345                               slow_path,
2346                               src_curr_addr,
2347                               false);
2348
2349  src_curr_addr = src_curr_addr.X();
2350  dst_curr_addr = dst_curr_addr.X();
2351  src_stop_addr = src_stop_addr.X();
2352
2353  GenSystemArrayCopyAddresses(masm,
2354                              DataType::Type::kUint16,
2355                              src,
2356                              src_pos,
2357                              dst,
2358                              dst_pos,
2359                              length,
2360                              src_curr_addr,
2361                              dst_curr_addr,
2362                              src_stop_addr);
2363
2364  // Iterate over the arrays and do a raw copy of the chars.
2365  const int32_t char_size = DataType::Size(DataType::Type::kUint16);
2366  UseScratchRegisterScope temps(masm);
2367  Register tmp = temps.AcquireW();
2368  vixl::aarch64::Label loop, done;
2369  __ Bind(&loop);
2370  __ Cmp(src_curr_addr, src_stop_addr);
2371  __ B(&done, eq);
2372  __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
2373  __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
2374  __ B(&loop);
2375  __ Bind(&done);
2376
2377  __ Bind(slow_path->GetExitLabel());
2378}
2379
2380// We can choose to use the native implementation there for longer copy lengths.
2381static constexpr int32_t kSystemArrayCopyThreshold = 128;
2382
2383// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
2384// We want to use two temporary registers in order to reduce the register pressure in arm64.
2385// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
2386void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2387  // The only read barrier implementation supporting the
2388  // SystemArrayCopy intrinsic is the Baker-style read barriers.
2389  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2390    return;
2391  }
2392
2393  // Check to see if we have known failures that will cause us to have to bail out
2394  // to the runtime, and just generate the runtime call directly.
2395  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2396  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2397
2398  // The positions must be non-negative.
2399  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
2400      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
2401    // We will have to fail anyways.
2402    return;
2403  }
2404
2405  // The length must be >= 0.
2406  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2407  if (length != nullptr) {
2408    int32_t len = length->GetValue();
2409    if (len < 0 || len >= kSystemArrayCopyThreshold) {
2410      // Just call as normal.
2411      return;
2412    }
2413  }
2414
2415  SystemArrayCopyOptimizations optimizations(invoke);
2416
2417  if (optimizations.GetDestinationIsSource()) {
2418    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
2419      // We only support backward copying if source and destination are the same.
2420      return;
2421    }
2422  }
2423
2424  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
2425    // We currently don't intrinsify primitive copying.
2426    return;
2427  }
2428
2429  ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
2430  LocationSummary* locations =
2431      new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
2432  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
2433  locations->SetInAt(0, Location::RequiresRegister());
2434  SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
2435  locations->SetInAt(2, Location::RequiresRegister());
2436  SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
2437  SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
2438
2439  locations->AddTemp(Location::RequiresRegister());
2440  locations->AddTemp(Location::RequiresRegister());
2441  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2442    // Temporary register IP0, obtained from the VIXL scratch register
2443    // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
2444    // (because that register is clobbered by ReadBarrierMarkRegX
2445    // entry points). It cannot be used in calls to
2446    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
2447    // either. For these reasons, get a third extra temporary register
2448    // from the register allocator.
2449    locations->AddTemp(Location::RequiresRegister());
2450  } else {
2451    // Cases other than Baker read barriers: the third temporary will
2452    // be acquired from the VIXL scratch register pool.
2453  }
2454}
2455
2456void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
2457  // The only read barrier implementation supporting the
2458  // SystemArrayCopy intrinsic is the Baker-style read barriers.
2459  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2460
2461  MacroAssembler* masm = GetVIXLAssembler();
2462  LocationSummary* locations = invoke->GetLocations();
2463
2464  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2465  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2466  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2467  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2468  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2469
2470  Register src = XRegisterFrom(locations->InAt(0));
2471  Location src_pos = locations->InAt(1);
2472  Register dest = XRegisterFrom(locations->InAt(2));
2473  Location dest_pos = locations->InAt(3);
2474  Location length = locations->InAt(4);
2475  Register temp1 = WRegisterFrom(locations->GetTemp(0));
2476  Location temp1_loc = LocationFrom(temp1);
2477  Register temp2 = WRegisterFrom(locations->GetTemp(1));
2478  Location temp2_loc = LocationFrom(temp2);
2479
2480  SlowPathCodeARM64* intrinsic_slow_path =
2481      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
2482  codegen_->AddSlowPath(intrinsic_slow_path);
2483
2484  vixl::aarch64::Label conditions_on_positions_validated;
2485  SystemArrayCopyOptimizations optimizations(invoke);
2486
2487  // If source and destination are the same, we go to slow path if we need to do
2488  // forward copying.
2489  if (src_pos.IsConstant()) {
2490    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2491    if (dest_pos.IsConstant()) {
2492      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2493      if (optimizations.GetDestinationIsSource()) {
2494        // Checked when building locations.
2495        DCHECK_GE(src_pos_constant, dest_pos_constant);
2496      } else if (src_pos_constant < dest_pos_constant) {
2497        __ Cmp(src, dest);
2498        __ B(intrinsic_slow_path->GetEntryLabel(), eq);
2499      }
2500      // Checked when building locations.
2501      DCHECK(!optimizations.GetDestinationIsSource()
2502             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
2503    } else {
2504      if (!optimizations.GetDestinationIsSource()) {
2505        __ Cmp(src, dest);
2506        __ B(&conditions_on_positions_validated, ne);
2507      }
2508      __ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
2509      __ B(intrinsic_slow_path->GetEntryLabel(), gt);
2510    }
2511  } else {
2512    if (!optimizations.GetDestinationIsSource()) {
2513      __ Cmp(src, dest);
2514      __ B(&conditions_on_positions_validated, ne);
2515    }
2516    __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
2517           OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
2518    __ B(intrinsic_slow_path->GetEntryLabel(), lt);
2519  }
2520
2521  __ Bind(&conditions_on_positions_validated);
2522
2523  if (!optimizations.GetSourceIsNotNull()) {
2524    // Bail out if the source is null.
2525    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
2526  }
2527
2528  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2529    // Bail out if the destination is null.
2530    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
2531  }
2532
2533  // We have already checked in the LocationsBuilder for the constant case.
2534  if (!length.IsConstant() &&
2535      !optimizations.GetCountIsSourceLength() &&
2536      !optimizations.GetCountIsDestinationLength()) {
2537    // Merge the following two comparisons into one:
2538    //   If the length is negative, bail out (delegate to libcore's native implementation).
2539    //   If the length >= 128 then (currently) prefer native implementation.
2540    __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
2541    __ B(intrinsic_slow_path->GetEntryLabel(), hs);
2542  }
2543  // Validity checks: source.
2544  CheckSystemArrayCopyPosition(masm,
2545                               src_pos,
2546                               src,
2547                               length,
2548                               intrinsic_slow_path,
2549                               temp1,
2550                               optimizations.GetCountIsSourceLength());
2551
2552  // Validity checks: dest.
2553  CheckSystemArrayCopyPosition(masm,
2554                               dest_pos,
2555                               dest,
2556                               length,
2557                               intrinsic_slow_path,
2558                               temp1,
2559                               optimizations.GetCountIsDestinationLength());
2560  {
2561    // We use a block to end the scratch scope before the write barrier, thus
2562    // freeing the temporary registers so they can be used in `MarkGCCard`.
2563    UseScratchRegisterScope temps(masm);
2564    Location temp3_loc;  // Used only for Baker read barrier.
2565    Register temp3;
2566    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2567      temp3_loc = locations->GetTemp(2);
2568      temp3 = WRegisterFrom(temp3_loc);
2569    } else {
2570      temp3 = temps.AcquireW();
2571    }
2572
2573    if (!optimizations.GetDoesNotNeedTypeCheck()) {
2574      // Check whether all elements of the source array are assignable to the component
2575      // type of the destination array. We do two checks: the classes are the same,
2576      // or the destination is Object[]. If none of these checks succeed, we go to the
2577      // slow path.
2578
2579      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2580        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2581          // /* HeapReference<Class> */ temp1 = src->klass_
2582          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2583                                                          temp1_loc,
2584                                                          src.W(),
2585                                                          class_offset,
2586                                                          temp3_loc,
2587                                                          /* needs_null_check */ false,
2588                                                          /* use_load_acquire */ false);
2589          // Bail out if the source is not a non primitive array.
2590          // /* HeapReference<Class> */ temp1 = temp1->component_type_
2591          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2592                                                          temp1_loc,
2593                                                          temp1,
2594                                                          component_offset,
2595                                                          temp3_loc,
2596                                                          /* needs_null_check */ false,
2597                                                          /* use_load_acquire */ false);
2598          __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
2599          // If heap poisoning is enabled, `temp1` has been unpoisoned
2600          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2601          // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2602          __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
2603          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2604          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2605        }
2606
2607        // /* HeapReference<Class> */ temp1 = dest->klass_
2608        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2609                                                        temp1_loc,
2610                                                        dest.W(),
2611                                                        class_offset,
2612                                                        temp3_loc,
2613                                                        /* needs_null_check */ false,
2614                                                        /* use_load_acquire */ false);
2615
2616        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2617          // Bail out if the destination is not a non primitive array.
2618          //
2619          // Register `temp1` is not trashed by the read barrier emitted
2620          // by GenerateFieldLoadWithBakerReadBarrier below, as that
2621          // method produces a call to a ReadBarrierMarkRegX entry point,
2622          // which saves all potentially live registers, including
2623          // temporaries such a `temp1`.
2624          // /* HeapReference<Class> */ temp2 = temp1->component_type_
2625          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2626                                                          temp2_loc,
2627                                                          temp1,
2628                                                          component_offset,
2629                                                          temp3_loc,
2630                                                          /* needs_null_check */ false,
2631                                                          /* use_load_acquire */ false);
2632          __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2633          // If heap poisoning is enabled, `temp2` has been unpoisoned
2634          // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2635          // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2636          __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2637          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2638          __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2639        }
2640
2641        // For the same reason given earlier, `temp1` is not trashed by the
2642        // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2643        // /* HeapReference<Class> */ temp2 = src->klass_
2644        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2645                                                        temp2_loc,
2646                                                        src.W(),
2647                                                        class_offset,
2648                                                        temp3_loc,
2649                                                        /* needs_null_check */ false,
2650                                                        /* use_load_acquire */ false);
2651        // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2652        __ Cmp(temp1, temp2);
2653
2654        if (optimizations.GetDestinationIsTypedObjectArray()) {
2655          vixl::aarch64::Label do_copy;
2656          __ B(&do_copy, eq);
2657          // /* HeapReference<Class> */ temp1 = temp1->component_type_
2658          codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2659                                                          temp1_loc,
2660                                                          temp1,
2661                                                          component_offset,
2662                                                          temp3_loc,
2663                                                          /* needs_null_check */ false,
2664                                                          /* use_load_acquire */ false);
2665          // /* HeapReference<Class> */ temp1 = temp1->super_class_
2666          // We do not need to emit a read barrier for the following
2667          // heap reference load, as `temp1` is only used in a
2668          // comparison with null below, and this reference is not
2669          // kept afterwards.
2670          __ Ldr(temp1, HeapOperand(temp1, super_offset));
2671          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2672          __ Bind(&do_copy);
2673        } else {
2674          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2675        }
2676      } else {
2677        // Non read barrier code.
2678
2679        // /* HeapReference<Class> */ temp1 = dest->klass_
2680        __ Ldr(temp1, MemOperand(dest, class_offset));
2681        // /* HeapReference<Class> */ temp2 = src->klass_
2682        __ Ldr(temp2, MemOperand(src, class_offset));
2683        bool did_unpoison = false;
2684        if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2685            !optimizations.GetSourceIsNonPrimitiveArray()) {
2686          // One or two of the references need to be unpoisoned. Unpoison them
2687          // both to make the identity check valid.
2688          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2689          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2690          did_unpoison = true;
2691        }
2692
2693        if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2694          // Bail out if the destination is not a non primitive array.
2695          // /* HeapReference<Class> */ temp3 = temp1->component_type_
2696          __ Ldr(temp3, HeapOperand(temp1, component_offset));
2697          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2698          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2699          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2700          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2701          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2702          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2703        }
2704
2705        if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2706          // Bail out if the source is not a non primitive array.
2707          // /* HeapReference<Class> */ temp3 = temp2->component_type_
2708          __ Ldr(temp3, HeapOperand(temp2, component_offset));
2709          __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
2710          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
2711          // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2712          __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
2713          static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2714          __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
2715        }
2716
2717        __ Cmp(temp1, temp2);
2718
2719        if (optimizations.GetDestinationIsTypedObjectArray()) {
2720          vixl::aarch64::Label do_copy;
2721          __ B(&do_copy, eq);
2722          if (!did_unpoison) {
2723            codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2724          }
2725          // /* HeapReference<Class> */ temp1 = temp1->component_type_
2726          __ Ldr(temp1, HeapOperand(temp1, component_offset));
2727          codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2728          // /* HeapReference<Class> */ temp1 = temp1->super_class_
2729          __ Ldr(temp1, HeapOperand(temp1, super_offset));
2730          // No need to unpoison the result, we're comparing against null.
2731          __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
2732          __ Bind(&do_copy);
2733        } else {
2734          __ B(intrinsic_slow_path->GetEntryLabel(), ne);
2735        }
2736      }
2737    } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2738      DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2739      // Bail out if the source is not a non primitive array.
2740      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2741        // /* HeapReference<Class> */ temp1 = src->klass_
2742        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2743                                                        temp1_loc,
2744                                                        src.W(),
2745                                                        class_offset,
2746                                                        temp3_loc,
2747                                                        /* needs_null_check */ false,
2748                                                        /* use_load_acquire */ false);
2749        // /* HeapReference<Class> */ temp2 = temp1->component_type_
2750        codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
2751                                                        temp2_loc,
2752                                                        temp1,
2753                                                        component_offset,
2754                                                        temp3_loc,
2755                                                        /* needs_null_check */ false,
2756                                                        /* use_load_acquire */ false);
2757        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2758        // If heap poisoning is enabled, `temp2` has been unpoisoned
2759        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2760      } else {
2761        // /* HeapReference<Class> */ temp1 = src->klass_
2762        __ Ldr(temp1, HeapOperand(src.W(), class_offset));
2763        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
2764        // /* HeapReference<Class> */ temp2 = temp1->component_type_
2765        __ Ldr(temp2, HeapOperand(temp1, component_offset));
2766        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
2767        codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
2768      }
2769      // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2770      __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
2771      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
2772      __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
2773    }
2774
2775    if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
2776      // Null constant length: not need to emit the loop code at all.
2777    } else {
2778      Register src_curr_addr = temp1.X();
2779      Register dst_curr_addr = temp2.X();
2780      Register src_stop_addr = temp3.X();
2781      vixl::aarch64::Label done;
2782      const DataType::Type type = DataType::Type::kReference;
2783      const int32_t element_size = DataType::Size(type);
2784
2785      if (length.IsRegister()) {
2786        // Don't enter the copy loop if the length is null.
2787        __ Cbz(WRegisterFrom(length), &done);
2788      }
2789
2790      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2791        // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2792
2793        // SystemArrayCopy implementation for Baker read barriers (see
2794        // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier):
2795        //
2796        //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2797        //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
2798        //   bool is_gray = (rb_state == ReadBarrier::GrayState());
2799        //   if (is_gray) {
2800        //     // Slow-path copy.
2801        //     do {
2802        //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2803        //     } while (src_ptr != end_ptr)
2804        //   } else {
2805        //     // Fast-path copy.
2806        //     do {
2807        //       *dest_ptr++ = *src_ptr++;
2808        //     } while (src_ptr != end_ptr)
2809        //   }
2810
2811        // Make sure `tmp` is not IP0, as it is clobbered by
2812        // ReadBarrierMarkRegX entry points in
2813        // ReadBarrierSystemArrayCopySlowPathARM64.
2814        DCHECK(temps.IsAvailable(ip0));
2815        temps.Exclude(ip0);
2816        Register tmp = temps.AcquireW();
2817        DCHECK_NE(LocationFrom(tmp).reg(), IP0);
2818        // Put IP0 back in the pool so that VIXL has at least one
2819        // scratch register available to emit macro-instructions (note
2820        // that IP1 is already used for `tmp`). Indeed some
2821        // macro-instructions used in GenSystemArrayCopyAddresses
2822        // (invoked hereunder) may require a scratch register (for
2823        // instance to emit a load with a large constant offset).
2824        temps.Include(ip0);
2825
2826        // /* int32_t */ monitor = src->monitor_
2827        __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
2828        // /* LockWord */ lock_word = LockWord(monitor)
2829        static_assert(sizeof(LockWord) == sizeof(int32_t),
2830                      "art::LockWord and int32_t have different sizes.");
2831
2832        // Introduce a dependency on the lock_word including rb_state,
2833        // to prevent load-load reordering, and without using
2834        // a memory barrier (which would be more expensive).
2835        // `src` is unchanged by this operation, but its value now depends
2836        // on `tmp`.
2837        __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
2838
2839        // Compute base source address, base destination address, and end
2840        // source address for System.arraycopy* intrinsics in `src_base`,
2841        // `dst_base` and `src_end` respectively.
2842        // Note that `src_curr_addr` is computed from from `src` (and
2843        // `src_pos`) here, and thus honors the artificial dependency
2844        // of `src` on `tmp`.
2845        GenSystemArrayCopyAddresses(masm,
2846                                    type,
2847                                    src,
2848                                    src_pos,
2849                                    dest,
2850                                    dest_pos,
2851                                    length,
2852                                    src_curr_addr,
2853                                    dst_curr_addr,
2854                                    src_stop_addr);
2855
2856        // Slow path used to copy array when `src` is gray.
2857        SlowPathCodeARM64* read_barrier_slow_path =
2858            new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(
2859                invoke, LocationFrom(tmp));
2860        codegen_->AddSlowPath(read_barrier_slow_path);
2861
2862        // Given the numeric representation, it's enough to check the low bit of the rb_state.
2863        static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2864        static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2865        __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
2866
2867        // Fast-path copy.
2868        // Iterate over the arrays and do a raw copy of the objects. We don't need to
2869        // poison/unpoison.
2870        vixl::aarch64::Label loop;
2871        __ Bind(&loop);
2872        __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2873        __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2874        __ Cmp(src_curr_addr, src_stop_addr);
2875        __ B(&loop, ne);
2876
2877        __ Bind(read_barrier_slow_path->GetExitLabel());
2878      } else {
2879        // Non read barrier code.
2880        // Compute base source address, base destination address, and end
2881        // source address for System.arraycopy* intrinsics in `src_base`,
2882        // `dst_base` and `src_end` respectively.
2883        GenSystemArrayCopyAddresses(masm,
2884                                    type,
2885                                    src,
2886                                    src_pos,
2887                                    dest,
2888                                    dest_pos,
2889                                    length,
2890                                    src_curr_addr,
2891                                    dst_curr_addr,
2892                                    src_stop_addr);
2893        // Iterate over the arrays and do a raw copy of the objects. We don't need to
2894        // poison/unpoison.
2895        vixl::aarch64::Label loop;
2896        __ Bind(&loop);
2897        {
2898          Register tmp = temps.AcquireW();
2899          __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
2900          __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
2901        }
2902        __ Cmp(src_curr_addr, src_stop_addr);
2903        __ B(&loop, ne);
2904      }
2905      __ Bind(&done);
2906    }
2907  }
2908
2909  // We only need one card marking on the destination array.
2910  codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
2911
2912  __ Bind(intrinsic_slow_path->GetExitLabel());
2913}
2914
2915static void GenIsInfinite(LocationSummary* locations,
2916                          bool is64bit,
2917                          MacroAssembler* masm) {
2918  Operand infinity;
2919  Register out;
2920
2921  if (is64bit) {
2922    infinity = kPositiveInfinityDouble;
2923    out = XRegisterFrom(locations->Out());
2924  } else {
2925    infinity = kPositiveInfinityFloat;
2926    out = WRegisterFrom(locations->Out());
2927  }
2928
2929  const Register zero = vixl::aarch64::Assembler::AppropriateZeroRegFor(out);
2930
2931  MoveFPToInt(locations, is64bit, masm);
2932  __ Eor(out, out, infinity);
2933  // We don't care about the sign bit, so shift left.
2934  __ Cmp(zero, Operand(out, LSL, 1));
2935  __ Cset(out, eq);
2936}
2937
2938void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2939  CreateFPToIntLocations(allocator_, invoke);
2940}
2941
2942void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
2943  GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
2944}
2945
2946void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2947  CreateFPToIntLocations(allocator_, invoke);
2948}
2949
2950void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
2951  GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
2952}
2953
2954void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
2955  InvokeRuntimeCallingConvention calling_convention;
2956  IntrinsicVisitor::ComputeIntegerValueOfLocations(
2957      invoke,
2958      codegen_,
2959      calling_convention.GetReturnLocation(DataType::Type::kReference),
2960      Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2961}
2962
2963void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
2964  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
2965  LocationSummary* locations = invoke->GetLocations();
2966  MacroAssembler* masm = GetVIXLAssembler();
2967
2968  Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
2969  UseScratchRegisterScope temps(masm);
2970  Register temp = temps.AcquireW();
2971  InvokeRuntimeCallingConvention calling_convention;
2972  Register argument = calling_convention.GetRegisterAt(0);
2973  if (invoke->InputAt(0)->IsConstant()) {
2974    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
2975    if (value >= info.low && value <= info.high) {
2976      // Just embed the j.l.Integer in the code.
2977      ScopedObjectAccess soa(Thread::Current());
2978      mirror::Object* boxed = info.cache->Get(value + (-info.low));
2979      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
2980      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
2981      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
2982    } else {
2983      // Allocate and initialize a new j.l.Integer.
2984      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
2985      // JIT object table.
2986      uint32_t address =
2987          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
2988      __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
2989      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
2990      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
2991      __ Mov(temp.W(), value);
2992      __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
2993      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
2994      // one.
2995      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2996    }
2997  } else {
2998    Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
2999    // Check bounds of our cache.
3000    __ Add(out.W(), in.W(), -info.low);
3001    __ Cmp(out.W(), info.high - info.low + 1);
3002    vixl::aarch64::Label allocate, done;
3003    __ B(&allocate, hs);
3004    // If the value is within the bounds, load the j.l.Integer directly from the array.
3005    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3006    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3007    __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3008    MemOperand source = HeapOperand(
3009        temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
3010    codegen_->Load(DataType::Type::kReference, out, source);
3011    codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
3012    __ B(&done);
3013    __ Bind(&allocate);
3014    // Otherwise allocate and initialize a new j.l.Integer.
3015    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3016    __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
3017    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3018    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3019    __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
3020    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3021    // one.
3022    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3023    __ Bind(&done);
3024  }
3025}
3026
3027void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) {
3028  LocationSummary* locations =
3029      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3030  locations->SetOut(Location::RequiresRegister());
3031}
3032
3033void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
3034  MacroAssembler* masm = GetVIXLAssembler();
3035  Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32);
3036  UseScratchRegisterScope temps(masm);
3037  Register temp = temps.AcquireX();
3038
3039  __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value());
3040  __ Ldar(out.W(), MemOperand(temp));
3041
3042  vixl::aarch64::Label done;
3043  __ Cbz(out.W(), &done);
3044  __ Stlr(wzr, MemOperand(temp));
3045  __ Bind(&done);
3046}
3047
3048void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
3049  LocationSummary* locations =
3050      new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3051  locations->SetInAt(0, Location::Any());
3052}
3053
3054void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3055
3056UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
3057
3058UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
3059UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
3060UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
3061UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
3062UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
3063UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppend);
3064UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
3065UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
3066
3067// 1.8.
3068UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
3069UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
3070UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
3071UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
3072UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
3073
3074UNREACHABLE_INTRINSICS(ARM64)
3075
3076#undef __
3077
3078}  // namespace arm64
3079}  // namespace art
3080