1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "code_generator_x86_64.h"
18
19#include "art_method.h"
20#include "code_generator_utils.h"
21#include "compiled_method.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "gc/accounting/card_table.h"
24#include "intrinsics.h"
25#include "intrinsics_x86_64.h"
26#include "mirror/array-inl.h"
27#include "mirror/class-inl.h"
28#include "mirror/object_reference.h"
29#include "thread.h"
30#include "utils/assembler.h"
31#include "utils/stack_checks.h"
32#include "utils/x86_64/assembler_x86_64.h"
33#include "utils/x86_64/managed_register_x86_64.h"
34
35namespace art {
36
37template<class MirrorType>
38class GcRoot;
39
40namespace x86_64 {
41
42static constexpr int kCurrentMethodStackOffset = 0;
43static constexpr Register kMethodRegisterArgument = RDI;
44// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46// generates less code/data with a small num_entries.
47static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48
49static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51
52static constexpr int kC2ConditionMask = 0x400;
53
54#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56
57class NullCheckSlowPathX86_64 : public SlowPathCode {
58 public:
59  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60
61  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63    __ Bind(GetEntryLabel());
64    if (instruction_->CanThrowIntoCatchBlock()) {
65      // Live registers will be restored in the catch block if caught.
66      SaveLiveRegisters(codegen, instruction_->GetLocations());
67    }
68    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69                                  instruction_,
70                                  instruction_->GetDexPc(),
71                                  this);
72    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73  }
74
75  bool IsFatal() const OVERRIDE { return true; }
76
77  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78
79 private:
80  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81};
82
83class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84 public:
85  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86
87  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89    __ Bind(GetEntryLabel());
90    if (instruction_->CanThrowIntoCatchBlock()) {
91      // Live registers will be restored in the catch block if caught.
92      SaveLiveRegisters(codegen, instruction_->GetLocations());
93    }
94    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95                                  instruction_,
96                                  instruction_->GetDexPc(),
97                                  this);
98    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99  }
100
101  bool IsFatal() const OVERRIDE { return true; }
102
103  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104
105 private:
106  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107};
108
109class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110 public:
111  DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112      : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113
114  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115    __ Bind(GetEntryLabel());
116    if (type_ == Primitive::kPrimInt) {
117      if (is_div_) {
118        __ negl(cpu_reg_);
119      } else {
120        __ xorl(cpu_reg_, cpu_reg_);
121      }
122
123    } else {
124      DCHECK_EQ(Primitive::kPrimLong, type_);
125      if (is_div_) {
126        __ negq(cpu_reg_);
127      } else {
128        __ xorl(cpu_reg_, cpu_reg_);
129      }
130    }
131    __ jmp(GetExitLabel());
132  }
133
134  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135
136 private:
137  const CpuRegister cpu_reg_;
138  const Primitive::Type type_;
139  const bool is_div_;
140  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141};
142
143class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144 public:
145  SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146      : SlowPathCode(instruction), successor_(successor) {}
147
148  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150    __ Bind(GetEntryLabel());
151    SaveLiveRegisters(codegen, instruction_->GetLocations());
152    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153                                  instruction_,
154                                  instruction_->GetDexPc(),
155                                  this);
156    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157    RestoreLiveRegisters(codegen, instruction_->GetLocations());
158    if (successor_ == nullptr) {
159      __ jmp(GetReturnLabel());
160    } else {
161      __ jmp(x86_64_codegen->GetLabelOf(successor_));
162    }
163  }
164
165  Label* GetReturnLabel() {
166    DCHECK(successor_ == nullptr);
167    return &return_label_;
168  }
169
170  HBasicBlock* GetSuccessor() const {
171    return successor_;
172  }
173
174  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175
176 private:
177  HBasicBlock* const successor_;
178  Label return_label_;
179
180  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181};
182
183class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184 public:
185  explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186    : SlowPathCode(instruction) {}
187
188  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189    LocationSummary* locations = instruction_->GetLocations();
190    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191    __ Bind(GetEntryLabel());
192    if (instruction_->CanThrowIntoCatchBlock()) {
193      // Live registers will be restored in the catch block if caught.
194      SaveLiveRegisters(codegen, instruction_->GetLocations());
195    }
196    // We're moving two locations to locations that could overlap, so we need a parallel
197    // move resolver.
198    InvokeRuntimeCallingConvention calling_convention;
199    codegen->EmitParallelMoves(
200        locations->InAt(0),
201        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202        Primitive::kPrimInt,
203        locations->InAt(1),
204        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205        Primitive::kPrimInt);
206    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207                                  instruction_,
208                                  instruction_->GetDexPc(),
209                                  this);
210    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211  }
212
213  bool IsFatal() const OVERRIDE { return true; }
214
215  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216
217 private:
218  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219};
220
221class LoadClassSlowPathX86_64 : public SlowPathCode {
222 public:
223  LoadClassSlowPathX86_64(HLoadClass* cls,
224                          HInstruction* at,
225                          uint32_t dex_pc,
226                          bool do_clinit)
227      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229  }
230
231  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232    LocationSummary* locations = at_->GetLocations();
233    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234    __ Bind(GetEntryLabel());
235
236    SaveLiveRegisters(codegen, locations);
237
238    InvokeRuntimeCallingConvention calling_convention;
239    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240    x86_64_codegen->InvokeRuntime(do_clinit_ ?
241                                      QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242                                      QUICK_ENTRY_POINT(pInitializeType),
243                                  at_,
244                                  dex_pc_,
245                                  this);
246    if (do_clinit_) {
247      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248    } else {
249      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250    }
251
252    Location out = locations->Out();
253    // Move the class to the desired location.
254    if (out.IsValid()) {
255      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256      x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257    }
258
259    RestoreLiveRegisters(codegen, locations);
260    __ jmp(GetExitLabel());
261  }
262
263  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264
265 private:
266  // The class this slow path will load.
267  HLoadClass* const cls_;
268
269  // The instruction where this slow path is happening.
270  // (Might be the load class or an initialization check).
271  HInstruction* const at_;
272
273  // The dex PC of `at_`.
274  const uint32_t dex_pc_;
275
276  // Whether to initialize the class.
277  const bool do_clinit_;
278
279  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280};
281
282class LoadStringSlowPathX86_64 : public SlowPathCode {
283 public:
284  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285
286  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287    LocationSummary* locations = instruction_->GetLocations();
288    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289
290    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291    __ Bind(GetEntryLabel());
292    SaveLiveRegisters(codegen, locations);
293
294    InvokeRuntimeCallingConvention calling_convention;
295    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298                                  instruction_,
299                                  instruction_->GetDexPc(),
300                                  this);
301    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302    x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303    RestoreLiveRegisters(codegen, locations);
304    __ jmp(GetExitLabel());
305  }
306
307  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308
309 private:
310  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311};
312
313class TypeCheckSlowPathX86_64 : public SlowPathCode {
314 public:
315  TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317
318  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319    LocationSummary* locations = instruction_->GetLocations();
320    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321                                                        : locations->Out();
322    uint32_t dex_pc = instruction_->GetDexPc();
323    DCHECK(instruction_->IsCheckCast()
324           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325
326    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327    __ Bind(GetEntryLabel());
328
329    if (!is_fatal_) {
330      SaveLiveRegisters(codegen, locations);
331    }
332
333    // We're moving two locations to locations that could overlap, so we need a parallel
334    // move resolver.
335    InvokeRuntimeCallingConvention calling_convention;
336    codegen->EmitParallelMoves(
337        locations->InAt(1),
338        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339        Primitive::kPrimNot,
340        object_class,
341        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342        Primitive::kPrimNot);
343
344    if (instruction_->IsInstanceOf()) {
345      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346                                    instruction_,
347                                    dex_pc,
348                                    this);
349      CheckEntrypointTypes<
350          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351    } else {
352      DCHECK(instruction_->IsCheckCast());
353      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354                                    instruction_,
355                                    dex_pc,
356                                    this);
357      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358    }
359
360    if (!is_fatal_) {
361      if (instruction_->IsInstanceOf()) {
362        x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363      }
364
365      RestoreLiveRegisters(codegen, locations);
366      __ jmp(GetExitLabel());
367    }
368  }
369
370  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371
372  bool IsFatal() const OVERRIDE { return is_fatal_; }
373
374 private:
375  const bool is_fatal_;
376
377  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378};
379
380class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381 public:
382  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383      : SlowPathCode(instruction) {}
384
385  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387    __ Bind(GetEntryLabel());
388    SaveLiveRegisters(codegen, instruction_->GetLocations());
389    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390                                  instruction_,
391                                  instruction_->GetDexPc(),
392                                  this);
393    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394  }
395
396  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397
398 private:
399  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400};
401
402class ArraySetSlowPathX86_64 : public SlowPathCode {
403 public:
404  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405
406  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407    LocationSummary* locations = instruction_->GetLocations();
408    __ Bind(GetEntryLabel());
409    SaveLiveRegisters(codegen, locations);
410
411    InvokeRuntimeCallingConvention calling_convention;
412    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413    parallel_move.AddMove(
414        locations->InAt(0),
415        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416        Primitive::kPrimNot,
417        nullptr);
418    parallel_move.AddMove(
419        locations->InAt(1),
420        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421        Primitive::kPrimInt,
422        nullptr);
423    parallel_move.AddMove(
424        locations->InAt(2),
425        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426        Primitive::kPrimNot,
427        nullptr);
428    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
429
430    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432                                  instruction_,
433                                  instruction_->GetDexPc(),
434                                  this);
435    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436    RestoreLiveRegisters(codegen, locations);
437    __ jmp(GetExitLabel());
438  }
439
440  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441
442 private:
443  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444};
445
446// Slow path marking an object during a read barrier.
447class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448 public:
449  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450      : SlowPathCode(instruction), out_(out), obj_(obj) {
451    DCHECK(kEmitCompilerReadBarrier);
452  }
453
454  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455
456  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457    LocationSummary* locations = instruction_->GetLocations();
458    Register reg_out = out_.AsRegister<Register>();
459    DCHECK(locations->CanCall());
460    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461    DCHECK(instruction_->IsInstanceFieldGet() ||
462           instruction_->IsStaticFieldGet() ||
463           instruction_->IsArrayGet() ||
464           instruction_->IsLoadClass() ||
465           instruction_->IsLoadString() ||
466           instruction_->IsInstanceOf() ||
467           instruction_->IsCheckCast())
468        << "Unexpected instruction in read barrier marking slow path: "
469        << instruction_->DebugName();
470
471    __ Bind(GetEntryLabel());
472    SaveLiveRegisters(codegen, locations);
473
474    InvokeRuntimeCallingConvention calling_convention;
475    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478                               instruction_,
479                               instruction_->GetDexPc(),
480                               this);
481    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483
484    RestoreLiveRegisters(codegen, locations);
485    __ jmp(GetExitLabel());
486  }
487
488 private:
489  const Location out_;
490  const Location obj_;
491
492  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493};
494
495// Slow path generating a read barrier for a heap reference.
496class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497 public:
498  ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499                                            Location out,
500                                            Location ref,
501                                            Location obj,
502                                            uint32_t offset,
503                                            Location index)
504      : SlowPathCode(instruction),
505        out_(out),
506        ref_(ref),
507        obj_(obj),
508        offset_(offset),
509        index_(index) {
510    DCHECK(kEmitCompilerReadBarrier);
511    // If `obj` is equal to `out` or `ref`, it means the initial
512    // object has been overwritten by (or after) the heap object
513    // reference load to be instrumented, e.g.:
514    //
515    //   __ movl(out, Address(out, offset));
516    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517    //
518    // In that case, we have lost the information about the original
519    // object, and the emitted read barrier cannot work properly.
520    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522}
523
524  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526    LocationSummary* locations = instruction_->GetLocations();
527    CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528    DCHECK(locations->CanCall());
529    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530    DCHECK(!instruction_->IsInvoke() ||
531           (instruction_->IsInvokeStaticOrDirect() &&
532            instruction_->GetLocations()->Intrinsified()))
533        << "Unexpected instruction in read barrier for heap reference slow path: "
534        << instruction_->DebugName();
535
536    __ Bind(GetEntryLabel());
537    SaveLiveRegisters(codegen, locations);
538
539    // We may have to change the index's value, but as `index_` is a
540    // constant member (like other "inputs" of this slow path),
541    // introduce a copy of it, `index`.
542    Location index = index_;
543    if (index_.IsValid()) {
544      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545      if (instruction_->IsArrayGet()) {
546        // Compute real offset and store it in index_.
547        Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550          // We are about to change the value of `index_reg` (see the
551          // calls to art::x86_64::X86_64Assembler::shll and
552          // art::x86_64::X86_64Assembler::AddImmediate below), but it
553          // has not been saved by the previous call to
554          // art::SlowPathCode::SaveLiveRegisters, as it is a
555          // callee-save register --
556          // art::SlowPathCode::SaveLiveRegisters does not consider
557          // callee-save registers, as it has been designed with the
558          // assumption that callee-save registers are supposed to be
559          // handled by the called function.  So, as a callee-save
560          // register, `index_reg` _would_ eventually be saved onto
561          // the stack, but it would be too late: we would have
562          // changed its value earlier.  Therefore, we manually save
563          // it here into another freely available register,
564          // `free_reg`, chosen of course among the caller-save
565          // registers (as a callee-save `free_reg` register would
566          // exhibit the same problem).
567          //
568          // Note we could have requested a temporary register from
569          // the register allocator instead; but we prefer not to, as
570          // this is a slow path, and we know we can find a
571          // caller-save register that is available.
572          Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573          __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574          index_reg = free_reg;
575          index = Location::RegisterLocation(index_reg);
576        } else {
577          // The initial register stored in `index_` has already been
578          // saved in the call to art::SlowPathCode::SaveLiveRegisters
579          // (as it is not a callee-save register), so we can freely
580          // use it.
581        }
582        // Shifting the index value contained in `index_reg` by the
583        // scale factor (2) cannot overflow in practice, as the
584        // runtime is unable to allocate object arrays with a size
585        // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586        __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587        static_assert(
588            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590        __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591      } else {
592        DCHECK(instruction_->IsInvoke());
593        DCHECK(instruction_->GetLocations()->Intrinsified());
594        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596            << instruction_->AsInvoke()->GetIntrinsic();
597        DCHECK_EQ(offset_, 0U);
598        DCHECK(index_.IsRegister());
599      }
600    }
601
602    // We're moving two or three locations to locations that could
603    // overlap, so we need a parallel move resolver.
604    InvokeRuntimeCallingConvention calling_convention;
605    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606    parallel_move.AddMove(ref_,
607                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608                          Primitive::kPrimNot,
609                          nullptr);
610    parallel_move.AddMove(obj_,
611                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612                          Primitive::kPrimNot,
613                          nullptr);
614    if (index.IsValid()) {
615      parallel_move.AddMove(index,
616                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617                            Primitive::kPrimInt,
618                            nullptr);
619      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
620    } else {
621      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
622      __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623    }
624    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625                                  instruction_,
626                                  instruction_->GetDexPc(),
627                                  this);
628    CheckEntrypointTypes<
629        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631
632    RestoreLiveRegisters(codegen, locations);
633    __ jmp(GetExitLabel());
634  }
635
636  const char* GetDescription() const OVERRIDE {
637    return "ReadBarrierForHeapReferenceSlowPathX86_64";
638  }
639
640 private:
641  CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642    size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643    size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646        return static_cast<CpuRegister>(i);
647      }
648    }
649    // We shall never fail to find a free caller-save register, as
650    // there are more than two core caller-save registers on x86-64
651    // (meaning it is possible to find one which is different from
652    // `ref` and `obj`).
653    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654    LOG(FATAL) << "Could not find a free caller-save register";
655    UNREACHABLE();
656  }
657
658  const Location out_;
659  const Location ref_;
660  const Location obj_;
661  const uint32_t offset_;
662  // An additional location containing an index to an array.
663  // Only used for HArrayGet and the UnsafeGetObject &
664  // UnsafeGetObjectVolatile intrinsics.
665  const Location index_;
666
667  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668};
669
670// Slow path generating a read barrier for a GC root.
671class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672 public:
673  ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674      : SlowPathCode(instruction), out_(out), root_(root) {
675    DCHECK(kEmitCompilerReadBarrier);
676  }
677
678  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679    LocationSummary* locations = instruction_->GetLocations();
680    DCHECK(locations->CanCall());
681    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683        << "Unexpected instruction in read barrier for GC root slow path: "
684        << instruction_->DebugName();
685
686    __ Bind(GetEntryLabel());
687    SaveLiveRegisters(codegen, locations);
688
689    InvokeRuntimeCallingConvention calling_convention;
690    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693                                  instruction_,
694                                  instruction_->GetDexPc(),
695                                  this);
696    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698
699    RestoreLiveRegisters(codegen, locations);
700    __ jmp(GetExitLabel());
701  }
702
703  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704
705 private:
706  const Location out_;
707  const Location root_;
708
709  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710};
711
712#undef __
713#define __ down_cast<X86_64Assembler*>(GetAssembler())->
714
715inline Condition X86_64IntegerCondition(IfCondition cond) {
716  switch (cond) {
717    case kCondEQ: return kEqual;
718    case kCondNE: return kNotEqual;
719    case kCondLT: return kLess;
720    case kCondLE: return kLessEqual;
721    case kCondGT: return kGreater;
722    case kCondGE: return kGreaterEqual;
723    case kCondB:  return kBelow;
724    case kCondBE: return kBelowEqual;
725    case kCondA:  return kAbove;
726    case kCondAE: return kAboveEqual;
727  }
728  LOG(FATAL) << "Unreachable";
729  UNREACHABLE();
730}
731
732// Maps FP condition to x86_64 name.
733inline Condition X86_64FPCondition(IfCondition cond) {
734  switch (cond) {
735    case kCondEQ: return kEqual;
736    case kCondNE: return kNotEqual;
737    case kCondLT: return kBelow;
738    case kCondLE: return kBelowEqual;
739    case kCondGT: return kAbove;
740    case kCondGE: return kAboveEqual;
741    default:      break;  // should not happen
742  };
743  LOG(FATAL) << "Unreachable";
744  UNREACHABLE();
745}
746
747HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749      MethodReference target_method ATTRIBUTE_UNUSED) {
750  switch (desired_dispatch_info.code_ptr_location) {
751    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754      return HInvokeStaticOrDirect::DispatchInfo {
755        desired_dispatch_info.method_load_kind,
756        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757        desired_dispatch_info.method_load_data,
758        0u
759      };
760    default:
761      return desired_dispatch_info;
762  }
763}
764
765void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766                                                     Location temp) {
767  // All registers are assumed to be correctly set up.
768
769  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
770  switch (invoke->GetMethodLoadKind()) {
771    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772      // temp = thread->string_init_entrypoint
773      __ gs()->movq(temp.AsRegister<CpuRegister>(),
774                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775      break;
776    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778      break;
779    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780      __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781      break;
782    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783      __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
784      method_patches_.emplace_back(invoke->GetTargetMethod());
785      __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
786      break;
787    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788      __ movq(temp.AsRegister<CpuRegister>(),
789              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790      // Bind a new fixup label at the end of the "movl" insn.
791      uint32_t offset = invoke->GetDexCacheArrayOffset();
792      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793      break;
794    }
795    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797      Register method_reg;
798      CpuRegister reg = temp.AsRegister<CpuRegister>();
799      if (current_method.IsRegister()) {
800        method_reg = current_method.AsRegister<Register>();
801      } else {
802        DCHECK(invoke->GetLocations()->Intrinsified());
803        DCHECK(!current_method.IsValid());
804        method_reg = reg.AsRegister();
805        __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806      }
807      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808      __ movq(reg,
809              Address(CpuRegister(method_reg),
810                      ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811      // temp = temp[index_in_cache];
812      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813      uint32_t index_in_cache = invoke->GetDexMethodIndex();
814      __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815      break;
816    }
817  }
818
819  switch (invoke->GetCodePtrLocation()) {
820    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821      __ call(&frame_entry_label_);
822      break;
823    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825      Label* label = &relative_call_patches_.back().label;
826      __ call(label);  // Bind to the patch label, override at link time.
827      __ Bind(label);  // Bind the label at the end of the "call" insn.
828      break;
829    }
830    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833      LOG(FATAL) << "Unsupported";
834      UNREACHABLE();
835    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836      // (callee_method + offset_of_quick_compiled_code)()
837      __ call(Address(callee_method.AsRegister<CpuRegister>(),
838                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839                          kX86_64WordSize).SizeValue()));
840      break;
841  }
842
843  DCHECK(!IsLeafMethod());
844}
845
846void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847  CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849      invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850
851  // Use the calling convention instead of the location of the receiver, as
852  // intrinsics may have put the receiver in a different register. In the intrinsics
853  // slow path, the arguments have been moved to the right place, so here we are
854  // guaranteed that the receiver is the first register of the calling convention.
855  InvokeDexCallingConvention calling_convention;
856  Register receiver = calling_convention.GetRegisterAt(0);
857
858  size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859  // /* HeapReference<Class> */ temp = receiver->klass_
860  __ movl(temp, Address(CpuRegister(receiver), class_offset));
861  MaybeRecordImplicitNullCheck(invoke);
862  // Instead of simply (possibly) unpoisoning `temp` here, we should
863  // emit a read barrier for the previous class reference load.
864  // However this is not required in practice, as this is an
865  // intermediate/temporary reference and because the current
866  // concurrent copying collector keeps the from-space memory
867  // intact/accessible until the end of the marking phase (the
868  // concurrent copying collector may not in the future).
869  __ MaybeUnpoisonHeapReference(temp);
870  // temp = temp->GetMethodAt(method_offset);
871  __ movq(temp, Address(temp, method_offset));
872  // call temp->GetEntryPoint();
873  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874      kX86_64WordSize).SizeValue()));
875}
876
877void CodeGeneratorX86_64::RecordSimplePatch() {
878  if (GetCompilerOptions().GetIncludePatchInformation()) {
879    simple_patches_.emplace_back();
880    __ Bind(&simple_patches_.back());
881  }
882}
883
884void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886  __ Bind(&string_patches_.back().label);
887}
888
889Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890                                                            uint32_t element_offset) {
891  // Add a patch entry and return the label.
892  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893  return &pc_relative_dex_cache_patches_.back().label;
894}
895
896void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897  DCHECK(linker_patches->empty());
898  size_t size =
899      method_patches_.size() +
900      relative_call_patches_.size() +
901      pc_relative_dex_cache_patches_.size() +
902      simple_patches_.size() +
903      string_patches_.size();
904  linker_patches->reserve(size);
905  // The label points to the end of the "movl" insn but the literal offset for method
906  // patch needs to point to the embedded constant which occupies the last 4 bytes.
907  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908  for (const MethodPatchInfo<Label>& info : method_patches_) {
909    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911                                                       info.target_method.dex_file,
912                                                       info.target_method.dex_method_index));
913  }
914  for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916    linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917                                                             info.target_method.dex_file,
918                                                             info.target_method.dex_method_index));
919  }
920  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923                                                              &info.target_dex_file,
924                                                              info.label.Position(),
925                                                              info.element_offset));
926  }
927  for (const Label& label : simple_patches_) {
928    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930  }
931  for (const StringPatchInfo<Label>& info : string_patches_) {
932    // These are always PC-relative, see GetSupportedLoadStringKind().
933    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934    linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935                                                               &info.dex_file,
936                                                               info.label.Position(),
937                                                               info.string_index));
938  }
939}
940
941void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942  stream << Register(reg);
943}
944
945void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946  stream << FloatRegister(reg);
947}
948
949size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950  __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951  return kX86_64WordSize;
952}
953
954size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955  __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956  return kX86_64WordSize;
957}
958
959size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960  __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961  return kX86_64WordSize;
962}
963
964size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965  __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966  return kX86_64WordSize;
967}
968
969void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970                                        HInstruction* instruction,
971                                        uint32_t dex_pc,
972                                        SlowPathCode* slow_path) {
973  InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974                instruction,
975                dex_pc,
976                slow_path);
977}
978
979void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980                                        HInstruction* instruction,
981                                        uint32_t dex_pc,
982                                        SlowPathCode* slow_path) {
983  ValidateInvokeRuntime(instruction, slow_path);
984  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985  RecordPcInfo(instruction, dex_pc, slow_path);
986}
987
988static constexpr int kNumberOfCpuRegisterPairs = 0;
989// Use a fake return address register to mimic Quick.
990static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
991CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992                                         const X86_64InstructionSetFeatures& isa_features,
993                                         const CompilerOptions& compiler_options,
994                                         OptimizingCompilerStats* stats)
995      : CodeGenerator(graph,
996                      kNumberOfCpuRegisters,
997                      kNumberOfFloatRegisters,
998                      kNumberOfCpuRegisterPairs,
999                      ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000                                          arraysize(kCoreCalleeSaves))
1001                          | (1 << kFakeReturnRegister),
1002                      ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003                                          arraysize(kFpuCalleeSaves)),
1004                      compiler_options,
1005                      stats),
1006        block_labels_(nullptr),
1007        location_builder_(graph, this),
1008        instruction_visitor_(graph, this),
1009        move_resolver_(graph->GetArena(), this),
1010        assembler_(graph->GetArena()),
1011        isa_features_(isa_features),
1012        constant_area_start_(0),
1013        method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014        relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015        pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019  AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020}
1021
1022InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023                                                               CodeGeneratorX86_64* codegen)
1024      : InstructionCodeGenerator(graph, codegen),
1025        assembler_(codegen->GetAssembler()),
1026        codegen_(codegen) {}
1027
1028void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029  // Stack register is always reserved.
1030  blocked_core_registers_[RSP] = true;
1031
1032  // Block the register used as TMP.
1033  blocked_core_registers_[TMP] = true;
1034}
1035
1036static dwarf::Reg DWARFReg(Register reg) {
1037  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038}
1039
1040static dwarf::Reg DWARFReg(FloatRegister reg) {
1041  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042}
1043
1044void CodeGeneratorX86_64::GenerateFrameEntry() {
1045  __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1046  __ Bind(&frame_entry_label_);
1047  bool skip_overflow_check = IsLeafMethod()
1048      && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049  DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050
1051  if (!skip_overflow_check) {
1052    __ testq(CpuRegister(RAX), Address(
1053        CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054    RecordPcInfo(nullptr, 0);
1055  }
1056
1057  if (HasEmptyFrame()) {
1058    return;
1059  }
1060
1061  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062    Register reg = kCoreCalleeSaves[i];
1063    if (allocated_registers_.ContainsCoreRegister(reg)) {
1064      __ pushq(CpuRegister(reg));
1065      __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066      __ cfi().RelOffset(DWARFReg(reg), 0);
1067    }
1068  }
1069
1070  int adjust = GetFrameSize() - GetCoreSpillSize();
1071  __ subq(CpuRegister(RSP), Immediate(adjust));
1072  __ cfi().AdjustCFAOffset(adjust);
1073  uint32_t xmm_spill_location = GetFpuSpillStart();
1074  size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075
1076  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077    if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079      __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080      __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081    }
1082  }
1083
1084  __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085          CpuRegister(kMethodRegisterArgument));
1086}
1087
1088void CodeGeneratorX86_64::GenerateFrameExit() {
1089  __ cfi().RememberState();
1090  if (!HasEmptyFrame()) {
1091    uint32_t xmm_spill_location = GetFpuSpillStart();
1092    size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094      if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095        int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096        __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097        __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098      }
1099    }
1100
1101    int adjust = GetFrameSize() - GetCoreSpillSize();
1102    __ addq(CpuRegister(RSP), Immediate(adjust));
1103    __ cfi().AdjustCFAOffset(-adjust);
1104
1105    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106      Register reg = kCoreCalleeSaves[i];
1107      if (allocated_registers_.ContainsCoreRegister(reg)) {
1108        __ popq(CpuRegister(reg));
1109        __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110        __ cfi().Restore(DWARFReg(reg));
1111      }
1112    }
1113  }
1114  __ ret();
1115  __ cfi().RestoreState();
1116  __ cfi().DefCFAOffset(GetFrameSize());
1117}
1118
1119void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120  __ Bind(GetLabelOf(block));
1121}
1122
1123void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124  if (source.Equals(destination)) {
1125    return;
1126  }
1127  if (destination.IsRegister()) {
1128    CpuRegister dest = destination.AsRegister<CpuRegister>();
1129    if (source.IsRegister()) {
1130      __ movq(dest, source.AsRegister<CpuRegister>());
1131    } else if (source.IsFpuRegister()) {
1132      __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133    } else if (source.IsStackSlot()) {
1134      __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135    } else if (source.IsConstant()) {
1136      HConstant* constant = source.GetConstant();
1137      if (constant->IsLongConstant()) {
1138        Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139      } else {
1140        Load32BitValue(dest, GetInt32ValueOf(constant));
1141      }
1142    } else {
1143      DCHECK(source.IsDoubleStackSlot());
1144      __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145    }
1146  } else if (destination.IsFpuRegister()) {
1147    XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148    if (source.IsRegister()) {
1149      __ movd(dest, source.AsRegister<CpuRegister>());
1150    } else if (source.IsFpuRegister()) {
1151      __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152    } else if (source.IsConstant()) {
1153      HConstant* constant = source.GetConstant();
1154      int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155      if (constant->IsFloatConstant()) {
1156        Load32BitValue(dest, static_cast<int32_t>(value));
1157      } else {
1158        Load64BitValue(dest, value);
1159      }
1160    } else if (source.IsStackSlot()) {
1161      __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162    } else {
1163      DCHECK(source.IsDoubleStackSlot());
1164      __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165    }
1166  } else if (destination.IsStackSlot()) {
1167    if (source.IsRegister()) {
1168      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169              source.AsRegister<CpuRegister>());
1170    } else if (source.IsFpuRegister()) {
1171      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172               source.AsFpuRegister<XmmRegister>());
1173    } else if (source.IsConstant()) {
1174      HConstant* constant = source.GetConstant();
1175      int32_t value = GetInt32ValueOf(constant);
1176      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177    } else {
1178      DCHECK(source.IsStackSlot()) << source;
1179      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181    }
1182  } else {
1183    DCHECK(destination.IsDoubleStackSlot());
1184    if (source.IsRegister()) {
1185      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186              source.AsRegister<CpuRegister>());
1187    } else if (source.IsFpuRegister()) {
1188      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189               source.AsFpuRegister<XmmRegister>());
1190    } else if (source.IsConstant()) {
1191      HConstant* constant = source.GetConstant();
1192      int64_t value;
1193      if (constant->IsDoubleConstant()) {
1194        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195      } else {
1196        DCHECK(constant->IsLongConstant());
1197        value = constant->AsLongConstant()->GetValue();
1198      }
1199      Store64BitValueToStack(destination, value);
1200    } else {
1201      DCHECK(source.IsDoubleStackSlot());
1202      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204    }
1205  }
1206}
1207
1208void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209  DCHECK(location.IsRegister());
1210  Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211}
1212
1213void CodeGeneratorX86_64::MoveLocation(
1214    Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215  Move(dst, src);
1216}
1217
1218void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219  if (location.IsRegister()) {
1220    locations->AddTemp(location);
1221  } else {
1222    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223  }
1224}
1225
1226void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227  DCHECK(!successor->IsExitBlock());
1228
1229  HBasicBlock* block = got->GetBlock();
1230  HInstruction* previous = got->GetPrevious();
1231
1232  HLoopInformation* info = block->GetLoopInformation();
1233  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235    return;
1236  }
1237
1238  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240  }
1241  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242    __ jmp(codegen_->GetLabelOf(successor));
1243  }
1244}
1245
1246void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247  got->SetLocations(nullptr);
1248}
1249
1250void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251  HandleGoto(got, got->GetSuccessor());
1252}
1253
1254void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255  try_boundary->SetLocations(nullptr);
1256}
1257
1258void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259  HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260  if (!successor->IsExitBlock()) {
1261    HandleGoto(try_boundary, successor);
1262  }
1263}
1264
1265void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266  exit->SetLocations(nullptr);
1267}
1268
1269void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270}
1271
1272template<class LabelType>
1273void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274                                                     LabelType* true_label,
1275                                                     LabelType* false_label) {
1276  if (cond->IsFPConditionTrueIfNaN()) {
1277    __ j(kUnordered, true_label);
1278  } else if (cond->IsFPConditionFalseIfNaN()) {
1279    __ j(kUnordered, false_label);
1280  }
1281  __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282}
1283
1284void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285  LocationSummary* locations = condition->GetLocations();
1286
1287  Location left = locations->InAt(0);
1288  Location right = locations->InAt(1);
1289  Primitive::Type type = condition->InputAt(0)->GetType();
1290  switch (type) {
1291    case Primitive::kPrimBoolean:
1292    case Primitive::kPrimByte:
1293    case Primitive::kPrimChar:
1294    case Primitive::kPrimShort:
1295    case Primitive::kPrimInt:
1296    case Primitive::kPrimNot: {
1297      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298      if (right.IsConstant()) {
1299        int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300        if (value == 0) {
1301          __ testl(left_reg, left_reg);
1302        } else {
1303          __ cmpl(left_reg, Immediate(value));
1304        }
1305      } else if (right.IsStackSlot()) {
1306        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307      } else {
1308        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309      }
1310      break;
1311    }
1312    case Primitive::kPrimLong: {
1313      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314      if (right.IsConstant()) {
1315        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316        codegen_->Compare64BitValue(left_reg, value);
1317      } else if (right.IsDoubleStackSlot()) {
1318        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319      } else {
1320        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321      }
1322      break;
1323    }
1324    case Primitive::kPrimFloat: {
1325      if (right.IsFpuRegister()) {
1326        __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327      } else if (right.IsConstant()) {
1328        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329                   codegen_->LiteralFloatAddress(
1330                     right.GetConstant()->AsFloatConstant()->GetValue()));
1331      } else {
1332        DCHECK(right.IsStackSlot());
1333        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334                   Address(CpuRegister(RSP), right.GetStackIndex()));
1335      }
1336      break;
1337    }
1338    case Primitive::kPrimDouble: {
1339      if (right.IsFpuRegister()) {
1340        __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341      } else if (right.IsConstant()) {
1342        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343                   codegen_->LiteralDoubleAddress(
1344                     right.GetConstant()->AsDoubleConstant()->GetValue()));
1345      } else {
1346        DCHECK(right.IsDoubleStackSlot());
1347        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348                   Address(CpuRegister(RSP), right.GetStackIndex()));
1349      }
1350      break;
1351    }
1352    default:
1353      LOG(FATAL) << "Unexpected condition type " << type;
1354  }
1355}
1356
1357template<class LabelType>
1358void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359                                                                  LabelType* true_target_in,
1360                                                                  LabelType* false_target_in) {
1361  // Generated branching requires both targets to be explicit. If either of the
1362  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363  LabelType fallthrough_target;
1364  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366
1367  // Generate the comparison to set the CC.
1368  GenerateCompareTest(condition);
1369
1370  // Now generate the correct jump(s).
1371  Primitive::Type type = condition->InputAt(0)->GetType();
1372  switch (type) {
1373    case Primitive::kPrimLong: {
1374      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375      break;
1376    }
1377    case Primitive::kPrimFloat: {
1378      GenerateFPJumps(condition, true_target, false_target);
1379      break;
1380    }
1381    case Primitive::kPrimDouble: {
1382      GenerateFPJumps(condition, true_target, false_target);
1383      break;
1384    }
1385    default:
1386      LOG(FATAL) << "Unexpected condition type " << type;
1387  }
1388
1389  if (false_target != &fallthrough_target) {
1390    __ jmp(false_target);
1391  }
1392
1393  if (fallthrough_target.IsLinked()) {
1394    __ Bind(&fallthrough_target);
1395  }
1396}
1397
1398static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400  // are set only strictly before `branch`. We can't use the eflags on long
1401  // conditions if they are materialized due to the complex branching.
1402  return cond->IsCondition() &&
1403         cond->GetNext() == branch &&
1404         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405}
1406
1407template<class LabelType>
1408void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409                                                           size_t condition_input_index,
1410                                                           LabelType* true_target,
1411                                                           LabelType* false_target) {
1412  HInstruction* cond = instruction->InputAt(condition_input_index);
1413
1414  if (true_target == nullptr && false_target == nullptr) {
1415    // Nothing to do. The code always falls through.
1416    return;
1417  } else if (cond->IsIntConstant()) {
1418    // Constant condition, statically compared against "true" (integer value 1).
1419    if (cond->AsIntConstant()->IsTrue()) {
1420      if (true_target != nullptr) {
1421        __ jmp(true_target);
1422      }
1423    } else {
1424      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425      if (false_target != nullptr) {
1426        __ jmp(false_target);
1427      }
1428    }
1429    return;
1430  }
1431
1432  // The following code generates these patterns:
1433  //  (1) true_target == nullptr && false_target != nullptr
1434  //        - opposite condition true => branch to false_target
1435  //  (2) true_target != nullptr && false_target == nullptr
1436  //        - condition true => branch to true_target
1437  //  (3) true_target != nullptr && false_target != nullptr
1438  //        - condition true => branch to true_target
1439  //        - branch to false_target
1440  if (IsBooleanValueOrMaterializedCondition(cond)) {
1441    if (AreEflagsSetFrom(cond, instruction)) {
1442      if (true_target == nullptr) {
1443        __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444      } else {
1445        __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446      }
1447    } else {
1448      // Materialized condition, compare against 0.
1449      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450      if (lhs.IsRegister()) {
1451        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452      } else {
1453        __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454      }
1455      if (true_target == nullptr) {
1456        __ j(kEqual, false_target);
1457      } else {
1458        __ j(kNotEqual, true_target);
1459      }
1460    }
1461  } else {
1462    // Condition has not been materialized, use its inputs as the
1463    // comparison and its condition as the branch condition.
1464    HCondition* condition = cond->AsCondition();
1465
1466    // If this is a long or FP comparison that has been folded into
1467    // the HCondition, generate the comparison directly.
1468    Primitive::Type type = condition->InputAt(0)->GetType();
1469    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470      GenerateCompareTestAndBranch(condition, true_target, false_target);
1471      return;
1472    }
1473
1474    Location lhs = condition->GetLocations()->InAt(0);
1475    Location rhs = condition->GetLocations()->InAt(1);
1476    if (rhs.IsRegister()) {
1477      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478    } else if (rhs.IsConstant()) {
1479      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480      codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481    } else {
1482      __ cmpl(lhs.AsRegister<CpuRegister>(),
1483              Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484    }
1485      if (true_target == nullptr) {
1486      __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487    } else {
1488      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489    }
1490  }
1491
1492  // If neither branch falls through (case 3), the conditional branch to `true_target`
1493  // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494  if (true_target != nullptr && false_target != nullptr) {
1495    __ jmp(false_target);
1496  }
1497}
1498
1499void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502    locations->SetInAt(0, Location::Any());
1503  }
1504}
1505
1506void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510      nullptr : codegen_->GetLabelOf(true_successor);
1511  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512      nullptr : codegen_->GetLabelOf(false_successor);
1513  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514}
1515
1516void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517  LocationSummary* locations = new (GetGraph()->GetArena())
1518      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520    locations->SetInAt(0, Location::Any());
1521  }
1522}
1523
1524void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526  GenerateTestAndBranch<Label>(deoptimize,
1527                               /* condition_input_index */ 0,
1528                               slow_path->GetEntryLabel(),
1529                               /* false_target */ nullptr);
1530}
1531
1532static bool SelectCanUseCMOV(HSelect* select) {
1533  // There are no conditional move instructions for XMMs.
1534  if (Primitive::IsFloatingPointType(select->GetType())) {
1535    return false;
1536  }
1537
1538  // A FP condition doesn't generate the single CC that we need.
1539  HInstruction* condition = select->GetCondition();
1540  if (condition->IsCondition() &&
1541      Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542    return false;
1543  }
1544
1545  // We can generate a CMOV for this Select.
1546  return true;
1547}
1548
1549void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551  if (Primitive::IsFloatingPointType(select->GetType())) {
1552    locations->SetInAt(0, Location::RequiresFpuRegister());
1553    locations->SetInAt(1, Location::Any());
1554  } else {
1555    locations->SetInAt(0, Location::RequiresRegister());
1556    if (SelectCanUseCMOV(select)) {
1557      if (select->InputAt(1)->IsConstant()) {
1558        locations->SetInAt(1, Location::RequiresRegister());
1559      } else {
1560        locations->SetInAt(1, Location::Any());
1561      }
1562    } else {
1563      locations->SetInAt(1, Location::Any());
1564    }
1565  }
1566  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567    locations->SetInAt(2, Location::RequiresRegister());
1568  }
1569  locations->SetOut(Location::SameAsFirstInput());
1570}
1571
1572void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573  LocationSummary* locations = select->GetLocations();
1574  if (SelectCanUseCMOV(select)) {
1575    // If both the condition and the source types are integer, we can generate
1576    // a CMOV to implement Select.
1577    CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578    Location value_true_loc = locations->InAt(1);
1579    DCHECK(locations->InAt(0).Equals(locations->Out()));
1580
1581    HInstruction* select_condition = select->GetCondition();
1582    Condition cond = kNotEqual;
1583
1584    // Figure out how to test the 'condition'.
1585    if (select_condition->IsCondition()) {
1586      HCondition* condition = select_condition->AsCondition();
1587      if (!condition->IsEmittedAtUseSite()) {
1588        // This was a previously materialized condition.
1589        // Can we use the existing condition code?
1590        if (AreEflagsSetFrom(condition, select)) {
1591          // Materialization was the previous instruction.  Condition codes are right.
1592          cond = X86_64IntegerCondition(condition->GetCondition());
1593        } else {
1594          // No, we have to recreate the condition code.
1595          CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596          __ testl(cond_reg, cond_reg);
1597        }
1598      } else {
1599        GenerateCompareTest(condition);
1600        cond = X86_64IntegerCondition(condition->GetCondition());
1601      }
1602    } else {
1603      // Must be a boolean condition, which needs to be compared to 0.
1604      CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605      __ testl(cond_reg, cond_reg);
1606    }
1607
1608    // If the condition is true, overwrite the output, which already contains false.
1609    // Generate the correct sized CMOV.
1610    bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611    if (value_true_loc.IsRegister()) {
1612      __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613    } else {
1614      __ cmov(cond,
1615              value_false,
1616              Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617    }
1618  } else {
1619    NearLabel false_target;
1620    GenerateTestAndBranch<NearLabel>(select,
1621                                     /* condition_input_index */ 2,
1622                                     /* true_target */ nullptr,
1623                                     &false_target);
1624    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625    __ Bind(&false_target);
1626  }
1627}
1628
1629void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630  new (GetGraph()->GetArena()) LocationSummary(info);
1631}
1632
1633void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635}
1636
1637void CodeGeneratorX86_64::GenerateNop() {
1638  __ nop();
1639}
1640
1641void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642  LocationSummary* locations =
1643      new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644  // Handle the long/FP comparisons made in instruction simplification.
1645  switch (cond->InputAt(0)->GetType()) {
1646    case Primitive::kPrimLong:
1647      locations->SetInAt(0, Location::RequiresRegister());
1648      locations->SetInAt(1, Location::Any());
1649      break;
1650    case Primitive::kPrimFloat:
1651    case Primitive::kPrimDouble:
1652      locations->SetInAt(0, Location::RequiresFpuRegister());
1653      locations->SetInAt(1, Location::Any());
1654      break;
1655    default:
1656      locations->SetInAt(0, Location::RequiresRegister());
1657      locations->SetInAt(1, Location::Any());
1658      break;
1659  }
1660  if (!cond->IsEmittedAtUseSite()) {
1661    locations->SetOut(Location::RequiresRegister());
1662  }
1663}
1664
1665void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666  if (cond->IsEmittedAtUseSite()) {
1667    return;
1668  }
1669
1670  LocationSummary* locations = cond->GetLocations();
1671  Location lhs = locations->InAt(0);
1672  Location rhs = locations->InAt(1);
1673  CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674  NearLabel true_label, false_label;
1675
1676  switch (cond->InputAt(0)->GetType()) {
1677    default:
1678      // Integer case.
1679
1680      // Clear output register: setcc only sets the low byte.
1681      __ xorl(reg, reg);
1682
1683      if (rhs.IsRegister()) {
1684        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685      } else if (rhs.IsConstant()) {
1686        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687        codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688      } else {
1689        __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690      }
1691      __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692      return;
1693    case Primitive::kPrimLong:
1694      // Clear output register: setcc only sets the low byte.
1695      __ xorl(reg, reg);
1696
1697      if (rhs.IsRegister()) {
1698        __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699      } else if (rhs.IsConstant()) {
1700        int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701        codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702      } else {
1703        __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704      }
1705      __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706      return;
1707    case Primitive::kPrimFloat: {
1708      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709      if (rhs.IsConstant()) {
1710        float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711        __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712      } else if (rhs.IsStackSlot()) {
1713        __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714      } else {
1715        __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716      }
1717      GenerateFPJumps(cond, &true_label, &false_label);
1718      break;
1719    }
1720    case Primitive::kPrimDouble: {
1721      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722      if (rhs.IsConstant()) {
1723        double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724        __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725      } else if (rhs.IsDoubleStackSlot()) {
1726        __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727      } else {
1728        __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729      }
1730      GenerateFPJumps(cond, &true_label, &false_label);
1731      break;
1732    }
1733  }
1734
1735  // Convert the jumps into the result.
1736  NearLabel done_label;
1737
1738  // False case: result = 0.
1739  __ Bind(&false_label);
1740  __ xorl(reg, reg);
1741  __ jmp(&done_label);
1742
1743  // True case: result = 1.
1744  __ Bind(&true_label);
1745  __ movl(reg, Immediate(1));
1746  __ Bind(&done_label);
1747}
1748
1749void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750  HandleCondition(comp);
1751}
1752
1753void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754  HandleCondition(comp);
1755}
1756
1757void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758  HandleCondition(comp);
1759}
1760
1761void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762  HandleCondition(comp);
1763}
1764
1765void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766  HandleCondition(comp);
1767}
1768
1769void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770  HandleCondition(comp);
1771}
1772
1773void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774  HandleCondition(comp);
1775}
1776
1777void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778  HandleCondition(comp);
1779}
1780
1781void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782  HandleCondition(comp);
1783}
1784
1785void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786  HandleCondition(comp);
1787}
1788
1789void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790  HandleCondition(comp);
1791}
1792
1793void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794  HandleCondition(comp);
1795}
1796
1797void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798  HandleCondition(comp);
1799}
1800
1801void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802  HandleCondition(comp);
1803}
1804
1805void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806  HandleCondition(comp);
1807}
1808
1809void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810  HandleCondition(comp);
1811}
1812
1813void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814  HandleCondition(comp);
1815}
1816
1817void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818  HandleCondition(comp);
1819}
1820
1821void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822  HandleCondition(comp);
1823}
1824
1825void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826  HandleCondition(comp);
1827}
1828
1829void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830  LocationSummary* locations =
1831      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832  switch (compare->InputAt(0)->GetType()) {
1833    case Primitive::kPrimBoolean:
1834    case Primitive::kPrimByte:
1835    case Primitive::kPrimShort:
1836    case Primitive::kPrimChar:
1837    case Primitive::kPrimInt:
1838    case Primitive::kPrimLong: {
1839      locations->SetInAt(0, Location::RequiresRegister());
1840      locations->SetInAt(1, Location::Any());
1841      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842      break;
1843    }
1844    case Primitive::kPrimFloat:
1845    case Primitive::kPrimDouble: {
1846      locations->SetInAt(0, Location::RequiresFpuRegister());
1847      locations->SetInAt(1, Location::Any());
1848      locations->SetOut(Location::RequiresRegister());
1849      break;
1850    }
1851    default:
1852      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853  }
1854}
1855
1856void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857  LocationSummary* locations = compare->GetLocations();
1858  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859  Location left = locations->InAt(0);
1860  Location right = locations->InAt(1);
1861
1862  NearLabel less, greater, done;
1863  Primitive::Type type = compare->InputAt(0)->GetType();
1864  Condition less_cond = kLess;
1865
1866  switch (type) {
1867    case Primitive::kPrimBoolean:
1868    case Primitive::kPrimByte:
1869    case Primitive::kPrimShort:
1870    case Primitive::kPrimChar:
1871    case Primitive::kPrimInt: {
1872      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873      if (right.IsConstant()) {
1874        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875        codegen_->Compare32BitValue(left_reg, value);
1876      } else if (right.IsStackSlot()) {
1877        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878      } else {
1879        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880      }
1881      break;
1882    }
1883    case Primitive::kPrimLong: {
1884      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885      if (right.IsConstant()) {
1886        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887        codegen_->Compare64BitValue(left_reg, value);
1888      } else if (right.IsDoubleStackSlot()) {
1889        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890      } else {
1891        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892      }
1893      break;
1894    }
1895    case Primitive::kPrimFloat: {
1896      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897      if (right.IsConstant()) {
1898        float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899        __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900      } else if (right.IsStackSlot()) {
1901        __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902      } else {
1903        __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904      }
1905      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906      less_cond = kBelow;  //  ucomis{s,d} sets CF
1907      break;
1908    }
1909    case Primitive::kPrimDouble: {
1910      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911      if (right.IsConstant()) {
1912        double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913        __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914      } else if (right.IsDoubleStackSlot()) {
1915        __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916      } else {
1917        __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918      }
1919      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920      less_cond = kBelow;  //  ucomis{s,d} sets CF
1921      break;
1922    }
1923    default:
1924      LOG(FATAL) << "Unexpected compare type " << type;
1925  }
1926
1927  __ movl(out, Immediate(0));
1928  __ j(kEqual, &done);
1929  __ j(less_cond, &less);
1930
1931  __ Bind(&greater);
1932  __ movl(out, Immediate(1));
1933  __ jmp(&done);
1934
1935  __ Bind(&less);
1936  __ movl(out, Immediate(-1));
1937
1938  __ Bind(&done);
1939}
1940
1941void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942  LocationSummary* locations =
1943      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944  locations->SetOut(Location::ConstantLocation(constant));
1945}
1946
1947void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948  // Will be generated at use site.
1949}
1950
1951void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952  LocationSummary* locations =
1953      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954  locations->SetOut(Location::ConstantLocation(constant));
1955}
1956
1957void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958  // Will be generated at use site.
1959}
1960
1961void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962  LocationSummary* locations =
1963      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964  locations->SetOut(Location::ConstantLocation(constant));
1965}
1966
1967void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968  // Will be generated at use site.
1969}
1970
1971void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972  LocationSummary* locations =
1973      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974  locations->SetOut(Location::ConstantLocation(constant));
1975}
1976
1977void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978  // Will be generated at use site.
1979}
1980
1981void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982  LocationSummary* locations =
1983      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984  locations->SetOut(Location::ConstantLocation(constant));
1985}
1986
1987void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989  // Will be generated at use site.
1990}
1991
1992void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993  memory_barrier->SetLocations(nullptr);
1994}
1995
1996void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998}
1999
2000void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001  ret->SetLocations(nullptr);
2002}
2003
2004void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005  codegen_->GenerateFrameExit();
2006}
2007
2008void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009  LocationSummary* locations =
2010      new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011  switch (ret->InputAt(0)->GetType()) {
2012    case Primitive::kPrimBoolean:
2013    case Primitive::kPrimByte:
2014    case Primitive::kPrimChar:
2015    case Primitive::kPrimShort:
2016    case Primitive::kPrimInt:
2017    case Primitive::kPrimNot:
2018    case Primitive::kPrimLong:
2019      locations->SetInAt(0, Location::RegisterLocation(RAX));
2020      break;
2021
2022    case Primitive::kPrimFloat:
2023    case Primitive::kPrimDouble:
2024      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025      break;
2026
2027    default:
2028      LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029  }
2030}
2031
2032void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033  if (kIsDebugBuild) {
2034    switch (ret->InputAt(0)->GetType()) {
2035      case Primitive::kPrimBoolean:
2036      case Primitive::kPrimByte:
2037      case Primitive::kPrimChar:
2038      case Primitive::kPrimShort:
2039      case Primitive::kPrimInt:
2040      case Primitive::kPrimNot:
2041      case Primitive::kPrimLong:
2042        DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043        break;
2044
2045      case Primitive::kPrimFloat:
2046      case Primitive::kPrimDouble:
2047        DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048                  XMM0);
2049        break;
2050
2051      default:
2052        LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053    }
2054  }
2055  codegen_->GenerateFrameExit();
2056}
2057
2058Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059  switch (type) {
2060    case Primitive::kPrimBoolean:
2061    case Primitive::kPrimByte:
2062    case Primitive::kPrimChar:
2063    case Primitive::kPrimShort:
2064    case Primitive::kPrimInt:
2065    case Primitive::kPrimNot:
2066    case Primitive::kPrimLong:
2067      return Location::RegisterLocation(RAX);
2068
2069    case Primitive::kPrimVoid:
2070      return Location::NoLocation();
2071
2072    case Primitive::kPrimDouble:
2073    case Primitive::kPrimFloat:
2074      return Location::FpuRegisterLocation(XMM0);
2075  }
2076
2077  UNREACHABLE();
2078}
2079
2080Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081  return Location::RegisterLocation(kMethodRegisterArgument);
2082}
2083
2084Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085  switch (type) {
2086    case Primitive::kPrimBoolean:
2087    case Primitive::kPrimByte:
2088    case Primitive::kPrimChar:
2089    case Primitive::kPrimShort:
2090    case Primitive::kPrimInt:
2091    case Primitive::kPrimNot: {
2092      uint32_t index = gp_index_++;
2093      stack_index_++;
2094      if (index < calling_convention.GetNumberOfRegisters()) {
2095        return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096      } else {
2097        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098      }
2099    }
2100
2101    case Primitive::kPrimLong: {
2102      uint32_t index = gp_index_;
2103      stack_index_ += 2;
2104      if (index < calling_convention.GetNumberOfRegisters()) {
2105        gp_index_ += 1;
2106        return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107      } else {
2108        gp_index_ += 2;
2109        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110      }
2111    }
2112
2113    case Primitive::kPrimFloat: {
2114      uint32_t index = float_index_++;
2115      stack_index_++;
2116      if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118      } else {
2119        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120      }
2121    }
2122
2123    case Primitive::kPrimDouble: {
2124      uint32_t index = float_index_++;
2125      stack_index_ += 2;
2126      if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128      } else {
2129        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130      }
2131    }
2132
2133    case Primitive::kPrimVoid:
2134      LOG(FATAL) << "Unexpected parameter type " << type;
2135      break;
2136  }
2137  return Location::NoLocation();
2138}
2139
2140void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141  // The trampoline uses the same calling convention as dex calling conventions,
2142  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143  // the method_idx.
2144  HandleInvoke(invoke);
2145}
2146
2147void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149}
2150
2151void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152  // Explicit clinit checks triggered by static invokes must have been pruned by
2153  // art::PrepareForRegisterAllocation.
2154  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155
2156  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157  if (intrinsic.TryDispatch(invoke)) {
2158    return;
2159  }
2160
2161  HandleInvoke(invoke);
2162}
2163
2164static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165  if (invoke->GetLocations()->Intrinsified()) {
2166    IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167    intrinsic.Dispatch(invoke);
2168    return true;
2169  }
2170  return false;
2171}
2172
2173void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174  // Explicit clinit checks triggered by static invokes must have been pruned by
2175  // art::PrepareForRegisterAllocation.
2176  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177
2178  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179    return;
2180  }
2181
2182  LocationSummary* locations = invoke->GetLocations();
2183  codegen_->GenerateStaticOrDirectCall(
2184      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186}
2187
2188void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191}
2192
2193void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195  if (intrinsic.TryDispatch(invoke)) {
2196    return;
2197  }
2198
2199  HandleInvoke(invoke);
2200}
2201
2202void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204    return;
2205  }
2206
2207  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208  DCHECK(!codegen_->IsLeafMethod());
2209  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210}
2211
2212void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213  HandleInvoke(invoke);
2214  // Add the hidden argument.
2215  invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216}
2217
2218void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220  LocationSummary* locations = invoke->GetLocations();
2221  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222  CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
2224      invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
2225  Location receiver = locations->InAt(0);
2226  size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2227
2228  // Set the hidden argument. This is safe to do this here, as RAX
2229  // won't be modified thereafter, before the `call` instruction.
2230  DCHECK_EQ(RAX, hidden_reg.AsRegister());
2231  codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2232
2233  if (receiver.IsStackSlot()) {
2234    __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2235    // /* HeapReference<Class> */ temp = temp->klass_
2236    __ movl(temp, Address(temp, class_offset));
2237  } else {
2238    // /* HeapReference<Class> */ temp = receiver->klass_
2239    __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2240  }
2241  codegen_->MaybeRecordImplicitNullCheck(invoke);
2242  // Instead of simply (possibly) unpoisoning `temp` here, we should
2243  // emit a read barrier for the previous class reference load.
2244  // However this is not required in practice, as this is an
2245  // intermediate/temporary reference and because the current
2246  // concurrent copying collector keeps the from-space memory
2247  // intact/accessible until the end of the marking phase (the
2248  // concurrent copying collector may not in the future).
2249  __ MaybeUnpoisonHeapReference(temp);
2250  // temp = temp->GetImtEntryAt(method_offset);
2251  __ movq(temp, Address(temp, method_offset));
2252  // call temp->GetEntryPoint();
2253  __ call(Address(temp,
2254                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2255
2256  DCHECK(!codegen_->IsLeafMethod());
2257  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2258}
2259
2260void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2261  LocationSummary* locations =
2262      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2263  switch (neg->GetResultType()) {
2264    case Primitive::kPrimInt:
2265    case Primitive::kPrimLong:
2266      locations->SetInAt(0, Location::RequiresRegister());
2267      locations->SetOut(Location::SameAsFirstInput());
2268      break;
2269
2270    case Primitive::kPrimFloat:
2271    case Primitive::kPrimDouble:
2272      locations->SetInAt(0, Location::RequiresFpuRegister());
2273      locations->SetOut(Location::SameAsFirstInput());
2274      locations->AddTemp(Location::RequiresFpuRegister());
2275      break;
2276
2277    default:
2278      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2279  }
2280}
2281
2282void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2283  LocationSummary* locations = neg->GetLocations();
2284  Location out = locations->Out();
2285  Location in = locations->InAt(0);
2286  switch (neg->GetResultType()) {
2287    case Primitive::kPrimInt:
2288      DCHECK(in.IsRegister());
2289      DCHECK(in.Equals(out));
2290      __ negl(out.AsRegister<CpuRegister>());
2291      break;
2292
2293    case Primitive::kPrimLong:
2294      DCHECK(in.IsRegister());
2295      DCHECK(in.Equals(out));
2296      __ negq(out.AsRegister<CpuRegister>());
2297      break;
2298
2299    case Primitive::kPrimFloat: {
2300      DCHECK(in.Equals(out));
2301      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2302      // Implement float negation with an exclusive or with value
2303      // 0x80000000 (mask for bit 31, representing the sign of a
2304      // single-precision floating-point number).
2305      __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2306      __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2307      break;
2308    }
2309
2310    case Primitive::kPrimDouble: {
2311      DCHECK(in.Equals(out));
2312      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2313      // Implement double negation with an exclusive or with value
2314      // 0x8000000000000000 (mask for bit 63, representing the sign of
2315      // a double-precision floating-point number).
2316      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2317      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2318      break;
2319    }
2320
2321    default:
2322      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2323  }
2324}
2325
2326void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2327  LocationSummary* locations =
2328      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2329  Primitive::Type result_type = conversion->GetResultType();
2330  Primitive::Type input_type = conversion->GetInputType();
2331  DCHECK_NE(result_type, input_type);
2332
2333  // The Java language does not allow treating boolean as an integral type but
2334  // our bit representation makes it safe.
2335
2336  switch (result_type) {
2337    case Primitive::kPrimByte:
2338      switch (input_type) {
2339        case Primitive::kPrimLong:
2340          // Type conversion from long to byte is a result of code transformations.
2341        case Primitive::kPrimBoolean:
2342          // Boolean input is a result of code transformations.
2343        case Primitive::kPrimShort:
2344        case Primitive::kPrimInt:
2345        case Primitive::kPrimChar:
2346          // Processing a Dex `int-to-byte' instruction.
2347          locations->SetInAt(0, Location::Any());
2348          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2349          break;
2350
2351        default:
2352          LOG(FATAL) << "Unexpected type conversion from " << input_type
2353                     << " to " << result_type;
2354      }
2355      break;
2356
2357    case Primitive::kPrimShort:
2358      switch (input_type) {
2359        case Primitive::kPrimLong:
2360          // Type conversion from long to short is a result of code transformations.
2361        case Primitive::kPrimBoolean:
2362          // Boolean input is a result of code transformations.
2363        case Primitive::kPrimByte:
2364        case Primitive::kPrimInt:
2365        case Primitive::kPrimChar:
2366          // Processing a Dex `int-to-short' instruction.
2367          locations->SetInAt(0, Location::Any());
2368          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2369          break;
2370
2371        default:
2372          LOG(FATAL) << "Unexpected type conversion from " << input_type
2373                     << " to " << result_type;
2374      }
2375      break;
2376
2377    case Primitive::kPrimInt:
2378      switch (input_type) {
2379        case Primitive::kPrimLong:
2380          // Processing a Dex `long-to-int' instruction.
2381          locations->SetInAt(0, Location::Any());
2382          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2383          break;
2384
2385        case Primitive::kPrimFloat:
2386          // Processing a Dex `float-to-int' instruction.
2387          locations->SetInAt(0, Location::RequiresFpuRegister());
2388          locations->SetOut(Location::RequiresRegister());
2389          break;
2390
2391        case Primitive::kPrimDouble:
2392          // Processing a Dex `double-to-int' instruction.
2393          locations->SetInAt(0, Location::RequiresFpuRegister());
2394          locations->SetOut(Location::RequiresRegister());
2395          break;
2396
2397        default:
2398          LOG(FATAL) << "Unexpected type conversion from " << input_type
2399                     << " to " << result_type;
2400      }
2401      break;
2402
2403    case Primitive::kPrimLong:
2404      switch (input_type) {
2405        case Primitive::kPrimBoolean:
2406          // Boolean input is a result of code transformations.
2407        case Primitive::kPrimByte:
2408        case Primitive::kPrimShort:
2409        case Primitive::kPrimInt:
2410        case Primitive::kPrimChar:
2411          // Processing a Dex `int-to-long' instruction.
2412          // TODO: We would benefit from a (to-be-implemented)
2413          // Location::RegisterOrStackSlot requirement for this input.
2414          locations->SetInAt(0, Location::RequiresRegister());
2415          locations->SetOut(Location::RequiresRegister());
2416          break;
2417
2418        case Primitive::kPrimFloat:
2419          // Processing a Dex `float-to-long' instruction.
2420          locations->SetInAt(0, Location::RequiresFpuRegister());
2421          locations->SetOut(Location::RequiresRegister());
2422          break;
2423
2424        case Primitive::kPrimDouble:
2425          // Processing a Dex `double-to-long' instruction.
2426          locations->SetInAt(0, Location::RequiresFpuRegister());
2427          locations->SetOut(Location::RequiresRegister());
2428          break;
2429
2430        default:
2431          LOG(FATAL) << "Unexpected type conversion from " << input_type
2432                     << " to " << result_type;
2433      }
2434      break;
2435
2436    case Primitive::kPrimChar:
2437      switch (input_type) {
2438        case Primitive::kPrimLong:
2439          // Type conversion from long to char is a result of code transformations.
2440        case Primitive::kPrimBoolean:
2441          // Boolean input is a result of code transformations.
2442        case Primitive::kPrimByte:
2443        case Primitive::kPrimShort:
2444        case Primitive::kPrimInt:
2445          // Processing a Dex `int-to-char' instruction.
2446          locations->SetInAt(0, Location::Any());
2447          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2448          break;
2449
2450        default:
2451          LOG(FATAL) << "Unexpected type conversion from " << input_type
2452                     << " to " << result_type;
2453      }
2454      break;
2455
2456    case Primitive::kPrimFloat:
2457      switch (input_type) {
2458        case Primitive::kPrimBoolean:
2459          // Boolean input is a result of code transformations.
2460        case Primitive::kPrimByte:
2461        case Primitive::kPrimShort:
2462        case Primitive::kPrimInt:
2463        case Primitive::kPrimChar:
2464          // Processing a Dex `int-to-float' instruction.
2465          locations->SetInAt(0, Location::Any());
2466          locations->SetOut(Location::RequiresFpuRegister());
2467          break;
2468
2469        case Primitive::kPrimLong:
2470          // Processing a Dex `long-to-float' instruction.
2471          locations->SetInAt(0, Location::Any());
2472          locations->SetOut(Location::RequiresFpuRegister());
2473          break;
2474
2475        case Primitive::kPrimDouble:
2476          // Processing a Dex `double-to-float' instruction.
2477          locations->SetInAt(0, Location::Any());
2478          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2479          break;
2480
2481        default:
2482          LOG(FATAL) << "Unexpected type conversion from " << input_type
2483                     << " to " << result_type;
2484      };
2485      break;
2486
2487    case Primitive::kPrimDouble:
2488      switch (input_type) {
2489        case Primitive::kPrimBoolean:
2490          // Boolean input is a result of code transformations.
2491        case Primitive::kPrimByte:
2492        case Primitive::kPrimShort:
2493        case Primitive::kPrimInt:
2494        case Primitive::kPrimChar:
2495          // Processing a Dex `int-to-double' instruction.
2496          locations->SetInAt(0, Location::Any());
2497          locations->SetOut(Location::RequiresFpuRegister());
2498          break;
2499
2500        case Primitive::kPrimLong:
2501          // Processing a Dex `long-to-double' instruction.
2502          locations->SetInAt(0, Location::Any());
2503          locations->SetOut(Location::RequiresFpuRegister());
2504          break;
2505
2506        case Primitive::kPrimFloat:
2507          // Processing a Dex `float-to-double' instruction.
2508          locations->SetInAt(0, Location::Any());
2509          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2510          break;
2511
2512        default:
2513          LOG(FATAL) << "Unexpected type conversion from " << input_type
2514                     << " to " << result_type;
2515      }
2516      break;
2517
2518    default:
2519      LOG(FATAL) << "Unexpected type conversion from " << input_type
2520                 << " to " << result_type;
2521  }
2522}
2523
2524void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2525  LocationSummary* locations = conversion->GetLocations();
2526  Location out = locations->Out();
2527  Location in = locations->InAt(0);
2528  Primitive::Type result_type = conversion->GetResultType();
2529  Primitive::Type input_type = conversion->GetInputType();
2530  DCHECK_NE(result_type, input_type);
2531  switch (result_type) {
2532    case Primitive::kPrimByte:
2533      switch (input_type) {
2534        case Primitive::kPrimLong:
2535          // Type conversion from long to byte is a result of code transformations.
2536        case Primitive::kPrimBoolean:
2537          // Boolean input is a result of code transformations.
2538        case Primitive::kPrimShort:
2539        case Primitive::kPrimInt:
2540        case Primitive::kPrimChar:
2541          // Processing a Dex `int-to-byte' instruction.
2542          if (in.IsRegister()) {
2543            __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2544          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2545            __ movsxb(out.AsRegister<CpuRegister>(),
2546                      Address(CpuRegister(RSP), in.GetStackIndex()));
2547          } else {
2548            __ movl(out.AsRegister<CpuRegister>(),
2549                    Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2550          }
2551          break;
2552
2553        default:
2554          LOG(FATAL) << "Unexpected type conversion from " << input_type
2555                     << " to " << result_type;
2556      }
2557      break;
2558
2559    case Primitive::kPrimShort:
2560      switch (input_type) {
2561        case Primitive::kPrimLong:
2562          // Type conversion from long to short is a result of code transformations.
2563        case Primitive::kPrimBoolean:
2564          // Boolean input is a result of code transformations.
2565        case Primitive::kPrimByte:
2566        case Primitive::kPrimInt:
2567        case Primitive::kPrimChar:
2568          // Processing a Dex `int-to-short' instruction.
2569          if (in.IsRegister()) {
2570            __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2571          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2572            __ movsxw(out.AsRegister<CpuRegister>(),
2573                      Address(CpuRegister(RSP), in.GetStackIndex()));
2574          } else {
2575            __ movl(out.AsRegister<CpuRegister>(),
2576                    Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2577          }
2578          break;
2579
2580        default:
2581          LOG(FATAL) << "Unexpected type conversion from " << input_type
2582                     << " to " << result_type;
2583      }
2584      break;
2585
2586    case Primitive::kPrimInt:
2587      switch (input_type) {
2588        case Primitive::kPrimLong:
2589          // Processing a Dex `long-to-int' instruction.
2590          if (in.IsRegister()) {
2591            __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2592          } else if (in.IsDoubleStackSlot()) {
2593            __ movl(out.AsRegister<CpuRegister>(),
2594                    Address(CpuRegister(RSP), in.GetStackIndex()));
2595          } else {
2596            DCHECK(in.IsConstant());
2597            DCHECK(in.GetConstant()->IsLongConstant());
2598            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2599            __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2600          }
2601          break;
2602
2603        case Primitive::kPrimFloat: {
2604          // Processing a Dex `float-to-int' instruction.
2605          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2606          CpuRegister output = out.AsRegister<CpuRegister>();
2607          NearLabel done, nan;
2608
2609          __ movl(output, Immediate(kPrimIntMax));
2610          // if input >= (float)INT_MAX goto done
2611          __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2612          __ j(kAboveEqual, &done);
2613          // if input == NaN goto nan
2614          __ j(kUnordered, &nan);
2615          // output = float-to-int-truncate(input)
2616          __ cvttss2si(output, input, false);
2617          __ jmp(&done);
2618          __ Bind(&nan);
2619          //  output = 0
2620          __ xorl(output, output);
2621          __ Bind(&done);
2622          break;
2623        }
2624
2625        case Primitive::kPrimDouble: {
2626          // Processing a Dex `double-to-int' instruction.
2627          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2628          CpuRegister output = out.AsRegister<CpuRegister>();
2629          NearLabel done, nan;
2630
2631          __ movl(output, Immediate(kPrimIntMax));
2632          // if input >= (double)INT_MAX goto done
2633          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2634          __ j(kAboveEqual, &done);
2635          // if input == NaN goto nan
2636          __ j(kUnordered, &nan);
2637          // output = double-to-int-truncate(input)
2638          __ cvttsd2si(output, input);
2639          __ jmp(&done);
2640          __ Bind(&nan);
2641          //  output = 0
2642          __ xorl(output, output);
2643          __ Bind(&done);
2644          break;
2645        }
2646
2647        default:
2648          LOG(FATAL) << "Unexpected type conversion from " << input_type
2649                     << " to " << result_type;
2650      }
2651      break;
2652
2653    case Primitive::kPrimLong:
2654      switch (input_type) {
2655        DCHECK(out.IsRegister());
2656        case Primitive::kPrimBoolean:
2657          // Boolean input is a result of code transformations.
2658        case Primitive::kPrimByte:
2659        case Primitive::kPrimShort:
2660        case Primitive::kPrimInt:
2661        case Primitive::kPrimChar:
2662          // Processing a Dex `int-to-long' instruction.
2663          DCHECK(in.IsRegister());
2664          __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2665          break;
2666
2667        case Primitive::kPrimFloat: {
2668          // Processing a Dex `float-to-long' instruction.
2669          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2670          CpuRegister output = out.AsRegister<CpuRegister>();
2671          NearLabel done, nan;
2672
2673          codegen_->Load64BitValue(output, kPrimLongMax);
2674          // if input >= (float)LONG_MAX goto done
2675          __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2676          __ j(kAboveEqual, &done);
2677          // if input == NaN goto nan
2678          __ j(kUnordered, &nan);
2679          // output = float-to-long-truncate(input)
2680          __ cvttss2si(output, input, true);
2681          __ jmp(&done);
2682          __ Bind(&nan);
2683          //  output = 0
2684          __ xorl(output, output);
2685          __ Bind(&done);
2686          break;
2687        }
2688
2689        case Primitive::kPrimDouble: {
2690          // Processing a Dex `double-to-long' instruction.
2691          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2692          CpuRegister output = out.AsRegister<CpuRegister>();
2693          NearLabel done, nan;
2694
2695          codegen_->Load64BitValue(output, kPrimLongMax);
2696          // if input >= (double)LONG_MAX goto done
2697          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2698          __ j(kAboveEqual, &done);
2699          // if input == NaN goto nan
2700          __ j(kUnordered, &nan);
2701          // output = double-to-long-truncate(input)
2702          __ cvttsd2si(output, input, true);
2703          __ jmp(&done);
2704          __ Bind(&nan);
2705          //  output = 0
2706          __ xorl(output, output);
2707          __ Bind(&done);
2708          break;
2709        }
2710
2711        default:
2712          LOG(FATAL) << "Unexpected type conversion from " << input_type
2713                     << " to " << result_type;
2714      }
2715      break;
2716
2717    case Primitive::kPrimChar:
2718      switch (input_type) {
2719        case Primitive::kPrimLong:
2720          // Type conversion from long to char is a result of code transformations.
2721        case Primitive::kPrimBoolean:
2722          // Boolean input is a result of code transformations.
2723        case Primitive::kPrimByte:
2724        case Primitive::kPrimShort:
2725        case Primitive::kPrimInt:
2726          // Processing a Dex `int-to-char' instruction.
2727          if (in.IsRegister()) {
2728            __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2729          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2730            __ movzxw(out.AsRegister<CpuRegister>(),
2731                      Address(CpuRegister(RSP), in.GetStackIndex()));
2732          } else {
2733            __ movl(out.AsRegister<CpuRegister>(),
2734                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2735          }
2736          break;
2737
2738        default:
2739          LOG(FATAL) << "Unexpected type conversion from " << input_type
2740                     << " to " << result_type;
2741      }
2742      break;
2743
2744    case Primitive::kPrimFloat:
2745      switch (input_type) {
2746        case Primitive::kPrimBoolean:
2747          // Boolean input is a result of code transformations.
2748        case Primitive::kPrimByte:
2749        case Primitive::kPrimShort:
2750        case Primitive::kPrimInt:
2751        case Primitive::kPrimChar:
2752          // Processing a Dex `int-to-float' instruction.
2753          if (in.IsRegister()) {
2754            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2755          } else if (in.IsConstant()) {
2756            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2757            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2758            codegen_->Load32BitValue(dest, static_cast<float>(v));
2759          } else {
2760            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2761                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
2762          }
2763          break;
2764
2765        case Primitive::kPrimLong:
2766          // Processing a Dex `long-to-float' instruction.
2767          if (in.IsRegister()) {
2768            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2769          } else if (in.IsConstant()) {
2770            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2771            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2772            codegen_->Load32BitValue(dest, static_cast<float>(v));
2773          } else {
2774            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2775                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
2776          }
2777          break;
2778
2779        case Primitive::kPrimDouble:
2780          // Processing a Dex `double-to-float' instruction.
2781          if (in.IsFpuRegister()) {
2782            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2783          } else if (in.IsConstant()) {
2784            double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2785            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2786            codegen_->Load32BitValue(dest, static_cast<float>(v));
2787          } else {
2788            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2789                        Address(CpuRegister(RSP), in.GetStackIndex()));
2790          }
2791          break;
2792
2793        default:
2794          LOG(FATAL) << "Unexpected type conversion from " << input_type
2795                     << " to " << result_type;
2796      };
2797      break;
2798
2799    case Primitive::kPrimDouble:
2800      switch (input_type) {
2801        case Primitive::kPrimBoolean:
2802          // Boolean input is a result of code transformations.
2803        case Primitive::kPrimByte:
2804        case Primitive::kPrimShort:
2805        case Primitive::kPrimInt:
2806        case Primitive::kPrimChar:
2807          // Processing a Dex `int-to-double' instruction.
2808          if (in.IsRegister()) {
2809            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2810          } else if (in.IsConstant()) {
2811            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2812            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2813            codegen_->Load64BitValue(dest, static_cast<double>(v));
2814          } else {
2815            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2816                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
2817          }
2818          break;
2819
2820        case Primitive::kPrimLong:
2821          // Processing a Dex `long-to-double' instruction.
2822          if (in.IsRegister()) {
2823            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2824          } else if (in.IsConstant()) {
2825            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2826            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2827            codegen_->Load64BitValue(dest, static_cast<double>(v));
2828          } else {
2829            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2830                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
2831          }
2832          break;
2833
2834        case Primitive::kPrimFloat:
2835          // Processing a Dex `float-to-double' instruction.
2836          if (in.IsFpuRegister()) {
2837            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2838          } else if (in.IsConstant()) {
2839            float v = in.GetConstant()->AsFloatConstant()->GetValue();
2840            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2841            codegen_->Load64BitValue(dest, static_cast<double>(v));
2842          } else {
2843            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2844                        Address(CpuRegister(RSP), in.GetStackIndex()));
2845          }
2846          break;
2847
2848        default:
2849          LOG(FATAL) << "Unexpected type conversion from " << input_type
2850                     << " to " << result_type;
2851      };
2852      break;
2853
2854    default:
2855      LOG(FATAL) << "Unexpected type conversion from " << input_type
2856                 << " to " << result_type;
2857  }
2858}
2859
2860void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2861  LocationSummary* locations =
2862      new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2863  switch (add->GetResultType()) {
2864    case Primitive::kPrimInt: {
2865      locations->SetInAt(0, Location::RequiresRegister());
2866      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2867      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2868      break;
2869    }
2870
2871    case Primitive::kPrimLong: {
2872      locations->SetInAt(0, Location::RequiresRegister());
2873      // We can use a leaq or addq if the constant can fit in an immediate.
2874      locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2875      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2876      break;
2877    }
2878
2879    case Primitive::kPrimDouble:
2880    case Primitive::kPrimFloat: {
2881      locations->SetInAt(0, Location::RequiresFpuRegister());
2882      locations->SetInAt(1, Location::Any());
2883      locations->SetOut(Location::SameAsFirstInput());
2884      break;
2885    }
2886
2887    default:
2888      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2889  }
2890}
2891
2892void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2893  LocationSummary* locations = add->GetLocations();
2894  Location first = locations->InAt(0);
2895  Location second = locations->InAt(1);
2896  Location out = locations->Out();
2897
2898  switch (add->GetResultType()) {
2899    case Primitive::kPrimInt: {
2900      if (second.IsRegister()) {
2901        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2902          __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2903        } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2904          __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2905        } else {
2906          __ leal(out.AsRegister<CpuRegister>(), Address(
2907              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2908        }
2909      } else if (second.IsConstant()) {
2910        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2911          __ addl(out.AsRegister<CpuRegister>(),
2912                  Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2913        } else {
2914          __ leal(out.AsRegister<CpuRegister>(), Address(
2915              first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2916        }
2917      } else {
2918        DCHECK(first.Equals(locations->Out()));
2919        __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2920      }
2921      break;
2922    }
2923
2924    case Primitive::kPrimLong: {
2925      if (second.IsRegister()) {
2926        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2927          __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2928        } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2929          __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2930        } else {
2931          __ leaq(out.AsRegister<CpuRegister>(), Address(
2932              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2933        }
2934      } else {
2935        DCHECK(second.IsConstant());
2936        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2937        int32_t int32_value = Low32Bits(value);
2938        DCHECK_EQ(int32_value, value);
2939        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2940          __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2941        } else {
2942          __ leaq(out.AsRegister<CpuRegister>(), Address(
2943              first.AsRegister<CpuRegister>(), int32_value));
2944        }
2945      }
2946      break;
2947    }
2948
2949    case Primitive::kPrimFloat: {
2950      if (second.IsFpuRegister()) {
2951        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2952      } else if (second.IsConstant()) {
2953        __ addss(first.AsFpuRegister<XmmRegister>(),
2954                 codegen_->LiteralFloatAddress(
2955                     second.GetConstant()->AsFloatConstant()->GetValue()));
2956      } else {
2957        DCHECK(second.IsStackSlot());
2958        __ addss(first.AsFpuRegister<XmmRegister>(),
2959                 Address(CpuRegister(RSP), second.GetStackIndex()));
2960      }
2961      break;
2962    }
2963
2964    case Primitive::kPrimDouble: {
2965      if (second.IsFpuRegister()) {
2966        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2967      } else if (second.IsConstant()) {
2968        __ addsd(first.AsFpuRegister<XmmRegister>(),
2969                 codegen_->LiteralDoubleAddress(
2970                     second.GetConstant()->AsDoubleConstant()->GetValue()));
2971      } else {
2972        DCHECK(second.IsDoubleStackSlot());
2973        __ addsd(first.AsFpuRegister<XmmRegister>(),
2974                 Address(CpuRegister(RSP), second.GetStackIndex()));
2975      }
2976      break;
2977    }
2978
2979    default:
2980      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2981  }
2982}
2983
2984void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2985  LocationSummary* locations =
2986      new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2987  switch (sub->GetResultType()) {
2988    case Primitive::kPrimInt: {
2989      locations->SetInAt(0, Location::RequiresRegister());
2990      locations->SetInAt(1, Location::Any());
2991      locations->SetOut(Location::SameAsFirstInput());
2992      break;
2993    }
2994    case Primitive::kPrimLong: {
2995      locations->SetInAt(0, Location::RequiresRegister());
2996      locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
2997      locations->SetOut(Location::SameAsFirstInput());
2998      break;
2999    }
3000    case Primitive::kPrimFloat:
3001    case Primitive::kPrimDouble: {
3002      locations->SetInAt(0, Location::RequiresFpuRegister());
3003      locations->SetInAt(1, Location::Any());
3004      locations->SetOut(Location::SameAsFirstInput());
3005      break;
3006    }
3007    default:
3008      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3009  }
3010}
3011
3012void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3013  LocationSummary* locations = sub->GetLocations();
3014  Location first = locations->InAt(0);
3015  Location second = locations->InAt(1);
3016  DCHECK(first.Equals(locations->Out()));
3017  switch (sub->GetResultType()) {
3018    case Primitive::kPrimInt: {
3019      if (second.IsRegister()) {
3020        __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3021      } else if (second.IsConstant()) {
3022        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3023        __ subl(first.AsRegister<CpuRegister>(), imm);
3024      } else {
3025        __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3026      }
3027      break;
3028    }
3029    case Primitive::kPrimLong: {
3030      if (second.IsConstant()) {
3031        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3032        DCHECK(IsInt<32>(value));
3033        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3034      } else {
3035        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3036      }
3037      break;
3038    }
3039
3040    case Primitive::kPrimFloat: {
3041      if (second.IsFpuRegister()) {
3042        __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3043      } else if (second.IsConstant()) {
3044        __ subss(first.AsFpuRegister<XmmRegister>(),
3045                 codegen_->LiteralFloatAddress(
3046                     second.GetConstant()->AsFloatConstant()->GetValue()));
3047      } else {
3048        DCHECK(second.IsStackSlot());
3049        __ subss(first.AsFpuRegister<XmmRegister>(),
3050                 Address(CpuRegister(RSP), second.GetStackIndex()));
3051      }
3052      break;
3053    }
3054
3055    case Primitive::kPrimDouble: {
3056      if (second.IsFpuRegister()) {
3057        __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3058      } else if (second.IsConstant()) {
3059        __ subsd(first.AsFpuRegister<XmmRegister>(),
3060                 codegen_->LiteralDoubleAddress(
3061                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3062      } else {
3063        DCHECK(second.IsDoubleStackSlot());
3064        __ subsd(first.AsFpuRegister<XmmRegister>(),
3065                 Address(CpuRegister(RSP), second.GetStackIndex()));
3066      }
3067      break;
3068    }
3069
3070    default:
3071      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3072  }
3073}
3074
3075void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3076  LocationSummary* locations =
3077      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3078  switch (mul->GetResultType()) {
3079    case Primitive::kPrimInt: {
3080      locations->SetInAt(0, Location::RequiresRegister());
3081      locations->SetInAt(1, Location::Any());
3082      if (mul->InputAt(1)->IsIntConstant()) {
3083        // Can use 3 operand multiply.
3084        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3085      } else {
3086        locations->SetOut(Location::SameAsFirstInput());
3087      }
3088      break;
3089    }
3090    case Primitive::kPrimLong: {
3091      locations->SetInAt(0, Location::RequiresRegister());
3092      locations->SetInAt(1, Location::Any());
3093      if (mul->InputAt(1)->IsLongConstant() &&
3094          IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3095        // Can use 3 operand multiply.
3096        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3097      } else {
3098        locations->SetOut(Location::SameAsFirstInput());
3099      }
3100      break;
3101    }
3102    case Primitive::kPrimFloat:
3103    case Primitive::kPrimDouble: {
3104      locations->SetInAt(0, Location::RequiresFpuRegister());
3105      locations->SetInAt(1, Location::Any());
3106      locations->SetOut(Location::SameAsFirstInput());
3107      break;
3108    }
3109
3110    default:
3111      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3112  }
3113}
3114
3115void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3116  LocationSummary* locations = mul->GetLocations();
3117  Location first = locations->InAt(0);
3118  Location second = locations->InAt(1);
3119  Location out = locations->Out();
3120  switch (mul->GetResultType()) {
3121    case Primitive::kPrimInt:
3122      // The constant may have ended up in a register, so test explicitly to avoid
3123      // problems where the output may not be the same as the first operand.
3124      if (mul->InputAt(1)->IsIntConstant()) {
3125        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3126        __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3127      } else if (second.IsRegister()) {
3128        DCHECK(first.Equals(out));
3129        __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3130      } else {
3131        DCHECK(first.Equals(out));
3132        DCHECK(second.IsStackSlot());
3133        __ imull(first.AsRegister<CpuRegister>(),
3134                 Address(CpuRegister(RSP), second.GetStackIndex()));
3135      }
3136      break;
3137    case Primitive::kPrimLong: {
3138      // The constant may have ended up in a register, so test explicitly to avoid
3139      // problems where the output may not be the same as the first operand.
3140      if (mul->InputAt(1)->IsLongConstant()) {
3141        int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3142        if (IsInt<32>(value)) {
3143          __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3144                   Immediate(static_cast<int32_t>(value)));
3145        } else {
3146          // Have to use the constant area.
3147          DCHECK(first.Equals(out));
3148          __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3149        }
3150      } else if (second.IsRegister()) {
3151        DCHECK(first.Equals(out));
3152        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3153      } else {
3154        DCHECK(second.IsDoubleStackSlot());
3155        DCHECK(first.Equals(out));
3156        __ imulq(first.AsRegister<CpuRegister>(),
3157                 Address(CpuRegister(RSP), second.GetStackIndex()));
3158      }
3159      break;
3160    }
3161
3162    case Primitive::kPrimFloat: {
3163      DCHECK(first.Equals(out));
3164      if (second.IsFpuRegister()) {
3165        __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3166      } else if (second.IsConstant()) {
3167        __ mulss(first.AsFpuRegister<XmmRegister>(),
3168                 codegen_->LiteralFloatAddress(
3169                     second.GetConstant()->AsFloatConstant()->GetValue()));
3170      } else {
3171        DCHECK(second.IsStackSlot());
3172        __ mulss(first.AsFpuRegister<XmmRegister>(),
3173                 Address(CpuRegister(RSP), second.GetStackIndex()));
3174      }
3175      break;
3176    }
3177
3178    case Primitive::kPrimDouble: {
3179      DCHECK(first.Equals(out));
3180      if (second.IsFpuRegister()) {
3181        __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3182      } else if (second.IsConstant()) {
3183        __ mulsd(first.AsFpuRegister<XmmRegister>(),
3184                 codegen_->LiteralDoubleAddress(
3185                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3186      } else {
3187        DCHECK(second.IsDoubleStackSlot());
3188        __ mulsd(first.AsFpuRegister<XmmRegister>(),
3189                 Address(CpuRegister(RSP), second.GetStackIndex()));
3190      }
3191      break;
3192    }
3193
3194    default:
3195      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3196  }
3197}
3198
3199void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3200                                                     uint32_t stack_adjustment, bool is_float) {
3201  if (source.IsStackSlot()) {
3202    DCHECK(is_float);
3203    __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3204  } else if (source.IsDoubleStackSlot()) {
3205    DCHECK(!is_float);
3206    __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3207  } else {
3208    // Write the value to the temporary location on the stack and load to FP stack.
3209    if (is_float) {
3210      Location stack_temp = Location::StackSlot(temp_offset);
3211      codegen_->Move(stack_temp, source);
3212      __ flds(Address(CpuRegister(RSP), temp_offset));
3213    } else {
3214      Location stack_temp = Location::DoubleStackSlot(temp_offset);
3215      codegen_->Move(stack_temp, source);
3216      __ fldl(Address(CpuRegister(RSP), temp_offset));
3217    }
3218  }
3219}
3220
3221void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3222  Primitive::Type type = rem->GetResultType();
3223  bool is_float = type == Primitive::kPrimFloat;
3224  size_t elem_size = Primitive::ComponentSize(type);
3225  LocationSummary* locations = rem->GetLocations();
3226  Location first = locations->InAt(0);
3227  Location second = locations->InAt(1);
3228  Location out = locations->Out();
3229
3230  // Create stack space for 2 elements.
3231  // TODO: enhance register allocator to ask for stack temporaries.
3232  __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3233
3234  // Load the values to the FP stack in reverse order, using temporaries if needed.
3235  PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3236  PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3237
3238  // Loop doing FPREM until we stabilize.
3239  NearLabel retry;
3240  __ Bind(&retry);
3241  __ fprem();
3242
3243  // Move FP status to AX.
3244  __ fstsw();
3245
3246  // And see if the argument reduction is complete. This is signaled by the
3247  // C2 FPU flag bit set to 0.
3248  __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3249  __ j(kNotEqual, &retry);
3250
3251  // We have settled on the final value. Retrieve it into an XMM register.
3252  // Store FP top of stack to real stack.
3253  if (is_float) {
3254    __ fsts(Address(CpuRegister(RSP), 0));
3255  } else {
3256    __ fstl(Address(CpuRegister(RSP), 0));
3257  }
3258
3259  // Pop the 2 items from the FP stack.
3260  __ fucompp();
3261
3262  // Load the value from the stack into an XMM register.
3263  DCHECK(out.IsFpuRegister()) << out;
3264  if (is_float) {
3265    __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3266  } else {
3267    __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3268  }
3269
3270  // And remove the temporary stack space we allocated.
3271  __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3272}
3273
3274void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3275  DCHECK(instruction->IsDiv() || instruction->IsRem());
3276
3277  LocationSummary* locations = instruction->GetLocations();
3278  Location second = locations->InAt(1);
3279  DCHECK(second.IsConstant());
3280
3281  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3282  CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3283  int64_t imm = Int64FromConstant(second.GetConstant());
3284
3285  DCHECK(imm == 1 || imm == -1);
3286
3287  switch (instruction->GetResultType()) {
3288    case Primitive::kPrimInt: {
3289      if (instruction->IsRem()) {
3290        __ xorl(output_register, output_register);
3291      } else {
3292        __ movl(output_register, input_register);
3293        if (imm == -1) {
3294          __ negl(output_register);
3295        }
3296      }
3297      break;
3298    }
3299
3300    case Primitive::kPrimLong: {
3301      if (instruction->IsRem()) {
3302        __ xorl(output_register, output_register);
3303      } else {
3304        __ movq(output_register, input_register);
3305        if (imm == -1) {
3306          __ negq(output_register);
3307        }
3308      }
3309      break;
3310    }
3311
3312    default:
3313      LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3314  }
3315}
3316
3317void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3318  LocationSummary* locations = instruction->GetLocations();
3319  Location second = locations->InAt(1);
3320
3321  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3322  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3323
3324  int64_t imm = Int64FromConstant(second.GetConstant());
3325  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3326  uint64_t abs_imm = AbsOrMin(imm);
3327
3328  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3329
3330  if (instruction->GetResultType() == Primitive::kPrimInt) {
3331    __ leal(tmp, Address(numerator, abs_imm - 1));
3332    __ testl(numerator, numerator);
3333    __ cmov(kGreaterEqual, tmp, numerator);
3334    int shift = CTZ(imm);
3335    __ sarl(tmp, Immediate(shift));
3336
3337    if (imm < 0) {
3338      __ negl(tmp);
3339    }
3340
3341    __ movl(output_register, tmp);
3342  } else {
3343    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3344    CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3345
3346    codegen_->Load64BitValue(rdx, abs_imm - 1);
3347    __ addq(rdx, numerator);
3348    __ testq(numerator, numerator);
3349    __ cmov(kGreaterEqual, rdx, numerator);
3350    int shift = CTZ(imm);
3351    __ sarq(rdx, Immediate(shift));
3352
3353    if (imm < 0) {
3354      __ negq(rdx);
3355    }
3356
3357    __ movq(output_register, rdx);
3358  }
3359}
3360
3361void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3362  DCHECK(instruction->IsDiv() || instruction->IsRem());
3363
3364  LocationSummary* locations = instruction->GetLocations();
3365  Location second = locations->InAt(1);
3366
3367  CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3368      : locations->GetTemp(0).AsRegister<CpuRegister>();
3369  CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3370  CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3371      : locations->Out().AsRegister<CpuRegister>();
3372  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3373
3374  DCHECK_EQ(RAX, eax.AsRegister());
3375  DCHECK_EQ(RDX, edx.AsRegister());
3376  if (instruction->IsDiv()) {
3377    DCHECK_EQ(RAX, out.AsRegister());
3378  } else {
3379    DCHECK_EQ(RDX, out.AsRegister());
3380  }
3381
3382  int64_t magic;
3383  int shift;
3384
3385  // TODO: can these branches be written as one?
3386  if (instruction->GetResultType() == Primitive::kPrimInt) {
3387    int imm = second.GetConstant()->AsIntConstant()->GetValue();
3388
3389    CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3390
3391    __ movl(numerator, eax);
3392
3393    NearLabel no_div;
3394    NearLabel end;
3395    __ testl(eax, eax);
3396    __ j(kNotEqual, &no_div);
3397
3398    __ xorl(out, out);
3399    __ jmp(&end);
3400
3401    __ Bind(&no_div);
3402
3403    __ movl(eax, Immediate(magic));
3404    __ imull(numerator);
3405
3406    if (imm > 0 && magic < 0) {
3407      __ addl(edx, numerator);
3408    } else if (imm < 0 && magic > 0) {
3409      __ subl(edx, numerator);
3410    }
3411
3412    if (shift != 0) {
3413      __ sarl(edx, Immediate(shift));
3414    }
3415
3416    __ movl(eax, edx);
3417    __ shrl(edx, Immediate(31));
3418    __ addl(edx, eax);
3419
3420    if (instruction->IsRem()) {
3421      __ movl(eax, numerator);
3422      __ imull(edx, Immediate(imm));
3423      __ subl(eax, edx);
3424      __ movl(edx, eax);
3425    } else {
3426      __ movl(eax, edx);
3427    }
3428    __ Bind(&end);
3429  } else {
3430    int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3431
3432    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3433
3434    CpuRegister rax = eax;
3435    CpuRegister rdx = edx;
3436
3437    CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3438
3439    // Save the numerator.
3440    __ movq(numerator, rax);
3441
3442    // RAX = magic
3443    codegen_->Load64BitValue(rax, magic);
3444
3445    // RDX:RAX = magic * numerator
3446    __ imulq(numerator);
3447
3448    if (imm > 0 && magic < 0) {
3449      // RDX += numerator
3450      __ addq(rdx, numerator);
3451    } else if (imm < 0 && magic > 0) {
3452      // RDX -= numerator
3453      __ subq(rdx, numerator);
3454    }
3455
3456    // Shift if needed.
3457    if (shift != 0) {
3458      __ sarq(rdx, Immediate(shift));
3459    }
3460
3461    // RDX += 1 if RDX < 0
3462    __ movq(rax, rdx);
3463    __ shrq(rdx, Immediate(63));
3464    __ addq(rdx, rax);
3465
3466    if (instruction->IsRem()) {
3467      __ movq(rax, numerator);
3468
3469      if (IsInt<32>(imm)) {
3470        __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3471      } else {
3472        __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3473      }
3474
3475      __ subq(rax, rdx);
3476      __ movq(rdx, rax);
3477    } else {
3478      __ movq(rax, rdx);
3479    }
3480  }
3481}
3482
3483void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3484  DCHECK(instruction->IsDiv() || instruction->IsRem());
3485  Primitive::Type type = instruction->GetResultType();
3486  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3487
3488  bool is_div = instruction->IsDiv();
3489  LocationSummary* locations = instruction->GetLocations();
3490
3491  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3492  Location second = locations->InAt(1);
3493
3494  DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3495  DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3496
3497  if (second.IsConstant()) {
3498    int64_t imm = Int64FromConstant(second.GetConstant());
3499
3500    if (imm == 0) {
3501      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3502    } else if (imm == 1 || imm == -1) {
3503      DivRemOneOrMinusOne(instruction);
3504    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3505      DivByPowerOfTwo(instruction->AsDiv());
3506    } else {
3507      DCHECK(imm <= -2 || imm >= 2);
3508      GenerateDivRemWithAnyConstant(instruction);
3509    }
3510  } else {
3511    SlowPathCode* slow_path =
3512        new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3513            instruction, out.AsRegister(), type, is_div);
3514    codegen_->AddSlowPath(slow_path);
3515
3516    CpuRegister second_reg = second.AsRegister<CpuRegister>();
3517    // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3518    // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3519    // so it's safe to just use negl instead of more complex comparisons.
3520    if (type == Primitive::kPrimInt) {
3521      __ cmpl(second_reg, Immediate(-1));
3522      __ j(kEqual, slow_path->GetEntryLabel());
3523      // edx:eax <- sign-extended of eax
3524      __ cdq();
3525      // eax = quotient, edx = remainder
3526      __ idivl(second_reg);
3527    } else {
3528      __ cmpq(second_reg, Immediate(-1));
3529      __ j(kEqual, slow_path->GetEntryLabel());
3530      // rdx:rax <- sign-extended of rax
3531      __ cqo();
3532      // rax = quotient, rdx = remainder
3533      __ idivq(second_reg);
3534    }
3535    __ Bind(slow_path->GetExitLabel());
3536  }
3537}
3538
3539void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3540  LocationSummary* locations =
3541      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3542  switch (div->GetResultType()) {
3543    case Primitive::kPrimInt:
3544    case Primitive::kPrimLong: {
3545      locations->SetInAt(0, Location::RegisterLocation(RAX));
3546      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3547      locations->SetOut(Location::SameAsFirstInput());
3548      // Intel uses edx:eax as the dividend.
3549      locations->AddTemp(Location::RegisterLocation(RDX));
3550      // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3551      // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3552      // output and request another temp.
3553      if (div->InputAt(1)->IsConstant()) {
3554        locations->AddTemp(Location::RequiresRegister());
3555      }
3556      break;
3557    }
3558
3559    case Primitive::kPrimFloat:
3560    case Primitive::kPrimDouble: {
3561      locations->SetInAt(0, Location::RequiresFpuRegister());
3562      locations->SetInAt(1, Location::Any());
3563      locations->SetOut(Location::SameAsFirstInput());
3564      break;
3565    }
3566
3567    default:
3568      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3569  }
3570}
3571
3572void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3573  LocationSummary* locations = div->GetLocations();
3574  Location first = locations->InAt(0);
3575  Location second = locations->InAt(1);
3576  DCHECK(first.Equals(locations->Out()));
3577
3578  Primitive::Type type = div->GetResultType();
3579  switch (type) {
3580    case Primitive::kPrimInt:
3581    case Primitive::kPrimLong: {
3582      GenerateDivRemIntegral(div);
3583      break;
3584    }
3585
3586    case Primitive::kPrimFloat: {
3587      if (second.IsFpuRegister()) {
3588        __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3589      } else if (second.IsConstant()) {
3590        __ divss(first.AsFpuRegister<XmmRegister>(),
3591                 codegen_->LiteralFloatAddress(
3592                     second.GetConstant()->AsFloatConstant()->GetValue()));
3593      } else {
3594        DCHECK(second.IsStackSlot());
3595        __ divss(first.AsFpuRegister<XmmRegister>(),
3596                 Address(CpuRegister(RSP), second.GetStackIndex()));
3597      }
3598      break;
3599    }
3600
3601    case Primitive::kPrimDouble: {
3602      if (second.IsFpuRegister()) {
3603        __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3604      } else if (second.IsConstant()) {
3605        __ divsd(first.AsFpuRegister<XmmRegister>(),
3606                 codegen_->LiteralDoubleAddress(
3607                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3608      } else {
3609        DCHECK(second.IsDoubleStackSlot());
3610        __ divsd(first.AsFpuRegister<XmmRegister>(),
3611                 Address(CpuRegister(RSP), second.GetStackIndex()));
3612      }
3613      break;
3614    }
3615
3616    default:
3617      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3618  }
3619}
3620
3621void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3622  Primitive::Type type = rem->GetResultType();
3623  LocationSummary* locations =
3624    new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3625
3626  switch (type) {
3627    case Primitive::kPrimInt:
3628    case Primitive::kPrimLong: {
3629      locations->SetInAt(0, Location::RegisterLocation(RAX));
3630      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3631      // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3632      locations->SetOut(Location::RegisterLocation(RDX));
3633      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3634      // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3635      // output and request another temp.
3636      if (rem->InputAt(1)->IsConstant()) {
3637        locations->AddTemp(Location::RequiresRegister());
3638      }
3639      break;
3640    }
3641
3642    case Primitive::kPrimFloat:
3643    case Primitive::kPrimDouble: {
3644      locations->SetInAt(0, Location::Any());
3645      locations->SetInAt(1, Location::Any());
3646      locations->SetOut(Location::RequiresFpuRegister());
3647      locations->AddTemp(Location::RegisterLocation(RAX));
3648      break;
3649    }
3650
3651    default:
3652      LOG(FATAL) << "Unexpected rem type " << type;
3653  }
3654}
3655
3656void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3657  Primitive::Type type = rem->GetResultType();
3658  switch (type) {
3659    case Primitive::kPrimInt:
3660    case Primitive::kPrimLong: {
3661      GenerateDivRemIntegral(rem);
3662      break;
3663    }
3664    case Primitive::kPrimFloat:
3665    case Primitive::kPrimDouble: {
3666      GenerateRemFP(rem);
3667      break;
3668    }
3669    default:
3670      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3671  }
3672}
3673
3674void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3675  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3676      ? LocationSummary::kCallOnSlowPath
3677      : LocationSummary::kNoCall;
3678  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3679  locations->SetInAt(0, Location::Any());
3680  if (instruction->HasUses()) {
3681    locations->SetOut(Location::SameAsFirstInput());
3682  }
3683}
3684
3685void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3686  SlowPathCode* slow_path =
3687      new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3688  codegen_->AddSlowPath(slow_path);
3689
3690  LocationSummary* locations = instruction->GetLocations();
3691  Location value = locations->InAt(0);
3692
3693  switch (instruction->GetType()) {
3694    case Primitive::kPrimBoolean:
3695    case Primitive::kPrimByte:
3696    case Primitive::kPrimChar:
3697    case Primitive::kPrimShort:
3698    case Primitive::kPrimInt: {
3699      if (value.IsRegister()) {
3700        __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3701        __ j(kEqual, slow_path->GetEntryLabel());
3702      } else if (value.IsStackSlot()) {
3703        __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3704        __ j(kEqual, slow_path->GetEntryLabel());
3705      } else {
3706        DCHECK(value.IsConstant()) << value;
3707        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3708        __ jmp(slow_path->GetEntryLabel());
3709        }
3710      }
3711      break;
3712    }
3713    case Primitive::kPrimLong: {
3714      if (value.IsRegister()) {
3715        __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3716        __ j(kEqual, slow_path->GetEntryLabel());
3717      } else if (value.IsDoubleStackSlot()) {
3718        __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3719        __ j(kEqual, slow_path->GetEntryLabel());
3720      } else {
3721        DCHECK(value.IsConstant()) << value;
3722        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3723        __ jmp(slow_path->GetEntryLabel());
3724        }
3725      }
3726      break;
3727    }
3728    default:
3729      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3730  }
3731}
3732
3733void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3734  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3735
3736  LocationSummary* locations =
3737      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3738
3739  switch (op->GetResultType()) {
3740    case Primitive::kPrimInt:
3741    case Primitive::kPrimLong: {
3742      locations->SetInAt(0, Location::RequiresRegister());
3743      // The shift count needs to be in CL.
3744      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3745      locations->SetOut(Location::SameAsFirstInput());
3746      break;
3747    }
3748    default:
3749      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3750  }
3751}
3752
3753void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3754  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3755
3756  LocationSummary* locations = op->GetLocations();
3757  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3758  Location second = locations->InAt(1);
3759
3760  switch (op->GetResultType()) {
3761    case Primitive::kPrimInt: {
3762      if (second.IsRegister()) {
3763        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3764        if (op->IsShl()) {
3765          __ shll(first_reg, second_reg);
3766        } else if (op->IsShr()) {
3767          __ sarl(first_reg, second_reg);
3768        } else {
3769          __ shrl(first_reg, second_reg);
3770        }
3771      } else {
3772        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3773        if (op->IsShl()) {
3774          __ shll(first_reg, imm);
3775        } else if (op->IsShr()) {
3776          __ sarl(first_reg, imm);
3777        } else {
3778          __ shrl(first_reg, imm);
3779        }
3780      }
3781      break;
3782    }
3783    case Primitive::kPrimLong: {
3784      if (second.IsRegister()) {
3785        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3786        if (op->IsShl()) {
3787          __ shlq(first_reg, second_reg);
3788        } else if (op->IsShr()) {
3789          __ sarq(first_reg, second_reg);
3790        } else {
3791          __ shrq(first_reg, second_reg);
3792        }
3793      } else {
3794        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3795        if (op->IsShl()) {
3796          __ shlq(first_reg, imm);
3797        } else if (op->IsShr()) {
3798          __ sarq(first_reg, imm);
3799        } else {
3800          __ shrq(first_reg, imm);
3801        }
3802      }
3803      break;
3804    }
3805    default:
3806      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3807      UNREACHABLE();
3808  }
3809}
3810
3811void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3812  LocationSummary* locations =
3813      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3814
3815  switch (ror->GetResultType()) {
3816    case Primitive::kPrimInt:
3817    case Primitive::kPrimLong: {
3818      locations->SetInAt(0, Location::RequiresRegister());
3819      // The shift count needs to be in CL (unless it is a constant).
3820      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3821      locations->SetOut(Location::SameAsFirstInput());
3822      break;
3823    }
3824    default:
3825      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3826      UNREACHABLE();
3827  }
3828}
3829
3830void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3831  LocationSummary* locations = ror->GetLocations();
3832  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3833  Location second = locations->InAt(1);
3834
3835  switch (ror->GetResultType()) {
3836    case Primitive::kPrimInt:
3837      if (second.IsRegister()) {
3838        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3839        __ rorl(first_reg, second_reg);
3840      } else {
3841        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3842        __ rorl(first_reg, imm);
3843      }
3844      break;
3845    case Primitive::kPrimLong:
3846      if (second.IsRegister()) {
3847        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3848        __ rorq(first_reg, second_reg);
3849      } else {
3850        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3851        __ rorq(first_reg, imm);
3852      }
3853      break;
3854    default:
3855      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3856      UNREACHABLE();
3857  }
3858}
3859
3860void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3861  HandleShift(shl);
3862}
3863
3864void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3865  HandleShift(shl);
3866}
3867
3868void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3869  HandleShift(shr);
3870}
3871
3872void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3873  HandleShift(shr);
3874}
3875
3876void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3877  HandleShift(ushr);
3878}
3879
3880void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3881  HandleShift(ushr);
3882}
3883
3884void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3885  LocationSummary* locations =
3886      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3887  InvokeRuntimeCallingConvention calling_convention;
3888  if (instruction->IsStringAlloc()) {
3889    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3890  } else {
3891    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3892    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3893  }
3894  locations->SetOut(Location::RegisterLocation(RAX));
3895}
3896
3897void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3898  // Note: if heap poisoning is enabled, the entry point takes cares
3899  // of poisoning the reference.
3900  if (instruction->IsStringAlloc()) {
3901    // String is allocated through StringFactory. Call NewEmptyString entry point.
3902    CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3903    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3904    __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3905    __ call(Address(temp, code_offset.SizeValue()));
3906    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3907  } else {
3908    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3909                            instruction,
3910                            instruction->GetDexPc(),
3911                            nullptr);
3912    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3913    DCHECK(!codegen_->IsLeafMethod());
3914  }
3915}
3916
3917void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3918  LocationSummary* locations =
3919      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3920  InvokeRuntimeCallingConvention calling_convention;
3921  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3922  locations->SetOut(Location::RegisterLocation(RAX));
3923  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3924  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3925}
3926
3927void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3928  InvokeRuntimeCallingConvention calling_convention;
3929  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3930                           instruction->GetTypeIndex());
3931  // Note: if heap poisoning is enabled, the entry point takes cares
3932  // of poisoning the reference.
3933  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3934                          instruction,
3935                          instruction->GetDexPc(),
3936                          nullptr);
3937  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3938
3939  DCHECK(!codegen_->IsLeafMethod());
3940}
3941
3942void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3943  LocationSummary* locations =
3944      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3945  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3946  if (location.IsStackSlot()) {
3947    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3948  } else if (location.IsDoubleStackSlot()) {
3949    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3950  }
3951  locations->SetOut(location);
3952}
3953
3954void InstructionCodeGeneratorX86_64::VisitParameterValue(
3955    HParameterValue* instruction ATTRIBUTE_UNUSED) {
3956  // Nothing to do, the parameter is already at its location.
3957}
3958
3959void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3960  LocationSummary* locations =
3961      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3962  locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3963}
3964
3965void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3966    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3967  // Nothing to do, the method is already at its location.
3968}
3969
3970void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3971  LocationSummary* locations =
3972      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3973  locations->SetInAt(0, Location::RequiresRegister());
3974  locations->SetOut(Location::RequiresRegister());
3975}
3976
3977void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3978  LocationSummary* locations = instruction->GetLocations();
3979  uint32_t method_offset = 0;
3980  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3981    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3982        instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3983  } else {
3984    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
3985        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
3986  }
3987  __ movq(locations->Out().AsRegister<CpuRegister>(),
3988          Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3989}
3990
3991void LocationsBuilderX86_64::VisitNot(HNot* not_) {
3992  LocationSummary* locations =
3993      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
3994  locations->SetInAt(0, Location::RequiresRegister());
3995  locations->SetOut(Location::SameAsFirstInput());
3996}
3997
3998void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
3999  LocationSummary* locations = not_->GetLocations();
4000  DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4001            locations->Out().AsRegister<CpuRegister>().AsRegister());
4002  Location out = locations->Out();
4003  switch (not_->GetResultType()) {
4004    case Primitive::kPrimInt:
4005      __ notl(out.AsRegister<CpuRegister>());
4006      break;
4007
4008    case Primitive::kPrimLong:
4009      __ notq(out.AsRegister<CpuRegister>());
4010      break;
4011
4012    default:
4013      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4014  }
4015}
4016
4017void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4018  LocationSummary* locations =
4019      new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4020  locations->SetInAt(0, Location::RequiresRegister());
4021  locations->SetOut(Location::SameAsFirstInput());
4022}
4023
4024void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4025  LocationSummary* locations = bool_not->GetLocations();
4026  DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4027            locations->Out().AsRegister<CpuRegister>().AsRegister());
4028  Location out = locations->Out();
4029  __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4030}
4031
4032void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4033  LocationSummary* locations =
4034      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4035  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4036    locations->SetInAt(i, Location::Any());
4037  }
4038  locations->SetOut(Location::Any());
4039}
4040
4041void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4042  LOG(FATAL) << "Unimplemented";
4043}
4044
4045void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4046  /*
4047   * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4048   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4049   * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4050   */
4051  switch (kind) {
4052    case MemBarrierKind::kAnyAny: {
4053      MemoryFence();
4054      break;
4055    }
4056    case MemBarrierKind::kAnyStore:
4057    case MemBarrierKind::kLoadAny:
4058    case MemBarrierKind::kStoreStore: {
4059      // nop
4060      break;
4061    }
4062    default:
4063      LOG(FATAL) << "Unexpected memory barier " << kind;
4064  }
4065}
4066
4067void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4068  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4069
4070  bool object_field_get_with_read_barrier =
4071      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4072  LocationSummary* locations =
4073      new (GetGraph()->GetArena()) LocationSummary(instruction,
4074                                                   object_field_get_with_read_barrier ?
4075                                                       LocationSummary::kCallOnSlowPath :
4076                                                       LocationSummary::kNoCall);
4077  locations->SetInAt(0, Location::RequiresRegister());
4078  if (Primitive::IsFloatingPointType(instruction->GetType())) {
4079    locations->SetOut(Location::RequiresFpuRegister());
4080  } else {
4081    // The output overlaps for an object field get when read barriers
4082    // are enabled: we do not want the move to overwrite the object's
4083    // location, as we need it to emit the read barrier.
4084    locations->SetOut(
4085        Location::RequiresRegister(),
4086        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4087  }
4088  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4089    // We need a temporary register for the read barrier marking slow
4090    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4091    locations->AddTemp(Location::RequiresRegister());
4092  }
4093}
4094
4095void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4096                                                    const FieldInfo& field_info) {
4097  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4098
4099  LocationSummary* locations = instruction->GetLocations();
4100  Location base_loc = locations->InAt(0);
4101  CpuRegister base = base_loc.AsRegister<CpuRegister>();
4102  Location out = locations->Out();
4103  bool is_volatile = field_info.IsVolatile();
4104  Primitive::Type field_type = field_info.GetFieldType();
4105  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4106
4107  switch (field_type) {
4108    case Primitive::kPrimBoolean: {
4109      __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4110      break;
4111    }
4112
4113    case Primitive::kPrimByte: {
4114      __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4115      break;
4116    }
4117
4118    case Primitive::kPrimShort: {
4119      __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4120      break;
4121    }
4122
4123    case Primitive::kPrimChar: {
4124      __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4125      break;
4126    }
4127
4128    case Primitive::kPrimInt: {
4129      __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4130      break;
4131    }
4132
4133    case Primitive::kPrimNot: {
4134      // /* HeapReference<Object> */ out = *(base + offset)
4135      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4136        Location temp_loc = locations->GetTemp(0);
4137        // Note that a potential implicit null check is handled in this
4138        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4139        codegen_->GenerateFieldLoadWithBakerReadBarrier(
4140            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4141        if (is_volatile) {
4142          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4143        }
4144      } else {
4145        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4146        codegen_->MaybeRecordImplicitNullCheck(instruction);
4147        if (is_volatile) {
4148          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4149        }
4150        // If read barriers are enabled, emit read barriers other than
4151        // Baker's using a slow path (and also unpoison the loaded
4152        // reference, if heap poisoning is enabled).
4153        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4154      }
4155      break;
4156    }
4157
4158    case Primitive::kPrimLong: {
4159      __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4160      break;
4161    }
4162
4163    case Primitive::kPrimFloat: {
4164      __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4165      break;
4166    }
4167
4168    case Primitive::kPrimDouble: {
4169      __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4170      break;
4171    }
4172
4173    case Primitive::kPrimVoid:
4174      LOG(FATAL) << "Unreachable type " << field_type;
4175      UNREACHABLE();
4176  }
4177
4178  if (field_type == Primitive::kPrimNot) {
4179    // Potential implicit null checks, in the case of reference
4180    // fields, are handled in the previous switch statement.
4181  } else {
4182    codegen_->MaybeRecordImplicitNullCheck(instruction);
4183  }
4184
4185  if (is_volatile) {
4186    if (field_type == Primitive::kPrimNot) {
4187      // Memory barriers, in the case of references, are also handled
4188      // in the previous switch statement.
4189    } else {
4190      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4191    }
4192  }
4193}
4194
4195void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4196                                            const FieldInfo& field_info) {
4197  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4198
4199  LocationSummary* locations =
4200      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4201  Primitive::Type field_type = field_info.GetFieldType();
4202  bool is_volatile = field_info.IsVolatile();
4203  bool needs_write_barrier =
4204      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4205
4206  locations->SetInAt(0, Location::RequiresRegister());
4207  if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4208    if (is_volatile) {
4209      // In order to satisfy the semantics of volatile, this must be a single instruction store.
4210      locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4211    } else {
4212      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4213    }
4214  } else {
4215    if (is_volatile) {
4216      // In order to satisfy the semantics of volatile, this must be a single instruction store.
4217      locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4218    } else {
4219      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4220    }
4221  }
4222  if (needs_write_barrier) {
4223    // Temporary registers for the write barrier.
4224    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4225    locations->AddTemp(Location::RequiresRegister());
4226  } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4227    // Temporary register for the reference poisoning.
4228    locations->AddTemp(Location::RequiresRegister());
4229  }
4230}
4231
4232void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4233                                                    const FieldInfo& field_info,
4234                                                    bool value_can_be_null) {
4235  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4236
4237  LocationSummary* locations = instruction->GetLocations();
4238  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4239  Location value = locations->InAt(1);
4240  bool is_volatile = field_info.IsVolatile();
4241  Primitive::Type field_type = field_info.GetFieldType();
4242  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4243
4244  if (is_volatile) {
4245    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4246  }
4247
4248  bool maybe_record_implicit_null_check_done = false;
4249
4250  switch (field_type) {
4251    case Primitive::kPrimBoolean:
4252    case Primitive::kPrimByte: {
4253      if (value.IsConstant()) {
4254        int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4255        __ movb(Address(base, offset), Immediate(v));
4256      } else {
4257        __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4258      }
4259      break;
4260    }
4261
4262    case Primitive::kPrimShort:
4263    case Primitive::kPrimChar: {
4264      if (value.IsConstant()) {
4265        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4266        __ movw(Address(base, offset), Immediate(v));
4267      } else {
4268        __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4269      }
4270      break;
4271    }
4272
4273    case Primitive::kPrimInt:
4274    case Primitive::kPrimNot: {
4275      if (value.IsConstant()) {
4276        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4277        // `field_type == Primitive::kPrimNot` implies `v == 0`.
4278        DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4279        // Note: if heap poisoning is enabled, no need to poison
4280        // (negate) `v` if it is a reference, as it would be null.
4281        __ movl(Address(base, offset), Immediate(v));
4282      } else {
4283        if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4284          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4285          __ movl(temp, value.AsRegister<CpuRegister>());
4286          __ PoisonHeapReference(temp);
4287          __ movl(Address(base, offset), temp);
4288        } else {
4289          __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4290        }
4291      }
4292      break;
4293    }
4294
4295    case Primitive::kPrimLong: {
4296      if (value.IsConstant()) {
4297        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4298        codegen_->MoveInt64ToAddress(Address(base, offset),
4299                                     Address(base, offset + sizeof(int32_t)),
4300                                     v,
4301                                     instruction);
4302        maybe_record_implicit_null_check_done = true;
4303      } else {
4304        __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4305      }
4306      break;
4307    }
4308
4309    case Primitive::kPrimFloat: {
4310      if (value.IsConstant()) {
4311        int32_t v =
4312            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4313        __ movl(Address(base, offset), Immediate(v));
4314      } else {
4315        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4316      }
4317      break;
4318    }
4319
4320    case Primitive::kPrimDouble: {
4321      if (value.IsConstant()) {
4322        int64_t v =
4323            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4324        codegen_->MoveInt64ToAddress(Address(base, offset),
4325                                     Address(base, offset + sizeof(int32_t)),
4326                                     v,
4327                                     instruction);
4328        maybe_record_implicit_null_check_done = true;
4329      } else {
4330        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4331      }
4332      break;
4333    }
4334
4335    case Primitive::kPrimVoid:
4336      LOG(FATAL) << "Unreachable type " << field_type;
4337      UNREACHABLE();
4338  }
4339
4340  if (!maybe_record_implicit_null_check_done) {
4341    codegen_->MaybeRecordImplicitNullCheck(instruction);
4342  }
4343
4344  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4345    CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4346    CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4347    codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4348  }
4349
4350  if (is_volatile) {
4351    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4352  }
4353}
4354
4355void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4356  HandleFieldSet(instruction, instruction->GetFieldInfo());
4357}
4358
4359void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4360  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4361}
4362
4363void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4364  HandleFieldGet(instruction);
4365}
4366
4367void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4368  HandleFieldGet(instruction, instruction->GetFieldInfo());
4369}
4370
4371void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4372  HandleFieldGet(instruction);
4373}
4374
4375void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4376  HandleFieldGet(instruction, instruction->GetFieldInfo());
4377}
4378
4379void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4380  HandleFieldSet(instruction, instruction->GetFieldInfo());
4381}
4382
4383void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4384  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4385}
4386
4387void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4388    HUnresolvedInstanceFieldGet* instruction) {
4389  FieldAccessCallingConventionX86_64 calling_convention;
4390  codegen_->CreateUnresolvedFieldLocationSummary(
4391      instruction, instruction->GetFieldType(), calling_convention);
4392}
4393
4394void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4395    HUnresolvedInstanceFieldGet* instruction) {
4396  FieldAccessCallingConventionX86_64 calling_convention;
4397  codegen_->GenerateUnresolvedFieldAccess(instruction,
4398                                          instruction->GetFieldType(),
4399                                          instruction->GetFieldIndex(),
4400                                          instruction->GetDexPc(),
4401                                          calling_convention);
4402}
4403
4404void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4405    HUnresolvedInstanceFieldSet* instruction) {
4406  FieldAccessCallingConventionX86_64 calling_convention;
4407  codegen_->CreateUnresolvedFieldLocationSummary(
4408      instruction, instruction->GetFieldType(), calling_convention);
4409}
4410
4411void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4412    HUnresolvedInstanceFieldSet* instruction) {
4413  FieldAccessCallingConventionX86_64 calling_convention;
4414  codegen_->GenerateUnresolvedFieldAccess(instruction,
4415                                          instruction->GetFieldType(),
4416                                          instruction->GetFieldIndex(),
4417                                          instruction->GetDexPc(),
4418                                          calling_convention);
4419}
4420
4421void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4422    HUnresolvedStaticFieldGet* instruction) {
4423  FieldAccessCallingConventionX86_64 calling_convention;
4424  codegen_->CreateUnresolvedFieldLocationSummary(
4425      instruction, instruction->GetFieldType(), calling_convention);
4426}
4427
4428void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4429    HUnresolvedStaticFieldGet* instruction) {
4430  FieldAccessCallingConventionX86_64 calling_convention;
4431  codegen_->GenerateUnresolvedFieldAccess(instruction,
4432                                          instruction->GetFieldType(),
4433                                          instruction->GetFieldIndex(),
4434                                          instruction->GetDexPc(),
4435                                          calling_convention);
4436}
4437
4438void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4439    HUnresolvedStaticFieldSet* instruction) {
4440  FieldAccessCallingConventionX86_64 calling_convention;
4441  codegen_->CreateUnresolvedFieldLocationSummary(
4442      instruction, instruction->GetFieldType(), calling_convention);
4443}
4444
4445void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4446    HUnresolvedStaticFieldSet* instruction) {
4447  FieldAccessCallingConventionX86_64 calling_convention;
4448  codegen_->GenerateUnresolvedFieldAccess(instruction,
4449                                          instruction->GetFieldType(),
4450                                          instruction->GetFieldIndex(),
4451                                          instruction->GetDexPc(),
4452                                          calling_convention);
4453}
4454
4455void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4456  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4457      ? LocationSummary::kCallOnSlowPath
4458      : LocationSummary::kNoCall;
4459  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4460  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4461      ? Location::RequiresRegister()
4462      : Location::Any();
4463  locations->SetInAt(0, loc);
4464  if (instruction->HasUses()) {
4465    locations->SetOut(Location::SameAsFirstInput());
4466  }
4467}
4468
4469void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4470  if (CanMoveNullCheckToUser(instruction)) {
4471    return;
4472  }
4473  LocationSummary* locations = instruction->GetLocations();
4474  Location obj = locations->InAt(0);
4475
4476  __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4477  RecordPcInfo(instruction, instruction->GetDexPc());
4478}
4479
4480void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4481  SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4482  AddSlowPath(slow_path);
4483
4484  LocationSummary* locations = instruction->GetLocations();
4485  Location obj = locations->InAt(0);
4486
4487  if (obj.IsRegister()) {
4488    __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4489  } else if (obj.IsStackSlot()) {
4490    __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4491  } else {
4492    DCHECK(obj.IsConstant()) << obj;
4493    DCHECK(obj.GetConstant()->IsNullConstant());
4494    __ jmp(slow_path->GetEntryLabel());
4495    return;
4496  }
4497  __ j(kEqual, slow_path->GetEntryLabel());
4498}
4499
4500void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4501  codegen_->GenerateNullCheck(instruction);
4502}
4503
4504void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4505  bool object_array_get_with_read_barrier =
4506      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4507  LocationSummary* locations =
4508      new (GetGraph()->GetArena()) LocationSummary(instruction,
4509                                                   object_array_get_with_read_barrier ?
4510                                                       LocationSummary::kCallOnSlowPath :
4511                                                       LocationSummary::kNoCall);
4512  locations->SetInAt(0, Location::RequiresRegister());
4513  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4514  if (Primitive::IsFloatingPointType(instruction->GetType())) {
4515    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4516  } else {
4517    // The output overlaps for an object array get when read barriers
4518    // are enabled: we do not want the move to overwrite the array's
4519    // location, as we need it to emit the read barrier.
4520    locations->SetOut(
4521        Location::RequiresRegister(),
4522        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4523  }
4524  // We need a temporary register for the read barrier marking slow
4525  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4526  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4527    locations->AddTemp(Location::RequiresRegister());
4528  }
4529}
4530
4531void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4532  LocationSummary* locations = instruction->GetLocations();
4533  Location obj_loc = locations->InAt(0);
4534  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4535  Location index = locations->InAt(1);
4536  Location out_loc = locations->Out();
4537
4538  Primitive::Type type = instruction->GetType();
4539  switch (type) {
4540    case Primitive::kPrimBoolean: {
4541      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4542      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4543      if (index.IsConstant()) {
4544        __ movzxb(out, Address(obj,
4545            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4546      } else {
4547        __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4548      }
4549      break;
4550    }
4551
4552    case Primitive::kPrimByte: {
4553      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4554      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4555      if (index.IsConstant()) {
4556        __ movsxb(out, Address(obj,
4557            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4558      } else {
4559        __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4560      }
4561      break;
4562    }
4563
4564    case Primitive::kPrimShort: {
4565      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4566      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4567      if (index.IsConstant()) {
4568        __ movsxw(out, Address(obj,
4569            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4570      } else {
4571        __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4572      }
4573      break;
4574    }
4575
4576    case Primitive::kPrimChar: {
4577      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4578      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4579      if (index.IsConstant()) {
4580        __ movzxw(out, Address(obj,
4581            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4582      } else {
4583        __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4584      }
4585      break;
4586    }
4587
4588    case Primitive::kPrimInt: {
4589      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4590      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4591      if (index.IsConstant()) {
4592        __ movl(out, Address(obj,
4593            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4594      } else {
4595        __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4596      }
4597      break;
4598    }
4599
4600    case Primitive::kPrimNot: {
4601      static_assert(
4602          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4603          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4604      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4605      // /* HeapReference<Object> */ out =
4606      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4607      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4608        Location temp = locations->GetTemp(0);
4609        // Note that a potential implicit null check is handled in this
4610        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4611        codegen_->GenerateArrayLoadWithBakerReadBarrier(
4612            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4613      } else {
4614        CpuRegister out = out_loc.AsRegister<CpuRegister>();
4615        if (index.IsConstant()) {
4616          uint32_t offset =
4617              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4618          __ movl(out, Address(obj, offset));
4619          codegen_->MaybeRecordImplicitNullCheck(instruction);
4620          // If read barriers are enabled, emit read barriers other than
4621          // Baker's using a slow path (and also unpoison the loaded
4622          // reference, if heap poisoning is enabled).
4623          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4624        } else {
4625          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4626          codegen_->MaybeRecordImplicitNullCheck(instruction);
4627          // If read barriers are enabled, emit read barriers other than
4628          // Baker's using a slow path (and also unpoison the loaded
4629          // reference, if heap poisoning is enabled).
4630          codegen_->MaybeGenerateReadBarrierSlow(
4631              instruction, out_loc, out_loc, obj_loc, data_offset, index);
4632        }
4633      }
4634      break;
4635    }
4636
4637    case Primitive::kPrimLong: {
4638      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4639      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4640      if (index.IsConstant()) {
4641        __ movq(out, Address(obj,
4642            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4643      } else {
4644        __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4645      }
4646      break;
4647    }
4648
4649    case Primitive::kPrimFloat: {
4650      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4651      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4652      if (index.IsConstant()) {
4653        __ movss(out, Address(obj,
4654            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4655      } else {
4656        __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4657      }
4658      break;
4659    }
4660
4661    case Primitive::kPrimDouble: {
4662      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4663      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4664      if (index.IsConstant()) {
4665        __ movsd(out, Address(obj,
4666            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4667      } else {
4668        __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4669      }
4670      break;
4671    }
4672
4673    case Primitive::kPrimVoid:
4674      LOG(FATAL) << "Unreachable type " << type;
4675      UNREACHABLE();
4676  }
4677
4678  if (type == Primitive::kPrimNot) {
4679    // Potential implicit null checks, in the case of reference
4680    // arrays, are handled in the previous switch statement.
4681  } else {
4682    codegen_->MaybeRecordImplicitNullCheck(instruction);
4683  }
4684}
4685
4686void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4687  Primitive::Type value_type = instruction->GetComponentType();
4688
4689  bool needs_write_barrier =
4690      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4691  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4692  bool object_array_set_with_read_barrier =
4693      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4694
4695  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4696      instruction,
4697      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4698          LocationSummary::kCallOnSlowPath :
4699          LocationSummary::kNoCall);
4700
4701  locations->SetInAt(0, Location::RequiresRegister());
4702  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4703  if (Primitive::IsFloatingPointType(value_type)) {
4704    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4705  } else {
4706    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4707  }
4708
4709  if (needs_write_barrier) {
4710    // Temporary registers for the write barrier.
4711
4712    // This first temporary register is possibly used for heap
4713    // reference poisoning and/or read barrier emission too.
4714    locations->AddTemp(Location::RequiresRegister());
4715    locations->AddTemp(Location::RequiresRegister());
4716  }
4717}
4718
4719void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4720  LocationSummary* locations = instruction->GetLocations();
4721  Location array_loc = locations->InAt(0);
4722  CpuRegister array = array_loc.AsRegister<CpuRegister>();
4723  Location index = locations->InAt(1);
4724  Location value = locations->InAt(2);
4725  Primitive::Type value_type = instruction->GetComponentType();
4726  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4727  bool needs_write_barrier =
4728      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4729  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4730  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4731  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4732
4733  switch (value_type) {
4734    case Primitive::kPrimBoolean:
4735    case Primitive::kPrimByte: {
4736      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4737      Address address = index.IsConstant()
4738          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4739          : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4740      if (value.IsRegister()) {
4741        __ movb(address, value.AsRegister<CpuRegister>());
4742      } else {
4743        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4744      }
4745      codegen_->MaybeRecordImplicitNullCheck(instruction);
4746      break;
4747    }
4748
4749    case Primitive::kPrimShort:
4750    case Primitive::kPrimChar: {
4751      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4752      Address address = index.IsConstant()
4753          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4754          : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4755      if (value.IsRegister()) {
4756        __ movw(address, value.AsRegister<CpuRegister>());
4757      } else {
4758        DCHECK(value.IsConstant()) << value;
4759        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4760      }
4761      codegen_->MaybeRecordImplicitNullCheck(instruction);
4762      break;
4763    }
4764
4765    case Primitive::kPrimNot: {
4766      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4767      Address address = index.IsConstant()
4768          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4769          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4770
4771      if (!value.IsRegister()) {
4772        // Just setting null.
4773        DCHECK(instruction->InputAt(2)->IsNullConstant());
4774        DCHECK(value.IsConstant()) << value;
4775        __ movl(address, Immediate(0));
4776        codegen_->MaybeRecordImplicitNullCheck(instruction);
4777        DCHECK(!needs_write_barrier);
4778        DCHECK(!may_need_runtime_call_for_type_check);
4779        break;
4780      }
4781
4782      DCHECK(needs_write_barrier);
4783      CpuRegister register_value = value.AsRegister<CpuRegister>();
4784      NearLabel done, not_null, do_put;
4785      SlowPathCode* slow_path = nullptr;
4786      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4787      if (may_need_runtime_call_for_type_check) {
4788        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4789        codegen_->AddSlowPath(slow_path);
4790        if (instruction->GetValueCanBeNull()) {
4791          __ testl(register_value, register_value);
4792          __ j(kNotEqual, &not_null);
4793          __ movl(address, Immediate(0));
4794          codegen_->MaybeRecordImplicitNullCheck(instruction);
4795          __ jmp(&done);
4796          __ Bind(&not_null);
4797        }
4798
4799        if (kEmitCompilerReadBarrier) {
4800          // When read barriers are enabled, the type checking
4801          // instrumentation requires two read barriers:
4802          //
4803          //   __ movl(temp2, temp);
4804          //   // /* HeapReference<Class> */ temp = temp->component_type_
4805          //   __ movl(temp, Address(temp, component_offset));
4806          //   codegen_->GenerateReadBarrierSlow(
4807          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4808          //
4809          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
4810          //   __ movl(temp2, Address(register_value, class_offset));
4811          //   codegen_->GenerateReadBarrierSlow(
4812          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4813          //
4814          //   __ cmpl(temp, temp2);
4815          //
4816          // However, the second read barrier may trash `temp`, as it
4817          // is a temporary register, and as such would not be saved
4818          // along with live registers before calling the runtime (nor
4819          // restored afterwards).  So in this case, we bail out and
4820          // delegate the work to the array set slow path.
4821          //
4822          // TODO: Extend the register allocator to support a new
4823          // "(locally) live temp" location so as to avoid always
4824          // going into the slow path when read barriers are enabled.
4825          __ jmp(slow_path->GetEntryLabel());
4826        } else {
4827          // /* HeapReference<Class> */ temp = array->klass_
4828          __ movl(temp, Address(array, class_offset));
4829          codegen_->MaybeRecordImplicitNullCheck(instruction);
4830          __ MaybeUnpoisonHeapReference(temp);
4831
4832          // /* HeapReference<Class> */ temp = temp->component_type_
4833          __ movl(temp, Address(temp, component_offset));
4834          // If heap poisoning is enabled, no need to unpoison `temp`
4835          // nor the object reference in `register_value->klass`, as
4836          // we are comparing two poisoned references.
4837          __ cmpl(temp, Address(register_value, class_offset));
4838
4839          if (instruction->StaticTypeOfArrayIsObjectArray()) {
4840            __ j(kEqual, &do_put);
4841            // If heap poisoning is enabled, the `temp` reference has
4842            // not been unpoisoned yet; unpoison it now.
4843            __ MaybeUnpoisonHeapReference(temp);
4844
4845            // /* HeapReference<Class> */ temp = temp->super_class_
4846            __ movl(temp, Address(temp, super_offset));
4847            // If heap poisoning is enabled, no need to unpoison
4848            // `temp`, as we are comparing against null below.
4849            __ testl(temp, temp);
4850            __ j(kNotEqual, slow_path->GetEntryLabel());
4851            __ Bind(&do_put);
4852          } else {
4853            __ j(kNotEqual, slow_path->GetEntryLabel());
4854          }
4855        }
4856      }
4857
4858      if (kPoisonHeapReferences) {
4859        __ movl(temp, register_value);
4860        __ PoisonHeapReference(temp);
4861        __ movl(address, temp);
4862      } else {
4863        __ movl(address, register_value);
4864      }
4865      if (!may_need_runtime_call_for_type_check) {
4866        codegen_->MaybeRecordImplicitNullCheck(instruction);
4867      }
4868
4869      CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4870      codegen_->MarkGCCard(
4871          temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4872      __ Bind(&done);
4873
4874      if (slow_path != nullptr) {
4875        __ Bind(slow_path->GetExitLabel());
4876      }
4877
4878      break;
4879    }
4880
4881    case Primitive::kPrimInt: {
4882      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4883      Address address = index.IsConstant()
4884          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4885          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4886      if (value.IsRegister()) {
4887        __ movl(address, value.AsRegister<CpuRegister>());
4888      } else {
4889        DCHECK(value.IsConstant()) << value;
4890        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4891        __ movl(address, Immediate(v));
4892      }
4893      codegen_->MaybeRecordImplicitNullCheck(instruction);
4894      break;
4895    }
4896
4897    case Primitive::kPrimLong: {
4898      uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4899      Address address = index.IsConstant()
4900          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4901          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4902      if (value.IsRegister()) {
4903        __ movq(address, value.AsRegister<CpuRegister>());
4904        codegen_->MaybeRecordImplicitNullCheck(instruction);
4905      } else {
4906        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4907        Address address_high = index.IsConstant()
4908            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4909                offset + sizeof(int32_t))
4910            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4911        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4912      }
4913      break;
4914    }
4915
4916    case Primitive::kPrimFloat: {
4917      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4918      Address address = index.IsConstant()
4919          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4920          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4921      if (value.IsFpuRegister()) {
4922        __ movss(address, value.AsFpuRegister<XmmRegister>());
4923      } else {
4924        DCHECK(value.IsConstant());
4925        int32_t v =
4926            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4927        __ movl(address, Immediate(v));
4928      }
4929      codegen_->MaybeRecordImplicitNullCheck(instruction);
4930      break;
4931    }
4932
4933    case Primitive::kPrimDouble: {
4934      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4935      Address address = index.IsConstant()
4936          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4937          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4938      if (value.IsFpuRegister()) {
4939        __ movsd(address, value.AsFpuRegister<XmmRegister>());
4940        codegen_->MaybeRecordImplicitNullCheck(instruction);
4941      } else {
4942        int64_t v =
4943            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4944        Address address_high = index.IsConstant()
4945            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4946                offset + sizeof(int32_t))
4947            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4948        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4949      }
4950      break;
4951    }
4952
4953    case Primitive::kPrimVoid:
4954      LOG(FATAL) << "Unreachable type " << instruction->GetType();
4955      UNREACHABLE();
4956  }
4957}
4958
4959void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4960  LocationSummary* locations =
4961      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4962  locations->SetInAt(0, Location::RequiresRegister());
4963  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4964}
4965
4966void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4967  LocationSummary* locations = instruction->GetLocations();
4968  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4969  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4970  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4971  __ movl(out, Address(obj, offset));
4972  codegen_->MaybeRecordImplicitNullCheck(instruction);
4973}
4974
4975void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4976  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4977      ? LocationSummary::kCallOnSlowPath
4978      : LocationSummary::kNoCall;
4979  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4980  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4981  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4982  if (instruction->HasUses()) {
4983    locations->SetOut(Location::SameAsFirstInput());
4984  }
4985}
4986
4987void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4988  LocationSummary* locations = instruction->GetLocations();
4989  Location index_loc = locations->InAt(0);
4990  Location length_loc = locations->InAt(1);
4991  SlowPathCode* slow_path =
4992      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
4993
4994  if (length_loc.IsConstant()) {
4995    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
4996    if (index_loc.IsConstant()) {
4997      // BCE will remove the bounds check if we are guarenteed to pass.
4998      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
4999      if (index < 0 || index >= length) {
5000        codegen_->AddSlowPath(slow_path);
5001        __ jmp(slow_path->GetEntryLabel());
5002      } else {
5003        // Some optimization after BCE may have generated this, and we should not
5004        // generate a bounds check if it is a valid range.
5005      }
5006      return;
5007    }
5008
5009    // We have to reverse the jump condition because the length is the constant.
5010    CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5011    __ cmpl(index_reg, Immediate(length));
5012    codegen_->AddSlowPath(slow_path);
5013    __ j(kAboveEqual, slow_path->GetEntryLabel());
5014  } else {
5015    CpuRegister length = length_loc.AsRegister<CpuRegister>();
5016    if (index_loc.IsConstant()) {
5017      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5018      __ cmpl(length, Immediate(value));
5019    } else {
5020      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5021    }
5022    codegen_->AddSlowPath(slow_path);
5023    __ j(kBelowEqual, slow_path->GetEntryLabel());
5024  }
5025}
5026
5027void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5028                                     CpuRegister card,
5029                                     CpuRegister object,
5030                                     CpuRegister value,
5031                                     bool value_can_be_null) {
5032  NearLabel is_null;
5033  if (value_can_be_null) {
5034    __ testl(value, value);
5035    __ j(kEqual, &is_null);
5036  }
5037  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5038                                        /* no_rip */ true));
5039  __ movq(temp, object);
5040  __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5041  __ movb(Address(temp, card, TIMES_1, 0), card);
5042  if (value_can_be_null) {
5043    __ Bind(&is_null);
5044  }
5045}
5046
5047void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5048  LOG(FATAL) << "Unimplemented";
5049}
5050
5051void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5052  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5053}
5054
5055void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5056  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5057}
5058
5059void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5060  HBasicBlock* block = instruction->GetBlock();
5061  if (block->GetLoopInformation() != nullptr) {
5062    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5063    // The back edge will generate the suspend check.
5064    return;
5065  }
5066  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5067    // The goto will generate the suspend check.
5068    return;
5069  }
5070  GenerateSuspendCheck(instruction, nullptr);
5071}
5072
5073void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5074                                                          HBasicBlock* successor) {
5075  SuspendCheckSlowPathX86_64* slow_path =
5076      down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5077  if (slow_path == nullptr) {
5078    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5079    instruction->SetSlowPath(slow_path);
5080    codegen_->AddSlowPath(slow_path);
5081    if (successor != nullptr) {
5082      DCHECK(successor->IsLoopHeader());
5083      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5084    }
5085  } else {
5086    DCHECK_EQ(slow_path->GetSuccessor(), successor);
5087  }
5088
5089  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5090                                  /* no_rip */ true),
5091                Immediate(0));
5092  if (successor == nullptr) {
5093    __ j(kNotEqual, slow_path->GetEntryLabel());
5094    __ Bind(slow_path->GetReturnLabel());
5095  } else {
5096    __ j(kEqual, codegen_->GetLabelOf(successor));
5097    __ jmp(slow_path->GetEntryLabel());
5098  }
5099}
5100
5101X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5102  return codegen_->GetAssembler();
5103}
5104
5105void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5106  MoveOperands* move = moves_[index];
5107  Location source = move->GetSource();
5108  Location destination = move->GetDestination();
5109
5110  if (source.IsRegister()) {
5111    if (destination.IsRegister()) {
5112      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5113    } else if (destination.IsStackSlot()) {
5114      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5115              source.AsRegister<CpuRegister>());
5116    } else {
5117      DCHECK(destination.IsDoubleStackSlot());
5118      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5119              source.AsRegister<CpuRegister>());
5120    }
5121  } else if (source.IsStackSlot()) {
5122    if (destination.IsRegister()) {
5123      __ movl(destination.AsRegister<CpuRegister>(),
5124              Address(CpuRegister(RSP), source.GetStackIndex()));
5125    } else if (destination.IsFpuRegister()) {
5126      __ movss(destination.AsFpuRegister<XmmRegister>(),
5127              Address(CpuRegister(RSP), source.GetStackIndex()));
5128    } else {
5129      DCHECK(destination.IsStackSlot());
5130      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5131      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5132    }
5133  } else if (source.IsDoubleStackSlot()) {
5134    if (destination.IsRegister()) {
5135      __ movq(destination.AsRegister<CpuRegister>(),
5136              Address(CpuRegister(RSP), source.GetStackIndex()));
5137    } else if (destination.IsFpuRegister()) {
5138      __ movsd(destination.AsFpuRegister<XmmRegister>(),
5139               Address(CpuRegister(RSP), source.GetStackIndex()));
5140    } else {
5141      DCHECK(destination.IsDoubleStackSlot()) << destination;
5142      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5143      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5144    }
5145  } else if (source.IsConstant()) {
5146    HConstant* constant = source.GetConstant();
5147    if (constant->IsIntConstant() || constant->IsNullConstant()) {
5148      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5149      if (destination.IsRegister()) {
5150        if (value == 0) {
5151          __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5152        } else {
5153          __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5154        }
5155      } else {
5156        DCHECK(destination.IsStackSlot()) << destination;
5157        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5158      }
5159    } else if (constant->IsLongConstant()) {
5160      int64_t value = constant->AsLongConstant()->GetValue();
5161      if (destination.IsRegister()) {
5162        codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5163      } else {
5164        DCHECK(destination.IsDoubleStackSlot()) << destination;
5165        codegen_->Store64BitValueToStack(destination, value);
5166      }
5167    } else if (constant->IsFloatConstant()) {
5168      float fp_value = constant->AsFloatConstant()->GetValue();
5169      if (destination.IsFpuRegister()) {
5170        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5171        codegen_->Load32BitValue(dest, fp_value);
5172      } else {
5173        DCHECK(destination.IsStackSlot()) << destination;
5174        Immediate imm(bit_cast<int32_t, float>(fp_value));
5175        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5176      }
5177    } else {
5178      DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5179      double fp_value =  constant->AsDoubleConstant()->GetValue();
5180      int64_t value = bit_cast<int64_t, double>(fp_value);
5181      if (destination.IsFpuRegister()) {
5182        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5183        codegen_->Load64BitValue(dest, fp_value);
5184      } else {
5185        DCHECK(destination.IsDoubleStackSlot()) << destination;
5186        codegen_->Store64BitValueToStack(destination, value);
5187      }
5188    }
5189  } else if (source.IsFpuRegister()) {
5190    if (destination.IsFpuRegister()) {
5191      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5192    } else if (destination.IsStackSlot()) {
5193      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5194               source.AsFpuRegister<XmmRegister>());
5195    } else {
5196      DCHECK(destination.IsDoubleStackSlot()) << destination;
5197      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5198               source.AsFpuRegister<XmmRegister>());
5199    }
5200  }
5201}
5202
5203void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5204  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5205  __ movl(Address(CpuRegister(RSP), mem), reg);
5206  __ movl(reg, CpuRegister(TMP));
5207}
5208
5209void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5210  ScratchRegisterScope ensure_scratch(
5211      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5212
5213  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5214  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5215  __ movl(CpuRegister(ensure_scratch.GetRegister()),
5216          Address(CpuRegister(RSP), mem2 + stack_offset));
5217  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5218  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5219          CpuRegister(ensure_scratch.GetRegister()));
5220}
5221
5222void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5223  __ movq(CpuRegister(TMP), reg1);
5224  __ movq(reg1, reg2);
5225  __ movq(reg2, CpuRegister(TMP));
5226}
5227
5228void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5229  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5230  __ movq(Address(CpuRegister(RSP), mem), reg);
5231  __ movq(reg, CpuRegister(TMP));
5232}
5233
5234void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5235  ScratchRegisterScope ensure_scratch(
5236      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5237
5238  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5239  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5240  __ movq(CpuRegister(ensure_scratch.GetRegister()),
5241          Address(CpuRegister(RSP), mem2 + stack_offset));
5242  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5243  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5244          CpuRegister(ensure_scratch.GetRegister()));
5245}
5246
5247void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5248  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5249  __ movss(Address(CpuRegister(RSP), mem), reg);
5250  __ movd(reg, CpuRegister(TMP));
5251}
5252
5253void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5254  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5255  __ movsd(Address(CpuRegister(RSP), mem), reg);
5256  __ movd(reg, CpuRegister(TMP));
5257}
5258
5259void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5260  MoveOperands* move = moves_[index];
5261  Location source = move->GetSource();
5262  Location destination = move->GetDestination();
5263
5264  if (source.IsRegister() && destination.IsRegister()) {
5265    Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5266  } else if (source.IsRegister() && destination.IsStackSlot()) {
5267    Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5268  } else if (source.IsStackSlot() && destination.IsRegister()) {
5269    Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5270  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5271    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5272  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5273    Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5274  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5275    Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5276  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5277    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5278  } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5279    __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5280    __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5281    __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5282  } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5283    Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5284  } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5285    Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5286  } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5287    Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5288  } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5289    Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5290  } else {
5291    LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5292  }
5293}
5294
5295
5296void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5297  __ pushq(CpuRegister(reg));
5298}
5299
5300
5301void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5302  __ popq(CpuRegister(reg));
5303}
5304
5305void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5306    SlowPathCode* slow_path, CpuRegister class_reg) {
5307  __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
5308          Immediate(mirror::Class::kStatusInitialized));
5309  __ j(kLess, slow_path->GetEntryLabel());
5310  __ Bind(slow_path->GetExitLabel());
5311  // No need for memory fence, thanks to the x86-64 memory model.
5312}
5313
5314void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5315  InvokeRuntimeCallingConvention calling_convention;
5316  CodeGenerator::CreateLoadClassLocationSummary(
5317      cls,
5318      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5319      Location::RegisterLocation(RAX),
5320      /* code_generator_supports_read_barrier */ true);
5321}
5322
5323void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5324  LocationSummary* locations = cls->GetLocations();
5325  if (cls->NeedsAccessCheck()) {
5326    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5327    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5328                            cls,
5329                            cls->GetDexPc(),
5330                            nullptr);
5331    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5332    return;
5333  }
5334
5335  Location out_loc = locations->Out();
5336  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5337  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5338
5339  if (cls->IsReferrersClass()) {
5340    DCHECK(!cls->CanCallRuntime());
5341    DCHECK(!cls->MustGenerateClinitCheck());
5342    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5343    GenerateGcRootFieldLoad(
5344        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5345  } else {
5346    // /* GcRoot<mirror::Class>[] */ out =
5347    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
5348    __ movq(out, Address(current_method,
5349                         ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5350    // /* GcRoot<mirror::Class> */ out = out[type_index]
5351    GenerateGcRootFieldLoad(
5352        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5353
5354    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5355      DCHECK(cls->CanCallRuntime());
5356      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5357          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5358      codegen_->AddSlowPath(slow_path);
5359      if (!cls->IsInDexCache()) {
5360        __ testl(out, out);
5361        __ j(kEqual, slow_path->GetEntryLabel());
5362      }
5363      if (cls->MustGenerateClinitCheck()) {
5364        GenerateClassInitializationCheck(slow_path, out);
5365      } else {
5366        __ Bind(slow_path->GetExitLabel());
5367      }
5368    }
5369  }
5370}
5371
5372void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5373  LocationSummary* locations =
5374      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5375  locations->SetInAt(0, Location::RequiresRegister());
5376  if (check->HasUses()) {
5377    locations->SetOut(Location::SameAsFirstInput());
5378  }
5379}
5380
5381void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5382  // We assume the class to not be null.
5383  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5384      check->GetLoadClass(), check, check->GetDexPc(), true);
5385  codegen_->AddSlowPath(slow_path);
5386  GenerateClassInitializationCheck(slow_path,
5387                                   check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5388}
5389
5390HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5391    HLoadString::LoadKind desired_string_load_kind) {
5392  if (kEmitCompilerReadBarrier) {
5393    switch (desired_string_load_kind) {
5394      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5395      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5396      case HLoadString::LoadKind::kBootImageAddress:
5397        // TODO: Implement for read barrier.
5398        return HLoadString::LoadKind::kDexCacheViaMethod;
5399      default:
5400        break;
5401    }
5402  }
5403  switch (desired_string_load_kind) {
5404    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5405      DCHECK(!GetCompilerOptions().GetCompilePic());
5406      // We prefer the always-available RIP-relative address for the x86-64 boot image.
5407      return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5408    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5409      DCHECK(GetCompilerOptions().GetCompilePic());
5410      break;
5411    case HLoadString::LoadKind::kBootImageAddress:
5412      break;
5413    case HLoadString::LoadKind::kDexCacheAddress:
5414      DCHECK(Runtime::Current()->UseJitCompilation());
5415      break;
5416    case HLoadString::LoadKind::kDexCachePcRelative:
5417      DCHECK(!Runtime::Current()->UseJitCompilation());
5418      break;
5419    case HLoadString::LoadKind::kDexCacheViaMethod:
5420      break;
5421  }
5422  return desired_string_load_kind;
5423}
5424
5425void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5426  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5427      ? LocationSummary::kCallOnSlowPath
5428      : LocationSummary::kNoCall;
5429  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5430  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5431    locations->SetInAt(0, Location::RequiresRegister());
5432  }
5433  locations->SetOut(Location::RequiresRegister());
5434}
5435
5436void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5437  LocationSummary* locations = load->GetLocations();
5438  Location out_loc = locations->Out();
5439  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5440
5441  switch (load->GetLoadKind()) {
5442    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5443      DCHECK(!kEmitCompilerReadBarrier);
5444      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5445      codegen_->RecordStringPatch(load);
5446      return;  // No dex cache slow path.
5447    }
5448    case HLoadString::LoadKind::kBootImageAddress: {
5449      DCHECK(!kEmitCompilerReadBarrier);
5450      DCHECK_NE(load->GetAddress(), 0u);
5451      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5452      __ movl(out, Immediate(address));  // Zero-extended.
5453      codegen_->RecordSimplePatch();
5454      return;  // No dex cache slow path.
5455    }
5456    case HLoadString::LoadKind::kDexCacheAddress: {
5457      DCHECK_NE(load->GetAddress(), 0u);
5458      if (IsUint<32>(load->GetAddress())) {
5459        Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5460        GenerateGcRootFieldLoad(load, out_loc, address);
5461      } else {
5462        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5463        __ movq(out, Immediate(load->GetAddress()));
5464        GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5465      }
5466      break;
5467    }
5468    case HLoadString::LoadKind::kDexCachePcRelative: {
5469      uint32_t offset = load->GetDexCacheElementOffset();
5470      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5471      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5472                                          /* no_rip */ false);
5473      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5474      break;
5475    }
5476    case HLoadString::LoadKind::kDexCacheViaMethod: {
5477      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5478
5479      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5480      GenerateGcRootFieldLoad(
5481          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5482      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5483      __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5484      // /* GcRoot<mirror::String> */ out = out[string_index]
5485      GenerateGcRootFieldLoad(
5486          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5487      break;
5488    }
5489    default:
5490      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5491      UNREACHABLE();
5492  }
5493
5494  if (!load->IsInDexCache()) {
5495    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5496    codegen_->AddSlowPath(slow_path);
5497    __ testl(out, out);
5498    __ j(kEqual, slow_path->GetEntryLabel());
5499    __ Bind(slow_path->GetExitLabel());
5500  }
5501}
5502
5503static Address GetExceptionTlsAddress() {
5504  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5505                           /* no_rip */ true);
5506}
5507
5508void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5509  LocationSummary* locations =
5510      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5511  locations->SetOut(Location::RequiresRegister());
5512}
5513
5514void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5515  __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5516}
5517
5518void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5519  new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5520}
5521
5522void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5523  __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5524}
5525
5526void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5527  LocationSummary* locations =
5528      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5529  InvokeRuntimeCallingConvention calling_convention;
5530  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5531}
5532
5533void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5534  codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5535                          instruction,
5536                          instruction->GetDexPc(),
5537                          nullptr);
5538  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5539}
5540
5541static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5542  return kEmitCompilerReadBarrier &&
5543      (kUseBakerReadBarrier ||
5544       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5545       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5546       type_check_kind == TypeCheckKind::kArrayObjectCheck);
5547}
5548
5549void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5550  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5551  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5552  switch (type_check_kind) {
5553    case TypeCheckKind::kExactCheck:
5554    case TypeCheckKind::kAbstractClassCheck:
5555    case TypeCheckKind::kClassHierarchyCheck:
5556    case TypeCheckKind::kArrayObjectCheck:
5557      call_kind =
5558          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5559      break;
5560    case TypeCheckKind::kArrayCheck:
5561    case TypeCheckKind::kUnresolvedCheck:
5562    case TypeCheckKind::kInterfaceCheck:
5563      call_kind = LocationSummary::kCallOnSlowPath;
5564      break;
5565  }
5566
5567  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5568  locations->SetInAt(0, Location::RequiresRegister());
5569  locations->SetInAt(1, Location::Any());
5570  // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5571  locations->SetOut(Location::RequiresRegister());
5572  // When read barriers are enabled, we need a temporary register for
5573  // some cases.
5574  if (TypeCheckNeedsATemporary(type_check_kind)) {
5575    locations->AddTemp(Location::RequiresRegister());
5576  }
5577}
5578
5579void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5580  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5581  LocationSummary* locations = instruction->GetLocations();
5582  Location obj_loc = locations->InAt(0);
5583  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5584  Location cls = locations->InAt(1);
5585  Location out_loc =  locations->Out();
5586  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5587  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5588      locations->GetTemp(0) :
5589      Location::NoLocation();
5590  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5591  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5592  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5593  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5594  SlowPathCode* slow_path = nullptr;
5595  NearLabel done, zero;
5596
5597  // Return 0 if `obj` is null.
5598  // Avoid null check if we know obj is not null.
5599  if (instruction->MustDoNullCheck()) {
5600    __ testl(obj, obj);
5601    __ j(kEqual, &zero);
5602  }
5603
5604  // /* HeapReference<Class> */ out = obj->klass_
5605  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5606
5607  switch (type_check_kind) {
5608    case TypeCheckKind::kExactCheck: {
5609      if (cls.IsRegister()) {
5610        __ cmpl(out, cls.AsRegister<CpuRegister>());
5611      } else {
5612        DCHECK(cls.IsStackSlot()) << cls;
5613        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5614      }
5615      if (zero.IsLinked()) {
5616        // Classes must be equal for the instanceof to succeed.
5617        __ j(kNotEqual, &zero);
5618        __ movl(out, Immediate(1));
5619        __ jmp(&done);
5620      } else {
5621        __ setcc(kEqual, out);
5622        // setcc only sets the low byte.
5623        __ andl(out, Immediate(1));
5624      }
5625      break;
5626    }
5627
5628    case TypeCheckKind::kAbstractClassCheck: {
5629      // If the class is abstract, we eagerly fetch the super class of the
5630      // object to avoid doing a comparison we know will fail.
5631      NearLabel loop, success;
5632      __ Bind(&loop);
5633      // /* HeapReference<Class> */ out = out->super_class_
5634      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5635      __ testl(out, out);
5636      // If `out` is null, we use it for the result, and jump to `done`.
5637      __ j(kEqual, &done);
5638      if (cls.IsRegister()) {
5639        __ cmpl(out, cls.AsRegister<CpuRegister>());
5640      } else {
5641        DCHECK(cls.IsStackSlot()) << cls;
5642        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5643      }
5644      __ j(kNotEqual, &loop);
5645      __ movl(out, Immediate(1));
5646      if (zero.IsLinked()) {
5647        __ jmp(&done);
5648      }
5649      break;
5650    }
5651
5652    case TypeCheckKind::kClassHierarchyCheck: {
5653      // Walk over the class hierarchy to find a match.
5654      NearLabel loop, success;
5655      __ Bind(&loop);
5656      if (cls.IsRegister()) {
5657        __ cmpl(out, cls.AsRegister<CpuRegister>());
5658      } else {
5659        DCHECK(cls.IsStackSlot()) << cls;
5660        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5661      }
5662      __ j(kEqual, &success);
5663      // /* HeapReference<Class> */ out = out->super_class_
5664      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5665      __ testl(out, out);
5666      __ j(kNotEqual, &loop);
5667      // If `out` is null, we use it for the result, and jump to `done`.
5668      __ jmp(&done);
5669      __ Bind(&success);
5670      __ movl(out, Immediate(1));
5671      if (zero.IsLinked()) {
5672        __ jmp(&done);
5673      }
5674      break;
5675    }
5676
5677    case TypeCheckKind::kArrayObjectCheck: {
5678      // Do an exact check.
5679      NearLabel exact_check;
5680      if (cls.IsRegister()) {
5681        __ cmpl(out, cls.AsRegister<CpuRegister>());
5682      } else {
5683        DCHECK(cls.IsStackSlot()) << cls;
5684        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5685      }
5686      __ j(kEqual, &exact_check);
5687      // Otherwise, we need to check that the object's class is a non-primitive array.
5688      // /* HeapReference<Class> */ out = out->component_type_
5689      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5690      __ testl(out, out);
5691      // If `out` is null, we use it for the result, and jump to `done`.
5692      __ j(kEqual, &done);
5693      __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5694      __ j(kNotEqual, &zero);
5695      __ Bind(&exact_check);
5696      __ movl(out, Immediate(1));
5697      __ jmp(&done);
5698      break;
5699    }
5700
5701    case TypeCheckKind::kArrayCheck: {
5702      if (cls.IsRegister()) {
5703        __ cmpl(out, cls.AsRegister<CpuRegister>());
5704      } else {
5705        DCHECK(cls.IsStackSlot()) << cls;
5706        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5707      }
5708      DCHECK(locations->OnlyCallsOnSlowPath());
5709      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5710                                                                       /* is_fatal */ false);
5711      codegen_->AddSlowPath(slow_path);
5712      __ j(kNotEqual, slow_path->GetEntryLabel());
5713      __ movl(out, Immediate(1));
5714      if (zero.IsLinked()) {
5715        __ jmp(&done);
5716      }
5717      break;
5718    }
5719
5720    case TypeCheckKind::kUnresolvedCheck:
5721    case TypeCheckKind::kInterfaceCheck: {
5722      // Note that we indeed only call on slow path, but we always go
5723      // into the slow path for the unresolved and interface check
5724      // cases.
5725      //
5726      // We cannot directly call the InstanceofNonTrivial runtime
5727      // entry point without resorting to a type checking slow path
5728      // here (i.e. by calling InvokeRuntime directly), as it would
5729      // require to assign fixed registers for the inputs of this
5730      // HInstanceOf instruction (following the runtime calling
5731      // convention), which might be cluttered by the potential first
5732      // read barrier emission at the beginning of this method.
5733      //
5734      // TODO: Introduce a new runtime entry point taking the object
5735      // to test (instead of its class) as argument, and let it deal
5736      // with the read barrier issues. This will let us refactor this
5737      // case of the `switch` code as it was previously (with a direct
5738      // call to the runtime not using a type checking slow path).
5739      // This should also be beneficial for the other cases above.
5740      DCHECK(locations->OnlyCallsOnSlowPath());
5741      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5742                                                                       /* is_fatal */ false);
5743      codegen_->AddSlowPath(slow_path);
5744      __ jmp(slow_path->GetEntryLabel());
5745      if (zero.IsLinked()) {
5746        __ jmp(&done);
5747      }
5748      break;
5749    }
5750  }
5751
5752  if (zero.IsLinked()) {
5753    __ Bind(&zero);
5754    __ xorl(out, out);
5755  }
5756
5757  if (done.IsLinked()) {
5758    __ Bind(&done);
5759  }
5760
5761  if (slow_path != nullptr) {
5762    __ Bind(slow_path->GetExitLabel());
5763  }
5764}
5765
5766void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5767  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5768  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5769  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5770  switch (type_check_kind) {
5771    case TypeCheckKind::kExactCheck:
5772    case TypeCheckKind::kAbstractClassCheck:
5773    case TypeCheckKind::kClassHierarchyCheck:
5774    case TypeCheckKind::kArrayObjectCheck:
5775      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5776          LocationSummary::kCallOnSlowPath :
5777          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
5778      break;
5779    case TypeCheckKind::kArrayCheck:
5780    case TypeCheckKind::kUnresolvedCheck:
5781    case TypeCheckKind::kInterfaceCheck:
5782      call_kind = LocationSummary::kCallOnSlowPath;
5783      break;
5784  }
5785  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5786  locations->SetInAt(0, Location::RequiresRegister());
5787  locations->SetInAt(1, Location::Any());
5788  // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5789  locations->AddTemp(Location::RequiresRegister());
5790  // When read barriers are enabled, we need an additional temporary
5791  // register for some cases.
5792  if (TypeCheckNeedsATemporary(type_check_kind)) {
5793    locations->AddTemp(Location::RequiresRegister());
5794  }
5795}
5796
5797void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5798  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5799  LocationSummary* locations = instruction->GetLocations();
5800  Location obj_loc = locations->InAt(0);
5801  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5802  Location cls = locations->InAt(1);
5803  Location temp_loc = locations->GetTemp(0);
5804  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5805  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5806      locations->GetTemp(1) :
5807      Location::NoLocation();
5808  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5809  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5810  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5811  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5812
5813  bool is_type_check_slow_path_fatal =
5814      (type_check_kind == TypeCheckKind::kExactCheck ||
5815       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5816       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5817       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5818      !instruction->CanThrowIntoCatchBlock();
5819  SlowPathCode* type_check_slow_path =
5820      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5821                                                           is_type_check_slow_path_fatal);
5822  codegen_->AddSlowPath(type_check_slow_path);
5823
5824  switch (type_check_kind) {
5825    case TypeCheckKind::kExactCheck:
5826    case TypeCheckKind::kArrayCheck: {
5827      NearLabel done;
5828      // Avoid null check if we know obj is not null.
5829      if (instruction->MustDoNullCheck()) {
5830        __ testl(obj, obj);
5831        __ j(kEqual, &done);
5832      }
5833
5834      // /* HeapReference<Class> */ temp = obj->klass_
5835      GenerateReferenceLoadTwoRegisters(
5836          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5837
5838      if (cls.IsRegister()) {
5839        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5840      } else {
5841        DCHECK(cls.IsStackSlot()) << cls;
5842        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5843      }
5844      // Jump to slow path for throwing the exception or doing a
5845      // more involved array check.
5846      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5847      __ Bind(&done);
5848      break;
5849    }
5850
5851    case TypeCheckKind::kAbstractClassCheck: {
5852      NearLabel done;
5853      // Avoid null check if we know obj is not null.
5854      if (instruction->MustDoNullCheck()) {
5855        __ testl(obj, obj);
5856        __ j(kEqual, &done);
5857      }
5858
5859      // /* HeapReference<Class> */ temp = obj->klass_
5860      GenerateReferenceLoadTwoRegisters(
5861          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5862
5863      // If the class is abstract, we eagerly fetch the super class of the
5864      // object to avoid doing a comparison we know will fail.
5865      NearLabel loop, compare_classes;
5866      __ Bind(&loop);
5867      // /* HeapReference<Class> */ temp = temp->super_class_
5868      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5869
5870      // If the class reference currently in `temp` is not null, jump
5871      // to the `compare_classes` label to compare it with the checked
5872      // class.
5873      __ testl(temp, temp);
5874      __ j(kNotEqual, &compare_classes);
5875      // Otherwise, jump to the slow path to throw the exception.
5876      //
5877      // But before, move back the object's class into `temp` before
5878      // going into the slow path, as it has been overwritten in the
5879      // meantime.
5880      // /* HeapReference<Class> */ temp = obj->klass_
5881      GenerateReferenceLoadTwoRegisters(
5882          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5883      __ jmp(type_check_slow_path->GetEntryLabel());
5884
5885      __ Bind(&compare_classes);
5886      if (cls.IsRegister()) {
5887        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5888      } else {
5889        DCHECK(cls.IsStackSlot()) << cls;
5890        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5891      }
5892      __ j(kNotEqual, &loop);
5893      __ Bind(&done);
5894      break;
5895    }
5896
5897    case TypeCheckKind::kClassHierarchyCheck: {
5898      NearLabel done;
5899      // Avoid null check if we know obj is not null.
5900      if (instruction->MustDoNullCheck()) {
5901        __ testl(obj, obj);
5902        __ j(kEqual, &done);
5903      }
5904
5905      // /* HeapReference<Class> */ temp = obj->klass_
5906      GenerateReferenceLoadTwoRegisters(
5907          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5908
5909      // Walk over the class hierarchy to find a match.
5910      NearLabel loop;
5911      __ Bind(&loop);
5912      if (cls.IsRegister()) {
5913        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5914      } else {
5915        DCHECK(cls.IsStackSlot()) << cls;
5916        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5917      }
5918      __ j(kEqual, &done);
5919
5920      // /* HeapReference<Class> */ temp = temp->super_class_
5921      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5922
5923      // If the class reference currently in `temp` is not null, jump
5924      // back at the beginning of the loop.
5925      __ testl(temp, temp);
5926      __ j(kNotEqual, &loop);
5927      // Otherwise, jump to the slow path to throw the exception.
5928      //
5929      // But before, move back the object's class into `temp` before
5930      // going into the slow path, as it has been overwritten in the
5931      // meantime.
5932      // /* HeapReference<Class> */ temp = obj->klass_
5933      GenerateReferenceLoadTwoRegisters(
5934          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5935      __ jmp(type_check_slow_path->GetEntryLabel());
5936      __ Bind(&done);
5937      break;
5938    }
5939
5940    case TypeCheckKind::kArrayObjectCheck: {
5941      // We cannot use a NearLabel here, as its range might be too
5942      // short in some cases when read barriers are enabled.  This has
5943      // been observed for instance when the code emitted for this
5944      // case uses high x86-64 registers (R8-R15).
5945      Label done;
5946      // Avoid null check if we know obj is not null.
5947      if (instruction->MustDoNullCheck()) {
5948        __ testl(obj, obj);
5949        __ j(kEqual, &done);
5950      }
5951
5952      // /* HeapReference<Class> */ temp = obj->klass_
5953      GenerateReferenceLoadTwoRegisters(
5954          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5955
5956      // Do an exact check.
5957      NearLabel check_non_primitive_component_type;
5958      if (cls.IsRegister()) {
5959        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5960      } else {
5961        DCHECK(cls.IsStackSlot()) << cls;
5962        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5963      }
5964      __ j(kEqual, &done);
5965
5966      // Otherwise, we need to check that the object's class is a non-primitive array.
5967      // /* HeapReference<Class> */ temp = temp->component_type_
5968      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5969
5970      // If the component type is not null (i.e. the object is indeed
5971      // an array), jump to label `check_non_primitive_component_type`
5972      // to further check that this component type is not a primitive
5973      // type.
5974      __ testl(temp, temp);
5975      __ j(kNotEqual, &check_non_primitive_component_type);
5976      // Otherwise, jump to the slow path to throw the exception.
5977      //
5978      // But before, move back the object's class into `temp` before
5979      // going into the slow path, as it has been overwritten in the
5980      // meantime.
5981      // /* HeapReference<Class> */ temp = obj->klass_
5982      GenerateReferenceLoadTwoRegisters(
5983          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5984      __ jmp(type_check_slow_path->GetEntryLabel());
5985
5986      __ Bind(&check_non_primitive_component_type);
5987      __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5988      __ j(kEqual, &done);
5989      // Same comment as above regarding `temp` and the slow path.
5990      // /* HeapReference<Class> */ temp = obj->klass_
5991      GenerateReferenceLoadTwoRegisters(
5992          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5993      __ jmp(type_check_slow_path->GetEntryLabel());
5994      __ Bind(&done);
5995      break;
5996    }
5997
5998    case TypeCheckKind::kUnresolvedCheck:
5999    case TypeCheckKind::kInterfaceCheck:
6000      NearLabel done;
6001      // Avoid null check if we know obj is not null.
6002      if (instruction->MustDoNullCheck()) {
6003        __ testl(obj, obj);
6004        __ j(kEqual, &done);
6005      }
6006
6007      // /* HeapReference<Class> */ temp = obj->klass_
6008      GenerateReferenceLoadTwoRegisters(
6009          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6010
6011      // We always go into the type check slow path for the unresolved
6012      // and interface check cases.
6013      //
6014      // We cannot directly call the CheckCast runtime entry point
6015      // without resorting to a type checking slow path here (i.e. by
6016      // calling InvokeRuntime directly), as it would require to
6017      // assign fixed registers for the inputs of this HInstanceOf
6018      // instruction (following the runtime calling convention), which
6019      // might be cluttered by the potential first read barrier
6020      // emission at the beginning of this method.
6021      //
6022      // TODO: Introduce a new runtime entry point taking the object
6023      // to test (instead of its class) as argument, and let it deal
6024      // with the read barrier issues. This will let us refactor this
6025      // case of the `switch` code as it was previously (with a direct
6026      // call to the runtime not using a type checking slow path).
6027      // This should also be beneficial for the other cases above.
6028      __ jmp(type_check_slow_path->GetEntryLabel());
6029      __ Bind(&done);
6030      break;
6031  }
6032
6033  __ Bind(type_check_slow_path->GetExitLabel());
6034}
6035
6036void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6037  LocationSummary* locations =
6038      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6039  InvokeRuntimeCallingConvention calling_convention;
6040  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6041}
6042
6043void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6044  codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6045                                                 : QUICK_ENTRY_POINT(pUnlockObject),
6046                          instruction,
6047                          instruction->GetDexPc(),
6048                          nullptr);
6049  if (instruction->IsEnter()) {
6050    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6051  } else {
6052    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6053  }
6054}
6055
6056void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
6057void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
6058void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6059
6060void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6061  LocationSummary* locations =
6062      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6063  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6064         || instruction->GetResultType() == Primitive::kPrimLong);
6065  locations->SetInAt(0, Location::RequiresRegister());
6066  locations->SetInAt(1, Location::Any());
6067  locations->SetOut(Location::SameAsFirstInput());
6068}
6069
6070void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6071  HandleBitwiseOperation(instruction);
6072}
6073
6074void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6075  HandleBitwiseOperation(instruction);
6076}
6077
6078void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6079  HandleBitwiseOperation(instruction);
6080}
6081
6082void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6083  LocationSummary* locations = instruction->GetLocations();
6084  Location first = locations->InAt(0);
6085  Location second = locations->InAt(1);
6086  DCHECK(first.Equals(locations->Out()));
6087
6088  if (instruction->GetResultType() == Primitive::kPrimInt) {
6089    if (second.IsRegister()) {
6090      if (instruction->IsAnd()) {
6091        __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6092      } else if (instruction->IsOr()) {
6093        __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6094      } else {
6095        DCHECK(instruction->IsXor());
6096        __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6097      }
6098    } else if (second.IsConstant()) {
6099      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6100      if (instruction->IsAnd()) {
6101        __ andl(first.AsRegister<CpuRegister>(), imm);
6102      } else if (instruction->IsOr()) {
6103        __ orl(first.AsRegister<CpuRegister>(), imm);
6104      } else {
6105        DCHECK(instruction->IsXor());
6106        __ xorl(first.AsRegister<CpuRegister>(), imm);
6107      }
6108    } else {
6109      Address address(CpuRegister(RSP), second.GetStackIndex());
6110      if (instruction->IsAnd()) {
6111        __ andl(first.AsRegister<CpuRegister>(), address);
6112      } else if (instruction->IsOr()) {
6113        __ orl(first.AsRegister<CpuRegister>(), address);
6114      } else {
6115        DCHECK(instruction->IsXor());
6116        __ xorl(first.AsRegister<CpuRegister>(), address);
6117      }
6118    }
6119  } else {
6120    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6121    CpuRegister first_reg = first.AsRegister<CpuRegister>();
6122    bool second_is_constant = false;
6123    int64_t value = 0;
6124    if (second.IsConstant()) {
6125      second_is_constant = true;
6126      value = second.GetConstant()->AsLongConstant()->GetValue();
6127    }
6128    bool is_int32_value = IsInt<32>(value);
6129
6130    if (instruction->IsAnd()) {
6131      if (second_is_constant) {
6132        if (is_int32_value) {
6133          __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6134        } else {
6135          __ andq(first_reg, codegen_->LiteralInt64Address(value));
6136        }
6137      } else if (second.IsDoubleStackSlot()) {
6138        __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6139      } else {
6140        __ andq(first_reg, second.AsRegister<CpuRegister>());
6141      }
6142    } else if (instruction->IsOr()) {
6143      if (second_is_constant) {
6144        if (is_int32_value) {
6145          __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6146        } else {
6147          __ orq(first_reg, codegen_->LiteralInt64Address(value));
6148        }
6149      } else if (second.IsDoubleStackSlot()) {
6150        __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6151      } else {
6152        __ orq(first_reg, second.AsRegister<CpuRegister>());
6153      }
6154    } else {
6155      DCHECK(instruction->IsXor());
6156      if (second_is_constant) {
6157        if (is_int32_value) {
6158          __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6159        } else {
6160          __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6161        }
6162      } else if (second.IsDoubleStackSlot()) {
6163        __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6164      } else {
6165        __ xorq(first_reg, second.AsRegister<CpuRegister>());
6166      }
6167    }
6168  }
6169}
6170
6171void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6172                                                                      Location out,
6173                                                                      uint32_t offset,
6174                                                                      Location maybe_temp) {
6175  CpuRegister out_reg = out.AsRegister<CpuRegister>();
6176  if (kEmitCompilerReadBarrier) {
6177    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6178    if (kUseBakerReadBarrier) {
6179      // Load with fast path based Baker's read barrier.
6180      // /* HeapReference<Object> */ out = *(out + offset)
6181      codegen_->GenerateFieldLoadWithBakerReadBarrier(
6182          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6183    } else {
6184      // Load with slow path based read barrier.
6185      // Save the value of `out` into `maybe_temp` before overwriting it
6186      // in the following move operation, as we will need it for the
6187      // read barrier below.
6188      __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6189      // /* HeapReference<Object> */ out = *(out + offset)
6190      __ movl(out_reg, Address(out_reg, offset));
6191      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6192    }
6193  } else {
6194    // Plain load with no read barrier.
6195    // /* HeapReference<Object> */ out = *(out + offset)
6196    __ movl(out_reg, Address(out_reg, offset));
6197    __ MaybeUnpoisonHeapReference(out_reg);
6198  }
6199}
6200
6201void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6202                                                                       Location out,
6203                                                                       Location obj,
6204                                                                       uint32_t offset,
6205                                                                       Location maybe_temp) {
6206  CpuRegister out_reg = out.AsRegister<CpuRegister>();
6207  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6208  if (kEmitCompilerReadBarrier) {
6209    if (kUseBakerReadBarrier) {
6210      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6211      // Load with fast path based Baker's read barrier.
6212      // /* HeapReference<Object> */ out = *(obj + offset)
6213      codegen_->GenerateFieldLoadWithBakerReadBarrier(
6214          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6215    } else {
6216      // Load with slow path based read barrier.
6217      // /* HeapReference<Object> */ out = *(obj + offset)
6218      __ movl(out_reg, Address(obj_reg, offset));
6219      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6220    }
6221  } else {
6222    // Plain load with no read barrier.
6223    // /* HeapReference<Object> */ out = *(obj + offset)
6224    __ movl(out_reg, Address(obj_reg, offset));
6225    __ MaybeUnpoisonHeapReference(out_reg);
6226  }
6227}
6228
6229void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6230                                                             Location root,
6231                                                             const Address& address,
6232                                                             Label* fixup_label) {
6233  CpuRegister root_reg = root.AsRegister<CpuRegister>();
6234  if (kEmitCompilerReadBarrier) {
6235    if (kUseBakerReadBarrier) {
6236      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6237      // Baker's read barrier are used:
6238      //
6239      //   root = *address;
6240      //   if (Thread::Current()->GetIsGcMarking()) {
6241      //     root = ReadBarrier::Mark(root)
6242      //   }
6243
6244      // /* GcRoot<mirror::Object> */ root = *address
6245      __ movl(root_reg, address);
6246      if (fixup_label != nullptr) {
6247        __ Bind(fixup_label);
6248      }
6249      static_assert(
6250          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6251          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6252          "have different sizes.");
6253      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6254                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
6255                    "have different sizes.");
6256
6257      // Slow path used to mark the GC root `root`.
6258      SlowPathCode* slow_path =
6259          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6260      codegen_->AddSlowPath(slow_path);
6261
6262      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6263                                      /* no_rip */ true),
6264                    Immediate(0));
6265      __ j(kNotEqual, slow_path->GetEntryLabel());
6266      __ Bind(slow_path->GetExitLabel());
6267    } else {
6268      // GC root loaded through a slow path for read barriers other
6269      // than Baker's.
6270      // /* GcRoot<mirror::Object>* */ root = address
6271      __ leaq(root_reg, address);
6272      if (fixup_label != nullptr) {
6273        __ Bind(fixup_label);
6274      }
6275      // /* mirror::Object* */ root = root->Read()
6276      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6277    }
6278  } else {
6279    // Plain GC root load with no read barrier.
6280    // /* GcRoot<mirror::Object> */ root = *address
6281    __ movl(root_reg, address);
6282    if (fixup_label != nullptr) {
6283      __ Bind(fixup_label);
6284    }
6285    // Note that GC roots are not affected by heap poisoning, thus we
6286    // do not have to unpoison `root_reg` here.
6287  }
6288}
6289
6290void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6291                                                                Location ref,
6292                                                                CpuRegister obj,
6293                                                                uint32_t offset,
6294                                                                Location temp,
6295                                                                bool needs_null_check) {
6296  DCHECK(kEmitCompilerReadBarrier);
6297  DCHECK(kUseBakerReadBarrier);
6298
6299  // /* HeapReference<Object> */ ref = *(obj + offset)
6300  Address src(obj, offset);
6301  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6302}
6303
6304void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6305                                                                Location ref,
6306                                                                CpuRegister obj,
6307                                                                uint32_t data_offset,
6308                                                                Location index,
6309                                                                Location temp,
6310                                                                bool needs_null_check) {
6311  DCHECK(kEmitCompilerReadBarrier);
6312  DCHECK(kUseBakerReadBarrier);
6313
6314  // /* HeapReference<Object> */ ref =
6315  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6316  Address src = index.IsConstant() ?
6317      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6318      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6319  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6320}
6321
6322void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6323                                                                    Location ref,
6324                                                                    CpuRegister obj,
6325                                                                    const Address& src,
6326                                                                    Location temp,
6327                                                                    bool needs_null_check) {
6328  DCHECK(kEmitCompilerReadBarrier);
6329  DCHECK(kUseBakerReadBarrier);
6330
6331  // In slow path based read barriers, the read barrier call is
6332  // inserted after the original load. However, in fast path based
6333  // Baker's read barriers, we need to perform the load of
6334  // mirror::Object::monitor_ *before* the original reference load.
6335  // This load-load ordering is required by the read barrier.
6336  // The fast path/slow path (for Baker's algorithm) should look like:
6337  //
6338  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6339  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6340  //   HeapReference<Object> ref = *src;  // Original reference load.
6341  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6342  //   if (is_gray) {
6343  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6344  //   }
6345  //
6346  // Note: the original implementation in ReadBarrier::Barrier is
6347  // slightly more complex as:
6348  // - it implements the load-load fence using a data dependency on
6349  //   the high-bits of rb_state, which are expected to be all zeroes
6350  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6351  //   here, which is a no-op thanks to the x86-64 memory model);
6352  // - it performs additional checks that we do not do here for
6353  //   performance reasons.
6354
6355  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6356  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6357  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6358
6359  // /* int32_t */ monitor = obj->monitor_
6360  __ movl(temp_reg, Address(obj, monitor_offset));
6361  if (needs_null_check) {
6362    MaybeRecordImplicitNullCheck(instruction);
6363  }
6364  // /* LockWord */ lock_word = LockWord(monitor)
6365  static_assert(sizeof(LockWord) == sizeof(int32_t),
6366                "art::LockWord and int32_t have different sizes.");
6367  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6368  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6369  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6370  static_assert(
6371      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6372      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6373
6374  // Load fence to prevent load-load reordering.
6375  // Note that this is a no-op, thanks to the x86-64 memory model.
6376  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6377
6378  // The actual reference load.
6379  // /* HeapReference<Object> */ ref = *src
6380  __ movl(ref_reg, src);
6381
6382  // Object* ref = ref_addr->AsMirrorPtr()
6383  __ MaybeUnpoisonHeapReference(ref_reg);
6384
6385  // Slow path used to mark the object `ref` when it is gray.
6386  SlowPathCode* slow_path =
6387      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6388  AddSlowPath(slow_path);
6389
6390  // if (rb_state == ReadBarrier::gray_ptr_)
6391  //   ref = ReadBarrier::Mark(ref);
6392  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6393  __ j(kEqual, slow_path->GetEntryLabel());
6394  __ Bind(slow_path->GetExitLabel());
6395}
6396
6397void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6398                                                  Location out,
6399                                                  Location ref,
6400                                                  Location obj,
6401                                                  uint32_t offset,
6402                                                  Location index) {
6403  DCHECK(kEmitCompilerReadBarrier);
6404
6405  // Insert a slow path based read barrier *after* the reference load.
6406  //
6407  // If heap poisoning is enabled, the unpoisoning of the loaded
6408  // reference will be carried out by the runtime within the slow
6409  // path.
6410  //
6411  // Note that `ref` currently does not get unpoisoned (when heap
6412  // poisoning is enabled), which is alright as the `ref` argument is
6413  // not used by the artReadBarrierSlow entry point.
6414  //
6415  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6416  SlowPathCode* slow_path = new (GetGraph()->GetArena())
6417      ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6418  AddSlowPath(slow_path);
6419
6420  __ jmp(slow_path->GetEntryLabel());
6421  __ Bind(slow_path->GetExitLabel());
6422}
6423
6424void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6425                                                       Location out,
6426                                                       Location ref,
6427                                                       Location obj,
6428                                                       uint32_t offset,
6429                                                       Location index) {
6430  if (kEmitCompilerReadBarrier) {
6431    // Baker's read barriers shall be handled by the fast path
6432    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6433    DCHECK(!kUseBakerReadBarrier);
6434    // If heap poisoning is enabled, unpoisoning will be taken care of
6435    // by the runtime within the slow path.
6436    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6437  } else if (kPoisonHeapReferences) {
6438    __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6439  }
6440}
6441
6442void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6443                                                         Location out,
6444                                                         Location root) {
6445  DCHECK(kEmitCompilerReadBarrier);
6446
6447  // Insert a slow path based read barrier *after* the GC root load.
6448  //
6449  // Note that GC roots are not affected by heap poisoning, so we do
6450  // not need to do anything special for this here.
6451  SlowPathCode* slow_path =
6452      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6453  AddSlowPath(slow_path);
6454
6455  __ jmp(slow_path->GetEntryLabel());
6456  __ Bind(slow_path->GetExitLabel());
6457}
6458
6459void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6460  // Nothing to do, this should be removed during prepare for register allocator.
6461  LOG(FATAL) << "Unreachable";
6462}
6463
6464void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6465  // Nothing to do, this should be removed during prepare for register allocator.
6466  LOG(FATAL) << "Unreachable";
6467}
6468
6469// Simple implementation of packed switch - generate cascaded compare/jumps.
6470void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6471  LocationSummary* locations =
6472      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6473  locations->SetInAt(0, Location::RequiresRegister());
6474  locations->AddTemp(Location::RequiresRegister());
6475  locations->AddTemp(Location::RequiresRegister());
6476}
6477
6478void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479  int32_t lower_bound = switch_instr->GetStartValue();
6480  uint32_t num_entries = switch_instr->GetNumEntries();
6481  LocationSummary* locations = switch_instr->GetLocations();
6482  CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6483  CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6484  CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6485  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6486
6487  // Should we generate smaller inline compare/jumps?
6488  if (num_entries <= kPackedSwitchJumpTableThreshold) {
6489    // Figure out the correct compare values and jump conditions.
6490    // Handle the first compare/branch as a special case because it might
6491    // jump to the default case.
6492    DCHECK_GT(num_entries, 2u);
6493    Condition first_condition;
6494    uint32_t index;
6495    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6496    if (lower_bound != 0) {
6497      first_condition = kLess;
6498      __ cmpl(value_reg_in, Immediate(lower_bound));
6499      __ j(first_condition, codegen_->GetLabelOf(default_block));
6500      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6501
6502      index = 1;
6503    } else {
6504      // Handle all the compare/jumps below.
6505      first_condition = kBelow;
6506      index = 0;
6507    }
6508
6509    // Handle the rest of the compare/jumps.
6510    for (; index + 1 < num_entries; index += 2) {
6511      int32_t compare_to_value = lower_bound + index + 1;
6512      __ cmpl(value_reg_in, Immediate(compare_to_value));
6513      // Jump to successors[index] if value < case_value[index].
6514      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6515      // Jump to successors[index + 1] if value == case_value[index + 1].
6516      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6517    }
6518
6519    if (index != num_entries) {
6520      // There are an odd number of entries. Handle the last one.
6521      DCHECK_EQ(index + 1, num_entries);
6522      __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6523      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6524    }
6525
6526    // And the default for any other value.
6527    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6528      __ jmp(codegen_->GetLabelOf(default_block));
6529    }
6530    return;
6531  }
6532
6533  // Remove the bias, if needed.
6534  Register value_reg_out = value_reg_in.AsRegister();
6535  if (lower_bound != 0) {
6536    __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6537    value_reg_out = temp_reg.AsRegister();
6538  }
6539  CpuRegister value_reg(value_reg_out);
6540
6541  // Is the value in range?
6542  __ cmpl(value_reg, Immediate(num_entries - 1));
6543  __ j(kAbove, codegen_->GetLabelOf(default_block));
6544
6545  // We are in the range of the table.
6546  // Load the address of the jump table in the constant area.
6547  __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6548
6549  // Load the (signed) offset from the jump table.
6550  __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6551
6552  // Add the offset to the address of the table base.
6553  __ addq(temp_reg, base_reg);
6554
6555  // And jump.
6556  __ jmp(temp_reg);
6557}
6558
6559void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6560  if (value == 0) {
6561    __ xorl(dest, dest);
6562  } else {
6563    __ movl(dest, Immediate(value));
6564  }
6565}
6566
6567void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6568  if (value == 0) {
6569    // Clears upper bits too.
6570    __ xorl(dest, dest);
6571  } else if (IsUint<32>(value)) {
6572    // We can use a 32 bit move, as it will zero-extend and is shorter.
6573    __ movl(dest, Immediate(static_cast<int32_t>(value)));
6574  } else {
6575    __ movq(dest, Immediate(value));
6576  }
6577}
6578
6579void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6580  if (value == 0) {
6581    __ xorps(dest, dest);
6582  } else {
6583    __ movss(dest, LiteralInt32Address(value));
6584  }
6585}
6586
6587void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6588  if (value == 0) {
6589    __ xorpd(dest, dest);
6590  } else {
6591    __ movsd(dest, LiteralInt64Address(value));
6592  }
6593}
6594
6595void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6596  Load32BitValue(dest, bit_cast<int32_t, float>(value));
6597}
6598
6599void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6600  Load64BitValue(dest, bit_cast<int64_t, double>(value));
6601}
6602
6603void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6604  if (value == 0) {
6605    __ testl(dest, dest);
6606  } else {
6607    __ cmpl(dest, Immediate(value));
6608  }
6609}
6610
6611void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6612  if (IsInt<32>(value)) {
6613    if (value == 0) {
6614      __ testq(dest, dest);
6615    } else {
6616      __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6617    }
6618  } else {
6619    // Value won't fit in an int.
6620    __ cmpq(dest, LiteralInt64Address(value));
6621  }
6622}
6623
6624void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6625  DCHECK(dest.IsDoubleStackSlot());
6626  if (IsInt<32>(value)) {
6627    // Can move directly as an int32 constant.
6628    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6629            Immediate(static_cast<int32_t>(value)));
6630  } else {
6631    Load64BitValue(CpuRegister(TMP), value);
6632    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6633  }
6634}
6635
6636/**
6637 * Class to handle late fixup of offsets into constant area.
6638 */
6639class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6640 public:
6641  RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6642      : codegen_(&codegen), offset_into_constant_area_(offset) {}
6643
6644 protected:
6645  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6646
6647  CodeGeneratorX86_64* codegen_;
6648
6649 private:
6650  void Process(const MemoryRegion& region, int pos) OVERRIDE {
6651    // Patch the correct offset for the instruction.  We use the address of the
6652    // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6653    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6654    int32_t relative_position = constant_offset - pos;
6655
6656    // Patch in the right value.
6657    region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6658  }
6659
6660  // Location in constant area that the fixup refers to.
6661  size_t offset_into_constant_area_;
6662};
6663
6664/**
6665 t * Class to handle late fixup of offsets to a jump table that will be created in the
6666 * constant area.
6667 */
6668class JumpTableRIPFixup : public RIPFixup {
6669 public:
6670  JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6671      : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6672
6673  void CreateJumpTable() {
6674    X86_64Assembler* assembler = codegen_->GetAssembler();
6675
6676    // Ensure that the reference to the jump table has the correct offset.
6677    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6678    SetOffset(offset_in_constant_table);
6679
6680    // Compute the offset from the start of the function to this jump table.
6681    const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6682
6683    // Populate the jump table with the correct values for the jump table.
6684    int32_t num_entries = switch_instr_->GetNumEntries();
6685    HBasicBlock* block = switch_instr_->GetBlock();
6686    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6687    // The value that we want is the target offset - the position of the table.
6688    for (int32_t i = 0; i < num_entries; i++) {
6689      HBasicBlock* b = successors[i];
6690      Label* l = codegen_->GetLabelOf(b);
6691      DCHECK(l->IsBound());
6692      int32_t offset_to_block = l->Position() - current_table_offset;
6693      assembler->AppendInt32(offset_to_block);
6694    }
6695  }
6696
6697 private:
6698  const HPackedSwitch* switch_instr_;
6699};
6700
6701void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6702  // Generate the constant area if needed.
6703  X86_64Assembler* assembler = GetAssembler();
6704  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6705    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6706    assembler->Align(4, 0);
6707    constant_area_start_ = assembler->CodeSize();
6708
6709    // Populate any jump tables.
6710    for (auto jump_table : fixups_to_jump_tables_) {
6711      jump_table->CreateJumpTable();
6712    }
6713
6714    // And now add the constant area to the generated code.
6715    assembler->AddConstantArea();
6716  }
6717
6718  // And finish up.
6719  CodeGenerator::Finalize(allocator);
6720}
6721
6722Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6723  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6724  return Address::RIP(fixup);
6725}
6726
6727Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6728  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6729  return Address::RIP(fixup);
6730}
6731
6732Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6733  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6734  return Address::RIP(fixup);
6735}
6736
6737Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6738  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6739  return Address::RIP(fixup);
6740}
6741
6742// TODO: trg as memory.
6743void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6744  if (!trg.IsValid()) {
6745    DCHECK_EQ(type, Primitive::kPrimVoid);
6746    return;
6747  }
6748
6749  DCHECK_NE(type, Primitive::kPrimVoid);
6750
6751  Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6752  if (trg.Equals(return_loc)) {
6753    return;
6754  }
6755
6756  // Let the parallel move resolver take care of all of this.
6757  HParallelMove parallel_move(GetGraph()->GetArena());
6758  parallel_move.AddMove(return_loc, trg, type, nullptr);
6759  GetMoveResolver()->EmitNativeCode(&parallel_move);
6760}
6761
6762Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6763  // Create a fixup to be used to create and address the jump table.
6764  JumpTableRIPFixup* table_fixup =
6765      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6766
6767  // We have to populate the jump tables.
6768  fixups_to_jump_tables_.push_back(table_fixup);
6769  return Address::RIP(table_fixup);
6770}
6771
6772void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6773                                             const Address& addr_high,
6774                                             int64_t v,
6775                                             HInstruction* instruction) {
6776  if (IsInt<32>(v)) {
6777    int32_t v_32 = v;
6778    __ movq(addr_low, Immediate(v_32));
6779    MaybeRecordImplicitNullCheck(instruction);
6780  } else {
6781    // Didn't fit in a register.  Do it in pieces.
6782    int32_t low_v = Low32Bits(v);
6783    int32_t high_v = High32Bits(v);
6784    __ movl(addr_low, Immediate(low_v));
6785    MaybeRecordImplicitNullCheck(instruction);
6786    __ movl(addr_high, Immediate(high_v));
6787  }
6788}
6789
6790#undef __
6791
6792}  // namespace x86_64
6793}  // namespace art
6794