code_generator_x86_64.cc revision b3cd84a2fbd4875c605cfa5a4a362864b570f1e6
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "code_generator_x86_64.h"
18
19#include "art_method.h"
20#include "code_generator_utils.h"
21#include "compiled_method.h"
22#include "entrypoints/quick/quick_entrypoints.h"
23#include "gc/accounting/card_table.h"
24#include "intrinsics.h"
25#include "intrinsics_x86_64.h"
26#include "mirror/array-inl.h"
27#include "mirror/class-inl.h"
28#include "mirror/object_reference.h"
29#include "thread.h"
30#include "utils/assembler.h"
31#include "utils/stack_checks.h"
32#include "utils/x86_64/assembler_x86_64.h"
33#include "utils/x86_64/managed_register_x86_64.h"
34
35namespace art {
36
37template<class MirrorType>
38class GcRoot;
39
40namespace x86_64 {
41
42static constexpr int kCurrentMethodStackOffset = 0;
43static constexpr Register kMethodRegisterArgument = RDI;
44// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46// generates less code/data with a small num_entries.
47static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48
49static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51
52static constexpr int kC2ConditionMask = 0x400;
53
54#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())->
55#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, x).Int32Value()
56
57class NullCheckSlowPathX86_64 : public SlowPathCode {
58 public:
59  explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
60
61  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
62    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
63    __ Bind(GetEntryLabel());
64    if (instruction_->CanThrowIntoCatchBlock()) {
65      // Live registers will be restored in the catch block if caught.
66      SaveLiveRegisters(codegen, instruction_->GetLocations());
67    }
68    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
69                                  instruction_,
70                                  instruction_->GetDexPc(),
71                                  this);
72    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
73  }
74
75  bool IsFatal() const OVERRIDE { return true; }
76
77  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
78
79 private:
80  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
81};
82
83class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
84 public:
85  explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
86
87  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
88    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
89    __ Bind(GetEntryLabel());
90    if (instruction_->CanThrowIntoCatchBlock()) {
91      // Live registers will be restored in the catch block if caught.
92      SaveLiveRegisters(codegen, instruction_->GetLocations());
93    }
94    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
95                                  instruction_,
96                                  instruction_->GetDexPc(),
97                                  this);
98    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
99  }
100
101  bool IsFatal() const OVERRIDE { return true; }
102
103  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
104
105 private:
106  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
107};
108
109class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
110 public:
111  DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
112      : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
113
114  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
115    __ Bind(GetEntryLabel());
116    if (type_ == Primitive::kPrimInt) {
117      if (is_div_) {
118        __ negl(cpu_reg_);
119      } else {
120        __ xorl(cpu_reg_, cpu_reg_);
121      }
122
123    } else {
124      DCHECK_EQ(Primitive::kPrimLong, type_);
125      if (is_div_) {
126        __ negq(cpu_reg_);
127      } else {
128        __ xorl(cpu_reg_, cpu_reg_);
129      }
130    }
131    __ jmp(GetExitLabel());
132  }
133
134  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
135
136 private:
137  const CpuRegister cpu_reg_;
138  const Primitive::Type type_;
139  const bool is_div_;
140  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
141};
142
143class SuspendCheckSlowPathX86_64 : public SlowPathCode {
144 public:
145  SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
146      : SlowPathCode(instruction), successor_(successor) {}
147
148  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
149    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
150    __ Bind(GetEntryLabel());
151    SaveLiveRegisters(codegen, instruction_->GetLocations());
152    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
153                                  instruction_,
154                                  instruction_->GetDexPc(),
155                                  this);
156    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
157    RestoreLiveRegisters(codegen, instruction_->GetLocations());
158    if (successor_ == nullptr) {
159      __ jmp(GetReturnLabel());
160    } else {
161      __ jmp(x86_64_codegen->GetLabelOf(successor_));
162    }
163  }
164
165  Label* GetReturnLabel() {
166    DCHECK(successor_ == nullptr);
167    return &return_label_;
168  }
169
170  HBasicBlock* GetSuccessor() const {
171    return successor_;
172  }
173
174  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
175
176 private:
177  HBasicBlock* const successor_;
178  Label return_label_;
179
180  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
181};
182
183class BoundsCheckSlowPathX86_64 : public SlowPathCode {
184 public:
185  explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
186    : SlowPathCode(instruction) {}
187
188  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
189    LocationSummary* locations = instruction_->GetLocations();
190    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
191    __ Bind(GetEntryLabel());
192    if (instruction_->CanThrowIntoCatchBlock()) {
193      // Live registers will be restored in the catch block if caught.
194      SaveLiveRegisters(codegen, instruction_->GetLocations());
195    }
196    // We're moving two locations to locations that could overlap, so we need a parallel
197    // move resolver.
198    InvokeRuntimeCallingConvention calling_convention;
199    codegen->EmitParallelMoves(
200        locations->InAt(0),
201        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
202        Primitive::kPrimInt,
203        locations->InAt(1),
204        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
205        Primitive::kPrimInt);
206    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
207                                  instruction_,
208                                  instruction_->GetDexPc(),
209                                  this);
210    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
211  }
212
213  bool IsFatal() const OVERRIDE { return true; }
214
215  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
216
217 private:
218  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
219};
220
221class LoadClassSlowPathX86_64 : public SlowPathCode {
222 public:
223  LoadClassSlowPathX86_64(HLoadClass* cls,
224                          HInstruction* at,
225                          uint32_t dex_pc,
226                          bool do_clinit)
227      : SlowPathCode(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
228    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
229  }
230
231  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
232    LocationSummary* locations = at_->GetLocations();
233    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
234    __ Bind(GetEntryLabel());
235
236    SaveLiveRegisters(codegen, locations);
237
238    InvokeRuntimeCallingConvention calling_convention;
239    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
240    x86_64_codegen->InvokeRuntime(do_clinit_ ?
241                                      QUICK_ENTRY_POINT(pInitializeStaticStorage) :
242                                      QUICK_ENTRY_POINT(pInitializeType),
243                                  at_,
244                                  dex_pc_,
245                                  this);
246    if (do_clinit_) {
247      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
248    } else {
249      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
250    }
251
252    Location out = locations->Out();
253    // Move the class to the desired location.
254    if (out.IsValid()) {
255      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
256      x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
257    }
258
259    RestoreLiveRegisters(codegen, locations);
260    __ jmp(GetExitLabel());
261  }
262
263  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
264
265 private:
266  // The class this slow path will load.
267  HLoadClass* const cls_;
268
269  // The instruction where this slow path is happening.
270  // (Might be the load class or an initialization check).
271  HInstruction* const at_;
272
273  // The dex PC of `at_`.
274  const uint32_t dex_pc_;
275
276  // Whether to initialize the class.
277  const bool do_clinit_;
278
279  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
280};
281
282class LoadStringSlowPathX86_64 : public SlowPathCode {
283 public:
284  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
285
286  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
287    LocationSummary* locations = instruction_->GetLocations();
288    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
289
290    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
291    __ Bind(GetEntryLabel());
292    SaveLiveRegisters(codegen, locations);
293
294    InvokeRuntimeCallingConvention calling_convention;
295    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
296    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
297    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
298                                  instruction_,
299                                  instruction_->GetDexPc(),
300                                  this);
301    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
302    x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
303    RestoreLiveRegisters(codegen, locations);
304    __ jmp(GetExitLabel());
305  }
306
307  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
308
309 private:
310  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
311};
312
313class TypeCheckSlowPathX86_64 : public SlowPathCode {
314 public:
315  TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
316      : SlowPathCode(instruction), is_fatal_(is_fatal) {}
317
318  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
319    LocationSummary* locations = instruction_->GetLocations();
320    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
321                                                        : locations->Out();
322    uint32_t dex_pc = instruction_->GetDexPc();
323    DCHECK(instruction_->IsCheckCast()
324           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
325
326    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
327    __ Bind(GetEntryLabel());
328
329    if (!is_fatal_) {
330      SaveLiveRegisters(codegen, locations);
331    }
332
333    // We're moving two locations to locations that could overlap, so we need a parallel
334    // move resolver.
335    InvokeRuntimeCallingConvention calling_convention;
336    codegen->EmitParallelMoves(
337        locations->InAt(1),
338        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
339        Primitive::kPrimNot,
340        object_class,
341        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
342        Primitive::kPrimNot);
343
344    if (instruction_->IsInstanceOf()) {
345      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
346                                    instruction_,
347                                    dex_pc,
348                                    this);
349      CheckEntrypointTypes<
350          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
351    } else {
352      DCHECK(instruction_->IsCheckCast());
353      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
354                                    instruction_,
355                                    dex_pc,
356                                    this);
357      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
358    }
359
360    if (!is_fatal_) {
361      if (instruction_->IsInstanceOf()) {
362        x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
363      }
364
365      RestoreLiveRegisters(codegen, locations);
366      __ jmp(GetExitLabel());
367    }
368  }
369
370  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
371
372  bool IsFatal() const OVERRIDE { return is_fatal_; }
373
374 private:
375  const bool is_fatal_;
376
377  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
378};
379
380class DeoptimizationSlowPathX86_64 : public SlowPathCode {
381 public:
382  explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
383      : SlowPathCode(instruction) {}
384
385  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
386    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
387    __ Bind(GetEntryLabel());
388    SaveLiveRegisters(codegen, instruction_->GetLocations());
389    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
390                                  instruction_,
391                                  instruction_->GetDexPc(),
392                                  this);
393    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
394  }
395
396  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
397
398 private:
399  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
400};
401
402class ArraySetSlowPathX86_64 : public SlowPathCode {
403 public:
404  explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
405
406  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
407    LocationSummary* locations = instruction_->GetLocations();
408    __ Bind(GetEntryLabel());
409    SaveLiveRegisters(codegen, locations);
410
411    InvokeRuntimeCallingConvention calling_convention;
412    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
413    parallel_move.AddMove(
414        locations->InAt(0),
415        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
416        Primitive::kPrimNot,
417        nullptr);
418    parallel_move.AddMove(
419        locations->InAt(1),
420        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
421        Primitive::kPrimInt,
422        nullptr);
423    parallel_move.AddMove(
424        locations->InAt(2),
425        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
426        Primitive::kPrimNot,
427        nullptr);
428    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
429
430    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
431    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
432                                  instruction_,
433                                  instruction_->GetDexPc(),
434                                  this);
435    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
436    RestoreLiveRegisters(codegen, locations);
437    __ jmp(GetExitLabel());
438  }
439
440  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
441
442 private:
443  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
444};
445
446// Slow path marking an object during a read barrier.
447class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
448 public:
449  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location out, Location obj)
450      : SlowPathCode(instruction), out_(out), obj_(obj) {
451    DCHECK(kEmitCompilerReadBarrier);
452  }
453
454  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
455
456  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
457    LocationSummary* locations = instruction_->GetLocations();
458    Register reg_out = out_.AsRegister<Register>();
459    DCHECK(locations->CanCall());
460    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
461    DCHECK(instruction_->IsInstanceFieldGet() ||
462           instruction_->IsStaticFieldGet() ||
463           instruction_->IsArrayGet() ||
464           instruction_->IsLoadClass() ||
465           instruction_->IsLoadString() ||
466           instruction_->IsInstanceOf() ||
467           instruction_->IsCheckCast())
468        << "Unexpected instruction in read barrier marking slow path: "
469        << instruction_->DebugName();
470
471    __ Bind(GetEntryLabel());
472    SaveLiveRegisters(codegen, locations);
473
474    InvokeRuntimeCallingConvention calling_convention;
475    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
476    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
477    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
478                               instruction_,
479                               instruction_->GetDexPc(),
480                               this);
481    CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
482    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
483
484    RestoreLiveRegisters(codegen, locations);
485    __ jmp(GetExitLabel());
486  }
487
488 private:
489  const Location out_;
490  const Location obj_;
491
492  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
493};
494
495// Slow path generating a read barrier for a heap reference.
496class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
497 public:
498  ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
499                                            Location out,
500                                            Location ref,
501                                            Location obj,
502                                            uint32_t offset,
503                                            Location index)
504      : SlowPathCode(instruction),
505        out_(out),
506        ref_(ref),
507        obj_(obj),
508        offset_(offset),
509        index_(index) {
510    DCHECK(kEmitCompilerReadBarrier);
511    // If `obj` is equal to `out` or `ref`, it means the initial
512    // object has been overwritten by (or after) the heap object
513    // reference load to be instrumented, e.g.:
514    //
515    //   __ movl(out, Address(out, offset));
516    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
517    //
518    // In that case, we have lost the information about the original
519    // object, and the emitted read barrier cannot work properly.
520    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
521    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
522}
523
524  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
525    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
526    LocationSummary* locations = instruction_->GetLocations();
527    CpuRegister reg_out = out_.AsRegister<CpuRegister>();
528    DCHECK(locations->CanCall());
529    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
530    DCHECK(!instruction_->IsInvoke() ||
531           (instruction_->IsInvokeStaticOrDirect() &&
532            instruction_->GetLocations()->Intrinsified()))
533        << "Unexpected instruction in read barrier for heap reference slow path: "
534        << instruction_->DebugName();
535
536    __ Bind(GetEntryLabel());
537    SaveLiveRegisters(codegen, locations);
538
539    // We may have to change the index's value, but as `index_` is a
540    // constant member (like other "inputs" of this slow path),
541    // introduce a copy of it, `index`.
542    Location index = index_;
543    if (index_.IsValid()) {
544      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
545      if (instruction_->IsArrayGet()) {
546        // Compute real offset and store it in index_.
547        Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
548        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
549        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
550          // We are about to change the value of `index_reg` (see the
551          // calls to art::x86_64::X86_64Assembler::shll and
552          // art::x86_64::X86_64Assembler::AddImmediate below), but it
553          // has not been saved by the previous call to
554          // art::SlowPathCode::SaveLiveRegisters, as it is a
555          // callee-save register --
556          // art::SlowPathCode::SaveLiveRegisters does not consider
557          // callee-save registers, as it has been designed with the
558          // assumption that callee-save registers are supposed to be
559          // handled by the called function.  So, as a callee-save
560          // register, `index_reg` _would_ eventually be saved onto
561          // the stack, but it would be too late: we would have
562          // changed its value earlier.  Therefore, we manually save
563          // it here into another freely available register,
564          // `free_reg`, chosen of course among the caller-save
565          // registers (as a callee-save `free_reg` register would
566          // exhibit the same problem).
567          //
568          // Note we could have requested a temporary register from
569          // the register allocator instead; but we prefer not to, as
570          // this is a slow path, and we know we can find a
571          // caller-save register that is available.
572          Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
573          __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
574          index_reg = free_reg;
575          index = Location::RegisterLocation(index_reg);
576        } else {
577          // The initial register stored in `index_` has already been
578          // saved in the call to art::SlowPathCode::SaveLiveRegisters
579          // (as it is not a callee-save register), so we can freely
580          // use it.
581        }
582        // Shifting the index value contained in `index_reg` by the
583        // scale factor (2) cannot overflow in practice, as the
584        // runtime is unable to allocate object arrays with a size
585        // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
586        __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
587        static_assert(
588            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
589            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
590        __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
591      } else {
592        DCHECK(instruction_->IsInvoke());
593        DCHECK(instruction_->GetLocations()->Intrinsified());
594        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
595               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
596            << instruction_->AsInvoke()->GetIntrinsic();
597        DCHECK_EQ(offset_, 0U);
598        DCHECK(index_.IsRegister());
599      }
600    }
601
602    // We're moving two or three locations to locations that could
603    // overlap, so we need a parallel move resolver.
604    InvokeRuntimeCallingConvention calling_convention;
605    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
606    parallel_move.AddMove(ref_,
607                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
608                          Primitive::kPrimNot,
609                          nullptr);
610    parallel_move.AddMove(obj_,
611                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
612                          Primitive::kPrimNot,
613                          nullptr);
614    if (index.IsValid()) {
615      parallel_move.AddMove(index,
616                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
617                            Primitive::kPrimInt,
618                            nullptr);
619      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
620    } else {
621      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
622      __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
623    }
624    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
625                                  instruction_,
626                                  instruction_->GetDexPc(),
627                                  this);
628    CheckEntrypointTypes<
629        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
630    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
631
632    RestoreLiveRegisters(codegen, locations);
633    __ jmp(GetExitLabel());
634  }
635
636  const char* GetDescription() const OVERRIDE {
637    return "ReadBarrierForHeapReferenceSlowPathX86_64";
638  }
639
640 private:
641  CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
642    size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
643    size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
644    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
645      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
646        return static_cast<CpuRegister>(i);
647      }
648    }
649    // We shall never fail to find a free caller-save register, as
650    // there are more than two core caller-save registers on x86-64
651    // (meaning it is possible to find one which is different from
652    // `ref` and `obj`).
653    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
654    LOG(FATAL) << "Could not find a free caller-save register";
655    UNREACHABLE();
656  }
657
658  const Location out_;
659  const Location ref_;
660  const Location obj_;
661  const uint32_t offset_;
662  // An additional location containing an index to an array.
663  // Only used for HArrayGet and the UnsafeGetObject &
664  // UnsafeGetObjectVolatile intrinsics.
665  const Location index_;
666
667  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
668};
669
670// Slow path generating a read barrier for a GC root.
671class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
672 public:
673  ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
674      : SlowPathCode(instruction), out_(out), root_(root) {
675    DCHECK(kEmitCompilerReadBarrier);
676  }
677
678  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
679    LocationSummary* locations = instruction_->GetLocations();
680    DCHECK(locations->CanCall());
681    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
682    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
683        << "Unexpected instruction in read barrier for GC root slow path: "
684        << instruction_->DebugName();
685
686    __ Bind(GetEntryLabel());
687    SaveLiveRegisters(codegen, locations);
688
689    InvokeRuntimeCallingConvention calling_convention;
690    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
691    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
692    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
693                                  instruction_,
694                                  instruction_->GetDexPc(),
695                                  this);
696    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
697    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
698
699    RestoreLiveRegisters(codegen, locations);
700    __ jmp(GetExitLabel());
701  }
702
703  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
704
705 private:
706  const Location out_;
707  const Location root_;
708
709  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
710};
711
712#undef __
713#define __ down_cast<X86_64Assembler*>(GetAssembler())->
714
715inline Condition X86_64IntegerCondition(IfCondition cond) {
716  switch (cond) {
717    case kCondEQ: return kEqual;
718    case kCondNE: return kNotEqual;
719    case kCondLT: return kLess;
720    case kCondLE: return kLessEqual;
721    case kCondGT: return kGreater;
722    case kCondGE: return kGreaterEqual;
723    case kCondB:  return kBelow;
724    case kCondBE: return kBelowEqual;
725    case kCondA:  return kAbove;
726    case kCondAE: return kAboveEqual;
727  }
728  LOG(FATAL) << "Unreachable";
729  UNREACHABLE();
730}
731
732// Maps FP condition to x86_64 name.
733inline Condition X86_64FPCondition(IfCondition cond) {
734  switch (cond) {
735    case kCondEQ: return kEqual;
736    case kCondNE: return kNotEqual;
737    case kCondLT: return kBelow;
738    case kCondLE: return kBelowEqual;
739    case kCondGT: return kAbove;
740    case kCondGE: return kAboveEqual;
741    default:      break;  // should not happen
742  };
743  LOG(FATAL) << "Unreachable";
744  UNREACHABLE();
745}
746
747HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
748      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
749      MethodReference target_method ATTRIBUTE_UNUSED) {
750  switch (desired_dispatch_info.code_ptr_location) {
751    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
752    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
753      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
754      return HInvokeStaticOrDirect::DispatchInfo {
755        desired_dispatch_info.method_load_kind,
756        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
757        desired_dispatch_info.method_load_data,
758        0u
759      };
760    default:
761      return desired_dispatch_info;
762  }
763}
764
765void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
766                                                     Location temp) {
767  // All registers are assumed to be correctly set up.
768
769  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
770  switch (invoke->GetMethodLoadKind()) {
771    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
772      // temp = thread->string_init_entrypoint
773      __ gs()->movq(temp.AsRegister<CpuRegister>(),
774                    Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true));
775      break;
776    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
777      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
778      break;
779    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
780      __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
781      break;
782    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
783      __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
784      method_patches_.emplace_back(invoke->GetTargetMethod());
785      __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
786      break;
787    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
788      __ movq(temp.AsRegister<CpuRegister>(),
789              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
790      // Bind a new fixup label at the end of the "movl" insn.
791      uint32_t offset = invoke->GetDexCacheArrayOffset();
792      __ Bind(NewPcRelativeDexCacheArrayPatch(*invoke->GetTargetMethod().dex_file, offset));
793      break;
794    }
795    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
796      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
797      Register method_reg;
798      CpuRegister reg = temp.AsRegister<CpuRegister>();
799      if (current_method.IsRegister()) {
800        method_reg = current_method.AsRegister<Register>();
801      } else {
802        DCHECK(invoke->GetLocations()->Intrinsified());
803        DCHECK(!current_method.IsValid());
804        method_reg = reg.AsRegister();
805        __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
806      }
807      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
808      __ movq(reg,
809              Address(CpuRegister(method_reg),
810                      ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
811      // temp = temp[index_in_cache];
812      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
813      uint32_t index_in_cache = invoke->GetDexMethodIndex();
814      __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
815      break;
816    }
817  }
818
819  switch (invoke->GetCodePtrLocation()) {
820    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
821      __ call(&frame_entry_label_);
822      break;
823    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
824      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
825      Label* label = &relative_call_patches_.back().label;
826      __ call(label);  // Bind to the patch label, override at link time.
827      __ Bind(label);  // Bind the label at the end of the "call" insn.
828      break;
829    }
830    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
831    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
832      // Filtered out by GetSupportedInvokeStaticOrDirectDispatch().
833      LOG(FATAL) << "Unsupported";
834      UNREACHABLE();
835    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
836      // (callee_method + offset_of_quick_compiled_code)()
837      __ call(Address(callee_method.AsRegister<CpuRegister>(),
838                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
839                          kX86_64WordSize).SizeValue()));
840      break;
841  }
842
843  DCHECK(!IsLeafMethod());
844}
845
846void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
847  CpuRegister temp = temp_in.AsRegister<CpuRegister>();
848  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
849      invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
850
851  // Use the calling convention instead of the location of the receiver, as
852  // intrinsics may have put the receiver in a different register. In the intrinsics
853  // slow path, the arguments have been moved to the right place, so here we are
854  // guaranteed that the receiver is the first register of the calling convention.
855  InvokeDexCallingConvention calling_convention;
856  Register receiver = calling_convention.GetRegisterAt(0);
857
858  size_t class_offset = mirror::Object::ClassOffset().SizeValue();
859  // /* HeapReference<Class> */ temp = receiver->klass_
860  __ movl(temp, Address(CpuRegister(receiver), class_offset));
861  MaybeRecordImplicitNullCheck(invoke);
862  // Instead of simply (possibly) unpoisoning `temp` here, we should
863  // emit a read barrier for the previous class reference load.
864  // However this is not required in practice, as this is an
865  // intermediate/temporary reference and because the current
866  // concurrent copying collector keeps the from-space memory
867  // intact/accessible until the end of the marking phase (the
868  // concurrent copying collector may not in the future).
869  __ MaybeUnpoisonHeapReference(temp);
870  // temp = temp->GetMethodAt(method_offset);
871  __ movq(temp, Address(temp, method_offset));
872  // call temp->GetEntryPoint();
873  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
874      kX86_64WordSize).SizeValue()));
875}
876
877void CodeGeneratorX86_64::RecordSimplePatch() {
878  if (GetCompilerOptions().GetIncludePatchInformation()) {
879    simple_patches_.emplace_back();
880    __ Bind(&simple_patches_.back());
881  }
882}
883
884void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) {
885  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex());
886  __ Bind(&string_patches_.back().label);
887}
888
889Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
890                                                            uint32_t element_offset) {
891  // Add a patch entry and return the label.
892  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
893  return &pc_relative_dex_cache_patches_.back().label;
894}
895
896void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
897  DCHECK(linker_patches->empty());
898  size_t size =
899      method_patches_.size() +
900      relative_call_patches_.size() +
901      pc_relative_dex_cache_patches_.size() +
902      simple_patches_.size() +
903      string_patches_.size();
904  linker_patches->reserve(size);
905  // The label points to the end of the "movl" insn but the literal offset for method
906  // patch needs to point to the embedded constant which occupies the last 4 bytes.
907  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
908  for (const MethodPatchInfo<Label>& info : method_patches_) {
909    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
910    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
911                                                       info.target_method.dex_file,
912                                                       info.target_method.dex_method_index));
913  }
914  for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
915    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
916    linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
917                                                             info.target_method.dex_file,
918                                                             info.target_method.dex_method_index));
919  }
920  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
921    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
922    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
923                                                              &info.target_dex_file,
924                                                              info.label.Position(),
925                                                              info.element_offset));
926  }
927  for (const Label& label : simple_patches_) {
928    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
929    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
930  }
931  for (const StringPatchInfo<Label>& info : string_patches_) {
932    // These are always PC-relative, see GetSupportedLoadStringKind().
933    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
934    linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset,
935                                                               &info.dex_file,
936                                                               info.label.Position(),
937                                                               info.string_index));
938  }
939}
940
941void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
942  stream << Register(reg);
943}
944
945void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
946  stream << FloatRegister(reg);
947}
948
949size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
950  __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
951  return kX86_64WordSize;
952}
953
954size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
955  __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
956  return kX86_64WordSize;
957}
958
959size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
960  __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
961  return kX86_64WordSize;
962}
963
964size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
965  __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
966  return kX86_64WordSize;
967}
968
969void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
970                                        HInstruction* instruction,
971                                        uint32_t dex_pc,
972                                        SlowPathCode* slow_path) {
973  InvokeRuntime(GetThreadOffset<kX86_64WordSize>(entrypoint).Int32Value(),
974                instruction,
975                dex_pc,
976                slow_path);
977}
978
979void CodeGeneratorX86_64::InvokeRuntime(int32_t entry_point_offset,
980                                        HInstruction* instruction,
981                                        uint32_t dex_pc,
982                                        SlowPathCode* slow_path) {
983  ValidateInvokeRuntime(instruction, slow_path);
984  __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
985  RecordPcInfo(instruction, dex_pc, slow_path);
986}
987
988static constexpr int kNumberOfCpuRegisterPairs = 0;
989// Use a fake return address register to mimic Quick.
990static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
991CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
992                                         const X86_64InstructionSetFeatures& isa_features,
993                                         const CompilerOptions& compiler_options,
994                                         OptimizingCompilerStats* stats)
995      : CodeGenerator(graph,
996                      kNumberOfCpuRegisters,
997                      kNumberOfFloatRegisters,
998                      kNumberOfCpuRegisterPairs,
999                      ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1000                                          arraysize(kCoreCalleeSaves))
1001                          | (1 << kFakeReturnRegister),
1002                      ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1003                                          arraysize(kFpuCalleeSaves)),
1004                      compiler_options,
1005                      stats),
1006        block_labels_(nullptr),
1007        location_builder_(graph, this),
1008        instruction_visitor_(graph, this),
1009        move_resolver_(graph->GetArena(), this),
1010        assembler_(graph->GetArena()),
1011        isa_features_(isa_features),
1012        constant_area_start_(0),
1013        method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1014        relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1015        pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1016        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1017        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1018        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1019  AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1020}
1021
1022InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1023                                                               CodeGeneratorX86_64* codegen)
1024      : InstructionCodeGenerator(graph, codegen),
1025        assembler_(codegen->GetAssembler()),
1026        codegen_(codegen) {}
1027
1028void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1029  // Stack register is always reserved.
1030  blocked_core_registers_[RSP] = true;
1031
1032  // Block the register used as TMP.
1033  blocked_core_registers_[TMP] = true;
1034}
1035
1036static dwarf::Reg DWARFReg(Register reg) {
1037  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1038}
1039
1040static dwarf::Reg DWARFReg(FloatRegister reg) {
1041  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1042}
1043
1044void CodeGeneratorX86_64::GenerateFrameEntry() {
1045  __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
1046  __ Bind(&frame_entry_label_);
1047  bool skip_overflow_check = IsLeafMethod()
1048      && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1049  DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1050
1051  if (!skip_overflow_check) {
1052    __ testq(CpuRegister(RAX), Address(
1053        CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1054    RecordPcInfo(nullptr, 0);
1055  }
1056
1057  if (HasEmptyFrame()) {
1058    return;
1059  }
1060
1061  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1062    Register reg = kCoreCalleeSaves[i];
1063    if (allocated_registers_.ContainsCoreRegister(reg)) {
1064      __ pushq(CpuRegister(reg));
1065      __ cfi().AdjustCFAOffset(kX86_64WordSize);
1066      __ cfi().RelOffset(DWARFReg(reg), 0);
1067    }
1068  }
1069
1070  int adjust = GetFrameSize() - GetCoreSpillSize();
1071  __ subq(CpuRegister(RSP), Immediate(adjust));
1072  __ cfi().AdjustCFAOffset(adjust);
1073  uint32_t xmm_spill_location = GetFpuSpillStart();
1074  size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1075
1076  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1077    if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1078      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1079      __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1080      __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1081    }
1082  }
1083
1084  __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1085          CpuRegister(kMethodRegisterArgument));
1086}
1087
1088void CodeGeneratorX86_64::GenerateFrameExit() {
1089  __ cfi().RememberState();
1090  if (!HasEmptyFrame()) {
1091    uint32_t xmm_spill_location = GetFpuSpillStart();
1092    size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1093    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1094      if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1095        int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1096        __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1097        __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1098      }
1099    }
1100
1101    int adjust = GetFrameSize() - GetCoreSpillSize();
1102    __ addq(CpuRegister(RSP), Immediate(adjust));
1103    __ cfi().AdjustCFAOffset(-adjust);
1104
1105    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1106      Register reg = kCoreCalleeSaves[i];
1107      if (allocated_registers_.ContainsCoreRegister(reg)) {
1108        __ popq(CpuRegister(reg));
1109        __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1110        __ cfi().Restore(DWARFReg(reg));
1111      }
1112    }
1113  }
1114  __ ret();
1115  __ cfi().RestoreState();
1116  __ cfi().DefCFAOffset(GetFrameSize());
1117}
1118
1119void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1120  __ Bind(GetLabelOf(block));
1121}
1122
1123void CodeGeneratorX86_64::Move(Location destination, Location source) {
1124  if (source.Equals(destination)) {
1125    return;
1126  }
1127  if (destination.IsRegister()) {
1128    CpuRegister dest = destination.AsRegister<CpuRegister>();
1129    if (source.IsRegister()) {
1130      __ movq(dest, source.AsRegister<CpuRegister>());
1131    } else if (source.IsFpuRegister()) {
1132      __ movd(dest, source.AsFpuRegister<XmmRegister>());
1133    } else if (source.IsStackSlot()) {
1134      __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1135    } else if (source.IsConstant()) {
1136      HConstant* constant = source.GetConstant();
1137      if (constant->IsLongConstant()) {
1138        Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1139      } else {
1140        Load32BitValue(dest, GetInt32ValueOf(constant));
1141      }
1142    } else {
1143      DCHECK(source.IsDoubleStackSlot());
1144      __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1145    }
1146  } else if (destination.IsFpuRegister()) {
1147    XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1148    if (source.IsRegister()) {
1149      __ movd(dest, source.AsRegister<CpuRegister>());
1150    } else if (source.IsFpuRegister()) {
1151      __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1152    } else if (source.IsConstant()) {
1153      HConstant* constant = source.GetConstant();
1154      int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1155      if (constant->IsFloatConstant()) {
1156        Load32BitValue(dest, static_cast<int32_t>(value));
1157      } else {
1158        Load64BitValue(dest, value);
1159      }
1160    } else if (source.IsStackSlot()) {
1161      __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1162    } else {
1163      DCHECK(source.IsDoubleStackSlot());
1164      __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1165    }
1166  } else if (destination.IsStackSlot()) {
1167    if (source.IsRegister()) {
1168      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1169              source.AsRegister<CpuRegister>());
1170    } else if (source.IsFpuRegister()) {
1171      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1172               source.AsFpuRegister<XmmRegister>());
1173    } else if (source.IsConstant()) {
1174      HConstant* constant = source.GetConstant();
1175      int32_t value = GetInt32ValueOf(constant);
1176      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1177    } else {
1178      DCHECK(source.IsStackSlot()) << source;
1179      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1180      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1181    }
1182  } else {
1183    DCHECK(destination.IsDoubleStackSlot());
1184    if (source.IsRegister()) {
1185      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1186              source.AsRegister<CpuRegister>());
1187    } else if (source.IsFpuRegister()) {
1188      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1189               source.AsFpuRegister<XmmRegister>());
1190    } else if (source.IsConstant()) {
1191      HConstant* constant = source.GetConstant();
1192      int64_t value;
1193      if (constant->IsDoubleConstant()) {
1194        value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
1195      } else {
1196        DCHECK(constant->IsLongConstant());
1197        value = constant->AsLongConstant()->GetValue();
1198      }
1199      Store64BitValueToStack(destination, value);
1200    } else {
1201      DCHECK(source.IsDoubleStackSlot());
1202      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1203      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1204    }
1205  }
1206}
1207
1208void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1209  DCHECK(location.IsRegister());
1210  Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1211}
1212
1213void CodeGeneratorX86_64::MoveLocation(
1214    Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1215  Move(dst, src);
1216}
1217
1218void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1219  if (location.IsRegister()) {
1220    locations->AddTemp(location);
1221  } else {
1222    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1223  }
1224}
1225
1226void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1227  DCHECK(!successor->IsExitBlock());
1228
1229  HBasicBlock* block = got->GetBlock();
1230  HInstruction* previous = got->GetPrevious();
1231
1232  HLoopInformation* info = block->GetLoopInformation();
1233  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1234    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1235    return;
1236  }
1237
1238  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1239    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1240  }
1241  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1242    __ jmp(codegen_->GetLabelOf(successor));
1243  }
1244}
1245
1246void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1247  got->SetLocations(nullptr);
1248}
1249
1250void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1251  HandleGoto(got, got->GetSuccessor());
1252}
1253
1254void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1255  try_boundary->SetLocations(nullptr);
1256}
1257
1258void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1259  HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1260  if (!successor->IsExitBlock()) {
1261    HandleGoto(try_boundary, successor);
1262  }
1263}
1264
1265void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1266  exit->SetLocations(nullptr);
1267}
1268
1269void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1270}
1271
1272template<class LabelType>
1273void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1274                                                     LabelType* true_label,
1275                                                     LabelType* false_label) {
1276  if (cond->IsFPConditionTrueIfNaN()) {
1277    __ j(kUnordered, true_label);
1278  } else if (cond->IsFPConditionFalseIfNaN()) {
1279    __ j(kUnordered, false_label);
1280  }
1281  __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1282}
1283
1284void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1285  LocationSummary* locations = condition->GetLocations();
1286
1287  Location left = locations->InAt(0);
1288  Location right = locations->InAt(1);
1289  Primitive::Type type = condition->InputAt(0)->GetType();
1290  switch (type) {
1291    case Primitive::kPrimBoolean:
1292    case Primitive::kPrimByte:
1293    case Primitive::kPrimChar:
1294    case Primitive::kPrimShort:
1295    case Primitive::kPrimInt:
1296    case Primitive::kPrimNot: {
1297      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1298      if (right.IsConstant()) {
1299        int32_t value = CodeGenerator::GetInt32ValueOf(right.GetConstant());
1300        if (value == 0) {
1301          __ testl(left_reg, left_reg);
1302        } else {
1303          __ cmpl(left_reg, Immediate(value));
1304        }
1305      } else if (right.IsStackSlot()) {
1306        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1307      } else {
1308        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1309      }
1310      break;
1311    }
1312    case Primitive::kPrimLong: {
1313      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1314      if (right.IsConstant()) {
1315        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1316        codegen_->Compare64BitValue(left_reg, value);
1317      } else if (right.IsDoubleStackSlot()) {
1318        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1319      } else {
1320        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1321      }
1322      break;
1323    }
1324    case Primitive::kPrimFloat: {
1325      if (right.IsFpuRegister()) {
1326        __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1327      } else if (right.IsConstant()) {
1328        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1329                   codegen_->LiteralFloatAddress(
1330                     right.GetConstant()->AsFloatConstant()->GetValue()));
1331      } else {
1332        DCHECK(right.IsStackSlot());
1333        __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1334                   Address(CpuRegister(RSP), right.GetStackIndex()));
1335      }
1336      break;
1337    }
1338    case Primitive::kPrimDouble: {
1339      if (right.IsFpuRegister()) {
1340        __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1341      } else if (right.IsConstant()) {
1342        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1343                   codegen_->LiteralDoubleAddress(
1344                     right.GetConstant()->AsDoubleConstant()->GetValue()));
1345      } else {
1346        DCHECK(right.IsDoubleStackSlot());
1347        __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1348                   Address(CpuRegister(RSP), right.GetStackIndex()));
1349      }
1350      break;
1351    }
1352    default:
1353      LOG(FATAL) << "Unexpected condition type " << type;
1354  }
1355}
1356
1357template<class LabelType>
1358void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1359                                                                  LabelType* true_target_in,
1360                                                                  LabelType* false_target_in) {
1361  // Generated branching requires both targets to be explicit. If either of the
1362  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1363  LabelType fallthrough_target;
1364  LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1365  LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1366
1367  // Generate the comparison to set the CC.
1368  GenerateCompareTest(condition);
1369
1370  // Now generate the correct jump(s).
1371  Primitive::Type type = condition->InputAt(0)->GetType();
1372  switch (type) {
1373    case Primitive::kPrimLong: {
1374      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1375      break;
1376    }
1377    case Primitive::kPrimFloat: {
1378      GenerateFPJumps(condition, true_target, false_target);
1379      break;
1380    }
1381    case Primitive::kPrimDouble: {
1382      GenerateFPJumps(condition, true_target, false_target);
1383      break;
1384    }
1385    default:
1386      LOG(FATAL) << "Unexpected condition type " << type;
1387  }
1388
1389  if (false_target != &fallthrough_target) {
1390    __ jmp(false_target);
1391  }
1392
1393  if (fallthrough_target.IsLinked()) {
1394    __ Bind(&fallthrough_target);
1395  }
1396}
1397
1398static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1399  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1400  // are set only strictly before `branch`. We can't use the eflags on long
1401  // conditions if they are materialized due to the complex branching.
1402  return cond->IsCondition() &&
1403         cond->GetNext() == branch &&
1404         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1405}
1406
1407template<class LabelType>
1408void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1409                                                           size_t condition_input_index,
1410                                                           LabelType* true_target,
1411                                                           LabelType* false_target) {
1412  HInstruction* cond = instruction->InputAt(condition_input_index);
1413
1414  if (true_target == nullptr && false_target == nullptr) {
1415    // Nothing to do. The code always falls through.
1416    return;
1417  } else if (cond->IsIntConstant()) {
1418    // Constant condition, statically compared against "true" (integer value 1).
1419    if (cond->AsIntConstant()->IsTrue()) {
1420      if (true_target != nullptr) {
1421        __ jmp(true_target);
1422      }
1423    } else {
1424      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1425      if (false_target != nullptr) {
1426        __ jmp(false_target);
1427      }
1428    }
1429    return;
1430  }
1431
1432  // The following code generates these patterns:
1433  //  (1) true_target == nullptr && false_target != nullptr
1434  //        - opposite condition true => branch to false_target
1435  //  (2) true_target != nullptr && false_target == nullptr
1436  //        - condition true => branch to true_target
1437  //  (3) true_target != nullptr && false_target != nullptr
1438  //        - condition true => branch to true_target
1439  //        - branch to false_target
1440  if (IsBooleanValueOrMaterializedCondition(cond)) {
1441    if (AreEflagsSetFrom(cond, instruction)) {
1442      if (true_target == nullptr) {
1443        __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1444      } else {
1445        __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1446      }
1447    } else {
1448      // Materialized condition, compare against 0.
1449      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1450      if (lhs.IsRegister()) {
1451        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1452      } else {
1453        __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1454      }
1455      if (true_target == nullptr) {
1456        __ j(kEqual, false_target);
1457      } else {
1458        __ j(kNotEqual, true_target);
1459      }
1460    }
1461  } else {
1462    // Condition has not been materialized, use its inputs as the
1463    // comparison and its condition as the branch condition.
1464    HCondition* condition = cond->AsCondition();
1465
1466    // If this is a long or FP comparison that has been folded into
1467    // the HCondition, generate the comparison directly.
1468    Primitive::Type type = condition->InputAt(0)->GetType();
1469    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1470      GenerateCompareTestAndBranch(condition, true_target, false_target);
1471      return;
1472    }
1473
1474    Location lhs = condition->GetLocations()->InAt(0);
1475    Location rhs = condition->GetLocations()->InAt(1);
1476    if (rhs.IsRegister()) {
1477      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1478    } else if (rhs.IsConstant()) {
1479      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1480      codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1481    } else {
1482      __ cmpl(lhs.AsRegister<CpuRegister>(),
1483              Address(CpuRegister(RSP), rhs.GetStackIndex()));
1484    }
1485      if (true_target == nullptr) {
1486      __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1487    } else {
1488      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1489    }
1490  }
1491
1492  // If neither branch falls through (case 3), the conditional branch to `true_target`
1493  // was already emitted (case 2) and we need to emit a jump to `false_target`.
1494  if (true_target != nullptr && false_target != nullptr) {
1495    __ jmp(false_target);
1496  }
1497}
1498
1499void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1500  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1501  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1502    locations->SetInAt(0, Location::Any());
1503  }
1504}
1505
1506void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1507  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1508  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1509  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1510      nullptr : codegen_->GetLabelOf(true_successor);
1511  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1512      nullptr : codegen_->GetLabelOf(false_successor);
1513  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1514}
1515
1516void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1517  LocationSummary* locations = new (GetGraph()->GetArena())
1518      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1519  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1520    locations->SetInAt(0, Location::Any());
1521  }
1522}
1523
1524void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1525  SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1526  GenerateTestAndBranch<Label>(deoptimize,
1527                               /* condition_input_index */ 0,
1528                               slow_path->GetEntryLabel(),
1529                               /* false_target */ nullptr);
1530}
1531
1532static bool SelectCanUseCMOV(HSelect* select) {
1533  // There are no conditional move instructions for XMMs.
1534  if (Primitive::IsFloatingPointType(select->GetType())) {
1535    return false;
1536  }
1537
1538  // A FP condition doesn't generate the single CC that we need.
1539  HInstruction* condition = select->GetCondition();
1540  if (condition->IsCondition() &&
1541      Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1542    return false;
1543  }
1544
1545  // We can generate a CMOV for this Select.
1546  return true;
1547}
1548
1549void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1550  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1551  if (Primitive::IsFloatingPointType(select->GetType())) {
1552    locations->SetInAt(0, Location::RequiresFpuRegister());
1553    locations->SetInAt(1, Location::Any());
1554  } else {
1555    locations->SetInAt(0, Location::RequiresRegister());
1556    if (SelectCanUseCMOV(select)) {
1557      if (select->InputAt(1)->IsConstant()) {
1558        locations->SetInAt(1, Location::RequiresRegister());
1559      } else {
1560        locations->SetInAt(1, Location::Any());
1561      }
1562    } else {
1563      locations->SetInAt(1, Location::Any());
1564    }
1565  }
1566  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1567    locations->SetInAt(2, Location::RequiresRegister());
1568  }
1569  locations->SetOut(Location::SameAsFirstInput());
1570}
1571
1572void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1573  LocationSummary* locations = select->GetLocations();
1574  if (SelectCanUseCMOV(select)) {
1575    // If both the condition and the source types are integer, we can generate
1576    // a CMOV to implement Select.
1577    CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1578    Location value_true_loc = locations->InAt(1);
1579    DCHECK(locations->InAt(0).Equals(locations->Out()));
1580
1581    HInstruction* select_condition = select->GetCondition();
1582    Condition cond = kNotEqual;
1583
1584    // Figure out how to test the 'condition'.
1585    if (select_condition->IsCondition()) {
1586      HCondition* condition = select_condition->AsCondition();
1587      if (!condition->IsEmittedAtUseSite()) {
1588        // This was a previously materialized condition.
1589        // Can we use the existing condition code?
1590        if (AreEflagsSetFrom(condition, select)) {
1591          // Materialization was the previous instruction.  Condition codes are right.
1592          cond = X86_64IntegerCondition(condition->GetCondition());
1593        } else {
1594          // No, we have to recreate the condition code.
1595          CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1596          __ testl(cond_reg, cond_reg);
1597        }
1598      } else {
1599        GenerateCompareTest(condition);
1600        cond = X86_64IntegerCondition(condition->GetCondition());
1601      }
1602    } else {
1603      // Must be a boolean condition, which needs to be compared to 0.
1604      CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1605      __ testl(cond_reg, cond_reg);
1606    }
1607
1608    // If the condition is true, overwrite the output, which already contains false.
1609    // Generate the correct sized CMOV.
1610    bool is_64_bit = Primitive::Is64BitType(select->GetType());
1611    if (value_true_loc.IsRegister()) {
1612      __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1613    } else {
1614      __ cmov(cond,
1615              value_false,
1616              Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1617    }
1618  } else {
1619    NearLabel false_target;
1620    GenerateTestAndBranch<NearLabel>(select,
1621                                     /* condition_input_index */ 2,
1622                                     /* true_target */ nullptr,
1623                                     &false_target);
1624    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1625    __ Bind(&false_target);
1626  }
1627}
1628
1629void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1630  new (GetGraph()->GetArena()) LocationSummary(info);
1631}
1632
1633void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1634  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1635}
1636
1637void CodeGeneratorX86_64::GenerateNop() {
1638  __ nop();
1639}
1640
1641void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1642  LocationSummary* locations =
1643      new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1644  // Handle the long/FP comparisons made in instruction simplification.
1645  switch (cond->InputAt(0)->GetType()) {
1646    case Primitive::kPrimLong:
1647      locations->SetInAt(0, Location::RequiresRegister());
1648      locations->SetInAt(1, Location::Any());
1649      break;
1650    case Primitive::kPrimFloat:
1651    case Primitive::kPrimDouble:
1652      locations->SetInAt(0, Location::RequiresFpuRegister());
1653      locations->SetInAt(1, Location::Any());
1654      break;
1655    default:
1656      locations->SetInAt(0, Location::RequiresRegister());
1657      locations->SetInAt(1, Location::Any());
1658      break;
1659  }
1660  if (!cond->IsEmittedAtUseSite()) {
1661    locations->SetOut(Location::RequiresRegister());
1662  }
1663}
1664
1665void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1666  if (cond->IsEmittedAtUseSite()) {
1667    return;
1668  }
1669
1670  LocationSummary* locations = cond->GetLocations();
1671  Location lhs = locations->InAt(0);
1672  Location rhs = locations->InAt(1);
1673  CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1674  NearLabel true_label, false_label;
1675
1676  switch (cond->InputAt(0)->GetType()) {
1677    default:
1678      // Integer case.
1679
1680      // Clear output register: setcc only sets the low byte.
1681      __ xorl(reg, reg);
1682
1683      if (rhs.IsRegister()) {
1684        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1685      } else if (rhs.IsConstant()) {
1686        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
1687        codegen_->Compare32BitValue(lhs.AsRegister<CpuRegister>(), constant);
1688      } else {
1689        __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1690      }
1691      __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1692      return;
1693    case Primitive::kPrimLong:
1694      // Clear output register: setcc only sets the low byte.
1695      __ xorl(reg, reg);
1696
1697      if (rhs.IsRegister()) {
1698        __ cmpq(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
1699      } else if (rhs.IsConstant()) {
1700        int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
1701        codegen_->Compare64BitValue(lhs.AsRegister<CpuRegister>(), value);
1702      } else {
1703        __ cmpq(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
1704      }
1705      __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1706      return;
1707    case Primitive::kPrimFloat: {
1708      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1709      if (rhs.IsConstant()) {
1710        float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1711        __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1712      } else if (rhs.IsStackSlot()) {
1713        __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1714      } else {
1715        __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1716      }
1717      GenerateFPJumps(cond, &true_label, &false_label);
1718      break;
1719    }
1720    case Primitive::kPrimDouble: {
1721      XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1722      if (rhs.IsConstant()) {
1723        double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1724        __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1725      } else if (rhs.IsDoubleStackSlot()) {
1726        __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1727      } else {
1728        __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1729      }
1730      GenerateFPJumps(cond, &true_label, &false_label);
1731      break;
1732    }
1733  }
1734
1735  // Convert the jumps into the result.
1736  NearLabel done_label;
1737
1738  // False case: result = 0.
1739  __ Bind(&false_label);
1740  __ xorl(reg, reg);
1741  __ jmp(&done_label);
1742
1743  // True case: result = 1.
1744  __ Bind(&true_label);
1745  __ movl(reg, Immediate(1));
1746  __ Bind(&done_label);
1747}
1748
1749void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1750  HandleCondition(comp);
1751}
1752
1753void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1754  HandleCondition(comp);
1755}
1756
1757void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1758  HandleCondition(comp);
1759}
1760
1761void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1762  HandleCondition(comp);
1763}
1764
1765void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1766  HandleCondition(comp);
1767}
1768
1769void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1770  HandleCondition(comp);
1771}
1772
1773void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1774  HandleCondition(comp);
1775}
1776
1777void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1778  HandleCondition(comp);
1779}
1780
1781void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1782  HandleCondition(comp);
1783}
1784
1785void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1786  HandleCondition(comp);
1787}
1788
1789void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1790  HandleCondition(comp);
1791}
1792
1793void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1794  HandleCondition(comp);
1795}
1796
1797void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1798  HandleCondition(comp);
1799}
1800
1801void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
1802  HandleCondition(comp);
1803}
1804
1805void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1806  HandleCondition(comp);
1807}
1808
1809void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
1810  HandleCondition(comp);
1811}
1812
1813void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
1814  HandleCondition(comp);
1815}
1816
1817void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
1818  HandleCondition(comp);
1819}
1820
1821void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1822  HandleCondition(comp);
1823}
1824
1825void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
1826  HandleCondition(comp);
1827}
1828
1829void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
1830  LocationSummary* locations =
1831      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
1832  switch (compare->InputAt(0)->GetType()) {
1833    case Primitive::kPrimBoolean:
1834    case Primitive::kPrimByte:
1835    case Primitive::kPrimShort:
1836    case Primitive::kPrimChar:
1837    case Primitive::kPrimInt:
1838    case Primitive::kPrimLong: {
1839      locations->SetInAt(0, Location::RequiresRegister());
1840      locations->SetInAt(1, Location::Any());
1841      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1842      break;
1843    }
1844    case Primitive::kPrimFloat:
1845    case Primitive::kPrimDouble: {
1846      locations->SetInAt(0, Location::RequiresFpuRegister());
1847      locations->SetInAt(1, Location::Any());
1848      locations->SetOut(Location::RequiresRegister());
1849      break;
1850    }
1851    default:
1852      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
1853  }
1854}
1855
1856void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
1857  LocationSummary* locations = compare->GetLocations();
1858  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1859  Location left = locations->InAt(0);
1860  Location right = locations->InAt(1);
1861
1862  NearLabel less, greater, done;
1863  Primitive::Type type = compare->InputAt(0)->GetType();
1864  Condition less_cond = kLess;
1865
1866  switch (type) {
1867    case Primitive::kPrimBoolean:
1868    case Primitive::kPrimByte:
1869    case Primitive::kPrimShort:
1870    case Primitive::kPrimChar:
1871    case Primitive::kPrimInt: {
1872      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1873      if (right.IsConstant()) {
1874        int32_t value = right.GetConstant()->AsIntConstant()->GetValue();
1875        codegen_->Compare32BitValue(left_reg, value);
1876      } else if (right.IsStackSlot()) {
1877        __ cmpl(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1878      } else {
1879        __ cmpl(left_reg, right.AsRegister<CpuRegister>());
1880      }
1881      break;
1882    }
1883    case Primitive::kPrimLong: {
1884      CpuRegister left_reg = left.AsRegister<CpuRegister>();
1885      if (right.IsConstant()) {
1886        int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
1887        codegen_->Compare64BitValue(left_reg, value);
1888      } else if (right.IsDoubleStackSlot()) {
1889        __ cmpq(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1890      } else {
1891        __ cmpq(left_reg, right.AsRegister<CpuRegister>());
1892      }
1893      break;
1894    }
1895    case Primitive::kPrimFloat: {
1896      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1897      if (right.IsConstant()) {
1898        float value = right.GetConstant()->AsFloatConstant()->GetValue();
1899        __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
1900      } else if (right.IsStackSlot()) {
1901        __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1902      } else {
1903        __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
1904      }
1905      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1906      less_cond = kBelow;  //  ucomis{s,d} sets CF
1907      break;
1908    }
1909    case Primitive::kPrimDouble: {
1910      XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
1911      if (right.IsConstant()) {
1912        double value = right.GetConstant()->AsDoubleConstant()->GetValue();
1913        __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
1914      } else if (right.IsDoubleStackSlot()) {
1915        __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
1916      } else {
1917        __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
1918      }
1919      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
1920      less_cond = kBelow;  //  ucomis{s,d} sets CF
1921      break;
1922    }
1923    default:
1924      LOG(FATAL) << "Unexpected compare type " << type;
1925  }
1926
1927  __ movl(out, Immediate(0));
1928  __ j(kEqual, &done);
1929  __ j(less_cond, &less);
1930
1931  __ Bind(&greater);
1932  __ movl(out, Immediate(1));
1933  __ jmp(&done);
1934
1935  __ Bind(&less);
1936  __ movl(out, Immediate(-1));
1937
1938  __ Bind(&done);
1939}
1940
1941void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
1942  LocationSummary* locations =
1943      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1944  locations->SetOut(Location::ConstantLocation(constant));
1945}
1946
1947void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
1948  // Will be generated at use site.
1949}
1950
1951void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
1952  LocationSummary* locations =
1953      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1954  locations->SetOut(Location::ConstantLocation(constant));
1955}
1956
1957void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
1958  // Will be generated at use site.
1959}
1960
1961void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
1962  LocationSummary* locations =
1963      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1964  locations->SetOut(Location::ConstantLocation(constant));
1965}
1966
1967void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
1968  // Will be generated at use site.
1969}
1970
1971void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
1972  LocationSummary* locations =
1973      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1974  locations->SetOut(Location::ConstantLocation(constant));
1975}
1976
1977void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
1978  // Will be generated at use site.
1979}
1980
1981void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
1982  LocationSummary* locations =
1983      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
1984  locations->SetOut(Location::ConstantLocation(constant));
1985}
1986
1987void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
1988    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
1989  // Will be generated at use site.
1990}
1991
1992void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1993  memory_barrier->SetLocations(nullptr);
1994}
1995
1996void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
1997  codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
1998}
1999
2000void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2001  ret->SetLocations(nullptr);
2002}
2003
2004void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2005  codegen_->GenerateFrameExit();
2006}
2007
2008void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2009  LocationSummary* locations =
2010      new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2011  switch (ret->InputAt(0)->GetType()) {
2012    case Primitive::kPrimBoolean:
2013    case Primitive::kPrimByte:
2014    case Primitive::kPrimChar:
2015    case Primitive::kPrimShort:
2016    case Primitive::kPrimInt:
2017    case Primitive::kPrimNot:
2018    case Primitive::kPrimLong:
2019      locations->SetInAt(0, Location::RegisterLocation(RAX));
2020      break;
2021
2022    case Primitive::kPrimFloat:
2023    case Primitive::kPrimDouble:
2024      locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2025      break;
2026
2027    default:
2028      LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2029  }
2030}
2031
2032void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2033  if (kIsDebugBuild) {
2034    switch (ret->InputAt(0)->GetType()) {
2035      case Primitive::kPrimBoolean:
2036      case Primitive::kPrimByte:
2037      case Primitive::kPrimChar:
2038      case Primitive::kPrimShort:
2039      case Primitive::kPrimInt:
2040      case Primitive::kPrimNot:
2041      case Primitive::kPrimLong:
2042        DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2043        break;
2044
2045      case Primitive::kPrimFloat:
2046      case Primitive::kPrimDouble:
2047        DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2048                  XMM0);
2049        break;
2050
2051      default:
2052        LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2053    }
2054  }
2055  codegen_->GenerateFrameExit();
2056}
2057
2058Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2059  switch (type) {
2060    case Primitive::kPrimBoolean:
2061    case Primitive::kPrimByte:
2062    case Primitive::kPrimChar:
2063    case Primitive::kPrimShort:
2064    case Primitive::kPrimInt:
2065    case Primitive::kPrimNot:
2066    case Primitive::kPrimLong:
2067      return Location::RegisterLocation(RAX);
2068
2069    case Primitive::kPrimVoid:
2070      return Location::NoLocation();
2071
2072    case Primitive::kPrimDouble:
2073    case Primitive::kPrimFloat:
2074      return Location::FpuRegisterLocation(XMM0);
2075  }
2076
2077  UNREACHABLE();
2078}
2079
2080Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2081  return Location::RegisterLocation(kMethodRegisterArgument);
2082}
2083
2084Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2085  switch (type) {
2086    case Primitive::kPrimBoolean:
2087    case Primitive::kPrimByte:
2088    case Primitive::kPrimChar:
2089    case Primitive::kPrimShort:
2090    case Primitive::kPrimInt:
2091    case Primitive::kPrimNot: {
2092      uint32_t index = gp_index_++;
2093      stack_index_++;
2094      if (index < calling_convention.GetNumberOfRegisters()) {
2095        return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2096      } else {
2097        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2098      }
2099    }
2100
2101    case Primitive::kPrimLong: {
2102      uint32_t index = gp_index_;
2103      stack_index_ += 2;
2104      if (index < calling_convention.GetNumberOfRegisters()) {
2105        gp_index_ += 1;
2106        return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2107      } else {
2108        gp_index_ += 2;
2109        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2110      }
2111    }
2112
2113    case Primitive::kPrimFloat: {
2114      uint32_t index = float_index_++;
2115      stack_index_++;
2116      if (index < calling_convention.GetNumberOfFpuRegisters()) {
2117        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2118      } else {
2119        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2120      }
2121    }
2122
2123    case Primitive::kPrimDouble: {
2124      uint32_t index = float_index_++;
2125      stack_index_ += 2;
2126      if (index < calling_convention.GetNumberOfFpuRegisters()) {
2127        return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2128      } else {
2129        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2130      }
2131    }
2132
2133    case Primitive::kPrimVoid:
2134      LOG(FATAL) << "Unexpected parameter type " << type;
2135      break;
2136  }
2137  return Location::NoLocation();
2138}
2139
2140void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2141  // The trampoline uses the same calling convention as dex calling conventions,
2142  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2143  // the method_idx.
2144  HandleInvoke(invoke);
2145}
2146
2147void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2148  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2149}
2150
2151void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2152  // Explicit clinit checks triggered by static invokes must have been pruned by
2153  // art::PrepareForRegisterAllocation.
2154  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2155
2156  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2157  if (intrinsic.TryDispatch(invoke)) {
2158    return;
2159  }
2160
2161  HandleInvoke(invoke);
2162}
2163
2164static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2165  if (invoke->GetLocations()->Intrinsified()) {
2166    IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2167    intrinsic.Dispatch(invoke);
2168    return true;
2169  }
2170  return false;
2171}
2172
2173void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2174  // Explicit clinit checks triggered by static invokes must have been pruned by
2175  // art::PrepareForRegisterAllocation.
2176  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2177
2178  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2179    return;
2180  }
2181
2182  LocationSummary* locations = invoke->GetLocations();
2183  codegen_->GenerateStaticOrDirectCall(
2184      invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2185  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2186}
2187
2188void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2189  InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2190  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2191}
2192
2193void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2194  IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2195  if (intrinsic.TryDispatch(invoke)) {
2196    return;
2197  }
2198
2199  HandleInvoke(invoke);
2200}
2201
2202void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2203  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2204    return;
2205  }
2206
2207  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2208  DCHECK(!codegen_->IsLeafMethod());
2209  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2210}
2211
2212void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2213  HandleInvoke(invoke);
2214  // Add the hidden argument.
2215  invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2216}
2217
2218void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2219  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2220  LocationSummary* locations = invoke->GetLocations();
2221  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2222  CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2223  Location receiver = locations->InAt(0);
2224  size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2225
2226  // Set the hidden argument. This is safe to do this here, as RAX
2227  // won't be modified thereafter, before the `call` instruction.
2228  DCHECK_EQ(RAX, hidden_reg.AsRegister());
2229  codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2230
2231  if (receiver.IsStackSlot()) {
2232    __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2233    // /* HeapReference<Class> */ temp = temp->klass_
2234    __ movl(temp, Address(temp, class_offset));
2235  } else {
2236    // /* HeapReference<Class> */ temp = receiver->klass_
2237    __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2238  }
2239  codegen_->MaybeRecordImplicitNullCheck(invoke);
2240  // Instead of simply (possibly) unpoisoning `temp` here, we should
2241  // emit a read barrier for the previous class reference load.
2242  // However this is not required in practice, as this is an
2243  // intermediate/temporary reference and because the current
2244  // concurrent copying collector keeps the from-space memory
2245  // intact/accessible until the end of the marking phase (the
2246  // concurrent copying collector may not in the future).
2247  __ MaybeUnpoisonHeapReference(temp);
2248  // temp = temp->GetAddressOfIMT()
2249  __ movq(temp,
2250      Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2251  // temp = temp->GetImtEntryAt(method_offset);
2252  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2253      invoke->GetImtIndex() % ImTable::kSize, kX86_64PointerSize));
2254  // temp = temp->GetImtEntryAt(method_offset);
2255  __ movq(temp, Address(temp, method_offset));
2256  // call temp->GetEntryPoint();
2257  __ call(Address(temp,
2258                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
2259
2260  DCHECK(!codegen_->IsLeafMethod());
2261  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2262}
2263
2264void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2265  LocationSummary* locations =
2266      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2267  switch (neg->GetResultType()) {
2268    case Primitive::kPrimInt:
2269    case Primitive::kPrimLong:
2270      locations->SetInAt(0, Location::RequiresRegister());
2271      locations->SetOut(Location::SameAsFirstInput());
2272      break;
2273
2274    case Primitive::kPrimFloat:
2275    case Primitive::kPrimDouble:
2276      locations->SetInAt(0, Location::RequiresFpuRegister());
2277      locations->SetOut(Location::SameAsFirstInput());
2278      locations->AddTemp(Location::RequiresFpuRegister());
2279      break;
2280
2281    default:
2282      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2283  }
2284}
2285
2286void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2287  LocationSummary* locations = neg->GetLocations();
2288  Location out = locations->Out();
2289  Location in = locations->InAt(0);
2290  switch (neg->GetResultType()) {
2291    case Primitive::kPrimInt:
2292      DCHECK(in.IsRegister());
2293      DCHECK(in.Equals(out));
2294      __ negl(out.AsRegister<CpuRegister>());
2295      break;
2296
2297    case Primitive::kPrimLong:
2298      DCHECK(in.IsRegister());
2299      DCHECK(in.Equals(out));
2300      __ negq(out.AsRegister<CpuRegister>());
2301      break;
2302
2303    case Primitive::kPrimFloat: {
2304      DCHECK(in.Equals(out));
2305      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2306      // Implement float negation with an exclusive or with value
2307      // 0x80000000 (mask for bit 31, representing the sign of a
2308      // single-precision floating-point number).
2309      __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2310      __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2311      break;
2312    }
2313
2314    case Primitive::kPrimDouble: {
2315      DCHECK(in.Equals(out));
2316      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2317      // Implement double negation with an exclusive or with value
2318      // 0x8000000000000000 (mask for bit 63, representing the sign of
2319      // a double-precision floating-point number).
2320      __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2321      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2322      break;
2323    }
2324
2325    default:
2326      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2327  }
2328}
2329
2330void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2331  LocationSummary* locations =
2332      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2333  Primitive::Type result_type = conversion->GetResultType();
2334  Primitive::Type input_type = conversion->GetInputType();
2335  DCHECK_NE(result_type, input_type);
2336
2337  // The Java language does not allow treating boolean as an integral type but
2338  // our bit representation makes it safe.
2339
2340  switch (result_type) {
2341    case Primitive::kPrimByte:
2342      switch (input_type) {
2343        case Primitive::kPrimLong:
2344          // Type conversion from long to byte is a result of code transformations.
2345        case Primitive::kPrimBoolean:
2346          // Boolean input is a result of code transformations.
2347        case Primitive::kPrimShort:
2348        case Primitive::kPrimInt:
2349        case Primitive::kPrimChar:
2350          // Processing a Dex `int-to-byte' instruction.
2351          locations->SetInAt(0, Location::Any());
2352          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2353          break;
2354
2355        default:
2356          LOG(FATAL) << "Unexpected type conversion from " << input_type
2357                     << " to " << result_type;
2358      }
2359      break;
2360
2361    case Primitive::kPrimShort:
2362      switch (input_type) {
2363        case Primitive::kPrimLong:
2364          // Type conversion from long to short is a result of code transformations.
2365        case Primitive::kPrimBoolean:
2366          // Boolean input is a result of code transformations.
2367        case Primitive::kPrimByte:
2368        case Primitive::kPrimInt:
2369        case Primitive::kPrimChar:
2370          // Processing a Dex `int-to-short' instruction.
2371          locations->SetInAt(0, Location::Any());
2372          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2373          break;
2374
2375        default:
2376          LOG(FATAL) << "Unexpected type conversion from " << input_type
2377                     << " to " << result_type;
2378      }
2379      break;
2380
2381    case Primitive::kPrimInt:
2382      switch (input_type) {
2383        case Primitive::kPrimLong:
2384          // Processing a Dex `long-to-int' instruction.
2385          locations->SetInAt(0, Location::Any());
2386          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2387          break;
2388
2389        case Primitive::kPrimFloat:
2390          // Processing a Dex `float-to-int' instruction.
2391          locations->SetInAt(0, Location::RequiresFpuRegister());
2392          locations->SetOut(Location::RequiresRegister());
2393          break;
2394
2395        case Primitive::kPrimDouble:
2396          // Processing a Dex `double-to-int' instruction.
2397          locations->SetInAt(0, Location::RequiresFpuRegister());
2398          locations->SetOut(Location::RequiresRegister());
2399          break;
2400
2401        default:
2402          LOG(FATAL) << "Unexpected type conversion from " << input_type
2403                     << " to " << result_type;
2404      }
2405      break;
2406
2407    case Primitive::kPrimLong:
2408      switch (input_type) {
2409        case Primitive::kPrimBoolean:
2410          // Boolean input is a result of code transformations.
2411        case Primitive::kPrimByte:
2412        case Primitive::kPrimShort:
2413        case Primitive::kPrimInt:
2414        case Primitive::kPrimChar:
2415          // Processing a Dex `int-to-long' instruction.
2416          // TODO: We would benefit from a (to-be-implemented)
2417          // Location::RegisterOrStackSlot requirement for this input.
2418          locations->SetInAt(0, Location::RequiresRegister());
2419          locations->SetOut(Location::RequiresRegister());
2420          break;
2421
2422        case Primitive::kPrimFloat:
2423          // Processing a Dex `float-to-long' instruction.
2424          locations->SetInAt(0, Location::RequiresFpuRegister());
2425          locations->SetOut(Location::RequiresRegister());
2426          break;
2427
2428        case Primitive::kPrimDouble:
2429          // Processing a Dex `double-to-long' instruction.
2430          locations->SetInAt(0, Location::RequiresFpuRegister());
2431          locations->SetOut(Location::RequiresRegister());
2432          break;
2433
2434        default:
2435          LOG(FATAL) << "Unexpected type conversion from " << input_type
2436                     << " to " << result_type;
2437      }
2438      break;
2439
2440    case Primitive::kPrimChar:
2441      switch (input_type) {
2442        case Primitive::kPrimLong:
2443          // Type conversion from long to char is a result of code transformations.
2444        case Primitive::kPrimBoolean:
2445          // Boolean input is a result of code transformations.
2446        case Primitive::kPrimByte:
2447        case Primitive::kPrimShort:
2448        case Primitive::kPrimInt:
2449          // Processing a Dex `int-to-char' instruction.
2450          locations->SetInAt(0, Location::Any());
2451          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2452          break;
2453
2454        default:
2455          LOG(FATAL) << "Unexpected type conversion from " << input_type
2456                     << " to " << result_type;
2457      }
2458      break;
2459
2460    case Primitive::kPrimFloat:
2461      switch (input_type) {
2462        case Primitive::kPrimBoolean:
2463          // Boolean input is a result of code transformations.
2464        case Primitive::kPrimByte:
2465        case Primitive::kPrimShort:
2466        case Primitive::kPrimInt:
2467        case Primitive::kPrimChar:
2468          // Processing a Dex `int-to-float' instruction.
2469          locations->SetInAt(0, Location::Any());
2470          locations->SetOut(Location::RequiresFpuRegister());
2471          break;
2472
2473        case Primitive::kPrimLong:
2474          // Processing a Dex `long-to-float' instruction.
2475          locations->SetInAt(0, Location::Any());
2476          locations->SetOut(Location::RequiresFpuRegister());
2477          break;
2478
2479        case Primitive::kPrimDouble:
2480          // Processing a Dex `double-to-float' instruction.
2481          locations->SetInAt(0, Location::Any());
2482          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2483          break;
2484
2485        default:
2486          LOG(FATAL) << "Unexpected type conversion from " << input_type
2487                     << " to " << result_type;
2488      };
2489      break;
2490
2491    case Primitive::kPrimDouble:
2492      switch (input_type) {
2493        case Primitive::kPrimBoolean:
2494          // Boolean input is a result of code transformations.
2495        case Primitive::kPrimByte:
2496        case Primitive::kPrimShort:
2497        case Primitive::kPrimInt:
2498        case Primitive::kPrimChar:
2499          // Processing a Dex `int-to-double' instruction.
2500          locations->SetInAt(0, Location::Any());
2501          locations->SetOut(Location::RequiresFpuRegister());
2502          break;
2503
2504        case Primitive::kPrimLong:
2505          // Processing a Dex `long-to-double' instruction.
2506          locations->SetInAt(0, Location::Any());
2507          locations->SetOut(Location::RequiresFpuRegister());
2508          break;
2509
2510        case Primitive::kPrimFloat:
2511          // Processing a Dex `float-to-double' instruction.
2512          locations->SetInAt(0, Location::Any());
2513          locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2514          break;
2515
2516        default:
2517          LOG(FATAL) << "Unexpected type conversion from " << input_type
2518                     << " to " << result_type;
2519      }
2520      break;
2521
2522    default:
2523      LOG(FATAL) << "Unexpected type conversion from " << input_type
2524                 << " to " << result_type;
2525  }
2526}
2527
2528void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2529  LocationSummary* locations = conversion->GetLocations();
2530  Location out = locations->Out();
2531  Location in = locations->InAt(0);
2532  Primitive::Type result_type = conversion->GetResultType();
2533  Primitive::Type input_type = conversion->GetInputType();
2534  DCHECK_NE(result_type, input_type);
2535  switch (result_type) {
2536    case Primitive::kPrimByte:
2537      switch (input_type) {
2538        case Primitive::kPrimLong:
2539          // Type conversion from long to byte is a result of code transformations.
2540        case Primitive::kPrimBoolean:
2541          // Boolean input is a result of code transformations.
2542        case Primitive::kPrimShort:
2543        case Primitive::kPrimInt:
2544        case Primitive::kPrimChar:
2545          // Processing a Dex `int-to-byte' instruction.
2546          if (in.IsRegister()) {
2547            __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2548          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2549            __ movsxb(out.AsRegister<CpuRegister>(),
2550                      Address(CpuRegister(RSP), in.GetStackIndex()));
2551          } else {
2552            __ movl(out.AsRegister<CpuRegister>(),
2553                    Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2554          }
2555          break;
2556
2557        default:
2558          LOG(FATAL) << "Unexpected type conversion from " << input_type
2559                     << " to " << result_type;
2560      }
2561      break;
2562
2563    case Primitive::kPrimShort:
2564      switch (input_type) {
2565        case Primitive::kPrimLong:
2566          // Type conversion from long to short is a result of code transformations.
2567        case Primitive::kPrimBoolean:
2568          // Boolean input is a result of code transformations.
2569        case Primitive::kPrimByte:
2570        case Primitive::kPrimInt:
2571        case Primitive::kPrimChar:
2572          // Processing a Dex `int-to-short' instruction.
2573          if (in.IsRegister()) {
2574            __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2575          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2576            __ movsxw(out.AsRegister<CpuRegister>(),
2577                      Address(CpuRegister(RSP), in.GetStackIndex()));
2578          } else {
2579            __ movl(out.AsRegister<CpuRegister>(),
2580                    Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2581          }
2582          break;
2583
2584        default:
2585          LOG(FATAL) << "Unexpected type conversion from " << input_type
2586                     << " to " << result_type;
2587      }
2588      break;
2589
2590    case Primitive::kPrimInt:
2591      switch (input_type) {
2592        case Primitive::kPrimLong:
2593          // Processing a Dex `long-to-int' instruction.
2594          if (in.IsRegister()) {
2595            __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2596          } else if (in.IsDoubleStackSlot()) {
2597            __ movl(out.AsRegister<CpuRegister>(),
2598                    Address(CpuRegister(RSP), in.GetStackIndex()));
2599          } else {
2600            DCHECK(in.IsConstant());
2601            DCHECK(in.GetConstant()->IsLongConstant());
2602            int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2603            __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2604          }
2605          break;
2606
2607        case Primitive::kPrimFloat: {
2608          // Processing a Dex `float-to-int' instruction.
2609          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2610          CpuRegister output = out.AsRegister<CpuRegister>();
2611          NearLabel done, nan;
2612
2613          __ movl(output, Immediate(kPrimIntMax));
2614          // if input >= (float)INT_MAX goto done
2615          __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2616          __ j(kAboveEqual, &done);
2617          // if input == NaN goto nan
2618          __ j(kUnordered, &nan);
2619          // output = float-to-int-truncate(input)
2620          __ cvttss2si(output, input, false);
2621          __ jmp(&done);
2622          __ Bind(&nan);
2623          //  output = 0
2624          __ xorl(output, output);
2625          __ Bind(&done);
2626          break;
2627        }
2628
2629        case Primitive::kPrimDouble: {
2630          // Processing a Dex `double-to-int' instruction.
2631          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2632          CpuRegister output = out.AsRegister<CpuRegister>();
2633          NearLabel done, nan;
2634
2635          __ movl(output, Immediate(kPrimIntMax));
2636          // if input >= (double)INT_MAX goto done
2637          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2638          __ j(kAboveEqual, &done);
2639          // if input == NaN goto nan
2640          __ j(kUnordered, &nan);
2641          // output = double-to-int-truncate(input)
2642          __ cvttsd2si(output, input);
2643          __ jmp(&done);
2644          __ Bind(&nan);
2645          //  output = 0
2646          __ xorl(output, output);
2647          __ Bind(&done);
2648          break;
2649        }
2650
2651        default:
2652          LOG(FATAL) << "Unexpected type conversion from " << input_type
2653                     << " to " << result_type;
2654      }
2655      break;
2656
2657    case Primitive::kPrimLong:
2658      switch (input_type) {
2659        DCHECK(out.IsRegister());
2660        case Primitive::kPrimBoolean:
2661          // Boolean input is a result of code transformations.
2662        case Primitive::kPrimByte:
2663        case Primitive::kPrimShort:
2664        case Primitive::kPrimInt:
2665        case Primitive::kPrimChar:
2666          // Processing a Dex `int-to-long' instruction.
2667          DCHECK(in.IsRegister());
2668          __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2669          break;
2670
2671        case Primitive::kPrimFloat: {
2672          // Processing a Dex `float-to-long' instruction.
2673          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2674          CpuRegister output = out.AsRegister<CpuRegister>();
2675          NearLabel done, nan;
2676
2677          codegen_->Load64BitValue(output, kPrimLongMax);
2678          // if input >= (float)LONG_MAX goto done
2679          __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2680          __ j(kAboveEqual, &done);
2681          // if input == NaN goto nan
2682          __ j(kUnordered, &nan);
2683          // output = float-to-long-truncate(input)
2684          __ cvttss2si(output, input, true);
2685          __ jmp(&done);
2686          __ Bind(&nan);
2687          //  output = 0
2688          __ xorl(output, output);
2689          __ Bind(&done);
2690          break;
2691        }
2692
2693        case Primitive::kPrimDouble: {
2694          // Processing a Dex `double-to-long' instruction.
2695          XmmRegister input = in.AsFpuRegister<XmmRegister>();
2696          CpuRegister output = out.AsRegister<CpuRegister>();
2697          NearLabel done, nan;
2698
2699          codegen_->Load64BitValue(output, kPrimLongMax);
2700          // if input >= (double)LONG_MAX goto done
2701          __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2702          __ j(kAboveEqual, &done);
2703          // if input == NaN goto nan
2704          __ j(kUnordered, &nan);
2705          // output = double-to-long-truncate(input)
2706          __ cvttsd2si(output, input, true);
2707          __ jmp(&done);
2708          __ Bind(&nan);
2709          //  output = 0
2710          __ xorl(output, output);
2711          __ Bind(&done);
2712          break;
2713        }
2714
2715        default:
2716          LOG(FATAL) << "Unexpected type conversion from " << input_type
2717                     << " to " << result_type;
2718      }
2719      break;
2720
2721    case Primitive::kPrimChar:
2722      switch (input_type) {
2723        case Primitive::kPrimLong:
2724          // Type conversion from long to char is a result of code transformations.
2725        case Primitive::kPrimBoolean:
2726          // Boolean input is a result of code transformations.
2727        case Primitive::kPrimByte:
2728        case Primitive::kPrimShort:
2729        case Primitive::kPrimInt:
2730          // Processing a Dex `int-to-char' instruction.
2731          if (in.IsRegister()) {
2732            __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2733          } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2734            __ movzxw(out.AsRegister<CpuRegister>(),
2735                      Address(CpuRegister(RSP), in.GetStackIndex()));
2736          } else {
2737            __ movl(out.AsRegister<CpuRegister>(),
2738                    Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2739          }
2740          break;
2741
2742        default:
2743          LOG(FATAL) << "Unexpected type conversion from " << input_type
2744                     << " to " << result_type;
2745      }
2746      break;
2747
2748    case Primitive::kPrimFloat:
2749      switch (input_type) {
2750        case Primitive::kPrimBoolean:
2751          // Boolean input is a result of code transformations.
2752        case Primitive::kPrimByte:
2753        case Primitive::kPrimShort:
2754        case Primitive::kPrimInt:
2755        case Primitive::kPrimChar:
2756          // Processing a Dex `int-to-float' instruction.
2757          if (in.IsRegister()) {
2758            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2759          } else if (in.IsConstant()) {
2760            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2761            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2762            codegen_->Load32BitValue(dest, static_cast<float>(v));
2763          } else {
2764            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2765                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
2766          }
2767          break;
2768
2769        case Primitive::kPrimLong:
2770          // Processing a Dex `long-to-float' instruction.
2771          if (in.IsRegister()) {
2772            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2773          } else if (in.IsConstant()) {
2774            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2775            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2776            codegen_->Load32BitValue(dest, static_cast<float>(v));
2777          } else {
2778            __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2779                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
2780          }
2781          break;
2782
2783        case Primitive::kPrimDouble:
2784          // Processing a Dex `double-to-float' instruction.
2785          if (in.IsFpuRegister()) {
2786            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2787          } else if (in.IsConstant()) {
2788            double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2789            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2790            codegen_->Load32BitValue(dest, static_cast<float>(v));
2791          } else {
2792            __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2793                        Address(CpuRegister(RSP), in.GetStackIndex()));
2794          }
2795          break;
2796
2797        default:
2798          LOG(FATAL) << "Unexpected type conversion from " << input_type
2799                     << " to " << result_type;
2800      };
2801      break;
2802
2803    case Primitive::kPrimDouble:
2804      switch (input_type) {
2805        case Primitive::kPrimBoolean:
2806          // Boolean input is a result of code transformations.
2807        case Primitive::kPrimByte:
2808        case Primitive::kPrimShort:
2809        case Primitive::kPrimInt:
2810        case Primitive::kPrimChar:
2811          // Processing a Dex `int-to-double' instruction.
2812          if (in.IsRegister()) {
2813            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2814          } else if (in.IsConstant()) {
2815            int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2816            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2817            codegen_->Load64BitValue(dest, static_cast<double>(v));
2818          } else {
2819            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2820                        Address(CpuRegister(RSP), in.GetStackIndex()), false);
2821          }
2822          break;
2823
2824        case Primitive::kPrimLong:
2825          // Processing a Dex `long-to-double' instruction.
2826          if (in.IsRegister()) {
2827            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2828          } else if (in.IsConstant()) {
2829            int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2830            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2831            codegen_->Load64BitValue(dest, static_cast<double>(v));
2832          } else {
2833            __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
2834                        Address(CpuRegister(RSP), in.GetStackIndex()), true);
2835          }
2836          break;
2837
2838        case Primitive::kPrimFloat:
2839          // Processing a Dex `float-to-double' instruction.
2840          if (in.IsFpuRegister()) {
2841            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2842          } else if (in.IsConstant()) {
2843            float v = in.GetConstant()->AsFloatConstant()->GetValue();
2844            XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2845            codegen_->Load64BitValue(dest, static_cast<double>(v));
2846          } else {
2847            __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
2848                        Address(CpuRegister(RSP), in.GetStackIndex()));
2849          }
2850          break;
2851
2852        default:
2853          LOG(FATAL) << "Unexpected type conversion from " << input_type
2854                     << " to " << result_type;
2855      };
2856      break;
2857
2858    default:
2859      LOG(FATAL) << "Unexpected type conversion from " << input_type
2860                 << " to " << result_type;
2861  }
2862}
2863
2864void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
2865  LocationSummary* locations =
2866      new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
2867  switch (add->GetResultType()) {
2868    case Primitive::kPrimInt: {
2869      locations->SetInAt(0, Location::RequiresRegister());
2870      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
2871      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2872      break;
2873    }
2874
2875    case Primitive::kPrimLong: {
2876      locations->SetInAt(0, Location::RequiresRegister());
2877      // We can use a leaq or addq if the constant can fit in an immediate.
2878      locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
2879      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2880      break;
2881    }
2882
2883    case Primitive::kPrimDouble:
2884    case Primitive::kPrimFloat: {
2885      locations->SetInAt(0, Location::RequiresFpuRegister());
2886      locations->SetInAt(1, Location::Any());
2887      locations->SetOut(Location::SameAsFirstInput());
2888      break;
2889    }
2890
2891    default:
2892      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2893  }
2894}
2895
2896void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
2897  LocationSummary* locations = add->GetLocations();
2898  Location first = locations->InAt(0);
2899  Location second = locations->InAt(1);
2900  Location out = locations->Out();
2901
2902  switch (add->GetResultType()) {
2903    case Primitive::kPrimInt: {
2904      if (second.IsRegister()) {
2905        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2906          __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2907        } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2908          __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2909        } else {
2910          __ leal(out.AsRegister<CpuRegister>(), Address(
2911              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2912        }
2913      } else if (second.IsConstant()) {
2914        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2915          __ addl(out.AsRegister<CpuRegister>(),
2916                  Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
2917        } else {
2918          __ leal(out.AsRegister<CpuRegister>(), Address(
2919              first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
2920        }
2921      } else {
2922        DCHECK(first.Equals(locations->Out()));
2923        __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
2924      }
2925      break;
2926    }
2927
2928    case Primitive::kPrimLong: {
2929      if (second.IsRegister()) {
2930        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2931          __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
2932        } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
2933          __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
2934        } else {
2935          __ leaq(out.AsRegister<CpuRegister>(), Address(
2936              first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
2937        }
2938      } else {
2939        DCHECK(second.IsConstant());
2940        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
2941        int32_t int32_value = Low32Bits(value);
2942        DCHECK_EQ(int32_value, value);
2943        if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
2944          __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
2945        } else {
2946          __ leaq(out.AsRegister<CpuRegister>(), Address(
2947              first.AsRegister<CpuRegister>(), int32_value));
2948        }
2949      }
2950      break;
2951    }
2952
2953    case Primitive::kPrimFloat: {
2954      if (second.IsFpuRegister()) {
2955        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2956      } else if (second.IsConstant()) {
2957        __ addss(first.AsFpuRegister<XmmRegister>(),
2958                 codegen_->LiteralFloatAddress(
2959                     second.GetConstant()->AsFloatConstant()->GetValue()));
2960      } else {
2961        DCHECK(second.IsStackSlot());
2962        __ addss(first.AsFpuRegister<XmmRegister>(),
2963                 Address(CpuRegister(RSP), second.GetStackIndex()));
2964      }
2965      break;
2966    }
2967
2968    case Primitive::kPrimDouble: {
2969      if (second.IsFpuRegister()) {
2970        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
2971      } else if (second.IsConstant()) {
2972        __ addsd(first.AsFpuRegister<XmmRegister>(),
2973                 codegen_->LiteralDoubleAddress(
2974                     second.GetConstant()->AsDoubleConstant()->GetValue()));
2975      } else {
2976        DCHECK(second.IsDoubleStackSlot());
2977        __ addsd(first.AsFpuRegister<XmmRegister>(),
2978                 Address(CpuRegister(RSP), second.GetStackIndex()));
2979      }
2980      break;
2981    }
2982
2983    default:
2984      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
2985  }
2986}
2987
2988void LocationsBuilderX86_64::VisitSub(HSub* sub) {
2989  LocationSummary* locations =
2990      new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
2991  switch (sub->GetResultType()) {
2992    case Primitive::kPrimInt: {
2993      locations->SetInAt(0, Location::RequiresRegister());
2994      locations->SetInAt(1, Location::Any());
2995      locations->SetOut(Location::SameAsFirstInput());
2996      break;
2997    }
2998    case Primitive::kPrimLong: {
2999      locations->SetInAt(0, Location::RequiresRegister());
3000      locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3001      locations->SetOut(Location::SameAsFirstInput());
3002      break;
3003    }
3004    case Primitive::kPrimFloat:
3005    case Primitive::kPrimDouble: {
3006      locations->SetInAt(0, Location::RequiresFpuRegister());
3007      locations->SetInAt(1, Location::Any());
3008      locations->SetOut(Location::SameAsFirstInput());
3009      break;
3010    }
3011    default:
3012      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3013  }
3014}
3015
3016void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3017  LocationSummary* locations = sub->GetLocations();
3018  Location first = locations->InAt(0);
3019  Location second = locations->InAt(1);
3020  DCHECK(first.Equals(locations->Out()));
3021  switch (sub->GetResultType()) {
3022    case Primitive::kPrimInt: {
3023      if (second.IsRegister()) {
3024        __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3025      } else if (second.IsConstant()) {
3026        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3027        __ subl(first.AsRegister<CpuRegister>(), imm);
3028      } else {
3029        __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3030      }
3031      break;
3032    }
3033    case Primitive::kPrimLong: {
3034      if (second.IsConstant()) {
3035        int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3036        DCHECK(IsInt<32>(value));
3037        __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3038      } else {
3039        __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3040      }
3041      break;
3042    }
3043
3044    case Primitive::kPrimFloat: {
3045      if (second.IsFpuRegister()) {
3046        __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3047      } else if (second.IsConstant()) {
3048        __ subss(first.AsFpuRegister<XmmRegister>(),
3049                 codegen_->LiteralFloatAddress(
3050                     second.GetConstant()->AsFloatConstant()->GetValue()));
3051      } else {
3052        DCHECK(second.IsStackSlot());
3053        __ subss(first.AsFpuRegister<XmmRegister>(),
3054                 Address(CpuRegister(RSP), second.GetStackIndex()));
3055      }
3056      break;
3057    }
3058
3059    case Primitive::kPrimDouble: {
3060      if (second.IsFpuRegister()) {
3061        __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3062      } else if (second.IsConstant()) {
3063        __ subsd(first.AsFpuRegister<XmmRegister>(),
3064                 codegen_->LiteralDoubleAddress(
3065                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3066      } else {
3067        DCHECK(second.IsDoubleStackSlot());
3068        __ subsd(first.AsFpuRegister<XmmRegister>(),
3069                 Address(CpuRegister(RSP), second.GetStackIndex()));
3070      }
3071      break;
3072    }
3073
3074    default:
3075      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3076  }
3077}
3078
3079void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3080  LocationSummary* locations =
3081      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3082  switch (mul->GetResultType()) {
3083    case Primitive::kPrimInt: {
3084      locations->SetInAt(0, Location::RequiresRegister());
3085      locations->SetInAt(1, Location::Any());
3086      if (mul->InputAt(1)->IsIntConstant()) {
3087        // Can use 3 operand multiply.
3088        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3089      } else {
3090        locations->SetOut(Location::SameAsFirstInput());
3091      }
3092      break;
3093    }
3094    case Primitive::kPrimLong: {
3095      locations->SetInAt(0, Location::RequiresRegister());
3096      locations->SetInAt(1, Location::Any());
3097      if (mul->InputAt(1)->IsLongConstant() &&
3098          IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3099        // Can use 3 operand multiply.
3100        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3101      } else {
3102        locations->SetOut(Location::SameAsFirstInput());
3103      }
3104      break;
3105    }
3106    case Primitive::kPrimFloat:
3107    case Primitive::kPrimDouble: {
3108      locations->SetInAt(0, Location::RequiresFpuRegister());
3109      locations->SetInAt(1, Location::Any());
3110      locations->SetOut(Location::SameAsFirstInput());
3111      break;
3112    }
3113
3114    default:
3115      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3116  }
3117}
3118
3119void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3120  LocationSummary* locations = mul->GetLocations();
3121  Location first = locations->InAt(0);
3122  Location second = locations->InAt(1);
3123  Location out = locations->Out();
3124  switch (mul->GetResultType()) {
3125    case Primitive::kPrimInt:
3126      // The constant may have ended up in a register, so test explicitly to avoid
3127      // problems where the output may not be the same as the first operand.
3128      if (mul->InputAt(1)->IsIntConstant()) {
3129        Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3130        __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3131      } else if (second.IsRegister()) {
3132        DCHECK(first.Equals(out));
3133        __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3134      } else {
3135        DCHECK(first.Equals(out));
3136        DCHECK(second.IsStackSlot());
3137        __ imull(first.AsRegister<CpuRegister>(),
3138                 Address(CpuRegister(RSP), second.GetStackIndex()));
3139      }
3140      break;
3141    case Primitive::kPrimLong: {
3142      // The constant may have ended up in a register, so test explicitly to avoid
3143      // problems where the output may not be the same as the first operand.
3144      if (mul->InputAt(1)->IsLongConstant()) {
3145        int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3146        if (IsInt<32>(value)) {
3147          __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3148                   Immediate(static_cast<int32_t>(value)));
3149        } else {
3150          // Have to use the constant area.
3151          DCHECK(first.Equals(out));
3152          __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3153        }
3154      } else if (second.IsRegister()) {
3155        DCHECK(first.Equals(out));
3156        __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3157      } else {
3158        DCHECK(second.IsDoubleStackSlot());
3159        DCHECK(first.Equals(out));
3160        __ imulq(first.AsRegister<CpuRegister>(),
3161                 Address(CpuRegister(RSP), second.GetStackIndex()));
3162      }
3163      break;
3164    }
3165
3166    case Primitive::kPrimFloat: {
3167      DCHECK(first.Equals(out));
3168      if (second.IsFpuRegister()) {
3169        __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3170      } else if (second.IsConstant()) {
3171        __ mulss(first.AsFpuRegister<XmmRegister>(),
3172                 codegen_->LiteralFloatAddress(
3173                     second.GetConstant()->AsFloatConstant()->GetValue()));
3174      } else {
3175        DCHECK(second.IsStackSlot());
3176        __ mulss(first.AsFpuRegister<XmmRegister>(),
3177                 Address(CpuRegister(RSP), second.GetStackIndex()));
3178      }
3179      break;
3180    }
3181
3182    case Primitive::kPrimDouble: {
3183      DCHECK(first.Equals(out));
3184      if (second.IsFpuRegister()) {
3185        __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3186      } else if (second.IsConstant()) {
3187        __ mulsd(first.AsFpuRegister<XmmRegister>(),
3188                 codegen_->LiteralDoubleAddress(
3189                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3190      } else {
3191        DCHECK(second.IsDoubleStackSlot());
3192        __ mulsd(first.AsFpuRegister<XmmRegister>(),
3193                 Address(CpuRegister(RSP), second.GetStackIndex()));
3194      }
3195      break;
3196    }
3197
3198    default:
3199      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3200  }
3201}
3202
3203void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3204                                                     uint32_t stack_adjustment, bool is_float) {
3205  if (source.IsStackSlot()) {
3206    DCHECK(is_float);
3207    __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3208  } else if (source.IsDoubleStackSlot()) {
3209    DCHECK(!is_float);
3210    __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3211  } else {
3212    // Write the value to the temporary location on the stack and load to FP stack.
3213    if (is_float) {
3214      Location stack_temp = Location::StackSlot(temp_offset);
3215      codegen_->Move(stack_temp, source);
3216      __ flds(Address(CpuRegister(RSP), temp_offset));
3217    } else {
3218      Location stack_temp = Location::DoubleStackSlot(temp_offset);
3219      codegen_->Move(stack_temp, source);
3220      __ fldl(Address(CpuRegister(RSP), temp_offset));
3221    }
3222  }
3223}
3224
3225void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3226  Primitive::Type type = rem->GetResultType();
3227  bool is_float = type == Primitive::kPrimFloat;
3228  size_t elem_size = Primitive::ComponentSize(type);
3229  LocationSummary* locations = rem->GetLocations();
3230  Location first = locations->InAt(0);
3231  Location second = locations->InAt(1);
3232  Location out = locations->Out();
3233
3234  // Create stack space for 2 elements.
3235  // TODO: enhance register allocator to ask for stack temporaries.
3236  __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3237
3238  // Load the values to the FP stack in reverse order, using temporaries if needed.
3239  PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3240  PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3241
3242  // Loop doing FPREM until we stabilize.
3243  NearLabel retry;
3244  __ Bind(&retry);
3245  __ fprem();
3246
3247  // Move FP status to AX.
3248  __ fstsw();
3249
3250  // And see if the argument reduction is complete. This is signaled by the
3251  // C2 FPU flag bit set to 0.
3252  __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3253  __ j(kNotEqual, &retry);
3254
3255  // We have settled on the final value. Retrieve it into an XMM register.
3256  // Store FP top of stack to real stack.
3257  if (is_float) {
3258    __ fsts(Address(CpuRegister(RSP), 0));
3259  } else {
3260    __ fstl(Address(CpuRegister(RSP), 0));
3261  }
3262
3263  // Pop the 2 items from the FP stack.
3264  __ fucompp();
3265
3266  // Load the value from the stack into an XMM register.
3267  DCHECK(out.IsFpuRegister()) << out;
3268  if (is_float) {
3269    __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3270  } else {
3271    __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3272  }
3273
3274  // And remove the temporary stack space we allocated.
3275  __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3276}
3277
3278void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3279  DCHECK(instruction->IsDiv() || instruction->IsRem());
3280
3281  LocationSummary* locations = instruction->GetLocations();
3282  Location second = locations->InAt(1);
3283  DCHECK(second.IsConstant());
3284
3285  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3286  CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3287  int64_t imm = Int64FromConstant(second.GetConstant());
3288
3289  DCHECK(imm == 1 || imm == -1);
3290
3291  switch (instruction->GetResultType()) {
3292    case Primitive::kPrimInt: {
3293      if (instruction->IsRem()) {
3294        __ xorl(output_register, output_register);
3295      } else {
3296        __ movl(output_register, input_register);
3297        if (imm == -1) {
3298          __ negl(output_register);
3299        }
3300      }
3301      break;
3302    }
3303
3304    case Primitive::kPrimLong: {
3305      if (instruction->IsRem()) {
3306        __ xorl(output_register, output_register);
3307      } else {
3308        __ movq(output_register, input_register);
3309        if (imm == -1) {
3310          __ negq(output_register);
3311        }
3312      }
3313      break;
3314    }
3315
3316    default:
3317      LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3318  }
3319}
3320
3321void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3322  LocationSummary* locations = instruction->GetLocations();
3323  Location second = locations->InAt(1);
3324
3325  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3326  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3327
3328  int64_t imm = Int64FromConstant(second.GetConstant());
3329  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3330  uint64_t abs_imm = AbsOrMin(imm);
3331
3332  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3333
3334  if (instruction->GetResultType() == Primitive::kPrimInt) {
3335    __ leal(tmp, Address(numerator, abs_imm - 1));
3336    __ testl(numerator, numerator);
3337    __ cmov(kGreaterEqual, tmp, numerator);
3338    int shift = CTZ(imm);
3339    __ sarl(tmp, Immediate(shift));
3340
3341    if (imm < 0) {
3342      __ negl(tmp);
3343    }
3344
3345    __ movl(output_register, tmp);
3346  } else {
3347    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3348    CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3349
3350    codegen_->Load64BitValue(rdx, abs_imm - 1);
3351    __ addq(rdx, numerator);
3352    __ testq(numerator, numerator);
3353    __ cmov(kGreaterEqual, rdx, numerator);
3354    int shift = CTZ(imm);
3355    __ sarq(rdx, Immediate(shift));
3356
3357    if (imm < 0) {
3358      __ negq(rdx);
3359    }
3360
3361    __ movq(output_register, rdx);
3362  }
3363}
3364
3365void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3366  DCHECK(instruction->IsDiv() || instruction->IsRem());
3367
3368  LocationSummary* locations = instruction->GetLocations();
3369  Location second = locations->InAt(1);
3370
3371  CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3372      : locations->GetTemp(0).AsRegister<CpuRegister>();
3373  CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3374  CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3375      : locations->Out().AsRegister<CpuRegister>();
3376  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3377
3378  DCHECK_EQ(RAX, eax.AsRegister());
3379  DCHECK_EQ(RDX, edx.AsRegister());
3380  if (instruction->IsDiv()) {
3381    DCHECK_EQ(RAX, out.AsRegister());
3382  } else {
3383    DCHECK_EQ(RDX, out.AsRegister());
3384  }
3385
3386  int64_t magic;
3387  int shift;
3388
3389  // TODO: can these branches be written as one?
3390  if (instruction->GetResultType() == Primitive::kPrimInt) {
3391    int imm = second.GetConstant()->AsIntConstant()->GetValue();
3392
3393    CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3394
3395    __ movl(numerator, eax);
3396
3397    NearLabel no_div;
3398    NearLabel end;
3399    __ testl(eax, eax);
3400    __ j(kNotEqual, &no_div);
3401
3402    __ xorl(out, out);
3403    __ jmp(&end);
3404
3405    __ Bind(&no_div);
3406
3407    __ movl(eax, Immediate(magic));
3408    __ imull(numerator);
3409
3410    if (imm > 0 && magic < 0) {
3411      __ addl(edx, numerator);
3412    } else if (imm < 0 && magic > 0) {
3413      __ subl(edx, numerator);
3414    }
3415
3416    if (shift != 0) {
3417      __ sarl(edx, Immediate(shift));
3418    }
3419
3420    __ movl(eax, edx);
3421    __ shrl(edx, Immediate(31));
3422    __ addl(edx, eax);
3423
3424    if (instruction->IsRem()) {
3425      __ movl(eax, numerator);
3426      __ imull(edx, Immediate(imm));
3427      __ subl(eax, edx);
3428      __ movl(edx, eax);
3429    } else {
3430      __ movl(eax, edx);
3431    }
3432    __ Bind(&end);
3433  } else {
3434    int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3435
3436    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3437
3438    CpuRegister rax = eax;
3439    CpuRegister rdx = edx;
3440
3441    CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3442
3443    // Save the numerator.
3444    __ movq(numerator, rax);
3445
3446    // RAX = magic
3447    codegen_->Load64BitValue(rax, magic);
3448
3449    // RDX:RAX = magic * numerator
3450    __ imulq(numerator);
3451
3452    if (imm > 0 && magic < 0) {
3453      // RDX += numerator
3454      __ addq(rdx, numerator);
3455    } else if (imm < 0 && magic > 0) {
3456      // RDX -= numerator
3457      __ subq(rdx, numerator);
3458    }
3459
3460    // Shift if needed.
3461    if (shift != 0) {
3462      __ sarq(rdx, Immediate(shift));
3463    }
3464
3465    // RDX += 1 if RDX < 0
3466    __ movq(rax, rdx);
3467    __ shrq(rdx, Immediate(63));
3468    __ addq(rdx, rax);
3469
3470    if (instruction->IsRem()) {
3471      __ movq(rax, numerator);
3472
3473      if (IsInt<32>(imm)) {
3474        __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3475      } else {
3476        __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3477      }
3478
3479      __ subq(rax, rdx);
3480      __ movq(rdx, rax);
3481    } else {
3482      __ movq(rax, rdx);
3483    }
3484  }
3485}
3486
3487void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3488  DCHECK(instruction->IsDiv() || instruction->IsRem());
3489  Primitive::Type type = instruction->GetResultType();
3490  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
3491
3492  bool is_div = instruction->IsDiv();
3493  LocationSummary* locations = instruction->GetLocations();
3494
3495  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3496  Location second = locations->InAt(1);
3497
3498  DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3499  DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3500
3501  if (second.IsConstant()) {
3502    int64_t imm = Int64FromConstant(second.GetConstant());
3503
3504    if (imm == 0) {
3505      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3506    } else if (imm == 1 || imm == -1) {
3507      DivRemOneOrMinusOne(instruction);
3508    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3509      DivByPowerOfTwo(instruction->AsDiv());
3510    } else {
3511      DCHECK(imm <= -2 || imm >= 2);
3512      GenerateDivRemWithAnyConstant(instruction);
3513    }
3514  } else {
3515    SlowPathCode* slow_path =
3516        new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3517            instruction, out.AsRegister(), type, is_div);
3518    codegen_->AddSlowPath(slow_path);
3519
3520    CpuRegister second_reg = second.AsRegister<CpuRegister>();
3521    // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3522    // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3523    // so it's safe to just use negl instead of more complex comparisons.
3524    if (type == Primitive::kPrimInt) {
3525      __ cmpl(second_reg, Immediate(-1));
3526      __ j(kEqual, slow_path->GetEntryLabel());
3527      // edx:eax <- sign-extended of eax
3528      __ cdq();
3529      // eax = quotient, edx = remainder
3530      __ idivl(second_reg);
3531    } else {
3532      __ cmpq(second_reg, Immediate(-1));
3533      __ j(kEqual, slow_path->GetEntryLabel());
3534      // rdx:rax <- sign-extended of rax
3535      __ cqo();
3536      // rax = quotient, rdx = remainder
3537      __ idivq(second_reg);
3538    }
3539    __ Bind(slow_path->GetExitLabel());
3540  }
3541}
3542
3543void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3544  LocationSummary* locations =
3545      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3546  switch (div->GetResultType()) {
3547    case Primitive::kPrimInt:
3548    case Primitive::kPrimLong: {
3549      locations->SetInAt(0, Location::RegisterLocation(RAX));
3550      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3551      locations->SetOut(Location::SameAsFirstInput());
3552      // Intel uses edx:eax as the dividend.
3553      locations->AddTemp(Location::RegisterLocation(RDX));
3554      // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3555      // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3556      // output and request another temp.
3557      if (div->InputAt(1)->IsConstant()) {
3558        locations->AddTemp(Location::RequiresRegister());
3559      }
3560      break;
3561    }
3562
3563    case Primitive::kPrimFloat:
3564    case Primitive::kPrimDouble: {
3565      locations->SetInAt(0, Location::RequiresFpuRegister());
3566      locations->SetInAt(1, Location::Any());
3567      locations->SetOut(Location::SameAsFirstInput());
3568      break;
3569    }
3570
3571    default:
3572      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3573  }
3574}
3575
3576void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3577  LocationSummary* locations = div->GetLocations();
3578  Location first = locations->InAt(0);
3579  Location second = locations->InAt(1);
3580  DCHECK(first.Equals(locations->Out()));
3581
3582  Primitive::Type type = div->GetResultType();
3583  switch (type) {
3584    case Primitive::kPrimInt:
3585    case Primitive::kPrimLong: {
3586      GenerateDivRemIntegral(div);
3587      break;
3588    }
3589
3590    case Primitive::kPrimFloat: {
3591      if (second.IsFpuRegister()) {
3592        __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3593      } else if (second.IsConstant()) {
3594        __ divss(first.AsFpuRegister<XmmRegister>(),
3595                 codegen_->LiteralFloatAddress(
3596                     second.GetConstant()->AsFloatConstant()->GetValue()));
3597      } else {
3598        DCHECK(second.IsStackSlot());
3599        __ divss(first.AsFpuRegister<XmmRegister>(),
3600                 Address(CpuRegister(RSP), second.GetStackIndex()));
3601      }
3602      break;
3603    }
3604
3605    case Primitive::kPrimDouble: {
3606      if (second.IsFpuRegister()) {
3607        __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3608      } else if (second.IsConstant()) {
3609        __ divsd(first.AsFpuRegister<XmmRegister>(),
3610                 codegen_->LiteralDoubleAddress(
3611                     second.GetConstant()->AsDoubleConstant()->GetValue()));
3612      } else {
3613        DCHECK(second.IsDoubleStackSlot());
3614        __ divsd(first.AsFpuRegister<XmmRegister>(),
3615                 Address(CpuRegister(RSP), second.GetStackIndex()));
3616      }
3617      break;
3618    }
3619
3620    default:
3621      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3622  }
3623}
3624
3625void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3626  Primitive::Type type = rem->GetResultType();
3627  LocationSummary* locations =
3628    new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3629
3630  switch (type) {
3631    case Primitive::kPrimInt:
3632    case Primitive::kPrimLong: {
3633      locations->SetInAt(0, Location::RegisterLocation(RAX));
3634      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3635      // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3636      locations->SetOut(Location::RegisterLocation(RDX));
3637      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3638      // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3639      // output and request another temp.
3640      if (rem->InputAt(1)->IsConstant()) {
3641        locations->AddTemp(Location::RequiresRegister());
3642      }
3643      break;
3644    }
3645
3646    case Primitive::kPrimFloat:
3647    case Primitive::kPrimDouble: {
3648      locations->SetInAt(0, Location::Any());
3649      locations->SetInAt(1, Location::Any());
3650      locations->SetOut(Location::RequiresFpuRegister());
3651      locations->AddTemp(Location::RegisterLocation(RAX));
3652      break;
3653    }
3654
3655    default:
3656      LOG(FATAL) << "Unexpected rem type " << type;
3657  }
3658}
3659
3660void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3661  Primitive::Type type = rem->GetResultType();
3662  switch (type) {
3663    case Primitive::kPrimInt:
3664    case Primitive::kPrimLong: {
3665      GenerateDivRemIntegral(rem);
3666      break;
3667    }
3668    case Primitive::kPrimFloat:
3669    case Primitive::kPrimDouble: {
3670      GenerateRemFP(rem);
3671      break;
3672    }
3673    default:
3674      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3675  }
3676}
3677
3678void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3679  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
3680      ? LocationSummary::kCallOnSlowPath
3681      : LocationSummary::kNoCall;
3682  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
3683  locations->SetInAt(0, Location::Any());
3684  if (instruction->HasUses()) {
3685    locations->SetOut(Location::SameAsFirstInput());
3686  }
3687}
3688
3689void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3690  SlowPathCode* slow_path =
3691      new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3692  codegen_->AddSlowPath(slow_path);
3693
3694  LocationSummary* locations = instruction->GetLocations();
3695  Location value = locations->InAt(0);
3696
3697  switch (instruction->GetType()) {
3698    case Primitive::kPrimBoolean:
3699    case Primitive::kPrimByte:
3700    case Primitive::kPrimChar:
3701    case Primitive::kPrimShort:
3702    case Primitive::kPrimInt: {
3703      if (value.IsRegister()) {
3704        __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3705        __ j(kEqual, slow_path->GetEntryLabel());
3706      } else if (value.IsStackSlot()) {
3707        __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3708        __ j(kEqual, slow_path->GetEntryLabel());
3709      } else {
3710        DCHECK(value.IsConstant()) << value;
3711        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3712        __ jmp(slow_path->GetEntryLabel());
3713        }
3714      }
3715      break;
3716    }
3717    case Primitive::kPrimLong: {
3718      if (value.IsRegister()) {
3719        __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3720        __ j(kEqual, slow_path->GetEntryLabel());
3721      } else if (value.IsDoubleStackSlot()) {
3722        __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3723        __ j(kEqual, slow_path->GetEntryLabel());
3724      } else {
3725        DCHECK(value.IsConstant()) << value;
3726        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3727        __ jmp(slow_path->GetEntryLabel());
3728        }
3729      }
3730      break;
3731    }
3732    default:
3733      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3734  }
3735}
3736
3737void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3738  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3739
3740  LocationSummary* locations =
3741      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3742
3743  switch (op->GetResultType()) {
3744    case Primitive::kPrimInt:
3745    case Primitive::kPrimLong: {
3746      locations->SetInAt(0, Location::RequiresRegister());
3747      // The shift count needs to be in CL.
3748      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3749      locations->SetOut(Location::SameAsFirstInput());
3750      break;
3751    }
3752    default:
3753      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3754  }
3755}
3756
3757void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3758  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3759
3760  LocationSummary* locations = op->GetLocations();
3761  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3762  Location second = locations->InAt(1);
3763
3764  switch (op->GetResultType()) {
3765    case Primitive::kPrimInt: {
3766      if (second.IsRegister()) {
3767        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3768        if (op->IsShl()) {
3769          __ shll(first_reg, second_reg);
3770        } else if (op->IsShr()) {
3771          __ sarl(first_reg, second_reg);
3772        } else {
3773          __ shrl(first_reg, second_reg);
3774        }
3775      } else {
3776        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3777        if (op->IsShl()) {
3778          __ shll(first_reg, imm);
3779        } else if (op->IsShr()) {
3780          __ sarl(first_reg, imm);
3781        } else {
3782          __ shrl(first_reg, imm);
3783        }
3784      }
3785      break;
3786    }
3787    case Primitive::kPrimLong: {
3788      if (second.IsRegister()) {
3789        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3790        if (op->IsShl()) {
3791          __ shlq(first_reg, second_reg);
3792        } else if (op->IsShr()) {
3793          __ sarq(first_reg, second_reg);
3794        } else {
3795          __ shrq(first_reg, second_reg);
3796        }
3797      } else {
3798        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3799        if (op->IsShl()) {
3800          __ shlq(first_reg, imm);
3801        } else if (op->IsShr()) {
3802          __ sarq(first_reg, imm);
3803        } else {
3804          __ shrq(first_reg, imm);
3805        }
3806      }
3807      break;
3808    }
3809    default:
3810      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3811      UNREACHABLE();
3812  }
3813}
3814
3815void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3816  LocationSummary* locations =
3817      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3818
3819  switch (ror->GetResultType()) {
3820    case Primitive::kPrimInt:
3821    case Primitive::kPrimLong: {
3822      locations->SetInAt(0, Location::RequiresRegister());
3823      // The shift count needs to be in CL (unless it is a constant).
3824      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
3825      locations->SetOut(Location::SameAsFirstInput());
3826      break;
3827    }
3828    default:
3829      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3830      UNREACHABLE();
3831  }
3832}
3833
3834void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
3835  LocationSummary* locations = ror->GetLocations();
3836  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3837  Location second = locations->InAt(1);
3838
3839  switch (ror->GetResultType()) {
3840    case Primitive::kPrimInt:
3841      if (second.IsRegister()) {
3842        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3843        __ rorl(first_reg, second_reg);
3844      } else {
3845        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3846        __ rorl(first_reg, imm);
3847      }
3848      break;
3849    case Primitive::kPrimLong:
3850      if (second.IsRegister()) {
3851        CpuRegister second_reg = second.AsRegister<CpuRegister>();
3852        __ rorq(first_reg, second_reg);
3853      } else {
3854        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3855        __ rorq(first_reg, imm);
3856      }
3857      break;
3858    default:
3859      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
3860      UNREACHABLE();
3861  }
3862}
3863
3864void LocationsBuilderX86_64::VisitShl(HShl* shl) {
3865  HandleShift(shl);
3866}
3867
3868void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
3869  HandleShift(shl);
3870}
3871
3872void LocationsBuilderX86_64::VisitShr(HShr* shr) {
3873  HandleShift(shr);
3874}
3875
3876void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
3877  HandleShift(shr);
3878}
3879
3880void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
3881  HandleShift(ushr);
3882}
3883
3884void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
3885  HandleShift(ushr);
3886}
3887
3888void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
3889  LocationSummary* locations =
3890      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3891  InvokeRuntimeCallingConvention calling_convention;
3892  if (instruction->IsStringAlloc()) {
3893    locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
3894  } else {
3895    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3896    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3897  }
3898  locations->SetOut(Location::RegisterLocation(RAX));
3899}
3900
3901void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
3902  // Note: if heap poisoning is enabled, the entry point takes cares
3903  // of poisoning the reference.
3904  if (instruction->IsStringAlloc()) {
3905    // String is allocated through StringFactory. Call NewEmptyString entry point.
3906    CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
3907    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize);
3908    __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
3909    __ call(Address(temp, code_offset.SizeValue()));
3910    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
3911  } else {
3912    codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3913                            instruction,
3914                            instruction->GetDexPc(),
3915                            nullptr);
3916    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
3917    DCHECK(!codegen_->IsLeafMethod());
3918  }
3919}
3920
3921void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
3922  LocationSummary* locations =
3923      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
3924  InvokeRuntimeCallingConvention calling_convention;
3925  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3926  locations->SetOut(Location::RegisterLocation(RAX));
3927  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
3928  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
3929}
3930
3931void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
3932  InvokeRuntimeCallingConvention calling_convention;
3933  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
3934                           instruction->GetTypeIndex());
3935  // Note: if heap poisoning is enabled, the entry point takes cares
3936  // of poisoning the reference.
3937  codegen_->InvokeRuntime(instruction->GetEntrypoint(),
3938                          instruction,
3939                          instruction->GetDexPc(),
3940                          nullptr);
3941  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
3942
3943  DCHECK(!codegen_->IsLeafMethod());
3944}
3945
3946void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
3947  LocationSummary* locations =
3948      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3949  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
3950  if (location.IsStackSlot()) {
3951    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3952  } else if (location.IsDoubleStackSlot()) {
3953    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
3954  }
3955  locations->SetOut(location);
3956}
3957
3958void InstructionCodeGeneratorX86_64::VisitParameterValue(
3959    HParameterValue* instruction ATTRIBUTE_UNUSED) {
3960  // Nothing to do, the parameter is already at its location.
3961}
3962
3963void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
3964  LocationSummary* locations =
3965      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3966  locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
3967}
3968
3969void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
3970    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
3971  // Nothing to do, the method is already at its location.
3972}
3973
3974void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3975  LocationSummary* locations =
3976      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
3977  locations->SetInAt(0, Location::RequiresRegister());
3978  locations->SetOut(Location::RequiresRegister());
3979}
3980
3981void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
3982  LocationSummary* locations = instruction->GetLocations();
3983  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
3984    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
3985        instruction->GetIndex(), kX86_64PointerSize).SizeValue();
3986    __ movq(locations->Out().AsRegister<CpuRegister>(),
3987            Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
3988  } else {
3989    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3990        instruction->GetIndex() % ImTable::kSize, kX86_64PointerSize));
3991    __ movq(locations->Out().AsRegister<CpuRegister>(),
3992            Address(locations->InAt(0).AsRegister<CpuRegister>(),
3993            mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3994    __ movq(locations->Out().AsRegister<CpuRegister>(),
3995            Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
3996  }
3997}
3998
3999void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4000  LocationSummary* locations =
4001      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4002  locations->SetInAt(0, Location::RequiresRegister());
4003  locations->SetOut(Location::SameAsFirstInput());
4004}
4005
4006void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4007  LocationSummary* locations = not_->GetLocations();
4008  DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4009            locations->Out().AsRegister<CpuRegister>().AsRegister());
4010  Location out = locations->Out();
4011  switch (not_->GetResultType()) {
4012    case Primitive::kPrimInt:
4013      __ notl(out.AsRegister<CpuRegister>());
4014      break;
4015
4016    case Primitive::kPrimLong:
4017      __ notq(out.AsRegister<CpuRegister>());
4018      break;
4019
4020    default:
4021      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4022  }
4023}
4024
4025void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4026  LocationSummary* locations =
4027      new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4028  locations->SetInAt(0, Location::RequiresRegister());
4029  locations->SetOut(Location::SameAsFirstInput());
4030}
4031
4032void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4033  LocationSummary* locations = bool_not->GetLocations();
4034  DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4035            locations->Out().AsRegister<CpuRegister>().AsRegister());
4036  Location out = locations->Out();
4037  __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4038}
4039
4040void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4041  LocationSummary* locations =
4042      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4043  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
4044    locations->SetInAt(i, Location::Any());
4045  }
4046  locations->SetOut(Location::Any());
4047}
4048
4049void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4050  LOG(FATAL) << "Unimplemented";
4051}
4052
4053void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4054  /*
4055   * According to the JSR-133 Cookbook, for x86 only StoreLoad/AnyAny barriers need memory fence.
4056   * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4057   * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4058   */
4059  switch (kind) {
4060    case MemBarrierKind::kAnyAny: {
4061      MemoryFence();
4062      break;
4063    }
4064    case MemBarrierKind::kAnyStore:
4065    case MemBarrierKind::kLoadAny:
4066    case MemBarrierKind::kStoreStore: {
4067      // nop
4068      break;
4069    }
4070    default:
4071      LOG(FATAL) << "Unexpected memory barier " << kind;
4072  }
4073}
4074
4075void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4076  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4077
4078  bool object_field_get_with_read_barrier =
4079      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4080  LocationSummary* locations =
4081      new (GetGraph()->GetArena()) LocationSummary(instruction,
4082                                                   object_field_get_with_read_barrier ?
4083                                                       LocationSummary::kCallOnSlowPath :
4084                                                       LocationSummary::kNoCall);
4085  locations->SetInAt(0, Location::RequiresRegister());
4086  if (Primitive::IsFloatingPointType(instruction->GetType())) {
4087    locations->SetOut(Location::RequiresFpuRegister());
4088  } else {
4089    // The output overlaps for an object field get when read barriers
4090    // are enabled: we do not want the move to overwrite the object's
4091    // location, as we need it to emit the read barrier.
4092    locations->SetOut(
4093        Location::RequiresRegister(),
4094        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4095  }
4096  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4097    // We need a temporary register for the read barrier marking slow
4098    // path in CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier.
4099    locations->AddTemp(Location::RequiresRegister());
4100  }
4101}
4102
4103void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4104                                                    const FieldInfo& field_info) {
4105  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4106
4107  LocationSummary* locations = instruction->GetLocations();
4108  Location base_loc = locations->InAt(0);
4109  CpuRegister base = base_loc.AsRegister<CpuRegister>();
4110  Location out = locations->Out();
4111  bool is_volatile = field_info.IsVolatile();
4112  Primitive::Type field_type = field_info.GetFieldType();
4113  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4114
4115  switch (field_type) {
4116    case Primitive::kPrimBoolean: {
4117      __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4118      break;
4119    }
4120
4121    case Primitive::kPrimByte: {
4122      __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4123      break;
4124    }
4125
4126    case Primitive::kPrimShort: {
4127      __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4128      break;
4129    }
4130
4131    case Primitive::kPrimChar: {
4132      __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4133      break;
4134    }
4135
4136    case Primitive::kPrimInt: {
4137      __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4138      break;
4139    }
4140
4141    case Primitive::kPrimNot: {
4142      // /* HeapReference<Object> */ out = *(base + offset)
4143      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4144        Location temp_loc = locations->GetTemp(0);
4145        // Note that a potential implicit null check is handled in this
4146        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
4147        codegen_->GenerateFieldLoadWithBakerReadBarrier(
4148            instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
4149        if (is_volatile) {
4150          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4151        }
4152      } else {
4153        __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4154        codegen_->MaybeRecordImplicitNullCheck(instruction);
4155        if (is_volatile) {
4156          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4157        }
4158        // If read barriers are enabled, emit read barriers other than
4159        // Baker's using a slow path (and also unpoison the loaded
4160        // reference, if heap poisoning is enabled).
4161        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4162      }
4163      break;
4164    }
4165
4166    case Primitive::kPrimLong: {
4167      __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4168      break;
4169    }
4170
4171    case Primitive::kPrimFloat: {
4172      __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4173      break;
4174    }
4175
4176    case Primitive::kPrimDouble: {
4177      __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4178      break;
4179    }
4180
4181    case Primitive::kPrimVoid:
4182      LOG(FATAL) << "Unreachable type " << field_type;
4183      UNREACHABLE();
4184  }
4185
4186  if (field_type == Primitive::kPrimNot) {
4187    // Potential implicit null checks, in the case of reference
4188    // fields, are handled in the previous switch statement.
4189  } else {
4190    codegen_->MaybeRecordImplicitNullCheck(instruction);
4191  }
4192
4193  if (is_volatile) {
4194    if (field_type == Primitive::kPrimNot) {
4195      // Memory barriers, in the case of references, are also handled
4196      // in the previous switch statement.
4197    } else {
4198      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4199    }
4200  }
4201}
4202
4203void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4204                                            const FieldInfo& field_info) {
4205  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4206
4207  LocationSummary* locations =
4208      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4209  Primitive::Type field_type = field_info.GetFieldType();
4210  bool is_volatile = field_info.IsVolatile();
4211  bool needs_write_barrier =
4212      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4213
4214  locations->SetInAt(0, Location::RequiresRegister());
4215  if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4216    if (is_volatile) {
4217      // In order to satisfy the semantics of volatile, this must be a single instruction store.
4218      locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4219    } else {
4220      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4221    }
4222  } else {
4223    if (is_volatile) {
4224      // In order to satisfy the semantics of volatile, this must be a single instruction store.
4225      locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4226    } else {
4227      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4228    }
4229  }
4230  if (needs_write_barrier) {
4231    // Temporary registers for the write barrier.
4232    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
4233    locations->AddTemp(Location::RequiresRegister());
4234  } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4235    // Temporary register for the reference poisoning.
4236    locations->AddTemp(Location::RequiresRegister());
4237  }
4238}
4239
4240void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4241                                                    const FieldInfo& field_info,
4242                                                    bool value_can_be_null) {
4243  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4244
4245  LocationSummary* locations = instruction->GetLocations();
4246  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4247  Location value = locations->InAt(1);
4248  bool is_volatile = field_info.IsVolatile();
4249  Primitive::Type field_type = field_info.GetFieldType();
4250  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4251
4252  if (is_volatile) {
4253    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4254  }
4255
4256  bool maybe_record_implicit_null_check_done = false;
4257
4258  switch (field_type) {
4259    case Primitive::kPrimBoolean:
4260    case Primitive::kPrimByte: {
4261      if (value.IsConstant()) {
4262        int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4263        __ movb(Address(base, offset), Immediate(v));
4264      } else {
4265        __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4266      }
4267      break;
4268    }
4269
4270    case Primitive::kPrimShort:
4271    case Primitive::kPrimChar: {
4272      if (value.IsConstant()) {
4273        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4274        __ movw(Address(base, offset), Immediate(v));
4275      } else {
4276        __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4277      }
4278      break;
4279    }
4280
4281    case Primitive::kPrimInt:
4282    case Primitive::kPrimNot: {
4283      if (value.IsConstant()) {
4284        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4285        // `field_type == Primitive::kPrimNot` implies `v == 0`.
4286        DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4287        // Note: if heap poisoning is enabled, no need to poison
4288        // (negate) `v` if it is a reference, as it would be null.
4289        __ movl(Address(base, offset), Immediate(v));
4290      } else {
4291        if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4292          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4293          __ movl(temp, value.AsRegister<CpuRegister>());
4294          __ PoisonHeapReference(temp);
4295          __ movl(Address(base, offset), temp);
4296        } else {
4297          __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4298        }
4299      }
4300      break;
4301    }
4302
4303    case Primitive::kPrimLong: {
4304      if (value.IsConstant()) {
4305        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4306        codegen_->MoveInt64ToAddress(Address(base, offset),
4307                                     Address(base, offset + sizeof(int32_t)),
4308                                     v,
4309                                     instruction);
4310        maybe_record_implicit_null_check_done = true;
4311      } else {
4312        __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4313      }
4314      break;
4315    }
4316
4317    case Primitive::kPrimFloat: {
4318      if (value.IsConstant()) {
4319        int32_t v =
4320            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4321        __ movl(Address(base, offset), Immediate(v));
4322      } else {
4323        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4324      }
4325      break;
4326    }
4327
4328    case Primitive::kPrimDouble: {
4329      if (value.IsConstant()) {
4330        int64_t v =
4331            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4332        codegen_->MoveInt64ToAddress(Address(base, offset),
4333                                     Address(base, offset + sizeof(int32_t)),
4334                                     v,
4335                                     instruction);
4336        maybe_record_implicit_null_check_done = true;
4337      } else {
4338        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4339      }
4340      break;
4341    }
4342
4343    case Primitive::kPrimVoid:
4344      LOG(FATAL) << "Unreachable type " << field_type;
4345      UNREACHABLE();
4346  }
4347
4348  if (!maybe_record_implicit_null_check_done) {
4349    codegen_->MaybeRecordImplicitNullCheck(instruction);
4350  }
4351
4352  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4353    CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4354    CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4355    codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4356  }
4357
4358  if (is_volatile) {
4359    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4360  }
4361}
4362
4363void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4364  HandleFieldSet(instruction, instruction->GetFieldInfo());
4365}
4366
4367void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4368  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4369}
4370
4371void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4372  HandleFieldGet(instruction);
4373}
4374
4375void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4376  HandleFieldGet(instruction, instruction->GetFieldInfo());
4377}
4378
4379void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4380  HandleFieldGet(instruction);
4381}
4382
4383void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4384  HandleFieldGet(instruction, instruction->GetFieldInfo());
4385}
4386
4387void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4388  HandleFieldSet(instruction, instruction->GetFieldInfo());
4389}
4390
4391void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4392  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4393}
4394
4395void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4396    HUnresolvedInstanceFieldGet* instruction) {
4397  FieldAccessCallingConventionX86_64 calling_convention;
4398  codegen_->CreateUnresolvedFieldLocationSummary(
4399      instruction, instruction->GetFieldType(), calling_convention);
4400}
4401
4402void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4403    HUnresolvedInstanceFieldGet* instruction) {
4404  FieldAccessCallingConventionX86_64 calling_convention;
4405  codegen_->GenerateUnresolvedFieldAccess(instruction,
4406                                          instruction->GetFieldType(),
4407                                          instruction->GetFieldIndex(),
4408                                          instruction->GetDexPc(),
4409                                          calling_convention);
4410}
4411
4412void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4413    HUnresolvedInstanceFieldSet* instruction) {
4414  FieldAccessCallingConventionX86_64 calling_convention;
4415  codegen_->CreateUnresolvedFieldLocationSummary(
4416      instruction, instruction->GetFieldType(), calling_convention);
4417}
4418
4419void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4420    HUnresolvedInstanceFieldSet* instruction) {
4421  FieldAccessCallingConventionX86_64 calling_convention;
4422  codegen_->GenerateUnresolvedFieldAccess(instruction,
4423                                          instruction->GetFieldType(),
4424                                          instruction->GetFieldIndex(),
4425                                          instruction->GetDexPc(),
4426                                          calling_convention);
4427}
4428
4429void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4430    HUnresolvedStaticFieldGet* instruction) {
4431  FieldAccessCallingConventionX86_64 calling_convention;
4432  codegen_->CreateUnresolvedFieldLocationSummary(
4433      instruction, instruction->GetFieldType(), calling_convention);
4434}
4435
4436void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4437    HUnresolvedStaticFieldGet* instruction) {
4438  FieldAccessCallingConventionX86_64 calling_convention;
4439  codegen_->GenerateUnresolvedFieldAccess(instruction,
4440                                          instruction->GetFieldType(),
4441                                          instruction->GetFieldIndex(),
4442                                          instruction->GetDexPc(),
4443                                          calling_convention);
4444}
4445
4446void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4447    HUnresolvedStaticFieldSet* instruction) {
4448  FieldAccessCallingConventionX86_64 calling_convention;
4449  codegen_->CreateUnresolvedFieldLocationSummary(
4450      instruction, instruction->GetFieldType(), calling_convention);
4451}
4452
4453void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4454    HUnresolvedStaticFieldSet* instruction) {
4455  FieldAccessCallingConventionX86_64 calling_convention;
4456  codegen_->GenerateUnresolvedFieldAccess(instruction,
4457                                          instruction->GetFieldType(),
4458                                          instruction->GetFieldIndex(),
4459                                          instruction->GetDexPc(),
4460                                          calling_convention);
4461}
4462
4463void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4464  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4465      ? LocationSummary::kCallOnSlowPath
4466      : LocationSummary::kNoCall;
4467  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4468  Location loc = codegen_->IsImplicitNullCheckAllowed(instruction)
4469      ? Location::RequiresRegister()
4470      : Location::Any();
4471  locations->SetInAt(0, loc);
4472  if (instruction->HasUses()) {
4473    locations->SetOut(Location::SameAsFirstInput());
4474  }
4475}
4476
4477void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4478  if (CanMoveNullCheckToUser(instruction)) {
4479    return;
4480  }
4481  LocationSummary* locations = instruction->GetLocations();
4482  Location obj = locations->InAt(0);
4483
4484  __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4485  RecordPcInfo(instruction, instruction->GetDexPc());
4486}
4487
4488void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4489  SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4490  AddSlowPath(slow_path);
4491
4492  LocationSummary* locations = instruction->GetLocations();
4493  Location obj = locations->InAt(0);
4494
4495  if (obj.IsRegister()) {
4496    __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4497  } else if (obj.IsStackSlot()) {
4498    __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4499  } else {
4500    DCHECK(obj.IsConstant()) << obj;
4501    DCHECK(obj.GetConstant()->IsNullConstant());
4502    __ jmp(slow_path->GetEntryLabel());
4503    return;
4504  }
4505  __ j(kEqual, slow_path->GetEntryLabel());
4506}
4507
4508void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4509  codegen_->GenerateNullCheck(instruction);
4510}
4511
4512void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4513  bool object_array_get_with_read_barrier =
4514      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4515  LocationSummary* locations =
4516      new (GetGraph()->GetArena()) LocationSummary(instruction,
4517                                                   object_array_get_with_read_barrier ?
4518                                                       LocationSummary::kCallOnSlowPath :
4519                                                       LocationSummary::kNoCall);
4520  locations->SetInAt(0, Location::RequiresRegister());
4521  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4522  if (Primitive::IsFloatingPointType(instruction->GetType())) {
4523    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4524  } else {
4525    // The output overlaps for an object array get when read barriers
4526    // are enabled: we do not want the move to overwrite the array's
4527    // location, as we need it to emit the read barrier.
4528    locations->SetOut(
4529        Location::RequiresRegister(),
4530        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4531  }
4532  // We need a temporary register for the read barrier marking slow
4533  // path in CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier.
4534  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4535    locations->AddTemp(Location::RequiresRegister());
4536  }
4537}
4538
4539void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4540  LocationSummary* locations = instruction->GetLocations();
4541  Location obj_loc = locations->InAt(0);
4542  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4543  Location index = locations->InAt(1);
4544  Location out_loc = locations->Out();
4545
4546  Primitive::Type type = instruction->GetType();
4547  switch (type) {
4548    case Primitive::kPrimBoolean: {
4549      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4550      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4551      if (index.IsConstant()) {
4552        __ movzxb(out, Address(obj,
4553            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4554      } else {
4555        __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4556      }
4557      break;
4558    }
4559
4560    case Primitive::kPrimByte: {
4561      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
4562      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4563      if (index.IsConstant()) {
4564        __ movsxb(out, Address(obj,
4565            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
4566      } else {
4567        __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
4568      }
4569      break;
4570    }
4571
4572    case Primitive::kPrimShort: {
4573      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
4574      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4575      if (index.IsConstant()) {
4576        __ movsxw(out, Address(obj,
4577            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4578      } else {
4579        __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4580      }
4581      break;
4582    }
4583
4584    case Primitive::kPrimChar: {
4585      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4586      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4587      if (index.IsConstant()) {
4588        __ movzxw(out, Address(obj,
4589            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
4590      } else {
4591        __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
4592      }
4593      break;
4594    }
4595
4596    case Primitive::kPrimInt: {
4597      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4598      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4599      if (index.IsConstant()) {
4600        __ movl(out, Address(obj,
4601            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4602      } else {
4603        __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4604      }
4605      break;
4606    }
4607
4608    case Primitive::kPrimNot: {
4609      static_assert(
4610          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4611          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4612      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4613      // /* HeapReference<Object> */ out =
4614      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
4615      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4616        Location temp = locations->GetTemp(0);
4617        // Note that a potential implicit null check is handled in this
4618        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
4619        codegen_->GenerateArrayLoadWithBakerReadBarrier(
4620            instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
4621      } else {
4622        CpuRegister out = out_loc.AsRegister<CpuRegister>();
4623        if (index.IsConstant()) {
4624          uint32_t offset =
4625              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4626          __ movl(out, Address(obj, offset));
4627          codegen_->MaybeRecordImplicitNullCheck(instruction);
4628          // If read barriers are enabled, emit read barriers other than
4629          // Baker's using a slow path (and also unpoison the loaded
4630          // reference, if heap poisoning is enabled).
4631          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4632        } else {
4633          __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4634          codegen_->MaybeRecordImplicitNullCheck(instruction);
4635          // If read barriers are enabled, emit read barriers other than
4636          // Baker's using a slow path (and also unpoison the loaded
4637          // reference, if heap poisoning is enabled).
4638          codegen_->MaybeGenerateReadBarrierSlow(
4639              instruction, out_loc, out_loc, obj_loc, data_offset, index);
4640        }
4641      }
4642      break;
4643    }
4644
4645    case Primitive::kPrimLong: {
4646      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4647      CpuRegister out = out_loc.AsRegister<CpuRegister>();
4648      if (index.IsConstant()) {
4649        __ movq(out, Address(obj,
4650            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4651      } else {
4652        __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4653      }
4654      break;
4655    }
4656
4657    case Primitive::kPrimFloat: {
4658      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4659      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4660      if (index.IsConstant()) {
4661        __ movss(out, Address(obj,
4662            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
4663      } else {
4664        __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
4665      }
4666      break;
4667    }
4668
4669    case Primitive::kPrimDouble: {
4670      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4671      XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4672      if (index.IsConstant()) {
4673        __ movsd(out, Address(obj,
4674            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
4675      } else {
4676        __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
4677      }
4678      break;
4679    }
4680
4681    case Primitive::kPrimVoid:
4682      LOG(FATAL) << "Unreachable type " << type;
4683      UNREACHABLE();
4684  }
4685
4686  if (type == Primitive::kPrimNot) {
4687    // Potential implicit null checks, in the case of reference
4688    // arrays, are handled in the previous switch statement.
4689  } else {
4690    codegen_->MaybeRecordImplicitNullCheck(instruction);
4691  }
4692}
4693
4694void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4695  Primitive::Type value_type = instruction->GetComponentType();
4696
4697  bool needs_write_barrier =
4698      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4699  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4700  bool object_array_set_with_read_barrier =
4701      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
4702
4703  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4704      instruction,
4705      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
4706          LocationSummary::kCallOnSlowPath :
4707          LocationSummary::kNoCall);
4708
4709  locations->SetInAt(0, Location::RequiresRegister());
4710  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4711  if (Primitive::IsFloatingPointType(value_type)) {
4712    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4713  } else {
4714    locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4715  }
4716
4717  if (needs_write_barrier) {
4718    // Temporary registers for the write barrier.
4719
4720    // This first temporary register is possibly used for heap
4721    // reference poisoning and/or read barrier emission too.
4722    locations->AddTemp(Location::RequiresRegister());
4723    locations->AddTemp(Location::RequiresRegister());
4724  }
4725}
4726
4727void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4728  LocationSummary* locations = instruction->GetLocations();
4729  Location array_loc = locations->InAt(0);
4730  CpuRegister array = array_loc.AsRegister<CpuRegister>();
4731  Location index = locations->InAt(1);
4732  Location value = locations->InAt(2);
4733  Primitive::Type value_type = instruction->GetComponentType();
4734  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4735  bool needs_write_barrier =
4736      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4737  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4738  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4739  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4740
4741  switch (value_type) {
4742    case Primitive::kPrimBoolean:
4743    case Primitive::kPrimByte: {
4744      uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4745      Address address = index.IsConstant()
4746          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset)
4747          : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset);
4748      if (value.IsRegister()) {
4749        __ movb(address, value.AsRegister<CpuRegister>());
4750      } else {
4751        __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4752      }
4753      codegen_->MaybeRecordImplicitNullCheck(instruction);
4754      break;
4755    }
4756
4757    case Primitive::kPrimShort:
4758    case Primitive::kPrimChar: {
4759      uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4760      Address address = index.IsConstant()
4761          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset)
4762          : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset);
4763      if (value.IsRegister()) {
4764        __ movw(address, value.AsRegister<CpuRegister>());
4765      } else {
4766        DCHECK(value.IsConstant()) << value;
4767        __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4768      }
4769      codegen_->MaybeRecordImplicitNullCheck(instruction);
4770      break;
4771    }
4772
4773    case Primitive::kPrimNot: {
4774      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4775      Address address = index.IsConstant()
4776          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4777          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4778
4779      if (!value.IsRegister()) {
4780        // Just setting null.
4781        DCHECK(instruction->InputAt(2)->IsNullConstant());
4782        DCHECK(value.IsConstant()) << value;
4783        __ movl(address, Immediate(0));
4784        codegen_->MaybeRecordImplicitNullCheck(instruction);
4785        DCHECK(!needs_write_barrier);
4786        DCHECK(!may_need_runtime_call_for_type_check);
4787        break;
4788      }
4789
4790      DCHECK(needs_write_barrier);
4791      CpuRegister register_value = value.AsRegister<CpuRegister>();
4792      NearLabel done, not_null, do_put;
4793      SlowPathCode* slow_path = nullptr;
4794      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4795      if (may_need_runtime_call_for_type_check) {
4796        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4797        codegen_->AddSlowPath(slow_path);
4798        if (instruction->GetValueCanBeNull()) {
4799          __ testl(register_value, register_value);
4800          __ j(kNotEqual, &not_null);
4801          __ movl(address, Immediate(0));
4802          codegen_->MaybeRecordImplicitNullCheck(instruction);
4803          __ jmp(&done);
4804          __ Bind(&not_null);
4805        }
4806
4807        if (kEmitCompilerReadBarrier) {
4808          // When read barriers are enabled, the type checking
4809          // instrumentation requires two read barriers:
4810          //
4811          //   __ movl(temp2, temp);
4812          //   // /* HeapReference<Class> */ temp = temp->component_type_
4813          //   __ movl(temp, Address(temp, component_offset));
4814          //   codegen_->GenerateReadBarrierSlow(
4815          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
4816          //
4817          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
4818          //   __ movl(temp2, Address(register_value, class_offset));
4819          //   codegen_->GenerateReadBarrierSlow(
4820          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
4821          //
4822          //   __ cmpl(temp, temp2);
4823          //
4824          // However, the second read barrier may trash `temp`, as it
4825          // is a temporary register, and as such would not be saved
4826          // along with live registers before calling the runtime (nor
4827          // restored afterwards).  So in this case, we bail out and
4828          // delegate the work to the array set slow path.
4829          //
4830          // TODO: Extend the register allocator to support a new
4831          // "(locally) live temp" location so as to avoid always
4832          // going into the slow path when read barriers are enabled.
4833          __ jmp(slow_path->GetEntryLabel());
4834        } else {
4835          // /* HeapReference<Class> */ temp = array->klass_
4836          __ movl(temp, Address(array, class_offset));
4837          codegen_->MaybeRecordImplicitNullCheck(instruction);
4838          __ MaybeUnpoisonHeapReference(temp);
4839
4840          // /* HeapReference<Class> */ temp = temp->component_type_
4841          __ movl(temp, Address(temp, component_offset));
4842          // If heap poisoning is enabled, no need to unpoison `temp`
4843          // nor the object reference in `register_value->klass`, as
4844          // we are comparing two poisoned references.
4845          __ cmpl(temp, Address(register_value, class_offset));
4846
4847          if (instruction->StaticTypeOfArrayIsObjectArray()) {
4848            __ j(kEqual, &do_put);
4849            // If heap poisoning is enabled, the `temp` reference has
4850            // not been unpoisoned yet; unpoison it now.
4851            __ MaybeUnpoisonHeapReference(temp);
4852
4853            // /* HeapReference<Class> */ temp = temp->super_class_
4854            __ movl(temp, Address(temp, super_offset));
4855            // If heap poisoning is enabled, no need to unpoison
4856            // `temp`, as we are comparing against null below.
4857            __ testl(temp, temp);
4858            __ j(kNotEqual, slow_path->GetEntryLabel());
4859            __ Bind(&do_put);
4860          } else {
4861            __ j(kNotEqual, slow_path->GetEntryLabel());
4862          }
4863        }
4864      }
4865
4866      if (kPoisonHeapReferences) {
4867        __ movl(temp, register_value);
4868        __ PoisonHeapReference(temp);
4869        __ movl(address, temp);
4870      } else {
4871        __ movl(address, register_value);
4872      }
4873      if (!may_need_runtime_call_for_type_check) {
4874        codegen_->MaybeRecordImplicitNullCheck(instruction);
4875      }
4876
4877      CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4878      codegen_->MarkGCCard(
4879          temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4880      __ Bind(&done);
4881
4882      if (slow_path != nullptr) {
4883        __ Bind(slow_path->GetExitLabel());
4884      }
4885
4886      break;
4887    }
4888
4889    case Primitive::kPrimInt: {
4890      uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4891      Address address = index.IsConstant()
4892          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4893          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4894      if (value.IsRegister()) {
4895        __ movl(address, value.AsRegister<CpuRegister>());
4896      } else {
4897        DCHECK(value.IsConstant()) << value;
4898        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4899        __ movl(address, Immediate(v));
4900      }
4901      codegen_->MaybeRecordImplicitNullCheck(instruction);
4902      break;
4903    }
4904
4905    case Primitive::kPrimLong: {
4906      uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4907      Address address = index.IsConstant()
4908          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4909          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4910      if (value.IsRegister()) {
4911        __ movq(address, value.AsRegister<CpuRegister>());
4912        codegen_->MaybeRecordImplicitNullCheck(instruction);
4913      } else {
4914        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4915        Address address_high = index.IsConstant()
4916            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4917                offset + sizeof(int32_t))
4918            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4919        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4920      }
4921      break;
4922    }
4923
4924    case Primitive::kPrimFloat: {
4925      uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
4926      Address address = index.IsConstant()
4927          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
4928          : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
4929      if (value.IsFpuRegister()) {
4930        __ movss(address, value.AsFpuRegister<XmmRegister>());
4931      } else {
4932        DCHECK(value.IsConstant());
4933        int32_t v =
4934            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4935        __ movl(address, Immediate(v));
4936      }
4937      codegen_->MaybeRecordImplicitNullCheck(instruction);
4938      break;
4939    }
4940
4941    case Primitive::kPrimDouble: {
4942      uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
4943      Address address = index.IsConstant()
4944          ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
4945          : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
4946      if (value.IsFpuRegister()) {
4947        __ movsd(address, value.AsFpuRegister<XmmRegister>());
4948        codegen_->MaybeRecordImplicitNullCheck(instruction);
4949      } else {
4950        int64_t v =
4951            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4952        Address address_high = index.IsConstant()
4953            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
4954                offset + sizeof(int32_t))
4955            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
4956        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
4957      }
4958      break;
4959    }
4960
4961    case Primitive::kPrimVoid:
4962      LOG(FATAL) << "Unreachable type " << instruction->GetType();
4963      UNREACHABLE();
4964  }
4965}
4966
4967void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
4968  LocationSummary* locations =
4969      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4970  locations->SetInAt(0, Location::RequiresRegister());
4971  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4972}
4973
4974void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
4975  LocationSummary* locations = instruction->GetLocations();
4976  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
4977  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
4978  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4979  __ movl(out, Address(obj, offset));
4980  codegen_->MaybeRecordImplicitNullCheck(instruction);
4981}
4982
4983void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4984  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
4985      ? LocationSummary::kCallOnSlowPath
4986      : LocationSummary::kNoCall;
4987  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
4988  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
4989  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4990  if (instruction->HasUses()) {
4991    locations->SetOut(Location::SameAsFirstInput());
4992  }
4993}
4994
4995void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
4996  LocationSummary* locations = instruction->GetLocations();
4997  Location index_loc = locations->InAt(0);
4998  Location length_loc = locations->InAt(1);
4999  SlowPathCode* slow_path =
5000      new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
5001
5002  if (length_loc.IsConstant()) {
5003    int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5004    if (index_loc.IsConstant()) {
5005      // BCE will remove the bounds check if we are guarenteed to pass.
5006      int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5007      if (index < 0 || index >= length) {
5008        codegen_->AddSlowPath(slow_path);
5009        __ jmp(slow_path->GetEntryLabel());
5010      } else {
5011        // Some optimization after BCE may have generated this, and we should not
5012        // generate a bounds check if it is a valid range.
5013      }
5014      return;
5015    }
5016
5017    // We have to reverse the jump condition because the length is the constant.
5018    CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5019    __ cmpl(index_reg, Immediate(length));
5020    codegen_->AddSlowPath(slow_path);
5021    __ j(kAboveEqual, slow_path->GetEntryLabel());
5022  } else {
5023    CpuRegister length = length_loc.AsRegister<CpuRegister>();
5024    if (index_loc.IsConstant()) {
5025      int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5026      __ cmpl(length, Immediate(value));
5027    } else {
5028      __ cmpl(length, index_loc.AsRegister<CpuRegister>());
5029    }
5030    codegen_->AddSlowPath(slow_path);
5031    __ j(kBelowEqual, slow_path->GetEntryLabel());
5032  }
5033}
5034
5035void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5036                                     CpuRegister card,
5037                                     CpuRegister object,
5038                                     CpuRegister value,
5039                                     bool value_can_be_null) {
5040  NearLabel is_null;
5041  if (value_can_be_null) {
5042    __ testl(value, value);
5043    __ j(kEqual, &is_null);
5044  }
5045  __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64WordSize>().Int32Value(),
5046                                        /* no_rip */ true));
5047  __ movq(temp, object);
5048  __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5049  __ movb(Address(temp, card, TIMES_1, 0), card);
5050  if (value_can_be_null) {
5051    __ Bind(&is_null);
5052  }
5053}
5054
5055void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5056  LOG(FATAL) << "Unimplemented";
5057}
5058
5059void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5060  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5061}
5062
5063void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5064  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5065}
5066
5067void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5068  HBasicBlock* block = instruction->GetBlock();
5069  if (block->GetLoopInformation() != nullptr) {
5070    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5071    // The back edge will generate the suspend check.
5072    return;
5073  }
5074  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5075    // The goto will generate the suspend check.
5076    return;
5077  }
5078  GenerateSuspendCheck(instruction, nullptr);
5079}
5080
5081void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5082                                                          HBasicBlock* successor) {
5083  SuspendCheckSlowPathX86_64* slow_path =
5084      down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5085  if (slow_path == nullptr) {
5086    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5087    instruction->SetSlowPath(slow_path);
5088    codegen_->AddSlowPath(slow_path);
5089    if (successor != nullptr) {
5090      DCHECK(successor->IsLoopHeader());
5091      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5092    }
5093  } else {
5094    DCHECK_EQ(slow_path->GetSuccessor(), successor);
5095  }
5096
5097  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(),
5098                                  /* no_rip */ true),
5099                Immediate(0));
5100  if (successor == nullptr) {
5101    __ j(kNotEqual, slow_path->GetEntryLabel());
5102    __ Bind(slow_path->GetReturnLabel());
5103  } else {
5104    __ j(kEqual, codegen_->GetLabelOf(successor));
5105    __ jmp(slow_path->GetEntryLabel());
5106  }
5107}
5108
5109X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5110  return codegen_->GetAssembler();
5111}
5112
5113void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5114  MoveOperands* move = moves_[index];
5115  Location source = move->GetSource();
5116  Location destination = move->GetDestination();
5117
5118  if (source.IsRegister()) {
5119    if (destination.IsRegister()) {
5120      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5121    } else if (destination.IsStackSlot()) {
5122      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5123              source.AsRegister<CpuRegister>());
5124    } else {
5125      DCHECK(destination.IsDoubleStackSlot());
5126      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5127              source.AsRegister<CpuRegister>());
5128    }
5129  } else if (source.IsStackSlot()) {
5130    if (destination.IsRegister()) {
5131      __ movl(destination.AsRegister<CpuRegister>(),
5132              Address(CpuRegister(RSP), source.GetStackIndex()));
5133    } else if (destination.IsFpuRegister()) {
5134      __ movss(destination.AsFpuRegister<XmmRegister>(),
5135              Address(CpuRegister(RSP), source.GetStackIndex()));
5136    } else {
5137      DCHECK(destination.IsStackSlot());
5138      __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5139      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5140    }
5141  } else if (source.IsDoubleStackSlot()) {
5142    if (destination.IsRegister()) {
5143      __ movq(destination.AsRegister<CpuRegister>(),
5144              Address(CpuRegister(RSP), source.GetStackIndex()));
5145    } else if (destination.IsFpuRegister()) {
5146      __ movsd(destination.AsFpuRegister<XmmRegister>(),
5147               Address(CpuRegister(RSP), source.GetStackIndex()));
5148    } else {
5149      DCHECK(destination.IsDoubleStackSlot()) << destination;
5150      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5151      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5152    }
5153  } else if (source.IsConstant()) {
5154    HConstant* constant = source.GetConstant();
5155    if (constant->IsIntConstant() || constant->IsNullConstant()) {
5156      int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5157      if (destination.IsRegister()) {
5158        if (value == 0) {
5159          __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5160        } else {
5161          __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5162        }
5163      } else {
5164        DCHECK(destination.IsStackSlot()) << destination;
5165        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5166      }
5167    } else if (constant->IsLongConstant()) {
5168      int64_t value = constant->AsLongConstant()->GetValue();
5169      if (destination.IsRegister()) {
5170        codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5171      } else {
5172        DCHECK(destination.IsDoubleStackSlot()) << destination;
5173        codegen_->Store64BitValueToStack(destination, value);
5174      }
5175    } else if (constant->IsFloatConstant()) {
5176      float fp_value = constant->AsFloatConstant()->GetValue();
5177      if (destination.IsFpuRegister()) {
5178        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5179        codegen_->Load32BitValue(dest, fp_value);
5180      } else {
5181        DCHECK(destination.IsStackSlot()) << destination;
5182        Immediate imm(bit_cast<int32_t, float>(fp_value));
5183        __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5184      }
5185    } else {
5186      DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5187      double fp_value =  constant->AsDoubleConstant()->GetValue();
5188      int64_t value = bit_cast<int64_t, double>(fp_value);
5189      if (destination.IsFpuRegister()) {
5190        XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5191        codegen_->Load64BitValue(dest, fp_value);
5192      } else {
5193        DCHECK(destination.IsDoubleStackSlot()) << destination;
5194        codegen_->Store64BitValueToStack(destination, value);
5195      }
5196    }
5197  } else if (source.IsFpuRegister()) {
5198    if (destination.IsFpuRegister()) {
5199      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5200    } else if (destination.IsStackSlot()) {
5201      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5202               source.AsFpuRegister<XmmRegister>());
5203    } else {
5204      DCHECK(destination.IsDoubleStackSlot()) << destination;
5205      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5206               source.AsFpuRegister<XmmRegister>());
5207    }
5208  }
5209}
5210
5211void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5212  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5213  __ movl(Address(CpuRegister(RSP), mem), reg);
5214  __ movl(reg, CpuRegister(TMP));
5215}
5216
5217void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5218  ScratchRegisterScope ensure_scratch(
5219      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5220
5221  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5222  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5223  __ movl(CpuRegister(ensure_scratch.GetRegister()),
5224          Address(CpuRegister(RSP), mem2 + stack_offset));
5225  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5226  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5227          CpuRegister(ensure_scratch.GetRegister()));
5228}
5229
5230void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5231  __ movq(CpuRegister(TMP), reg1);
5232  __ movq(reg1, reg2);
5233  __ movq(reg2, CpuRegister(TMP));
5234}
5235
5236void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5237  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5238  __ movq(Address(CpuRegister(RSP), mem), reg);
5239  __ movq(reg, CpuRegister(TMP));
5240}
5241
5242void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5243  ScratchRegisterScope ensure_scratch(
5244      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5245
5246  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5247  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5248  __ movq(CpuRegister(ensure_scratch.GetRegister()),
5249          Address(CpuRegister(RSP), mem2 + stack_offset));
5250  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5251  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5252          CpuRegister(ensure_scratch.GetRegister()));
5253}
5254
5255void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5256  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5257  __ movss(Address(CpuRegister(RSP), mem), reg);
5258  __ movd(reg, CpuRegister(TMP));
5259}
5260
5261void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5262  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5263  __ movsd(Address(CpuRegister(RSP), mem), reg);
5264  __ movd(reg, CpuRegister(TMP));
5265}
5266
5267void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5268  MoveOperands* move = moves_[index];
5269  Location source = move->GetSource();
5270  Location destination = move->GetDestination();
5271
5272  if (source.IsRegister() && destination.IsRegister()) {
5273    Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5274  } else if (source.IsRegister() && destination.IsStackSlot()) {
5275    Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5276  } else if (source.IsStackSlot() && destination.IsRegister()) {
5277    Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5278  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5279    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5280  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5281    Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5282  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5283    Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5284  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5285    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5286  } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5287    __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5288    __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5289    __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5290  } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5291    Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5292  } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5293    Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5294  } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5295    Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5296  } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5297    Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5298  } else {
5299    LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5300  }
5301}
5302
5303
5304void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5305  __ pushq(CpuRegister(reg));
5306}
5307
5308
5309void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5310  __ popq(CpuRegister(reg));
5311}
5312
5313void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5314    SlowPathCode* slow_path, CpuRegister class_reg) {
5315  __ cmpl(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
5316          Immediate(mirror::Class::kStatusInitialized));
5317  __ j(kLess, slow_path->GetEntryLabel());
5318  __ Bind(slow_path->GetExitLabel());
5319  // No need for memory fence, thanks to the x86-64 memory model.
5320}
5321
5322void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5323  InvokeRuntimeCallingConvention calling_convention;
5324  CodeGenerator::CreateLoadClassLocationSummary(
5325      cls,
5326      Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
5327      Location::RegisterLocation(RAX),
5328      /* code_generator_supports_read_barrier */ true);
5329}
5330
5331void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
5332  LocationSummary* locations = cls->GetLocations();
5333  if (cls->NeedsAccessCheck()) {
5334    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
5335    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
5336                            cls,
5337                            cls->GetDexPc(),
5338                            nullptr);
5339    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
5340    return;
5341  }
5342
5343  Location out_loc = locations->Out();
5344  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5345  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5346
5347  if (cls->IsReferrersClass()) {
5348    DCHECK(!cls->CanCallRuntime());
5349    DCHECK(!cls->MustGenerateClinitCheck());
5350    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5351    GenerateGcRootFieldLoad(
5352        cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5353  } else {
5354    // /* GcRoot<mirror::Class>[] */ out =
5355    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
5356    __ movq(out, Address(current_method,
5357                         ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
5358    // /* GcRoot<mirror::Class> */ out = out[type_index]
5359    GenerateGcRootFieldLoad(
5360        cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
5361
5362    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
5363      DCHECK(cls->CanCallRuntime());
5364      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5365          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5366      codegen_->AddSlowPath(slow_path);
5367      if (!cls->IsInDexCache()) {
5368        __ testl(out, out);
5369        __ j(kEqual, slow_path->GetEntryLabel());
5370      }
5371      if (cls->MustGenerateClinitCheck()) {
5372        GenerateClassInitializationCheck(slow_path, out);
5373      } else {
5374        __ Bind(slow_path->GetExitLabel());
5375      }
5376    }
5377  }
5378}
5379
5380void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5381  LocationSummary* locations =
5382      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5383  locations->SetInAt(0, Location::RequiresRegister());
5384  if (check->HasUses()) {
5385    locations->SetOut(Location::SameAsFirstInput());
5386  }
5387}
5388
5389void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5390  // We assume the class to not be null.
5391  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5392      check->GetLoadClass(), check, check->GetDexPc(), true);
5393  codegen_->AddSlowPath(slow_path);
5394  GenerateClassInitializationCheck(slow_path,
5395                                   check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5396}
5397
5398HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5399    HLoadString::LoadKind desired_string_load_kind) {
5400  if (kEmitCompilerReadBarrier) {
5401    switch (desired_string_load_kind) {
5402      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5403      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5404      case HLoadString::LoadKind::kBootImageAddress:
5405        // TODO: Implement for read barrier.
5406        return HLoadString::LoadKind::kDexCacheViaMethod;
5407      default:
5408        break;
5409    }
5410  }
5411  switch (desired_string_load_kind) {
5412    case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5413      DCHECK(!GetCompilerOptions().GetCompilePic());
5414      // We prefer the always-available RIP-relative address for the x86-64 boot image.
5415      return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5416    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5417      DCHECK(GetCompilerOptions().GetCompilePic());
5418      break;
5419    case HLoadString::LoadKind::kBootImageAddress:
5420      break;
5421    case HLoadString::LoadKind::kDexCacheAddress:
5422      DCHECK(Runtime::Current()->UseJitCompilation());
5423      break;
5424    case HLoadString::LoadKind::kDexCachePcRelative:
5425      DCHECK(!Runtime::Current()->UseJitCompilation());
5426      break;
5427    case HLoadString::LoadKind::kDexCacheViaMethod:
5428      break;
5429  }
5430  return desired_string_load_kind;
5431}
5432
5433void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5434  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
5435      ? LocationSummary::kCallOnSlowPath
5436      : LocationSummary::kNoCall;
5437  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5438  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5439    locations->SetInAt(0, Location::RequiresRegister());
5440  }
5441  locations->SetOut(Location::RequiresRegister());
5442}
5443
5444void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
5445  LocationSummary* locations = load->GetLocations();
5446  Location out_loc = locations->Out();
5447  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5448
5449  switch (load->GetLoadKind()) {
5450    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5451      DCHECK(!kEmitCompilerReadBarrier);
5452      __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5453      codegen_->RecordStringPatch(load);
5454      return;  // No dex cache slow path.
5455    }
5456    case HLoadString::LoadKind::kBootImageAddress: {
5457      DCHECK(!kEmitCompilerReadBarrier);
5458      DCHECK_NE(load->GetAddress(), 0u);
5459      uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
5460      __ movl(out, Immediate(address));  // Zero-extended.
5461      codegen_->RecordSimplePatch();
5462      return;  // No dex cache slow path.
5463    }
5464    case HLoadString::LoadKind::kDexCacheAddress: {
5465      DCHECK_NE(load->GetAddress(), 0u);
5466      if (IsUint<32>(load->GetAddress())) {
5467        Address address = Address::Absolute(load->GetAddress(), /* no_rip */ true);
5468        GenerateGcRootFieldLoad(load, out_loc, address);
5469      } else {
5470        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
5471        __ movq(out, Immediate(load->GetAddress()));
5472        GenerateGcRootFieldLoad(load, out_loc, Address(out, 0));
5473      }
5474      break;
5475    }
5476    case HLoadString::LoadKind::kDexCachePcRelative: {
5477      uint32_t offset = load->GetDexCacheElementOffset();
5478      Label* fixup_label = codegen_->NewPcRelativeDexCacheArrayPatch(load->GetDexFile(), offset);
5479      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5480                                          /* no_rip */ false);
5481      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
5482      break;
5483    }
5484    case HLoadString::LoadKind::kDexCacheViaMethod: {
5485      CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5486
5487      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5488      GenerateGcRootFieldLoad(
5489          load, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
5490      // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
5491      __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
5492      // /* GcRoot<mirror::String> */ out = out[string_index]
5493      GenerateGcRootFieldLoad(
5494          load, out_loc, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
5495      break;
5496    }
5497    default:
5498      LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind();
5499      UNREACHABLE();
5500  }
5501
5502  if (!load->IsInDexCache()) {
5503    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5504    codegen_->AddSlowPath(slow_path);
5505    __ testl(out, out);
5506    __ j(kEqual, slow_path->GetEntryLabel());
5507    __ Bind(slow_path->GetExitLabel());
5508  }
5509}
5510
5511static Address GetExceptionTlsAddress() {
5512  return Address::Absolute(Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(),
5513                           /* no_rip */ true);
5514}
5515
5516void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5517  LocationSummary* locations =
5518      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5519  locations->SetOut(Location::RequiresRegister());
5520}
5521
5522void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5523  __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5524}
5525
5526void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5527  new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5528}
5529
5530void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5531  __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5532}
5533
5534void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5535  LocationSummary* locations =
5536      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
5537  InvokeRuntimeCallingConvention calling_convention;
5538  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5539}
5540
5541void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5542  codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
5543                          instruction,
5544                          instruction->GetDexPc(),
5545                          nullptr);
5546  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5547}
5548
5549static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5550  return kEmitCompilerReadBarrier &&
5551      (kUseBakerReadBarrier ||
5552       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5553       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5554       type_check_kind == TypeCheckKind::kArrayObjectCheck);
5555}
5556
5557void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5558  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5559  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5560  switch (type_check_kind) {
5561    case TypeCheckKind::kExactCheck:
5562    case TypeCheckKind::kAbstractClassCheck:
5563    case TypeCheckKind::kClassHierarchyCheck:
5564    case TypeCheckKind::kArrayObjectCheck:
5565      call_kind =
5566          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5567      break;
5568    case TypeCheckKind::kArrayCheck:
5569    case TypeCheckKind::kUnresolvedCheck:
5570    case TypeCheckKind::kInterfaceCheck:
5571      call_kind = LocationSummary::kCallOnSlowPath;
5572      break;
5573  }
5574
5575  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5576  locations->SetInAt(0, Location::RequiresRegister());
5577  locations->SetInAt(1, Location::Any());
5578  // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5579  locations->SetOut(Location::RequiresRegister());
5580  // When read barriers are enabled, we need a temporary register for
5581  // some cases.
5582  if (TypeCheckNeedsATemporary(type_check_kind)) {
5583    locations->AddTemp(Location::RequiresRegister());
5584  }
5585}
5586
5587void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5588  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5589  LocationSummary* locations = instruction->GetLocations();
5590  Location obj_loc = locations->InAt(0);
5591  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5592  Location cls = locations->InAt(1);
5593  Location out_loc =  locations->Out();
5594  CpuRegister out = out_loc.AsRegister<CpuRegister>();
5595  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5596      locations->GetTemp(0) :
5597      Location::NoLocation();
5598  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5599  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5600  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5601  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5602  SlowPathCode* slow_path = nullptr;
5603  NearLabel done, zero;
5604
5605  // Return 0 if `obj` is null.
5606  // Avoid null check if we know obj is not null.
5607  if (instruction->MustDoNullCheck()) {
5608    __ testl(obj, obj);
5609    __ j(kEqual, &zero);
5610  }
5611
5612  // /* HeapReference<Class> */ out = obj->klass_
5613  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
5614
5615  switch (type_check_kind) {
5616    case TypeCheckKind::kExactCheck: {
5617      if (cls.IsRegister()) {
5618        __ cmpl(out, cls.AsRegister<CpuRegister>());
5619      } else {
5620        DCHECK(cls.IsStackSlot()) << cls;
5621        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5622      }
5623      if (zero.IsLinked()) {
5624        // Classes must be equal for the instanceof to succeed.
5625        __ j(kNotEqual, &zero);
5626        __ movl(out, Immediate(1));
5627        __ jmp(&done);
5628      } else {
5629        __ setcc(kEqual, out);
5630        // setcc only sets the low byte.
5631        __ andl(out, Immediate(1));
5632      }
5633      break;
5634    }
5635
5636    case TypeCheckKind::kAbstractClassCheck: {
5637      // If the class is abstract, we eagerly fetch the super class of the
5638      // object to avoid doing a comparison we know will fail.
5639      NearLabel loop, success;
5640      __ Bind(&loop);
5641      // /* HeapReference<Class> */ out = out->super_class_
5642      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5643      __ testl(out, out);
5644      // If `out` is null, we use it for the result, and jump to `done`.
5645      __ j(kEqual, &done);
5646      if (cls.IsRegister()) {
5647        __ cmpl(out, cls.AsRegister<CpuRegister>());
5648      } else {
5649        DCHECK(cls.IsStackSlot()) << cls;
5650        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5651      }
5652      __ j(kNotEqual, &loop);
5653      __ movl(out, Immediate(1));
5654      if (zero.IsLinked()) {
5655        __ jmp(&done);
5656      }
5657      break;
5658    }
5659
5660    case TypeCheckKind::kClassHierarchyCheck: {
5661      // Walk over the class hierarchy to find a match.
5662      NearLabel loop, success;
5663      __ Bind(&loop);
5664      if (cls.IsRegister()) {
5665        __ cmpl(out, cls.AsRegister<CpuRegister>());
5666      } else {
5667        DCHECK(cls.IsStackSlot()) << cls;
5668        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5669      }
5670      __ j(kEqual, &success);
5671      // /* HeapReference<Class> */ out = out->super_class_
5672      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
5673      __ testl(out, out);
5674      __ j(kNotEqual, &loop);
5675      // If `out` is null, we use it for the result, and jump to `done`.
5676      __ jmp(&done);
5677      __ Bind(&success);
5678      __ movl(out, Immediate(1));
5679      if (zero.IsLinked()) {
5680        __ jmp(&done);
5681      }
5682      break;
5683    }
5684
5685    case TypeCheckKind::kArrayObjectCheck: {
5686      // Do an exact check.
5687      NearLabel exact_check;
5688      if (cls.IsRegister()) {
5689        __ cmpl(out, cls.AsRegister<CpuRegister>());
5690      } else {
5691        DCHECK(cls.IsStackSlot()) << cls;
5692        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5693      }
5694      __ j(kEqual, &exact_check);
5695      // Otherwise, we need to check that the object's class is a non-primitive array.
5696      // /* HeapReference<Class> */ out = out->component_type_
5697      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
5698      __ testl(out, out);
5699      // If `out` is null, we use it for the result, and jump to `done`.
5700      __ j(kEqual, &done);
5701      __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5702      __ j(kNotEqual, &zero);
5703      __ Bind(&exact_check);
5704      __ movl(out, Immediate(1));
5705      __ jmp(&done);
5706      break;
5707    }
5708
5709    case TypeCheckKind::kArrayCheck: {
5710      if (cls.IsRegister()) {
5711        __ cmpl(out, cls.AsRegister<CpuRegister>());
5712      } else {
5713        DCHECK(cls.IsStackSlot()) << cls;
5714        __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5715      }
5716      DCHECK(locations->OnlyCallsOnSlowPath());
5717      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5718                                                                       /* is_fatal */ false);
5719      codegen_->AddSlowPath(slow_path);
5720      __ j(kNotEqual, slow_path->GetEntryLabel());
5721      __ movl(out, Immediate(1));
5722      if (zero.IsLinked()) {
5723        __ jmp(&done);
5724      }
5725      break;
5726    }
5727
5728    case TypeCheckKind::kUnresolvedCheck:
5729    case TypeCheckKind::kInterfaceCheck: {
5730      // Note that we indeed only call on slow path, but we always go
5731      // into the slow path for the unresolved and interface check
5732      // cases.
5733      //
5734      // We cannot directly call the InstanceofNonTrivial runtime
5735      // entry point without resorting to a type checking slow path
5736      // here (i.e. by calling InvokeRuntime directly), as it would
5737      // require to assign fixed registers for the inputs of this
5738      // HInstanceOf instruction (following the runtime calling
5739      // convention), which might be cluttered by the potential first
5740      // read barrier emission at the beginning of this method.
5741      //
5742      // TODO: Introduce a new runtime entry point taking the object
5743      // to test (instead of its class) as argument, and let it deal
5744      // with the read barrier issues. This will let us refactor this
5745      // case of the `switch` code as it was previously (with a direct
5746      // call to the runtime not using a type checking slow path).
5747      // This should also be beneficial for the other cases above.
5748      DCHECK(locations->OnlyCallsOnSlowPath());
5749      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5750                                                                       /* is_fatal */ false);
5751      codegen_->AddSlowPath(slow_path);
5752      __ jmp(slow_path->GetEntryLabel());
5753      if (zero.IsLinked()) {
5754        __ jmp(&done);
5755      }
5756      break;
5757    }
5758  }
5759
5760  if (zero.IsLinked()) {
5761    __ Bind(&zero);
5762    __ xorl(out, out);
5763  }
5764
5765  if (done.IsLinked()) {
5766    __ Bind(&done);
5767  }
5768
5769  if (slow_path != nullptr) {
5770    __ Bind(slow_path->GetExitLabel());
5771  }
5772}
5773
5774void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
5775  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5776  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
5777  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5778  switch (type_check_kind) {
5779    case TypeCheckKind::kExactCheck:
5780    case TypeCheckKind::kAbstractClassCheck:
5781    case TypeCheckKind::kClassHierarchyCheck:
5782    case TypeCheckKind::kArrayObjectCheck:
5783      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
5784          LocationSummary::kCallOnSlowPath :
5785          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
5786      break;
5787    case TypeCheckKind::kArrayCheck:
5788    case TypeCheckKind::kUnresolvedCheck:
5789    case TypeCheckKind::kInterfaceCheck:
5790      call_kind = LocationSummary::kCallOnSlowPath;
5791      break;
5792  }
5793  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5794  locations->SetInAt(0, Location::RequiresRegister());
5795  locations->SetInAt(1, Location::Any());
5796  // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
5797  locations->AddTemp(Location::RequiresRegister());
5798  // When read barriers are enabled, we need an additional temporary
5799  // register for some cases.
5800  if (TypeCheckNeedsATemporary(type_check_kind)) {
5801    locations->AddTemp(Location::RequiresRegister());
5802  }
5803}
5804
5805void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
5806  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5807  LocationSummary* locations = instruction->GetLocations();
5808  Location obj_loc = locations->InAt(0);
5809  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5810  Location cls = locations->InAt(1);
5811  Location temp_loc = locations->GetTemp(0);
5812  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5813  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
5814      locations->GetTemp(1) :
5815      Location::NoLocation();
5816  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5817  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5818  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5819  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5820
5821  bool is_type_check_slow_path_fatal =
5822      (type_check_kind == TypeCheckKind::kExactCheck ||
5823       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5824       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5825       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
5826      !instruction->CanThrowIntoCatchBlock();
5827  SlowPathCode* type_check_slow_path =
5828      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5829                                                           is_type_check_slow_path_fatal);
5830  codegen_->AddSlowPath(type_check_slow_path);
5831
5832  switch (type_check_kind) {
5833    case TypeCheckKind::kExactCheck:
5834    case TypeCheckKind::kArrayCheck: {
5835      NearLabel done;
5836      // Avoid null check if we know obj is not null.
5837      if (instruction->MustDoNullCheck()) {
5838        __ testl(obj, obj);
5839        __ j(kEqual, &done);
5840      }
5841
5842      // /* HeapReference<Class> */ temp = obj->klass_
5843      GenerateReferenceLoadTwoRegisters(
5844          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5845
5846      if (cls.IsRegister()) {
5847        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5848      } else {
5849        DCHECK(cls.IsStackSlot()) << cls;
5850        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5851      }
5852      // Jump to slow path for throwing the exception or doing a
5853      // more involved array check.
5854      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
5855      __ Bind(&done);
5856      break;
5857    }
5858
5859    case TypeCheckKind::kAbstractClassCheck: {
5860      NearLabel done;
5861      // Avoid null check if we know obj is not null.
5862      if (instruction->MustDoNullCheck()) {
5863        __ testl(obj, obj);
5864        __ j(kEqual, &done);
5865      }
5866
5867      // /* HeapReference<Class> */ temp = obj->klass_
5868      GenerateReferenceLoadTwoRegisters(
5869          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5870
5871      // If the class is abstract, we eagerly fetch the super class of the
5872      // object to avoid doing a comparison we know will fail.
5873      NearLabel loop, compare_classes;
5874      __ Bind(&loop);
5875      // /* HeapReference<Class> */ temp = temp->super_class_
5876      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5877
5878      // If the class reference currently in `temp` is not null, jump
5879      // to the `compare_classes` label to compare it with the checked
5880      // class.
5881      __ testl(temp, temp);
5882      __ j(kNotEqual, &compare_classes);
5883      // Otherwise, jump to the slow path to throw the exception.
5884      //
5885      // But before, move back the object's class into `temp` before
5886      // going into the slow path, as it has been overwritten in the
5887      // meantime.
5888      // /* HeapReference<Class> */ temp = obj->klass_
5889      GenerateReferenceLoadTwoRegisters(
5890          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5891      __ jmp(type_check_slow_path->GetEntryLabel());
5892
5893      __ Bind(&compare_classes);
5894      if (cls.IsRegister()) {
5895        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5896      } else {
5897        DCHECK(cls.IsStackSlot()) << cls;
5898        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5899      }
5900      __ j(kNotEqual, &loop);
5901      __ Bind(&done);
5902      break;
5903    }
5904
5905    case TypeCheckKind::kClassHierarchyCheck: {
5906      NearLabel done;
5907      // Avoid null check if we know obj is not null.
5908      if (instruction->MustDoNullCheck()) {
5909        __ testl(obj, obj);
5910        __ j(kEqual, &done);
5911      }
5912
5913      // /* HeapReference<Class> */ temp = obj->klass_
5914      GenerateReferenceLoadTwoRegisters(
5915          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5916
5917      // Walk over the class hierarchy to find a match.
5918      NearLabel loop;
5919      __ Bind(&loop);
5920      if (cls.IsRegister()) {
5921        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5922      } else {
5923        DCHECK(cls.IsStackSlot()) << cls;
5924        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5925      }
5926      __ j(kEqual, &done);
5927
5928      // /* HeapReference<Class> */ temp = temp->super_class_
5929      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
5930
5931      // If the class reference currently in `temp` is not null, jump
5932      // back at the beginning of the loop.
5933      __ testl(temp, temp);
5934      __ j(kNotEqual, &loop);
5935      // Otherwise, jump to the slow path to throw the exception.
5936      //
5937      // But before, move back the object's class into `temp` before
5938      // going into the slow path, as it has been overwritten in the
5939      // meantime.
5940      // /* HeapReference<Class> */ temp = obj->klass_
5941      GenerateReferenceLoadTwoRegisters(
5942          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5943      __ jmp(type_check_slow_path->GetEntryLabel());
5944      __ Bind(&done);
5945      break;
5946    }
5947
5948    case TypeCheckKind::kArrayObjectCheck: {
5949      // We cannot use a NearLabel here, as its range might be too
5950      // short in some cases when read barriers are enabled.  This has
5951      // been observed for instance when the code emitted for this
5952      // case uses high x86-64 registers (R8-R15).
5953      Label done;
5954      // Avoid null check if we know obj is not null.
5955      if (instruction->MustDoNullCheck()) {
5956        __ testl(obj, obj);
5957        __ j(kEqual, &done);
5958      }
5959
5960      // /* HeapReference<Class> */ temp = obj->klass_
5961      GenerateReferenceLoadTwoRegisters(
5962          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5963
5964      // Do an exact check.
5965      NearLabel check_non_primitive_component_type;
5966      if (cls.IsRegister()) {
5967        __ cmpl(temp, cls.AsRegister<CpuRegister>());
5968      } else {
5969        DCHECK(cls.IsStackSlot()) << cls;
5970        __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
5971      }
5972      __ j(kEqual, &done);
5973
5974      // Otherwise, we need to check that the object's class is a non-primitive array.
5975      // /* HeapReference<Class> */ temp = temp->component_type_
5976      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
5977
5978      // If the component type is not null (i.e. the object is indeed
5979      // an array), jump to label `check_non_primitive_component_type`
5980      // to further check that this component type is not a primitive
5981      // type.
5982      __ testl(temp, temp);
5983      __ j(kNotEqual, &check_non_primitive_component_type);
5984      // Otherwise, jump to the slow path to throw the exception.
5985      //
5986      // But before, move back the object's class into `temp` before
5987      // going into the slow path, as it has been overwritten in the
5988      // meantime.
5989      // /* HeapReference<Class> */ temp = obj->klass_
5990      GenerateReferenceLoadTwoRegisters(
5991          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
5992      __ jmp(type_check_slow_path->GetEntryLabel());
5993
5994      __ Bind(&check_non_primitive_component_type);
5995      __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
5996      __ j(kEqual, &done);
5997      // Same comment as above regarding `temp` and the slow path.
5998      // /* HeapReference<Class> */ temp = obj->klass_
5999      GenerateReferenceLoadTwoRegisters(
6000          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6001      __ jmp(type_check_slow_path->GetEntryLabel());
6002      __ Bind(&done);
6003      break;
6004    }
6005
6006    case TypeCheckKind::kUnresolvedCheck:
6007    case TypeCheckKind::kInterfaceCheck:
6008      NearLabel done;
6009      // Avoid null check if we know obj is not null.
6010      if (instruction->MustDoNullCheck()) {
6011        __ testl(obj, obj);
6012        __ j(kEqual, &done);
6013      }
6014
6015      // /* HeapReference<Class> */ temp = obj->klass_
6016      GenerateReferenceLoadTwoRegisters(
6017          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
6018
6019      // We always go into the type check slow path for the unresolved
6020      // and interface check cases.
6021      //
6022      // We cannot directly call the CheckCast runtime entry point
6023      // without resorting to a type checking slow path here (i.e. by
6024      // calling InvokeRuntime directly), as it would require to
6025      // assign fixed registers for the inputs of this HInstanceOf
6026      // instruction (following the runtime calling convention), which
6027      // might be cluttered by the potential first read barrier
6028      // emission at the beginning of this method.
6029      //
6030      // TODO: Introduce a new runtime entry point taking the object
6031      // to test (instead of its class) as argument, and let it deal
6032      // with the read barrier issues. This will let us refactor this
6033      // case of the `switch` code as it was previously (with a direct
6034      // call to the runtime not using a type checking slow path).
6035      // This should also be beneficial for the other cases above.
6036      __ jmp(type_check_slow_path->GetEntryLabel());
6037      __ Bind(&done);
6038      break;
6039  }
6040
6041  __ Bind(type_check_slow_path->GetExitLabel());
6042}
6043
6044void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6045  LocationSummary* locations =
6046      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
6047  InvokeRuntimeCallingConvention calling_convention;
6048  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6049}
6050
6051void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6052  codegen_->InvokeRuntime(instruction->IsEnter() ? QUICK_ENTRY_POINT(pLockObject)
6053                                                 : QUICK_ENTRY_POINT(pUnlockObject),
6054                          instruction,
6055                          instruction->GetDexPc(),
6056                          nullptr);
6057  if (instruction->IsEnter()) {
6058    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6059  } else {
6060    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6061  }
6062}
6063
6064void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
6065void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
6066void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6067
6068void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6069  LocationSummary* locations =
6070      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6071  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6072         || instruction->GetResultType() == Primitive::kPrimLong);
6073  locations->SetInAt(0, Location::RequiresRegister());
6074  locations->SetInAt(1, Location::Any());
6075  locations->SetOut(Location::SameAsFirstInput());
6076}
6077
6078void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6079  HandleBitwiseOperation(instruction);
6080}
6081
6082void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6083  HandleBitwiseOperation(instruction);
6084}
6085
6086void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6087  HandleBitwiseOperation(instruction);
6088}
6089
6090void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6091  LocationSummary* locations = instruction->GetLocations();
6092  Location first = locations->InAt(0);
6093  Location second = locations->InAt(1);
6094  DCHECK(first.Equals(locations->Out()));
6095
6096  if (instruction->GetResultType() == Primitive::kPrimInt) {
6097    if (second.IsRegister()) {
6098      if (instruction->IsAnd()) {
6099        __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6100      } else if (instruction->IsOr()) {
6101        __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6102      } else {
6103        DCHECK(instruction->IsXor());
6104        __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6105      }
6106    } else if (second.IsConstant()) {
6107      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6108      if (instruction->IsAnd()) {
6109        __ andl(first.AsRegister<CpuRegister>(), imm);
6110      } else if (instruction->IsOr()) {
6111        __ orl(first.AsRegister<CpuRegister>(), imm);
6112      } else {
6113        DCHECK(instruction->IsXor());
6114        __ xorl(first.AsRegister<CpuRegister>(), imm);
6115      }
6116    } else {
6117      Address address(CpuRegister(RSP), second.GetStackIndex());
6118      if (instruction->IsAnd()) {
6119        __ andl(first.AsRegister<CpuRegister>(), address);
6120      } else if (instruction->IsOr()) {
6121        __ orl(first.AsRegister<CpuRegister>(), address);
6122      } else {
6123        DCHECK(instruction->IsXor());
6124        __ xorl(first.AsRegister<CpuRegister>(), address);
6125      }
6126    }
6127  } else {
6128    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6129    CpuRegister first_reg = first.AsRegister<CpuRegister>();
6130    bool second_is_constant = false;
6131    int64_t value = 0;
6132    if (second.IsConstant()) {
6133      second_is_constant = true;
6134      value = second.GetConstant()->AsLongConstant()->GetValue();
6135    }
6136    bool is_int32_value = IsInt<32>(value);
6137
6138    if (instruction->IsAnd()) {
6139      if (second_is_constant) {
6140        if (is_int32_value) {
6141          __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6142        } else {
6143          __ andq(first_reg, codegen_->LiteralInt64Address(value));
6144        }
6145      } else if (second.IsDoubleStackSlot()) {
6146        __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6147      } else {
6148        __ andq(first_reg, second.AsRegister<CpuRegister>());
6149      }
6150    } else if (instruction->IsOr()) {
6151      if (second_is_constant) {
6152        if (is_int32_value) {
6153          __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6154        } else {
6155          __ orq(first_reg, codegen_->LiteralInt64Address(value));
6156        }
6157      } else if (second.IsDoubleStackSlot()) {
6158        __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6159      } else {
6160        __ orq(first_reg, second.AsRegister<CpuRegister>());
6161      }
6162    } else {
6163      DCHECK(instruction->IsXor());
6164      if (second_is_constant) {
6165        if (is_int32_value) {
6166          __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6167        } else {
6168          __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6169        }
6170      } else if (second.IsDoubleStackSlot()) {
6171        __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6172      } else {
6173        __ xorq(first_reg, second.AsRegister<CpuRegister>());
6174      }
6175    }
6176  }
6177}
6178
6179void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
6180                                                                      Location out,
6181                                                                      uint32_t offset,
6182                                                                      Location maybe_temp) {
6183  CpuRegister out_reg = out.AsRegister<CpuRegister>();
6184  if (kEmitCompilerReadBarrier) {
6185    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6186    if (kUseBakerReadBarrier) {
6187      // Load with fast path based Baker's read barrier.
6188      // /* HeapReference<Object> */ out = *(out + offset)
6189      codegen_->GenerateFieldLoadWithBakerReadBarrier(
6190          instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
6191    } else {
6192      // Load with slow path based read barrier.
6193      // Save the value of `out` into `maybe_temp` before overwriting it
6194      // in the following move operation, as we will need it for the
6195      // read barrier below.
6196      __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6197      // /* HeapReference<Object> */ out = *(out + offset)
6198      __ movl(out_reg, Address(out_reg, offset));
6199      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6200    }
6201  } else {
6202    // Plain load with no read barrier.
6203    // /* HeapReference<Object> */ out = *(out + offset)
6204    __ movl(out_reg, Address(out_reg, offset));
6205    __ MaybeUnpoisonHeapReference(out_reg);
6206  }
6207}
6208
6209void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
6210                                                                       Location out,
6211                                                                       Location obj,
6212                                                                       uint32_t offset,
6213                                                                       Location maybe_temp) {
6214  CpuRegister out_reg = out.AsRegister<CpuRegister>();
6215  CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6216  if (kEmitCompilerReadBarrier) {
6217    if (kUseBakerReadBarrier) {
6218      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6219      // Load with fast path based Baker's read barrier.
6220      // /* HeapReference<Object> */ out = *(obj + offset)
6221      codegen_->GenerateFieldLoadWithBakerReadBarrier(
6222          instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
6223    } else {
6224      // Load with slow path based read barrier.
6225      // /* HeapReference<Object> */ out = *(obj + offset)
6226      __ movl(out_reg, Address(obj_reg, offset));
6227      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6228    }
6229  } else {
6230    // Plain load with no read barrier.
6231    // /* HeapReference<Object> */ out = *(obj + offset)
6232    __ movl(out_reg, Address(obj_reg, offset));
6233    __ MaybeUnpoisonHeapReference(out_reg);
6234  }
6235}
6236
6237void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
6238                                                             Location root,
6239                                                             const Address& address,
6240                                                             Label* fixup_label) {
6241  CpuRegister root_reg = root.AsRegister<CpuRegister>();
6242  if (kEmitCompilerReadBarrier) {
6243    if (kUseBakerReadBarrier) {
6244      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6245      // Baker's read barrier are used:
6246      //
6247      //   root = *address;
6248      //   if (Thread::Current()->GetIsGcMarking()) {
6249      //     root = ReadBarrier::Mark(root)
6250      //   }
6251
6252      // /* GcRoot<mirror::Object> */ root = *address
6253      __ movl(root_reg, address);
6254      if (fixup_label != nullptr) {
6255        __ Bind(fixup_label);
6256      }
6257      static_assert(
6258          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6259          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6260          "have different sizes.");
6261      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6262                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
6263                    "have different sizes.");
6264
6265      // Slow path used to mark the GC root `root`.
6266      SlowPathCode* slow_path =
6267          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, root, root);
6268      codegen_->AddSlowPath(slow_path);
6269
6270      __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64WordSize>().Int32Value(),
6271                                      /* no_rip */ true),
6272                    Immediate(0));
6273      __ j(kNotEqual, slow_path->GetEntryLabel());
6274      __ Bind(slow_path->GetExitLabel());
6275    } else {
6276      // GC root loaded through a slow path for read barriers other
6277      // than Baker's.
6278      // /* GcRoot<mirror::Object>* */ root = address
6279      __ leaq(root_reg, address);
6280      if (fixup_label != nullptr) {
6281        __ Bind(fixup_label);
6282      }
6283      // /* mirror::Object* */ root = root->Read()
6284      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6285    }
6286  } else {
6287    // Plain GC root load with no read barrier.
6288    // /* GcRoot<mirror::Object> */ root = *address
6289    __ movl(root_reg, address);
6290    if (fixup_label != nullptr) {
6291      __ Bind(fixup_label);
6292    }
6293    // Note that GC roots are not affected by heap poisoning, thus we
6294    // do not have to unpoison `root_reg` here.
6295  }
6296}
6297
6298void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6299                                                                Location ref,
6300                                                                CpuRegister obj,
6301                                                                uint32_t offset,
6302                                                                Location temp,
6303                                                                bool needs_null_check) {
6304  DCHECK(kEmitCompilerReadBarrier);
6305  DCHECK(kUseBakerReadBarrier);
6306
6307  // /* HeapReference<Object> */ ref = *(obj + offset)
6308  Address src(obj, offset);
6309  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6310}
6311
6312void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6313                                                                Location ref,
6314                                                                CpuRegister obj,
6315                                                                uint32_t data_offset,
6316                                                                Location index,
6317                                                                Location temp,
6318                                                                bool needs_null_check) {
6319  DCHECK(kEmitCompilerReadBarrier);
6320  DCHECK(kUseBakerReadBarrier);
6321
6322  // /* HeapReference<Object> */ ref =
6323  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
6324  Address src = index.IsConstant() ?
6325      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset) :
6326      Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset);
6327  GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, temp, needs_null_check);
6328}
6329
6330void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6331                                                                    Location ref,
6332                                                                    CpuRegister obj,
6333                                                                    const Address& src,
6334                                                                    Location temp,
6335                                                                    bool needs_null_check) {
6336  DCHECK(kEmitCompilerReadBarrier);
6337  DCHECK(kUseBakerReadBarrier);
6338
6339  // In slow path based read barriers, the read barrier call is
6340  // inserted after the original load. However, in fast path based
6341  // Baker's read barriers, we need to perform the load of
6342  // mirror::Object::monitor_ *before* the original reference load.
6343  // This load-load ordering is required by the read barrier.
6344  // The fast path/slow path (for Baker's algorithm) should look like:
6345  //
6346  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6347  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
6348  //   HeapReference<Object> ref = *src;  // Original reference load.
6349  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
6350  //   if (is_gray) {
6351  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
6352  //   }
6353  //
6354  // Note: the original implementation in ReadBarrier::Barrier is
6355  // slightly more complex as:
6356  // - it implements the load-load fence using a data dependency on
6357  //   the high-bits of rb_state, which are expected to be all zeroes
6358  //   (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6359  //   here, which is a no-op thanks to the x86-64 memory model);
6360  // - it performs additional checks that we do not do here for
6361  //   performance reasons.
6362
6363  CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6364  CpuRegister temp_reg = temp.AsRegister<CpuRegister>();
6365  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6366
6367  // /* int32_t */ monitor = obj->monitor_
6368  __ movl(temp_reg, Address(obj, monitor_offset));
6369  if (needs_null_check) {
6370    MaybeRecordImplicitNullCheck(instruction);
6371  }
6372  // /* LockWord */ lock_word = LockWord(monitor)
6373  static_assert(sizeof(LockWord) == sizeof(int32_t),
6374                "art::LockWord and int32_t have different sizes.");
6375  // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
6376  __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift));
6377  __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask));
6378  static_assert(
6379      LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
6380      "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
6381
6382  // Load fence to prevent load-load reordering.
6383  // Note that this is a no-op, thanks to the x86-64 memory model.
6384  GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6385
6386  // The actual reference load.
6387  // /* HeapReference<Object> */ ref = *src
6388  __ movl(ref_reg, src);
6389
6390  // Object* ref = ref_addr->AsMirrorPtr()
6391  __ MaybeUnpoisonHeapReference(ref_reg);
6392
6393  // Slow path used to mark the object `ref` when it is gray.
6394  SlowPathCode* slow_path =
6395      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(instruction, ref, ref);
6396  AddSlowPath(slow_path);
6397
6398  // if (rb_state == ReadBarrier::gray_ptr_)
6399  //   ref = ReadBarrier::Mark(ref);
6400  __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_));
6401  __ j(kEqual, slow_path->GetEntryLabel());
6402  __ Bind(slow_path->GetExitLabel());
6403}
6404
6405void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6406                                                  Location out,
6407                                                  Location ref,
6408                                                  Location obj,
6409                                                  uint32_t offset,
6410                                                  Location index) {
6411  DCHECK(kEmitCompilerReadBarrier);
6412
6413  // Insert a slow path based read barrier *after* the reference load.
6414  //
6415  // If heap poisoning is enabled, the unpoisoning of the loaded
6416  // reference will be carried out by the runtime within the slow
6417  // path.
6418  //
6419  // Note that `ref` currently does not get unpoisoned (when heap
6420  // poisoning is enabled), which is alright as the `ref` argument is
6421  // not used by the artReadBarrierSlow entry point.
6422  //
6423  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6424  SlowPathCode* slow_path = new (GetGraph()->GetArena())
6425      ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6426  AddSlowPath(slow_path);
6427
6428  __ jmp(slow_path->GetEntryLabel());
6429  __ Bind(slow_path->GetExitLabel());
6430}
6431
6432void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6433                                                       Location out,
6434                                                       Location ref,
6435                                                       Location obj,
6436                                                       uint32_t offset,
6437                                                       Location index) {
6438  if (kEmitCompilerReadBarrier) {
6439    // Baker's read barriers shall be handled by the fast path
6440    // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6441    DCHECK(!kUseBakerReadBarrier);
6442    // If heap poisoning is enabled, unpoisoning will be taken care of
6443    // by the runtime within the slow path.
6444    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6445  } else if (kPoisonHeapReferences) {
6446    __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6447  }
6448}
6449
6450void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6451                                                         Location out,
6452                                                         Location root) {
6453  DCHECK(kEmitCompilerReadBarrier);
6454
6455  // Insert a slow path based read barrier *after* the GC root load.
6456  //
6457  // Note that GC roots are not affected by heap poisoning, so we do
6458  // not need to do anything special for this here.
6459  SlowPathCode* slow_path =
6460      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6461  AddSlowPath(slow_path);
6462
6463  __ jmp(slow_path->GetEntryLabel());
6464  __ Bind(slow_path->GetExitLabel());
6465}
6466
6467void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6468  // Nothing to do, this should be removed during prepare for register allocator.
6469  LOG(FATAL) << "Unreachable";
6470}
6471
6472void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6473  // Nothing to do, this should be removed during prepare for register allocator.
6474  LOG(FATAL) << "Unreachable";
6475}
6476
6477// Simple implementation of packed switch - generate cascaded compare/jumps.
6478void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6479  LocationSummary* locations =
6480      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6481  locations->SetInAt(0, Location::RequiresRegister());
6482  locations->AddTemp(Location::RequiresRegister());
6483  locations->AddTemp(Location::RequiresRegister());
6484}
6485
6486void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6487  int32_t lower_bound = switch_instr->GetStartValue();
6488  uint32_t num_entries = switch_instr->GetNumEntries();
6489  LocationSummary* locations = switch_instr->GetLocations();
6490  CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6491  CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6492  CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6493  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6494
6495  // Should we generate smaller inline compare/jumps?
6496  if (num_entries <= kPackedSwitchJumpTableThreshold) {
6497    // Figure out the correct compare values and jump conditions.
6498    // Handle the first compare/branch as a special case because it might
6499    // jump to the default case.
6500    DCHECK_GT(num_entries, 2u);
6501    Condition first_condition;
6502    uint32_t index;
6503    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6504    if (lower_bound != 0) {
6505      first_condition = kLess;
6506      __ cmpl(value_reg_in, Immediate(lower_bound));
6507      __ j(first_condition, codegen_->GetLabelOf(default_block));
6508      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6509
6510      index = 1;
6511    } else {
6512      // Handle all the compare/jumps below.
6513      first_condition = kBelow;
6514      index = 0;
6515    }
6516
6517    // Handle the rest of the compare/jumps.
6518    for (; index + 1 < num_entries; index += 2) {
6519      int32_t compare_to_value = lower_bound + index + 1;
6520      __ cmpl(value_reg_in, Immediate(compare_to_value));
6521      // Jump to successors[index] if value < case_value[index].
6522      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6523      // Jump to successors[index + 1] if value == case_value[index + 1].
6524      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6525    }
6526
6527    if (index != num_entries) {
6528      // There are an odd number of entries. Handle the last one.
6529      DCHECK_EQ(index + 1, num_entries);
6530      __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6531      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6532    }
6533
6534    // And the default for any other value.
6535    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6536      __ jmp(codegen_->GetLabelOf(default_block));
6537    }
6538    return;
6539  }
6540
6541  // Remove the bias, if needed.
6542  Register value_reg_out = value_reg_in.AsRegister();
6543  if (lower_bound != 0) {
6544    __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6545    value_reg_out = temp_reg.AsRegister();
6546  }
6547  CpuRegister value_reg(value_reg_out);
6548
6549  // Is the value in range?
6550  __ cmpl(value_reg, Immediate(num_entries - 1));
6551  __ j(kAbove, codegen_->GetLabelOf(default_block));
6552
6553  // We are in the range of the table.
6554  // Load the address of the jump table in the constant area.
6555  __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6556
6557  // Load the (signed) offset from the jump table.
6558  __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6559
6560  // Add the offset to the address of the table base.
6561  __ addq(temp_reg, base_reg);
6562
6563  // And jump.
6564  __ jmp(temp_reg);
6565}
6566
6567void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6568  if (value == 0) {
6569    __ xorl(dest, dest);
6570  } else {
6571    __ movl(dest, Immediate(value));
6572  }
6573}
6574
6575void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6576  if (value == 0) {
6577    // Clears upper bits too.
6578    __ xorl(dest, dest);
6579  } else if (IsUint<32>(value)) {
6580    // We can use a 32 bit move, as it will zero-extend and is shorter.
6581    __ movl(dest, Immediate(static_cast<int32_t>(value)));
6582  } else {
6583    __ movq(dest, Immediate(value));
6584  }
6585}
6586
6587void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6588  if (value == 0) {
6589    __ xorps(dest, dest);
6590  } else {
6591    __ movss(dest, LiteralInt32Address(value));
6592  }
6593}
6594
6595void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6596  if (value == 0) {
6597    __ xorpd(dest, dest);
6598  } else {
6599    __ movsd(dest, LiteralInt64Address(value));
6600  }
6601}
6602
6603void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6604  Load32BitValue(dest, bit_cast<int32_t, float>(value));
6605}
6606
6607void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6608  Load64BitValue(dest, bit_cast<int64_t, double>(value));
6609}
6610
6611void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6612  if (value == 0) {
6613    __ testl(dest, dest);
6614  } else {
6615    __ cmpl(dest, Immediate(value));
6616  }
6617}
6618
6619void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6620  if (IsInt<32>(value)) {
6621    if (value == 0) {
6622      __ testq(dest, dest);
6623    } else {
6624      __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6625    }
6626  } else {
6627    // Value won't fit in an int.
6628    __ cmpq(dest, LiteralInt64Address(value));
6629  }
6630}
6631
6632void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6633  DCHECK(dest.IsDoubleStackSlot());
6634  if (IsInt<32>(value)) {
6635    // Can move directly as an int32 constant.
6636    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6637            Immediate(static_cast<int32_t>(value)));
6638  } else {
6639    Load64BitValue(CpuRegister(TMP), value);
6640    __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6641  }
6642}
6643
6644/**
6645 * Class to handle late fixup of offsets into constant area.
6646 */
6647class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6648 public:
6649  RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6650      : codegen_(&codegen), offset_into_constant_area_(offset) {}
6651
6652 protected:
6653  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6654
6655  CodeGeneratorX86_64* codegen_;
6656
6657 private:
6658  void Process(const MemoryRegion& region, int pos) OVERRIDE {
6659    // Patch the correct offset for the instruction.  We use the address of the
6660    // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6661    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6662    int32_t relative_position = constant_offset - pos;
6663
6664    // Patch in the right value.
6665    region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6666  }
6667
6668  // Location in constant area that the fixup refers to.
6669  size_t offset_into_constant_area_;
6670};
6671
6672/**
6673 t * Class to handle late fixup of offsets to a jump table that will be created in the
6674 * constant area.
6675 */
6676class JumpTableRIPFixup : public RIPFixup {
6677 public:
6678  JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
6679      : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
6680
6681  void CreateJumpTable() {
6682    X86_64Assembler* assembler = codegen_->GetAssembler();
6683
6684    // Ensure that the reference to the jump table has the correct offset.
6685    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
6686    SetOffset(offset_in_constant_table);
6687
6688    // Compute the offset from the start of the function to this jump table.
6689    const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
6690
6691    // Populate the jump table with the correct values for the jump table.
6692    int32_t num_entries = switch_instr_->GetNumEntries();
6693    HBasicBlock* block = switch_instr_->GetBlock();
6694    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
6695    // The value that we want is the target offset - the position of the table.
6696    for (int32_t i = 0; i < num_entries; i++) {
6697      HBasicBlock* b = successors[i];
6698      Label* l = codegen_->GetLabelOf(b);
6699      DCHECK(l->IsBound());
6700      int32_t offset_to_block = l->Position() - current_table_offset;
6701      assembler->AppendInt32(offset_to_block);
6702    }
6703  }
6704
6705 private:
6706  const HPackedSwitch* switch_instr_;
6707};
6708
6709void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
6710  // Generate the constant area if needed.
6711  X86_64Assembler* assembler = GetAssembler();
6712  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
6713    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
6714    assembler->Align(4, 0);
6715    constant_area_start_ = assembler->CodeSize();
6716
6717    // Populate any jump tables.
6718    for (auto jump_table : fixups_to_jump_tables_) {
6719      jump_table->CreateJumpTable();
6720    }
6721
6722    // And now add the constant area to the generated code.
6723    assembler->AddConstantArea();
6724  }
6725
6726  // And finish up.
6727  CodeGenerator::Finalize(allocator);
6728}
6729
6730Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
6731  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
6732  return Address::RIP(fixup);
6733}
6734
6735Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
6736  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
6737  return Address::RIP(fixup);
6738}
6739
6740Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
6741  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
6742  return Address::RIP(fixup);
6743}
6744
6745Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
6746  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
6747  return Address::RIP(fixup);
6748}
6749
6750// TODO: trg as memory.
6751void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
6752  if (!trg.IsValid()) {
6753    DCHECK_EQ(type, Primitive::kPrimVoid);
6754    return;
6755  }
6756
6757  DCHECK_NE(type, Primitive::kPrimVoid);
6758
6759  Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
6760  if (trg.Equals(return_loc)) {
6761    return;
6762  }
6763
6764  // Let the parallel move resolver take care of all of this.
6765  HParallelMove parallel_move(GetGraph()->GetArena());
6766  parallel_move.AddMove(return_loc, trg, type, nullptr);
6767  GetMoveResolver()->EmitNativeCode(&parallel_move);
6768}
6769
6770Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
6771  // Create a fixup to be used to create and address the jump table.
6772  JumpTableRIPFixup* table_fixup =
6773      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
6774
6775  // We have to populate the jump tables.
6776  fixups_to_jump_tables_.push_back(table_fixup);
6777  return Address::RIP(table_fixup);
6778}
6779
6780void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
6781                                             const Address& addr_high,
6782                                             int64_t v,
6783                                             HInstruction* instruction) {
6784  if (IsInt<32>(v)) {
6785    int32_t v_32 = v;
6786    __ movq(addr_low, Immediate(v_32));
6787    MaybeRecordImplicitNullCheck(instruction);
6788  } else {
6789    // Didn't fit in a register.  Do it in pieces.
6790    int32_t low_v = Low32Bits(v);
6791    int32_t high_v = High32Bits(v);
6792    __ movl(addr_low, Immediate(low_v));
6793    MaybeRecordImplicitNullCheck(instruction);
6794    __ movl(addr_high, Immediate(high_v));
6795  }
6796}
6797
6798#undef __
6799
6800}  // namespace x86_64
6801}  // namespace art
6802