code_generator_x86_64.h revision a4f1220c1518074db18ca1044e9201492975750b
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
19
20#include "code_generator.h"
21#include "dex/compiler_enums.h"
22#include "driver/compiler_options.h"
23#include "nodes.h"
24#include "parallel_move_resolver.h"
25#include "utils/x86_64/assembler_x86_64.h"
26
27namespace art {
28namespace x86_64 {
29
30// Use a local definition to prevent copying mistakes.
31static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
32
33// Some x86_64 instructions require a register to be available as temp.
34static constexpr Register TMP = R11;
35
36static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
37static constexpr FloatRegister kParameterFloatRegisters[] =
38    { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
39
40static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
41static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
42
43static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
44static constexpr size_t kRuntimeParameterCoreRegistersLength =
45    arraysize(kRuntimeParameterCoreRegisters);
46static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
47static constexpr size_t kRuntimeParameterFpuRegistersLength =
48    arraysize(kRuntimeParameterFpuRegisters);
49
50// These XMM registers are non-volatile in ART ABI, but volatile in native ABI.
51// If the ART ABI changes, this list must be updated.  It is used to ensure that
52// these are not clobbered by any direct call to native code (such as math intrinsics).
53static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
54
55
56class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
57 public:
58  InvokeRuntimeCallingConvention()
59      : CallingConvention(kRuntimeParameterCoreRegisters,
60                          kRuntimeParameterCoreRegistersLength,
61                          kRuntimeParameterFpuRegisters,
62                          kRuntimeParameterFpuRegistersLength,
63                          kX86_64PointerSize) {}
64
65 private:
66  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
67};
68
69class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
70 public:
71  InvokeDexCallingConvention() : CallingConvention(
72      kParameterCoreRegisters,
73      kParameterCoreRegistersLength,
74      kParameterFloatRegisters,
75      kParameterFloatRegistersLength,
76      kX86_64PointerSize) {}
77
78 private:
79  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
80};
81
82class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
83 public:
84  FieldAccessCallingConventionX86_64() {}
85
86  Location GetObjectLocation() const OVERRIDE {
87    return Location::RegisterLocation(RSI);
88  }
89  Location GetFieldIndexLocation() const OVERRIDE {
90    return Location::RegisterLocation(RDI);
91  }
92  Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
93    return Location::RegisterLocation(RAX);
94  }
95  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
96    return Primitive::Is64BitType(type)
97        ? Location::RegisterLocation(RDX)
98        : (is_instance
99            ? Location::RegisterLocation(RDX)
100            : Location::RegisterLocation(RSI));
101  }
102  Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
103    return Location::FpuRegisterLocation(XMM0);
104  }
105
106 private:
107  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64);
108};
109
110
111class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
112 public:
113  InvokeDexCallingConventionVisitorX86_64() {}
114  virtual ~InvokeDexCallingConventionVisitorX86_64() {}
115
116  Location GetNextLocation(Primitive::Type type) OVERRIDE;
117  Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
118  Location GetMethodLocation() const OVERRIDE;
119
120 private:
121  InvokeDexCallingConvention calling_convention;
122
123  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
124};
125
126class CodeGeneratorX86_64;
127
128class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
129 public:
130  ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
131      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
132
133  void EmitMove(size_t index) OVERRIDE;
134  void EmitSwap(size_t index) OVERRIDE;
135  void SpillScratch(int reg) OVERRIDE;
136  void RestoreScratch(int reg) OVERRIDE;
137
138  X86_64Assembler* GetAssembler() const;
139
140 private:
141  void Exchange32(CpuRegister reg, int mem);
142  void Exchange32(XmmRegister reg, int mem);
143  void Exchange32(int mem1, int mem2);
144  void Exchange64(CpuRegister reg, int mem);
145  void Exchange64(XmmRegister reg, int mem);
146  void Exchange64(int mem1, int mem2);
147
148  CodeGeneratorX86_64* const codegen_;
149
150  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64);
151};
152
153class LocationsBuilderX86_64 : public HGraphVisitor {
154 public:
155  LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen)
156      : HGraphVisitor(graph), codegen_(codegen) {}
157
158#define DECLARE_VISIT_INSTRUCTION(name, super)     \
159  void Visit##name(H##name* instr) OVERRIDE;
160
161  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
162  FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
163
164#undef DECLARE_VISIT_INSTRUCTION
165
166  void VisitInstruction(HInstruction* instruction) OVERRIDE {
167    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
168               << " (id " << instruction->GetId() << ")";
169  }
170
171 private:
172  void HandleInvoke(HInvoke* invoke);
173  void HandleBitwiseOperation(HBinaryOperation* operation);
174  void HandleShift(HBinaryOperation* operation);
175  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
176  void HandleFieldGet(HInstruction* instruction);
177
178  CodeGeneratorX86_64* const codegen_;
179  InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
180
181  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
182};
183
184class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
185 public:
186  InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
187
188#define DECLARE_VISIT_INSTRUCTION(name, super)     \
189  void Visit##name(H##name* instr) OVERRIDE;
190
191  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
192  FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
193
194#undef DECLARE_VISIT_INSTRUCTION
195
196  void VisitInstruction(HInstruction* instruction) OVERRIDE {
197    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
198               << " (id " << instruction->GetId() << ")";
199  }
200
201  X86_64Assembler* GetAssembler() const { return assembler_; }
202
203 private:
204  // Generate code for the given suspend check. If not null, `successor`
205  // is the block to branch to if the suspend check is not needed, and after
206  // the suspend call.
207  void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
208  void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
209  void HandleBitwiseOperation(HBinaryOperation* operation);
210  void GenerateRemFP(HRem* rem);
211  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
212  void DivByPowerOfTwo(HDiv* instruction);
213  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
214  void GenerateDivRemIntegral(HBinaryOperation* instruction);
215  void HandleShift(HBinaryOperation* operation);
216  void GenerateMemoryBarrier(MemBarrierKind kind);
217  void HandleFieldSet(HInstruction* instruction,
218                      const FieldInfo& field_info,
219                      bool value_can_be_null);
220  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
221  void GenerateImplicitNullCheck(HNullCheck* instruction);
222  void GenerateExplicitNullCheck(HNullCheck* instruction);
223  void PushOntoFPStack(Location source, uint32_t temp_offset,
224                       uint32_t stack_adjustment, bool is_float);
225  void GenerateTestAndBranch(HInstruction* instruction,
226                             size_t condition_input_index,
227                             Label* true_target,
228                             Label* false_target);
229  void GenerateCompareTestAndBranch(HCondition* condition,
230                                    Label* true_target,
231                                    Label* false_target);
232  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
233  void HandleGoto(HInstruction* got, HBasicBlock* successor);
234
235  X86_64Assembler* const assembler_;
236  CodeGeneratorX86_64* const codegen_;
237
238  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
239};
240
241// Class for fixups to jump tables.
242class JumpTableRIPFixup;
243
244class CodeGeneratorX86_64 : public CodeGenerator {
245 public:
246  CodeGeneratorX86_64(HGraph* graph,
247                  const X86_64InstructionSetFeatures& isa_features,
248                  const CompilerOptions& compiler_options,
249                  OptimizingCompilerStats* stats = nullptr);
250  virtual ~CodeGeneratorX86_64() {}
251
252  void GenerateFrameEntry() OVERRIDE;
253  void GenerateFrameExit() OVERRIDE;
254  void Bind(HBasicBlock* block) OVERRIDE;
255  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
256  void MoveConstant(Location destination, int32_t value) OVERRIDE;
257  void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
258  void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
259
260  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
261  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
262  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
263  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
264
265  // Generate code to invoke a runtime entry point.
266  void InvokeRuntime(QuickEntrypointEnum entrypoint,
267                     HInstruction* instruction,
268                     uint32_t dex_pc,
269                     SlowPathCode* slow_path) OVERRIDE;
270
271  void InvokeRuntime(int32_t entry_point_offset,
272                     HInstruction* instruction,
273                     uint32_t dex_pc,
274                     SlowPathCode* slow_path);
275
276  size_t GetWordSize() const OVERRIDE {
277    return kX86_64WordSize;
278  }
279
280  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
281    return kX86_64WordSize;
282  }
283
284  HGraphVisitor* GetLocationBuilder() OVERRIDE {
285    return &location_builder_;
286  }
287
288  HGraphVisitor* GetInstructionVisitor() OVERRIDE {
289    return &instruction_visitor_;
290  }
291
292  X86_64Assembler* GetAssembler() OVERRIDE {
293    return &assembler_;
294  }
295
296  const X86_64Assembler& GetAssembler() const OVERRIDE {
297    return assembler_;
298  }
299
300  ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE {
301    return &move_resolver_;
302  }
303
304  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
305    return GetLabelOf(block)->Position();
306  }
307
308  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
309
310  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
311  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
312  void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
313  void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
314  void Finalize(CodeAllocator* allocator) OVERRIDE;
315
316  InstructionSet GetInstructionSet() const OVERRIDE {
317    return InstructionSet::kX86_64;
318  }
319
320  // Emit a write barrier.
321  void MarkGCCard(CpuRegister temp,
322                  CpuRegister card,
323                  CpuRegister object,
324                  CpuRegister value,
325                  bool value_can_be_null);
326
327  // Helper method to move a value between two locations.
328  void Move(Location destination, Location source);
329
330  Label* GetLabelOf(HBasicBlock* block) const {
331    return CommonGetLabelOf<Label>(block_labels_, block);
332  }
333
334  void Initialize() OVERRIDE {
335    block_labels_ = CommonInitializeLabels<Label>();
336  }
337
338  bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
339    return false;
340  }
341
342  // Check if the desired_dispatch_info is supported. If it is, return it,
343  // otherwise return a fall-back info that should be used instead.
344  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
345      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
346      MethodReference target_method) OVERRIDE;
347
348  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
349  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
350
351  void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
352
353  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
354
355  const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
356    return isa_features_;
357  }
358
359  // Generate a read barrier for a heap reference within `instruction`.
360  //
361  // A read barrier for an object reference read from the heap is
362  // implemented as a call to the artReadBarrierSlow runtime entry
363  // point, which is passed the values in locations `ref`, `obj`, and
364  // `offset`:
365  //
366  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
367  //                                      mirror::Object* obj,
368  //                                      uint32_t offset);
369  //
370  // The `out` location contains the value returned by
371  // artReadBarrierSlow.
372  //
373  // When `index` provided (i.e., when it is different from
374  // Location::NoLocation()), the offset value passed to
375  // artReadBarrierSlow is adjusted to take `index` into account.
376  void GenerateReadBarrier(HInstruction* instruction,
377                           Location out,
378                           Location ref,
379                           Location obj,
380                           uint32_t offset,
381                           Location index = Location::NoLocation());
382
383  // If read barriers are enabled, generate a read barrier for a heap reference.
384  // If heap poisoning is enabled, also unpoison the reference in `out`.
385  void MaybeGenerateReadBarrier(HInstruction* instruction,
386                                Location out,
387                                Location ref,
388                                Location obj,
389                                uint32_t offset,
390                                Location index = Location::NoLocation());
391
392  // Generate a read barrier for a GC root within `instruction`.
393  //
394  // A read barrier for an object reference GC root is implemented as
395  // a call to the artReadBarrierForRootSlow runtime entry point,
396  // which is passed the value in location `root`:
397  //
398  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
399  //
400  // The `out` location contains the value returned by
401  // artReadBarrierForRootSlow.
402  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
403
404  int ConstantAreaStart() const {
405    return constant_area_start_;
406  }
407
408  Address LiteralDoubleAddress(double v);
409  Address LiteralFloatAddress(float v);
410  Address LiteralInt32Address(int32_t v);
411  Address LiteralInt64Address(int64_t v);
412
413  // Load a 64 bit value into a register in the most efficient manner.
414  void Load64BitValue(CpuRegister dest, int64_t value);
415  Address LiteralCaseTable(HPackedSwitch* switch_instr);
416
417  // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
418  void Store64BitValueToStack(Location dest, int64_t value);
419
420  // Assign a 64 bit constant to an address.
421  void MoveInt64ToAddress(const Address& addr_low,
422                          const Address& addr_high,
423                          int64_t v,
424                          HInstruction* instruction);
425
426 private:
427  struct PcRelativeDexCacheAccessInfo {
428    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
429        : target_dex_file(dex_file), element_offset(element_off), label() { }
430
431    const DexFile& target_dex_file;
432    uint32_t element_offset;
433    Label label;
434  };
435
436  // Labels for each block that will be compiled.
437  Label* block_labels_;  // Indexed by block id.
438  Label frame_entry_label_;
439  LocationsBuilderX86_64 location_builder_;
440  InstructionCodeGeneratorX86_64 instruction_visitor_;
441  ParallelMoveResolverX86_64 move_resolver_;
442  X86_64Assembler assembler_;
443  const X86_64InstructionSetFeatures& isa_features_;
444
445  // Offset to the start of the constant area in the assembled code.
446  // Used for fixups to the constant area.
447  int constant_area_start_;
448
449  // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
450  ArenaDeque<MethodPatchInfo<Label>> method_patches_;
451  ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
452  // PC-relative DexCache access info.
453  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
454
455  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
456  // We will fix this up in the linker later to have the right value.
457  static constexpr int32_t kDummy32BitOffset = 256;
458
459  // Fixups for jump tables need to be handled specially.
460  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
461
462  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
463};
464
465}  // namespace x86_64
466}  // namespace art
467
468#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
469