code_generator.h revision b022fa1300e6d78639b3b910af0cf85c43df44bb
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/arena_containers.h"
23#include "base/arena_object.h"
24#include "base/bit_field.h"
25#include "driver/compiler_options.h"
26#include "globals.h"
27#include "graph_visualizer.h"
28#include "locations.h"
29#include "memory_region.h"
30#include "nodes.h"
31#include "stack_map_stream.h"
32
33namespace art {
34
35// Binary encoding of 2^32 for type double.
36static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
37// Binary encoding of 2^31 for type double.
38static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
39
40// Minimum value for a primitive integer.
41static int32_t constexpr kPrimIntMin = 0x80000000;
42// Minimum value for a primitive long.
43static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
44
45// Maximum value for a primitive integer.
46static int32_t constexpr kPrimIntMax = 0x7fffffff;
47// Maximum value for a primitive long.
48static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
49
50class Assembler;
51class CodeGenerator;
52class DexCompilationUnit;
53class LinkerPatch;
54class ParallelMoveResolver;
55class SrcMapElem;
56template <class Alloc>
57class SrcMap;
58using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
59
60class CodeAllocator {
61 public:
62  CodeAllocator() {}
63  virtual ~CodeAllocator() {}
64
65  virtual uint8_t* Allocate(size_t size) = 0;
66
67 private:
68  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
69};
70
71class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
72 public:
73  SlowPathCode() {
74    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
75      saved_core_stack_offsets_[i] = kRegisterNotSaved;
76      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
77    }
78  }
79
80  virtual ~SlowPathCode() {}
81
82  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
83
84  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
85  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
86
87  bool IsCoreRegisterSaved(int reg) const {
88    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
89  }
90
91  bool IsFpuRegisterSaved(int reg) const {
92    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
93  }
94
95  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
96    return saved_core_stack_offsets_[reg];
97  }
98
99  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
100    return saved_fpu_stack_offsets_[reg];
101  }
102
103  virtual bool IsFatal() const { return false; }
104
105  virtual const char* GetDescription() const = 0;
106
107 protected:
108  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
109  static constexpr uint32_t kRegisterNotSaved = -1;
110  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
111  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
112
113 private:
114  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
115};
116
117class InvokeDexCallingConventionVisitor {
118 public:
119  virtual Location GetNextLocation(Primitive::Type type) = 0;
120  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
121  virtual Location GetMethodLocation() const = 0;
122
123 protected:
124  InvokeDexCallingConventionVisitor() {}
125  virtual ~InvokeDexCallingConventionVisitor() {}
126
127  // The current index for core registers.
128  uint32_t gp_index_ = 0u;
129  // The current index for floating-point registers.
130  uint32_t float_index_ = 0u;
131  // The current stack index.
132  uint32_t stack_index_ = 0u;
133
134 private:
135  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
136};
137
138class CodeGenerator {
139 public:
140  // Compiles the graph to executable instructions. Returns whether the compilation
141  // succeeded.
142  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
143  void CompileOptimized(CodeAllocator* allocator);
144  static CodeGenerator* Create(HGraph* graph,
145                               InstructionSet instruction_set,
146                               const InstructionSetFeatures& isa_features,
147                               const CompilerOptions& compiler_options);
148  virtual ~CodeGenerator() {}
149
150  HGraph* GetGraph() const { return graph_; }
151
152  HBasicBlock* GetNextBlockToEmit() const;
153  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
154  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
155
156  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
157    // Note that this follows the current calling convention.
158    return GetFrameSize()
159        + InstructionSetPointerSize(GetInstructionSet())  // Art method
160        + parameter->GetIndex() * kVRegSize;
161  }
162
163  virtual void Initialize() = 0;
164  virtual void Finalize(CodeAllocator* allocator);
165  virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
166  virtual void GenerateFrameEntry() = 0;
167  virtual void GenerateFrameExit() = 0;
168  virtual void Bind(HBasicBlock* block) = 0;
169  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
170  virtual Assembler* GetAssembler() = 0;
171  virtual const Assembler& GetAssembler() const = 0;
172  virtual size_t GetWordSize() const = 0;
173  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
174  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
175  void InitializeCodeGeneration(size_t number_of_spill_slots,
176                                size_t maximum_number_of_live_core_registers,
177                                size_t maximum_number_of_live_fp_registers,
178                                size_t number_of_out_slots,
179                                const GrowableArray<HBasicBlock*>& block_order);
180  int32_t GetStackSlot(HLocal* local) const;
181  Location GetTemporaryLocation(HTemporary* temp) const;
182
183  uint32_t GetFrameSize() const { return frame_size_; }
184  void SetFrameSize(uint32_t size) { frame_size_ = size; }
185  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
186  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
187
188  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
189  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
190  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
191
192  virtual void ComputeSpillMask() {
193    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
194    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
195    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
196  }
197
198  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
199    uint32_t mask = 0;
200    for (size_t i = 0, e = length; i < e; ++i) {
201      mask |= (1 << registers[i]);
202    }
203    return mask;
204  }
205
206  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
207  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
208  virtual InstructionSet GetInstructionSet() const = 0;
209
210  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
211
212  // Saves the register in the stack. Returns the size taken on stack.
213  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
214  // Restores the register from the stack. Returns the size taken on stack.
215  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
216
217  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
218  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
219
220  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
221  // Returns whether we should split long moves in parallel moves.
222  virtual bool ShouldSplitLongMoves() const { return false; }
223
224  bool IsCoreCalleeSaveRegister(int reg) const {
225    return (core_callee_save_mask_ & (1 << reg)) != 0;
226  }
227
228  bool IsFloatingPointCalleeSaveRegister(int reg) const {
229    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
230  }
231
232  // Record native to dex mapping for a suspend point.  Required by runtime.
233  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
234  // Record additional native to dex mappings for native debugging/profiling tools.
235  void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end);
236
237  bool CanMoveNullCheckToUser(HNullCheck* null_check);
238  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
239
240  // Records a stack map which the runtime might use to set catch phi values
241  // during exception delivery.
242  // TODO: Replace with a catch-entering instruction that records the environment.
243  void RecordCatchBlockInfo();
244
245  // Returns true if implicit null checks are allowed in the compiler options
246  // and if the null check is not inside a try block. We currently cannot do
247  // implicit null checks in that case because we need the NullCheckSlowPath to
248  // save live registers, which may be needed by the runtime to set catch phis.
249  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
250
251  void AddSlowPath(SlowPathCode* slow_path) {
252    slow_paths_.Add(slow_path);
253  }
254
255  void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
256
257  void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
258  void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
259  void BuildNativeGCMap(
260      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
261  void BuildStackMaps(ArenaVector<uint8_t>* vector);
262
263  bool IsBaseline() const {
264    return is_baseline_;
265  }
266
267  bool IsLeafMethod() const {
268    return is_leaf_;
269  }
270
271  void MarkNotLeaf() {
272    is_leaf_ = false;
273    requires_current_method_ = true;
274  }
275
276  void SetRequiresCurrentMethod() {
277    requires_current_method_ = true;
278  }
279
280  bool RequiresCurrentMethod() const {
281    return requires_current_method_;
282  }
283
284  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
285  // suspend check. This is called when the code generator generates code
286  // for the suspend check at the back edge (instead of where the suspend check
287  // is, which is the loop entry). At this point, the spill slots for the phis
288  // have not been written to.
289  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
290
291  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
292  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
293
294  // Helper that returns the pointer offset of an index in an object array.
295  // Note: this method assumes we always have the same pointer size, regardless
296  // of the architecture.
297  static size_t GetCacheOffset(uint32_t index);
298  // Pointer variant for ArtMethod and ArtField arrays.
299  size_t GetCachePointerOffset(uint32_t index);
300
301  void EmitParallelMoves(Location from1,
302                         Location to1,
303                         Primitive::Type type1,
304                         Location from2,
305                         Location to2,
306                         Primitive::Type type2);
307
308  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
309    // Check that null value is not represented as an integer constant.
310    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
311    return type == Primitive::kPrimNot && !value->IsNullConstant();
312  }
313
314  void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
315
316  void AddAllocatedRegister(Location location) {
317    allocated_registers_.Add(location);
318  }
319
320  bool HasAllocatedRegister(bool is_core, int reg) const {
321    return is_core
322        ? allocated_registers_.ContainsCoreRegister(reg)
323        : allocated_registers_.ContainsFloatingPointRegister(reg);
324  }
325
326  void AllocateLocations(HInstruction* instruction);
327
328  // Tells whether the stack frame of the compiled method is
329  // considered "empty", that is either actually having a size of zero,
330  // or just containing the saved return address register.
331  bool HasEmptyFrame() const {
332    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
333  }
334
335  static int32_t GetInt32ValueOf(HConstant* constant) {
336    if (constant->IsIntConstant()) {
337      return constant->AsIntConstant()->GetValue();
338    } else if (constant->IsNullConstant()) {
339      return 0;
340    } else {
341      DCHECK(constant->IsFloatConstant());
342      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
343    }
344  }
345
346  static int64_t GetInt64ValueOf(HConstant* constant) {
347    if (constant->IsIntConstant()) {
348      return constant->AsIntConstant()->GetValue();
349    } else if (constant->IsNullConstant()) {
350      return 0;
351    } else if (constant->IsFloatConstant()) {
352      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
353    } else if (constant->IsLongConstant()) {
354      return constant->AsLongConstant()->GetValue();
355    } else {
356      DCHECK(constant->IsDoubleConstant());
357      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
358    }
359  }
360
361  size_t GetFirstRegisterSlotInSlowPath() const {
362    return first_register_slot_in_slow_path_;
363  }
364
365  uint32_t FrameEntrySpillSize() const {
366    return GetFpuSpillSize() + GetCoreSpillSize();
367  }
368
369  virtual ParallelMoveResolver* GetMoveResolver() = 0;
370
371  static void CreateCommonInvokeLocationSummary(
372      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
373
374  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
375  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
376
377 protected:
378  // Method patch info used for recording locations of required linker patches and
379  // target methods. The target method can be used for various purposes, whether for
380  // patching the address of the method or the code pointer or a PC-relative call.
381  template <typename LabelType>
382  struct MethodPatchInfo {
383    explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
384
385    MethodReference target_method;
386    LabelType label;
387  };
388
389  CodeGenerator(HGraph* graph,
390                size_t number_of_core_registers,
391                size_t number_of_fpu_registers,
392                size_t number_of_register_pairs,
393                uint32_t core_callee_save_mask,
394                uint32_t fpu_callee_save_mask,
395                const CompilerOptions& compiler_options)
396      : frame_size_(0),
397        core_spill_mask_(0),
398        fpu_spill_mask_(0),
399        first_register_slot_in_slow_path_(0),
400        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
401        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
402        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
403        number_of_core_registers_(number_of_core_registers),
404        number_of_fpu_registers_(number_of_fpu_registers),
405        number_of_register_pairs_(number_of_register_pairs),
406        core_callee_save_mask_(core_callee_save_mask),
407        fpu_callee_save_mask_(fpu_callee_save_mask),
408        stack_map_stream_(graph->GetArena()),
409        block_order_(nullptr),
410        is_baseline_(false),
411        disasm_info_(nullptr),
412        graph_(graph),
413        compiler_options_(compiler_options),
414        src_map_(nullptr),
415        slow_paths_(graph->GetArena(), 8),
416        current_block_index_(0),
417        is_leaf_(true),
418        requires_current_method_(false) {}
419
420  // Register allocation logic.
421  void AllocateRegistersLocally(HInstruction* instruction) const;
422
423  // Backend specific implementation for allocating a register.
424  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
425
426  static size_t FindFreeEntry(bool* array, size_t length);
427  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
428
429  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
430
431  virtual HGraphVisitor* GetLocationBuilder() = 0;
432  virtual HGraphVisitor* GetInstructionVisitor() = 0;
433
434  // Returns the location of the first spilled entry for floating point registers,
435  // relative to the stack pointer.
436  uint32_t GetFpuSpillStart() const {
437    return GetFrameSize() - FrameEntrySpillSize();
438  }
439
440  uint32_t GetFpuSpillSize() const {
441    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
442  }
443
444  uint32_t GetCoreSpillSize() const {
445    return POPCOUNT(core_spill_mask_) * GetWordSize();
446  }
447
448  bool HasAllocatedCalleeSaveRegisters() const {
449    // We check the core registers against 1 because it always comprises the return PC.
450    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
451      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
452  }
453
454  bool CallPushesPC() const {
455    InstructionSet instruction_set = GetInstructionSet();
456    return instruction_set == kX86 || instruction_set == kX86_64;
457  }
458
459  // Arm64 has its own type for a label, so we need to templatize this method
460  // to share the logic.
461  template <typename LabelType>
462  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
463    block = FirstNonEmptyBlock(block);
464    return raw_pointer_to_labels_array + block->GetBlockId();
465  }
466
467  // Frame size required for this method.
468  uint32_t frame_size_;
469  uint32_t core_spill_mask_;
470  uint32_t fpu_spill_mask_;
471  uint32_t first_register_slot_in_slow_path_;
472
473  // Registers that were allocated during linear scan.
474  RegisterSet allocated_registers_;
475
476  // Arrays used when doing register allocation to know which
477  // registers we can allocate. `SetupBlockedRegisters` updates the
478  // arrays.
479  bool* const blocked_core_registers_;
480  bool* const blocked_fpu_registers_;
481  bool* const blocked_register_pairs_;
482  size_t number_of_core_registers_;
483  size_t number_of_fpu_registers_;
484  size_t number_of_register_pairs_;
485  const uint32_t core_callee_save_mask_;
486  const uint32_t fpu_callee_save_mask_;
487
488  StackMapStream stack_map_stream_;
489
490  // The order to use for code generation.
491  const GrowableArray<HBasicBlock*>* block_order_;
492
493  // Whether we are using baseline.
494  bool is_baseline_;
495
496  DisassemblyInformation* disasm_info_;
497
498 private:
499  void InitLocationsBaseline(HInstruction* instruction);
500  size_t GetStackOffsetOfSavedRegister(size_t index);
501  void GenerateSlowPaths();
502  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
503  void BlockIfInRegister(Location location, bool is_out = false) const;
504  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
505
506  HGraph* const graph_;
507  const CompilerOptions& compiler_options_;
508
509  // Native to dex_pc map used for native debugging/profiling tools.
510  DefaultSrcMap* src_map_;
511  GrowableArray<SlowPathCode*> slow_paths_;
512
513  // The current block index in `block_order_` of the block
514  // we are generating code for.
515  size_t current_block_index_;
516
517  // Whether the method is a leaf method.
518  bool is_leaf_;
519
520  // Whether an instruction in the graph accesses the current method.
521  bool requires_current_method_;
522
523  friend class OptimizingCFITest;
524
525  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
526};
527
528template <typename C, typename F>
529class CallingConvention {
530 public:
531  CallingConvention(const C* registers,
532                    size_t number_of_registers,
533                    const F* fpu_registers,
534                    size_t number_of_fpu_registers,
535                    size_t pointer_size)
536      : registers_(registers),
537        number_of_registers_(number_of_registers),
538        fpu_registers_(fpu_registers),
539        number_of_fpu_registers_(number_of_fpu_registers),
540        pointer_size_(pointer_size) {}
541
542  size_t GetNumberOfRegisters() const { return number_of_registers_; }
543  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
544
545  C GetRegisterAt(size_t index) const {
546    DCHECK_LT(index, number_of_registers_);
547    return registers_[index];
548  }
549
550  F GetFpuRegisterAt(size_t index) const {
551    DCHECK_LT(index, number_of_fpu_registers_);
552    return fpu_registers_[index];
553  }
554
555  size_t GetStackOffsetOf(size_t index) const {
556    // We still reserve the space for parameters passed by registers.
557    // Add space for the method pointer.
558    return pointer_size_ + index * kVRegSize;
559  }
560
561 private:
562  const C* registers_;
563  const size_t number_of_registers_;
564  const F* fpu_registers_;
565  const size_t number_of_fpu_registers_;
566  const size_t pointer_size_;
567
568  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
569};
570
571}  // namespace art
572
573#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
574