code_generator.h revision 8158f28b6689314213eb4dbbe14166073be71f7e
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/bit_field.h"
23#include "driver/compiler_options.h"
24#include "globals.h"
25#include "graph_visualizer.h"
26#include "locations.h"
27#include "memory_region.h"
28#include "nodes.h"
29#include "stack_map_stream.h"
30
31namespace art {
32
33// Binary encoding of 2^32 for type double.
34static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
35// Binary encoding of 2^31 for type double.
36static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
37
38// Minimum value for a primitive integer.
39static int32_t constexpr kPrimIntMin = 0x80000000;
40// Minimum value for a primitive long.
41static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
42
43// Maximum value for a primitive integer.
44static int32_t constexpr kPrimIntMax = 0x7fffffff;
45// Maximum value for a primitive long.
46static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
47
48class Assembler;
49class CodeGenerator;
50class DexCompilationUnit;
51class ParallelMoveResolver;
52class SrcMapElem;
53template <class Alloc>
54class SrcMap;
55using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
56
57class CodeAllocator {
58 public:
59  CodeAllocator() {}
60  virtual ~CodeAllocator() {}
61
62  virtual uint8_t* Allocate(size_t size) = 0;
63
64 private:
65  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
66};
67
68class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
69 public:
70  SlowPathCode() {
71    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
72      saved_core_stack_offsets_[i] = kRegisterNotSaved;
73      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
74    }
75  }
76
77  virtual ~SlowPathCode() {}
78
79  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
80
81  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
82  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
83  void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
84
85  bool IsCoreRegisterSaved(int reg) const {
86    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
87  }
88
89  bool IsFpuRegisterSaved(int reg) const {
90    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
91  }
92
93  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
94    return saved_core_stack_offsets_[reg];
95  }
96
97  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
98    return saved_fpu_stack_offsets_[reg];
99  }
100
101  virtual bool IsFatal() const { return false; }
102
103  virtual const char* GetDescription() const = 0;
104
105 protected:
106  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
107  static constexpr uint32_t kRegisterNotSaved = -1;
108  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
109  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
110
111 private:
112  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
113};
114
115class InvokeDexCallingConventionVisitor {
116 public:
117  virtual Location GetNextLocation(Primitive::Type type) = 0;
118  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
119  virtual Location GetMethodLocation() const = 0;
120
121 protected:
122  InvokeDexCallingConventionVisitor() {}
123  virtual ~InvokeDexCallingConventionVisitor() {}
124
125  // The current index for core registers.
126  uint32_t gp_index_ = 0u;
127  // The current index for floating-point registers.
128  uint32_t float_index_ = 0u;
129  // The current stack index.
130  uint32_t stack_index_ = 0u;
131
132 private:
133  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
134};
135
136class CodeGenerator {
137 public:
138  // Compiles the graph to executable instructions. Returns whether the compilation
139  // succeeded.
140  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
141  void CompileOptimized(CodeAllocator* allocator);
142  static CodeGenerator* Create(HGraph* graph,
143                               InstructionSet instruction_set,
144                               const InstructionSetFeatures& isa_features,
145                               const CompilerOptions& compiler_options);
146  virtual ~CodeGenerator() {}
147
148  HGraph* GetGraph() const { return graph_; }
149
150  HBasicBlock* GetNextBlockToEmit() const;
151  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
152  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
153
154  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
155    // Note that this follows the current calling convention.
156    return GetFrameSize()
157        + InstructionSetPointerSize(GetInstructionSet())  // Art method
158        + parameter->GetIndex() * kVRegSize;
159  }
160
161  virtual void Initialize() = 0;
162  virtual void Finalize(CodeAllocator* allocator);
163  virtual void GenerateFrameEntry() = 0;
164  virtual void GenerateFrameExit() = 0;
165  virtual void Bind(HBasicBlock* block) = 0;
166  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
167  virtual Assembler* GetAssembler() = 0;
168  virtual const Assembler& GetAssembler() const = 0;
169  virtual size_t GetWordSize() const = 0;
170  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
171  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
172  void InitializeCodeGeneration(size_t number_of_spill_slots,
173                                size_t maximum_number_of_live_core_registers,
174                                size_t maximum_number_of_live_fp_registers,
175                                size_t number_of_out_slots,
176                                const GrowableArray<HBasicBlock*>& block_order);
177  int32_t GetStackSlot(HLocal* local) const;
178  Location GetTemporaryLocation(HTemporary* temp) const;
179
180  uint32_t GetFrameSize() const { return frame_size_; }
181  void SetFrameSize(uint32_t size) { frame_size_ = size; }
182  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
183  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
184
185  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
186  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
187  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
188
189  virtual void ComputeSpillMask() {
190    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
191    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
192    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
193  }
194
195  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
196    uint32_t mask = 0;
197    for (size_t i = 0, e = length; i < e; ++i) {
198      mask |= (1 << registers[i]);
199    }
200    return mask;
201  }
202
203  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
204  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
205  virtual InstructionSet GetInstructionSet() const = 0;
206
207  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
208
209  // Saves the register in the stack. Returns the size taken on stack.
210  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
211  // Restores the register from the stack. Returns the size taken on stack.
212  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
213
214  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
215  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
216
217  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
218  // Returns whether we should split long moves in parallel moves.
219  virtual bool ShouldSplitLongMoves() const { return false; }
220
221  bool IsCoreCalleeSaveRegister(int reg) const {
222    return (core_callee_save_mask_ & (1 << reg)) != 0;
223  }
224
225  bool IsFloatingPointCalleeSaveRegister(int reg) const {
226    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
227  }
228
229  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
230  bool CanMoveNullCheckToUser(HNullCheck* null_check);
231  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
232
233  void AddSlowPath(SlowPathCode* slow_path) {
234    slow_paths_.Add(slow_path);
235  }
236
237  void BuildSourceMap(DefaultSrcMap* src_map) const;
238  void BuildMappingTable(std::vector<uint8_t>* vector) const;
239  void BuildVMapTable(std::vector<uint8_t>* vector) const;
240  void BuildNativeGCMap(
241      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
242  void BuildStackMaps(std::vector<uint8_t>* vector);
243
244  bool IsBaseline() const {
245    return is_baseline_;
246  }
247
248  bool IsLeafMethod() const {
249    return is_leaf_;
250  }
251
252  void MarkNotLeaf() {
253    is_leaf_ = false;
254    requires_current_method_ = true;
255  }
256
257  void SetRequiresCurrentMethod() {
258    requires_current_method_ = true;
259  }
260
261  bool RequiresCurrentMethod() const {
262    return requires_current_method_;
263  }
264
265  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
266  // suspend check. This is called when the code generator generates code
267  // for the suspend check at the back edge (instead of where the suspend check
268  // is, which is the loop entry). At this point, the spill slots for the phis
269  // have not been written to.
270  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
271
272  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
273  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
274
275  // Helper that returns the pointer offset of an index in an object array.
276  // Note: this method assumes we always have the same pointer size, regardless
277  // of the architecture.
278  static size_t GetCacheOffset(uint32_t index);
279  // Pointer variant for ArtMethod and ArtField arrays.
280  size_t GetCachePointerOffset(uint32_t index);
281
282  void EmitParallelMoves(Location from1,
283                         Location to1,
284                         Primitive::Type type1,
285                         Location from2,
286                         Location to2,
287                         Primitive::Type type2);
288
289  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
290    // Check that null value is not represented as an integer constant.
291    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
292    return type == Primitive::kPrimNot && !value->IsNullConstant();
293  }
294
295  void AddAllocatedRegister(Location location) {
296    allocated_registers_.Add(location);
297  }
298
299  bool HasAllocatedRegister(bool is_core, int reg) const {
300    return is_core
301        ? allocated_registers_.ContainsCoreRegister(reg)
302        : allocated_registers_.ContainsFloatingPointRegister(reg);
303  }
304
305  void AllocateLocations(HInstruction* instruction);
306
307  // Tells whether the stack frame of the compiled method is
308  // considered "empty", that is either actually having a size of zero,
309  // or just containing the saved return address register.
310  bool HasEmptyFrame() const {
311    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
312  }
313
314  static int32_t GetInt32ValueOf(HConstant* constant) {
315    if (constant->IsIntConstant()) {
316      return constant->AsIntConstant()->GetValue();
317    } else if (constant->IsNullConstant()) {
318      return 0;
319    } else {
320      DCHECK(constant->IsFloatConstant());
321      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
322    }
323  }
324
325  static int64_t GetInt64ValueOf(HConstant* constant) {
326    if (constant->IsIntConstant()) {
327      return constant->AsIntConstant()->GetValue();
328    } else if (constant->IsNullConstant()) {
329      return 0;
330    } else if (constant->IsFloatConstant()) {
331      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
332    } else if (constant->IsLongConstant()) {
333      return constant->AsLongConstant()->GetValue();
334    } else {
335      DCHECK(constant->IsDoubleConstant());
336      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
337    }
338  }
339
340  size_t GetFirstRegisterSlotInSlowPath() const {
341    return first_register_slot_in_slow_path_;
342  }
343
344  uint32_t FrameEntrySpillSize() const {
345    return GetFpuSpillSize() + GetCoreSpillSize();
346  }
347
348  virtual ParallelMoveResolver* GetMoveResolver() = 0;
349
350  static void CreateCommonInvokeLocationSummary(
351      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
352
353  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
354  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
355
356 protected:
357  CodeGenerator(HGraph* graph,
358                size_t number_of_core_registers,
359                size_t number_of_fpu_registers,
360                size_t number_of_register_pairs,
361                uint32_t core_callee_save_mask,
362                uint32_t fpu_callee_save_mask,
363                const CompilerOptions& compiler_options)
364      : frame_size_(0),
365        core_spill_mask_(0),
366        fpu_spill_mask_(0),
367        first_register_slot_in_slow_path_(0),
368        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
369        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
370        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
371        number_of_core_registers_(number_of_core_registers),
372        number_of_fpu_registers_(number_of_fpu_registers),
373        number_of_register_pairs_(number_of_register_pairs),
374        core_callee_save_mask_(core_callee_save_mask),
375        fpu_callee_save_mask_(fpu_callee_save_mask),
376        stack_map_stream_(graph->GetArena()),
377        block_order_(nullptr),
378        is_baseline_(false),
379        disasm_info_(nullptr),
380        graph_(graph),
381        compiler_options_(compiler_options),
382        slow_paths_(graph->GetArena(), 8),
383        current_block_index_(0),
384        is_leaf_(true),
385        requires_current_method_(false) {}
386
387  // Register allocation logic.
388  void AllocateRegistersLocally(HInstruction* instruction) const;
389
390  // Backend specific implementation for allocating a register.
391  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
392
393  static size_t FindFreeEntry(bool* array, size_t length);
394  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
395
396  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
397
398  virtual HGraphVisitor* GetLocationBuilder() = 0;
399  virtual HGraphVisitor* GetInstructionVisitor() = 0;
400
401  // Returns the location of the first spilled entry for floating point registers,
402  // relative to the stack pointer.
403  uint32_t GetFpuSpillStart() const {
404    return GetFrameSize() - FrameEntrySpillSize();
405  }
406
407  uint32_t GetFpuSpillSize() const {
408    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
409  }
410
411  uint32_t GetCoreSpillSize() const {
412    return POPCOUNT(core_spill_mask_) * GetWordSize();
413  }
414
415  bool HasAllocatedCalleeSaveRegisters() const {
416    // We check the core registers against 1 because it always comprises the return PC.
417    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
418      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
419  }
420
421  bool CallPushesPC() const {
422    InstructionSet instruction_set = GetInstructionSet();
423    return instruction_set == kX86 || instruction_set == kX86_64;
424  }
425
426  // Arm64 has its own type for a label, so we need to templatize this method
427  // to share the logic.
428  template <typename T>
429  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
430    block = FirstNonEmptyBlock(block);
431    return raw_pointer_to_labels_array + block->GetBlockId();
432  }
433
434  // Frame size required for this method.
435  uint32_t frame_size_;
436  uint32_t core_spill_mask_;
437  uint32_t fpu_spill_mask_;
438  uint32_t first_register_slot_in_slow_path_;
439
440  // Registers that were allocated during linear scan.
441  RegisterSet allocated_registers_;
442
443  // Arrays used when doing register allocation to know which
444  // registers we can allocate. `SetupBlockedRegisters` updates the
445  // arrays.
446  bool* const blocked_core_registers_;
447  bool* const blocked_fpu_registers_;
448  bool* const blocked_register_pairs_;
449  size_t number_of_core_registers_;
450  size_t number_of_fpu_registers_;
451  size_t number_of_register_pairs_;
452  const uint32_t core_callee_save_mask_;
453  const uint32_t fpu_callee_save_mask_;
454
455  StackMapStream stack_map_stream_;
456
457  // The order to use for code generation.
458  const GrowableArray<HBasicBlock*>* block_order_;
459
460  // Whether we are using baseline.
461  bool is_baseline_;
462
463  DisassemblyInformation* disasm_info_;
464
465 private:
466  void InitLocationsBaseline(HInstruction* instruction);
467  size_t GetStackOffsetOfSavedRegister(size_t index);
468  void GenerateSlowPaths();
469  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
470  void BlockIfInRegister(Location location, bool is_out = false) const;
471  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
472
473  HGraph* const graph_;
474  const CompilerOptions& compiler_options_;
475
476  GrowableArray<SlowPathCode*> slow_paths_;
477
478  // The current block index in `block_order_` of the block
479  // we are generating code for.
480  size_t current_block_index_;
481
482  // Whether the method is a leaf method.
483  bool is_leaf_;
484
485  // Whether an instruction in the graph accesses the current method.
486  bool requires_current_method_;
487
488  friend class OptimizingCFITest;
489
490  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
491};
492
493template <typename C, typename F>
494class CallingConvention {
495 public:
496  CallingConvention(const C* registers,
497                    size_t number_of_registers,
498                    const F* fpu_registers,
499                    size_t number_of_fpu_registers,
500                    size_t pointer_size)
501      : registers_(registers),
502        number_of_registers_(number_of_registers),
503        fpu_registers_(fpu_registers),
504        number_of_fpu_registers_(number_of_fpu_registers),
505        pointer_size_(pointer_size) {}
506
507  size_t GetNumberOfRegisters() const { return number_of_registers_; }
508  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
509
510  C GetRegisterAt(size_t index) const {
511    DCHECK_LT(index, number_of_registers_);
512    return registers_[index];
513  }
514
515  F GetFpuRegisterAt(size_t index) const {
516    DCHECK_LT(index, number_of_fpu_registers_);
517    return fpu_registers_[index];
518  }
519
520  size_t GetStackOffsetOf(size_t index) const {
521    // We still reserve the space for parameters passed by registers.
522    // Add space for the method pointer.
523    return pointer_size_ + index * kVRegSize;
524  }
525
526 private:
527  const C* registers_;
528  const size_t number_of_registers_;
529  const F* fpu_registers_;
530  const size_t number_of_fpu_registers_;
531  const size_t pointer_size_;
532
533  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
534};
535
536}  // namespace art
537
538#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
539