1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/bit_field.h"
23#include "driver/compiler_options.h"
24#include "globals.h"
25#include "locations.h"
26#include "memory_region.h"
27#include "nodes.h"
28#include "stack_map_stream.h"
29
30namespace art {
31
32// Binary encoding of 2^32 for type double.
33static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
34// Binary encoding of 2^31 for type double.
35static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
36
37// Maximum value for a primitive integer.
38static int32_t constexpr kPrimIntMax = 0x7fffffff;
39// Maximum value for a primitive long.
40static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
41
42class Assembler;
43class CodeGenerator;
44class DexCompilationUnit;
45class ParallelMoveResolver;
46class SrcMapElem;
47template <class Alloc>
48class SrcMap;
49using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
50
51class CodeAllocator {
52 public:
53  CodeAllocator() {}
54  virtual ~CodeAllocator() {}
55
56  virtual uint8_t* Allocate(size_t size) = 0;
57
58 private:
59  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
60};
61
62struct PcInfo {
63  uint32_t dex_pc;
64  uintptr_t native_pc;
65};
66
67class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
68 public:
69  SlowPathCode() {
70    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
71      saved_core_stack_offsets_[i] = kRegisterNotSaved;
72      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
73    }
74  }
75
76  virtual ~SlowPathCode() {}
77
78  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
79
80  void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
81  void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
82  void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc);
83
84  bool IsCoreRegisterSaved(int reg) const {
85    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
86  }
87
88  bool IsFpuRegisterSaved(int reg) const {
89    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
90  }
91
92  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
93    return saved_core_stack_offsets_[reg];
94  }
95
96  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
97    return saved_fpu_stack_offsets_[reg];
98  }
99
100 private:
101  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
102  static constexpr uint32_t kRegisterNotSaved = -1;
103  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
104  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
105  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
106};
107
108class InvokeDexCallingConventionVisitor {
109 public:
110  virtual Location GetNextLocation(Primitive::Type type) = 0;
111
112 protected:
113  InvokeDexCallingConventionVisitor() {}
114  virtual ~InvokeDexCallingConventionVisitor() {}
115
116  // The current index for core registers.
117  uint32_t gp_index_ = 0u;
118  // The current index for floating-point registers.
119  uint32_t float_index_ = 0u;
120  // The current stack index.
121  uint32_t stack_index_ = 0u;
122
123 private:
124  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
125};
126
127class CodeGenerator {
128 public:
129  // Compiles the graph to executable instructions. Returns whether the compilation
130  // succeeded.
131  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
132  void CompileOptimized(CodeAllocator* allocator);
133  static CodeGenerator* Create(HGraph* graph,
134                               InstructionSet instruction_set,
135                               const InstructionSetFeatures& isa_features,
136                               const CompilerOptions& compiler_options);
137  virtual ~CodeGenerator() {}
138
139  HGraph* GetGraph() const { return graph_; }
140
141  HBasicBlock* GetNextBlockToEmit() const;
142  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
143  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
144
145  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
146    // Note that this follows the current calling convention.
147    return GetFrameSize()
148        + InstructionSetPointerSize(GetInstructionSet())  // Art method
149        + parameter->GetIndex() * kVRegSize;
150  }
151
152  virtual void Initialize() = 0;
153  virtual void Finalize(CodeAllocator* allocator);
154  virtual void GenerateFrameEntry() = 0;
155  virtual void GenerateFrameExit() = 0;
156  virtual void Bind(HBasicBlock* block) = 0;
157  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
158  virtual Assembler* GetAssembler() = 0;
159  virtual size_t GetWordSize() const = 0;
160  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
161  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
162  void InitializeCodeGeneration(size_t number_of_spill_slots,
163                                size_t maximum_number_of_live_core_registers,
164                                size_t maximum_number_of_live_fp_registers,
165                                size_t number_of_out_slots,
166                                const GrowableArray<HBasicBlock*>& block_order);
167  int32_t GetStackSlot(HLocal* local) const;
168  Location GetTemporaryLocation(HTemporary* temp) const;
169
170  uint32_t GetFrameSize() const { return frame_size_; }
171  void SetFrameSize(uint32_t size) { frame_size_ = size; }
172  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
173  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
174
175  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
176  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
177  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
178
179  virtual void ComputeSpillMask() {
180    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
181    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
182    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
183  }
184
185  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
186    uint32_t mask = 0;
187    for (size_t i = 0, e = length; i < e; ++i) {
188      mask |= (1 << registers[i]);
189    }
190    return mask;
191  }
192
193  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
194  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
195  virtual InstructionSet GetInstructionSet() const = 0;
196
197  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
198
199  // Saves the register in the stack. Returns the size taken on stack.
200  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
201  // Restores the register from the stack. Returns the size taken on stack.
202  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
203
204  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
205  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
206
207  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
208  // Returns whether we should split long moves in parallel moves.
209  virtual bool ShouldSplitLongMoves() const { return false; }
210
211  bool IsCoreCalleeSaveRegister(int reg) const {
212    return (core_callee_save_mask_ & (1 << reg)) != 0;
213  }
214
215  bool IsFloatingPointCalleeSaveRegister(int reg) const {
216    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
217  }
218
219  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
220  bool CanMoveNullCheckToUser(HNullCheck* null_check);
221  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
222
223  void AddSlowPath(SlowPathCode* slow_path) {
224    slow_paths_.Add(slow_path);
225  }
226
227  void BuildSourceMap(DefaultSrcMap* src_map) const;
228  void BuildMappingTable(std::vector<uint8_t>* vector) const;
229  void BuildVMapTable(std::vector<uint8_t>* vector) const;
230  void BuildNativeGCMap(
231      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
232  void BuildStackMaps(std::vector<uint8_t>* vector);
233
234  bool IsBaseline() const {
235    return is_baseline_;
236  }
237
238  bool IsLeafMethod() const {
239    return is_leaf_;
240  }
241
242  void MarkNotLeaf() {
243    is_leaf_ = false;
244    requires_current_method_ = true;
245  }
246
247  void SetRequiresCurrentMethod() {
248    requires_current_method_ = true;
249  }
250
251  bool RequiresCurrentMethod() const {
252    return requires_current_method_;
253  }
254
255  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
256  // suspend check. This is called when the code generator generates code
257  // for the suspend check at the back edge (instead of where the suspend check
258  // is, which is the loop entry). At this point, the spill slots for the phis
259  // have not been written to.
260  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
261
262  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
263  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
264
265  // Helper that returns the pointer offset of an index in an object array.
266  // Note: this method assumes we always have the same pointer size, regardless
267  // of the architecture.
268  static size_t GetCacheOffset(uint32_t index);
269  // Pointer variant for ArtMethod and ArtField arrays.
270  size_t GetCachePointerOffset(uint32_t index);
271
272  void EmitParallelMoves(Location from1,
273                         Location to1,
274                         Primitive::Type type1,
275                         Location from2,
276                         Location to2,
277                         Primitive::Type type2);
278
279  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
280    // Check that null value is not represented as an integer constant.
281    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
282    return type == Primitive::kPrimNot && !value->IsNullConstant();
283  }
284
285  void AddAllocatedRegister(Location location) {
286    allocated_registers_.Add(location);
287  }
288
289  void AllocateLocations(HInstruction* instruction);
290
291  // Tells whether the stack frame of the compiled method is
292  // considered "empty", that is either actually having a size of zero,
293  // or just containing the saved return address register.
294  bool HasEmptyFrame() const {
295    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
296  }
297
298  static int32_t GetInt32ValueOf(HConstant* constant) {
299    if (constant->IsIntConstant()) {
300      return constant->AsIntConstant()->GetValue();
301    } else if (constant->IsNullConstant()) {
302      return 0;
303    } else {
304      DCHECK(constant->IsFloatConstant());
305      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
306    }
307  }
308
309  static int64_t GetInt64ValueOf(HConstant* constant) {
310    if (constant->IsIntConstant()) {
311      return constant->AsIntConstant()->GetValue();
312    } else if (constant->IsNullConstant()) {
313      return 0;
314    } else if (constant->IsFloatConstant()) {
315      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
316    } else if (constant->IsLongConstant()) {
317      return constant->AsLongConstant()->GetValue();
318    } else {
319      DCHECK(constant->IsDoubleConstant());
320      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
321    }
322  }
323
324  size_t GetFirstRegisterSlotInSlowPath() const {
325    return first_register_slot_in_slow_path_;
326  }
327
328  uint32_t FrameEntrySpillSize() const {
329    return GetFpuSpillSize() + GetCoreSpillSize();
330  }
331
332  virtual ParallelMoveResolver* GetMoveResolver() = 0;
333
334 protected:
335  CodeGenerator(HGraph* graph,
336                size_t number_of_core_registers,
337                size_t number_of_fpu_registers,
338                size_t number_of_register_pairs,
339                uint32_t core_callee_save_mask,
340                uint32_t fpu_callee_save_mask,
341                const CompilerOptions& compiler_options)
342      : frame_size_(0),
343        core_spill_mask_(0),
344        fpu_spill_mask_(0),
345        first_register_slot_in_slow_path_(0),
346        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
347        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
348        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
349        number_of_core_registers_(number_of_core_registers),
350        number_of_fpu_registers_(number_of_fpu_registers),
351        number_of_register_pairs_(number_of_register_pairs),
352        core_callee_save_mask_(core_callee_save_mask),
353        fpu_callee_save_mask_(fpu_callee_save_mask),
354        is_baseline_(false),
355        graph_(graph),
356        compiler_options_(compiler_options),
357        pc_infos_(graph->GetArena(), 32),
358        slow_paths_(graph->GetArena(), 8),
359        block_order_(nullptr),
360        current_block_index_(0),
361        is_leaf_(true),
362        requires_current_method_(false),
363        stack_map_stream_(graph->GetArena()) {}
364
365  // Register allocation logic.
366  void AllocateRegistersLocally(HInstruction* instruction) const;
367
368  // Backend specific implementation for allocating a register.
369  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
370
371  static size_t FindFreeEntry(bool* array, size_t length);
372  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
373
374  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
375
376  virtual HGraphVisitor* GetLocationBuilder() = 0;
377  virtual HGraphVisitor* GetInstructionVisitor() = 0;
378
379  // Returns the location of the first spilled entry for floating point registers,
380  // relative to the stack pointer.
381  uint32_t GetFpuSpillStart() const {
382    return GetFrameSize() - FrameEntrySpillSize();
383  }
384
385  uint32_t GetFpuSpillSize() const {
386    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
387  }
388
389  uint32_t GetCoreSpillSize() const {
390    return POPCOUNT(core_spill_mask_) * GetWordSize();
391  }
392
393  bool HasAllocatedCalleeSaveRegisters() const {
394    // We check the core registers against 1 because it always comprises the return PC.
395    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
396      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
397  }
398
399  bool CallPushesPC() const {
400    InstructionSet instruction_set = GetInstructionSet();
401    return instruction_set == kX86 || instruction_set == kX86_64;
402  }
403
404  // Arm64 has its own type for a label, so we need to templatize this method
405  // to share the logic.
406  template <typename T>
407  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
408    block = FirstNonEmptyBlock(block);
409    return raw_pointer_to_labels_array + block->GetBlockId();
410  }
411
412  // Frame size required for this method.
413  uint32_t frame_size_;
414  uint32_t core_spill_mask_;
415  uint32_t fpu_spill_mask_;
416  uint32_t first_register_slot_in_slow_path_;
417
418  // Registers that were allocated during linear scan.
419  RegisterSet allocated_registers_;
420
421  // Arrays used when doing register allocation to know which
422  // registers we can allocate. `SetupBlockedRegisters` updates the
423  // arrays.
424  bool* const blocked_core_registers_;
425  bool* const blocked_fpu_registers_;
426  bool* const blocked_register_pairs_;
427  size_t number_of_core_registers_;
428  size_t number_of_fpu_registers_;
429  size_t number_of_register_pairs_;
430  const uint32_t core_callee_save_mask_;
431  const uint32_t fpu_callee_save_mask_;
432
433  // Whether we are using baseline.
434  bool is_baseline_;
435
436 private:
437  void InitLocationsBaseline(HInstruction* instruction);
438  size_t GetStackOffsetOfSavedRegister(size_t index);
439  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
440  void BlockIfInRegister(Location location, bool is_out = false) const;
441
442  HGraph* const graph_;
443  const CompilerOptions& compiler_options_;
444
445  GrowableArray<PcInfo> pc_infos_;
446  GrowableArray<SlowPathCode*> slow_paths_;
447
448  // The order to use for code generation.
449  const GrowableArray<HBasicBlock*>* block_order_;
450
451  // The current block index in `block_order_` of the block
452  // we are generating code for.
453  size_t current_block_index_;
454
455  // Whether the method is a leaf method.
456  bool is_leaf_;
457
458  // Whether an instruction in the graph accesses the current method.
459  bool requires_current_method_;
460
461  StackMapStream stack_map_stream_;
462
463  friend class OptimizingCFITest;
464
465  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
466};
467
468template <typename C, typename F>
469class CallingConvention {
470 public:
471  CallingConvention(const C* registers,
472                    size_t number_of_registers,
473                    const F* fpu_registers,
474                    size_t number_of_fpu_registers,
475                    size_t pointer_size)
476      : registers_(registers),
477        number_of_registers_(number_of_registers),
478        fpu_registers_(fpu_registers),
479        number_of_fpu_registers_(number_of_fpu_registers),
480        pointer_size_(pointer_size) {}
481
482  size_t GetNumberOfRegisters() const { return number_of_registers_; }
483  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
484
485  C GetRegisterAt(size_t index) const {
486    DCHECK_LT(index, number_of_registers_);
487    return registers_[index];
488  }
489
490  F GetFpuRegisterAt(size_t index) const {
491    DCHECK_LT(index, number_of_fpu_registers_);
492    return fpu_registers_[index];
493  }
494
495  size_t GetStackOffsetOf(size_t index) const {
496    // We still reserve the space for parameters passed by registers.
497    // Add space for the method pointer.
498    return pointer_size_ + index * kVRegSize;
499  }
500
501 private:
502  const C* registers_;
503  const size_t number_of_registers_;
504  const F* fpu_registers_;
505  const size_t number_of_fpu_registers_;
506  const size_t pointer_size_;
507
508  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
509};
510
511}  // namespace art
512
513#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
514