code_generator.h revision fead4e4f397455aa31905b2982d4d861126ab89d
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/bit_field.h"
23#include "driver/compiler_options.h"
24#include "globals.h"
25#include "locations.h"
26#include "memory_region.h"
27#include "nodes.h"
28#include "stack_map_stream.h"
29
30namespace art {
31
32// Binary encoding of 2^32 for type double.
33static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
34// Binary encoding of 2^31 for type double.
35static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
36
37// Maximum value for a primitive integer.
38static int32_t constexpr kPrimIntMax = 0x7fffffff;
39// Maximum value for a primitive long.
40static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
41
42class Assembler;
43class CodeGenerator;
44class DexCompilationUnit;
45class ParallelMoveResolver;
46class SrcMapElem;
47template <class Alloc>
48class SrcMap;
49using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
50
51class CodeAllocator {
52 public:
53  CodeAllocator() {}
54  virtual ~CodeAllocator() {}
55
56  virtual uint8_t* Allocate(size_t size) = 0;
57
58 private:
59  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
60};
61
62struct PcInfo {
63  uint32_t dex_pc;
64  uintptr_t native_pc;
65};
66
67class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
68 public:
69  SlowPathCode() {}
70  virtual ~SlowPathCode() {}
71
72  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
73
74 private:
75  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
76};
77
78class CodeGenerator {
79 public:
80  // Compiles the graph to executable instructions. Returns whether the compilation
81  // succeeded.
82  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
83  void CompileOptimized(CodeAllocator* allocator);
84  static CodeGenerator* Create(HGraph* graph,
85                               InstructionSet instruction_set,
86                               const InstructionSetFeatures& isa_features,
87                               const CompilerOptions& compiler_options);
88  virtual ~CodeGenerator() {}
89
90  HGraph* GetGraph() const { return graph_; }
91
92  HBasicBlock* GetNextBlockToEmit() const;
93  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
94  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
95
96  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
97    // Note that this follows the current calling convention.
98    return GetFrameSize()
99        + kVRegSize  // Art method
100        + parameter->GetIndex() * kVRegSize;
101  }
102
103  virtual void Initialize() = 0;
104  virtual void Finalize(CodeAllocator* allocator);
105  virtual void GenerateFrameEntry() = 0;
106  virtual void GenerateFrameExit() = 0;
107  virtual void Bind(HBasicBlock* block) = 0;
108  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
109  virtual Assembler* GetAssembler() = 0;
110  virtual size_t GetWordSize() const = 0;
111  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
112  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
113  void InitializeCodeGeneration(size_t number_of_spill_slots,
114                                size_t maximum_number_of_live_core_registers,
115                                size_t maximum_number_of_live_fp_registers,
116                                size_t number_of_out_slots,
117                                const GrowableArray<HBasicBlock*>& block_order);
118  int32_t GetStackSlot(HLocal* local) const;
119  Location GetTemporaryLocation(HTemporary* temp) const;
120
121  uint32_t GetFrameSize() const { return frame_size_; }
122  void SetFrameSize(uint32_t size) { frame_size_ = size; }
123  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
124  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
125
126  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
127  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
128  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
129
130  virtual void ComputeSpillMask() {
131    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
132    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
133    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
134  }
135
136  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
137    uint32_t mask = 0;
138    for (size_t i = 0, e = length; i < e; ++i) {
139      mask |= (1 << registers[i]);
140    }
141    return mask;
142  }
143
144  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
145  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
146  virtual InstructionSet GetInstructionSet() const = 0;
147
148  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
149
150  // Saves the register in the stack. Returns the size taken on stack.
151  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
152  // Restores the register from the stack. Returns the size taken on stack.
153  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
154
155  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
156  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
157
158  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
159  // Returns whether we should split long moves in parallel moves.
160  virtual bool ShouldSplitLongMoves() const { return false; }
161
162  bool IsCoreCalleeSaveRegister(int reg) const {
163    return (core_callee_save_mask_ & (1 << reg)) != 0;
164  }
165
166  bool IsFloatingPointCalleeSaveRegister(int reg) const {
167    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
168  }
169
170  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
171  bool CanMoveNullCheckToUser(HNullCheck* null_check);
172  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
173
174  void AddSlowPath(SlowPathCode* slow_path) {
175    slow_paths_.Add(slow_path);
176  }
177
178  void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
179  void BuildVMapTable(std::vector<uint8_t>* vector) const;
180  void BuildNativeGCMap(
181      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
182  void BuildStackMaps(std::vector<uint8_t>* vector);
183  void SaveLiveRegisters(LocationSummary* locations);
184  void RestoreLiveRegisters(LocationSummary* locations);
185
186  bool IsLeafMethod() const {
187    return is_leaf_;
188  }
189
190  void MarkNotLeaf() {
191    is_leaf_ = false;
192    requires_current_method_ = true;
193  }
194
195  void SetRequiresCurrentMethod() {
196    requires_current_method_ = true;
197  }
198
199  bool RequiresCurrentMethod() const {
200    return requires_current_method_;
201  }
202
203  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
204  // suspend check. This is called when the code generator generates code
205  // for the suspend check at the back edge (instead of where the suspend check
206  // is, which is the loop entry). At this point, the spill slots for the phis
207  // have not been written to.
208  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
209
210  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
211  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
212
213  // Helper that returns the pointer offset of an index in an object array.
214  // Note: this method assumes we always have the same pointer size, regardless
215  // of the architecture.
216  static size_t GetCacheOffset(uint32_t index);
217
218  void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
219
220  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
221    if (kIsDebugBuild) {
222      if (type == Primitive::kPrimNot && value->IsIntConstant()) {
223        CHECK_EQ(value->AsIntConstant()->GetValue(), 0);
224      }
225    }
226    return type == Primitive::kPrimNot && !value->IsIntConstant();
227  }
228
229  void AddAllocatedRegister(Location location) {
230    allocated_registers_.Add(location);
231  }
232
233  void AllocateLocations(HInstruction* instruction);
234
235  // Tells whether the stack frame of the compiled method is
236  // considered "empty", that is either actually having a size of zero,
237  // or just containing the saved return address register.
238  bool HasEmptyFrame() const {
239    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
240  }
241
242  static int32_t GetInt32ValueOf(HConstant* constant) {
243    if (constant->IsIntConstant()) {
244      return constant->AsIntConstant()->GetValue();
245    } else if (constant->IsNullConstant()) {
246      return 0;
247    } else {
248      DCHECK(constant->IsFloatConstant());
249      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
250    }
251  }
252
253  static int64_t GetInt64ValueOf(HConstant* constant) {
254    if (constant->IsIntConstant()) {
255      return constant->AsIntConstant()->GetValue();
256    } else if (constant->IsNullConstant()) {
257      return 0;
258    } else if (constant->IsFloatConstant()) {
259      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
260    } else if (constant->IsLongConstant()) {
261      return constant->AsLongConstant()->GetValue();
262    } else {
263      DCHECK(constant->IsDoubleConstant());
264      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
265    }
266  }
267
268 protected:
269  CodeGenerator(HGraph* graph,
270                size_t number_of_core_registers,
271                size_t number_of_fpu_registers,
272                size_t number_of_register_pairs,
273                uint32_t core_callee_save_mask,
274                uint32_t fpu_callee_save_mask,
275                const CompilerOptions& compiler_options)
276      : frame_size_(0),
277        core_spill_mask_(0),
278        fpu_spill_mask_(0),
279        first_register_slot_in_slow_path_(0),
280        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
281        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
282        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
283        number_of_core_registers_(number_of_core_registers),
284        number_of_fpu_registers_(number_of_fpu_registers),
285        number_of_register_pairs_(number_of_register_pairs),
286        core_callee_save_mask_(core_callee_save_mask),
287        fpu_callee_save_mask_(fpu_callee_save_mask),
288        graph_(graph),
289        compiler_options_(compiler_options),
290        pc_infos_(graph->GetArena(), 32),
291        slow_paths_(graph->GetArena(), 8),
292        block_order_(nullptr),
293        current_block_index_(0),
294        is_leaf_(true),
295        requires_current_method_(false),
296        stack_map_stream_(graph->GetArena()) {}
297
298  // Register allocation logic.
299  void AllocateRegistersLocally(HInstruction* instruction) const;
300
301  // Backend specific implementation for allocating a register.
302  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
303
304  static size_t FindFreeEntry(bool* array, size_t length);
305  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
306
307  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
308
309  virtual ParallelMoveResolver* GetMoveResolver() = 0;
310  virtual HGraphVisitor* GetLocationBuilder() = 0;
311  virtual HGraphVisitor* GetInstructionVisitor() = 0;
312
313  // Returns the location of the first spilled entry for floating point registers,
314  // relative to the stack pointer.
315  uint32_t GetFpuSpillStart() const {
316    return GetFrameSize() - FrameEntrySpillSize();
317  }
318
319  uint32_t GetFpuSpillSize() const {
320    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
321  }
322
323  uint32_t GetCoreSpillSize() const {
324    return POPCOUNT(core_spill_mask_) * GetWordSize();
325  }
326
327  uint32_t FrameEntrySpillSize() const {
328    return GetFpuSpillSize() + GetCoreSpillSize();
329  }
330
331  bool HasAllocatedCalleeSaveRegisters() const {
332    // We check the core registers against 1 because it always comprises the return PC.
333    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
334      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
335  }
336
337  bool CallPushesPC() const {
338    InstructionSet instruction_set = GetInstructionSet();
339    return instruction_set == kX86 || instruction_set == kX86_64;
340  }
341
342  // Arm64 has its own type for a label, so we need to templatize this method
343  // to share the logic.
344  template <typename T>
345  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
346    block = FirstNonEmptyBlock(block);
347    return raw_pointer_to_labels_array + block->GetBlockId();
348  }
349
350  // Frame size required for this method.
351  uint32_t frame_size_;
352  uint32_t core_spill_mask_;
353  uint32_t fpu_spill_mask_;
354  uint32_t first_register_slot_in_slow_path_;
355
356  // Registers that were allocated during linear scan.
357  RegisterSet allocated_registers_;
358
359  // Arrays used when doing register allocation to know which
360  // registers we can allocate. `SetupBlockedRegisters` updates the
361  // arrays.
362  bool* const blocked_core_registers_;
363  bool* const blocked_fpu_registers_;
364  bool* const blocked_register_pairs_;
365  size_t number_of_core_registers_;
366  size_t number_of_fpu_registers_;
367  size_t number_of_register_pairs_;
368  const uint32_t core_callee_save_mask_;
369  const uint32_t fpu_callee_save_mask_;
370
371 private:
372  void InitLocationsBaseline(HInstruction* instruction);
373  size_t GetStackOffsetOfSavedRegister(size_t index);
374  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
375  void BlockIfInRegister(Location location, bool is_out = false) const;
376
377  HGraph* const graph_;
378  const CompilerOptions& compiler_options_;
379
380  GrowableArray<PcInfo> pc_infos_;
381  GrowableArray<SlowPathCode*> slow_paths_;
382
383  // The order to use for code generation.
384  const GrowableArray<HBasicBlock*>* block_order_;
385
386  // The current block index in `block_order_` of the block
387  // we are generating code for.
388  size_t current_block_index_;
389
390  // Whether the method is a leaf method.
391  bool is_leaf_;
392
393  // Whether an instruction in the graph accesses the current method.
394  bool requires_current_method_;
395
396  StackMapStream stack_map_stream_;
397
398  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
399};
400
401template <typename C, typename F>
402class CallingConvention {
403 public:
404  CallingConvention(const C* registers,
405                    size_t number_of_registers,
406                    const F* fpu_registers,
407                    size_t number_of_fpu_registers)
408      : registers_(registers),
409        number_of_registers_(number_of_registers),
410        fpu_registers_(fpu_registers),
411        number_of_fpu_registers_(number_of_fpu_registers) {}
412
413  size_t GetNumberOfRegisters() const { return number_of_registers_; }
414  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
415
416  C GetRegisterAt(size_t index) const {
417    DCHECK_LT(index, number_of_registers_);
418    return registers_[index];
419  }
420
421  F GetFpuRegisterAt(size_t index) const {
422    DCHECK_LT(index, number_of_fpu_registers_);
423    return fpu_registers_[index];
424  }
425
426  size_t GetStackOffsetOf(size_t index) const {
427    // We still reserve the space for parameters passed by registers.
428    // Add one for the method pointer.
429    return (index + 1) * kVRegSize;
430  }
431
432 private:
433  const C* registers_;
434  const size_t number_of_registers_;
435  const F* fpu_registers_;
436  const size_t number_of_fpu_registers_;
437
438  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
439};
440
441}  // namespace art
442
443#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
444