code_generator.h revision 5f8741860d465410bfed495dbb5f794590d338da
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/bit_field.h"
23#include "driver/compiler_options.h"
24#include "globals.h"
25#include "locations.h"
26#include "memory_region.h"
27#include "nodes.h"
28#include "stack_map_stream.h"
29
30namespace art {
31
32static size_t constexpr kVRegSize = 4;
33
34// Binary encoding of 2^32 for type double.
35static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
36// Binary encoding of 2^31 for type double.
37static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
38
39// Maximum value for a primitive integer.
40static int32_t constexpr kPrimIntMax = 0x7fffffff;
41// Maximum value for a primitive long.
42static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
43
44class Assembler;
45class CodeGenerator;
46class DexCompilationUnit;
47class ParallelMoveResolver;
48class SrcMapElem;
49template <class Alloc>
50class SrcMap;
51using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
52
53class CodeAllocator {
54 public:
55  CodeAllocator() {}
56  virtual ~CodeAllocator() {}
57
58  virtual uint8_t* Allocate(size_t size) = 0;
59
60 private:
61  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
62};
63
64struct PcInfo {
65  uint32_t dex_pc;
66  uintptr_t native_pc;
67};
68
69class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
70 public:
71  SlowPathCode() {}
72  virtual ~SlowPathCode() {}
73
74  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
75
76 private:
77  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
78};
79
80class CodeGenerator {
81 public:
82  // Compiles the graph to executable instructions. Returns whether the compilation
83  // succeeded.
84  void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
85  void CompileOptimized(CodeAllocator* allocator);
86  static CodeGenerator* Create(HGraph* graph,
87                               InstructionSet instruction_set,
88                               const InstructionSetFeatures& isa_features,
89                               const CompilerOptions& compiler_options);
90  virtual ~CodeGenerator() {}
91
92  HGraph* GetGraph() const { return graph_; }
93
94  HBasicBlock* GetNextBlockToEmit() const;
95  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
96  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
97
98  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
99    // Note that this follows the current calling convention.
100    return GetFrameSize()
101        + kVRegSize  // Art method
102        + parameter->GetIndex() * kVRegSize;
103  }
104
105  virtual void Initialize() = 0;
106  virtual void Finalize(CodeAllocator* allocator);
107  virtual void GenerateFrameEntry() = 0;
108  virtual void GenerateFrameExit() = 0;
109  virtual void Bind(HBasicBlock* block) = 0;
110  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
111  virtual Assembler* GetAssembler() = 0;
112  virtual size_t GetWordSize() const = 0;
113  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
114  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
115  void InitializeCodeGeneration(size_t number_of_spill_slots,
116                                size_t maximum_number_of_live_core_registers,
117                                size_t maximum_number_of_live_fp_registers,
118                                size_t number_of_out_slots,
119                                const GrowableArray<HBasicBlock*>& block_order);
120  int32_t GetStackSlot(HLocal* local) const;
121  Location GetTemporaryLocation(HTemporary* temp) const;
122
123  uint32_t GetFrameSize() const { return frame_size_; }
124  void SetFrameSize(uint32_t size) { frame_size_ = size; }
125  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
126  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
127
128  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
129  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
130  virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
131
132  virtual void ComputeSpillMask() {
133    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
134    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
135    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
136  }
137
138  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
139    uint32_t mask = 0;
140    for (size_t i = 0, e = length; i < e; ++i) {
141      mask |= (1 << registers[i]);
142    }
143    return mask;
144  }
145
146  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
147  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
148  virtual InstructionSet GetInstructionSet() const = 0;
149
150  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
151
152  // Saves the register in the stack. Returns the size taken on stack.
153  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
154  // Restores the register from the stack. Returns the size taken on stack.
155  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
156  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
157    UNUSED(stack_index, reg_id);
158    UNIMPLEMENTED(FATAL);
159    UNREACHABLE();
160  }
161  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
162    UNUSED(stack_index, reg_id);
163    UNIMPLEMENTED(FATAL);
164    UNREACHABLE();
165  }
166  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
167
168  bool IsCoreCalleeSaveRegister(int reg) const {
169    return (core_callee_save_mask_ & (1 << reg)) != 0;
170  }
171
172  bool IsFloatingPointCalleeSaveRegister(int reg) const {
173    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
174  }
175
176  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc);
177  bool CanMoveNullCheckToUser(HNullCheck* null_check);
178  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
179
180  void AddSlowPath(SlowPathCode* slow_path) {
181    slow_paths_.Add(slow_path);
182  }
183
184  void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
185  void BuildVMapTable(std::vector<uint8_t>* vector) const;
186  void BuildNativeGCMap(
187      std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
188  void BuildStackMaps(std::vector<uint8_t>* vector);
189  void SaveLiveRegisters(LocationSummary* locations);
190  void RestoreLiveRegisters(LocationSummary* locations);
191
192  bool IsLeafMethod() const {
193    return is_leaf_;
194  }
195
196  void MarkNotLeaf() {
197    is_leaf_ = false;
198    requires_current_method_ = true;
199  }
200
201  void SetRequiresCurrentMethod() {
202    requires_current_method_ = true;
203  }
204
205  bool RequiresCurrentMethod() const {
206    return requires_current_method_;
207  }
208
209  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
210  // suspend check. This is called when the code generator generates code
211  // for the suspend check at the back edge (instead of where the suspend check
212  // is, which is the loop entry). At this point, the spill slots for the phis
213  // have not been written to.
214  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
215
216  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
217  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
218
219  // Helper that returns the pointer offset of an index in an object array.
220  // Note: this method assumes we always have the same pointer size, regardless
221  // of the architecture.
222  static size_t GetCacheOffset(uint32_t index);
223
224  void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
225
226  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
227    if (kIsDebugBuild) {
228      if (type == Primitive::kPrimNot && value->IsIntConstant()) {
229        CHECK_EQ(value->AsIntConstant()->GetValue(), 0);
230      }
231    }
232    return type == Primitive::kPrimNot && !value->IsIntConstant();
233  }
234
235  void AddAllocatedRegister(Location location) {
236    allocated_registers_.Add(location);
237  }
238
239  void AllocateLocations(HInstruction* instruction);
240
241  // Tells whether the stack frame of the compiled method is
242  // considered "empty", that is either actually having a size of zero,
243  // or just containing the saved return address register.
244  bool HasEmptyFrame() const {
245    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
246  }
247
248  static int32_t GetInt32ValueOf(HConstant* constant) {
249    if (constant->IsIntConstant()) {
250      return constant->AsIntConstant()->GetValue();
251    } else if (constant->IsNullConstant()) {
252      return 0;
253    } else {
254      DCHECK(constant->IsFloatConstant());
255      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
256    }
257  }
258
259  static int64_t GetInt64ValueOf(HConstant* constant) {
260    if (constant->IsIntConstant()) {
261      return constant->AsIntConstant()->GetValue();
262    } else if (constant->IsNullConstant()) {
263      return 0;
264    } else if (constant->IsFloatConstant()) {
265      return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
266    } else if (constant->IsLongConstant()) {
267      return constant->AsLongConstant()->GetValue();
268    } else {
269      DCHECK(constant->IsDoubleConstant());
270      return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
271    }
272  }
273
274 protected:
275  CodeGenerator(HGraph* graph,
276                size_t number_of_core_registers,
277                size_t number_of_fpu_registers,
278                size_t number_of_register_pairs,
279                uint32_t core_callee_save_mask,
280                uint32_t fpu_callee_save_mask,
281                const CompilerOptions& compiler_options)
282      : frame_size_(0),
283        core_spill_mask_(0),
284        fpu_spill_mask_(0),
285        first_register_slot_in_slow_path_(0),
286        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)),
287        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)),
288        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)),
289        number_of_core_registers_(number_of_core_registers),
290        number_of_fpu_registers_(number_of_fpu_registers),
291        number_of_register_pairs_(number_of_register_pairs),
292        core_callee_save_mask_(core_callee_save_mask),
293        fpu_callee_save_mask_(fpu_callee_save_mask),
294        graph_(graph),
295        compiler_options_(compiler_options),
296        pc_infos_(graph->GetArena(), 32),
297        slow_paths_(graph->GetArena(), 8),
298        block_order_(nullptr),
299        current_block_index_(0),
300        is_leaf_(true),
301        requires_current_method_(false),
302        stack_map_stream_(graph->GetArena()) {}
303
304  // Register allocation logic.
305  void AllocateRegistersLocally(HInstruction* instruction) const;
306
307  // Backend specific implementation for allocating a register.
308  virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
309
310  static size_t FindFreeEntry(bool* array, size_t length);
311  static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
312
313  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
314
315  virtual ParallelMoveResolver* GetMoveResolver() = 0;
316  virtual HGraphVisitor* GetLocationBuilder() = 0;
317  virtual HGraphVisitor* GetInstructionVisitor() = 0;
318
319  // Returns the location of the first spilled entry for floating point registers,
320  // relative to the stack pointer.
321  uint32_t GetFpuSpillStart() const {
322    return GetFrameSize() - FrameEntrySpillSize();
323  }
324
325  uint32_t GetFpuSpillSize() const {
326    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
327  }
328
329  uint32_t GetCoreSpillSize() const {
330    return POPCOUNT(core_spill_mask_) * GetWordSize();
331  }
332
333  uint32_t FrameEntrySpillSize() const {
334    return GetFpuSpillSize() + GetCoreSpillSize();
335  }
336
337  bool HasAllocatedCalleeSaveRegisters() const {
338    // We check the core registers against 1 because it always comprises the return PC.
339    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
340      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
341  }
342
343  bool CallPushesPC() const {
344    InstructionSet instruction_set = GetInstructionSet();
345    return instruction_set == kX86 || instruction_set == kX86_64;
346  }
347
348  // Arm64 has its own type for a label, so we need to templatize this method
349  // to share the logic.
350  template <typename T>
351  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
352    block = FirstNonEmptyBlock(block);
353    return raw_pointer_to_labels_array + block->GetBlockId();
354  }
355
356  // Frame size required for this method.
357  uint32_t frame_size_;
358  uint32_t core_spill_mask_;
359  uint32_t fpu_spill_mask_;
360  uint32_t first_register_slot_in_slow_path_;
361
362  // Registers that were allocated during linear scan.
363  RegisterSet allocated_registers_;
364
365  // Arrays used when doing register allocation to know which
366  // registers we can allocate. `SetupBlockedRegisters` updates the
367  // arrays.
368  bool* const blocked_core_registers_;
369  bool* const blocked_fpu_registers_;
370  bool* const blocked_register_pairs_;
371  size_t number_of_core_registers_;
372  size_t number_of_fpu_registers_;
373  size_t number_of_register_pairs_;
374  const uint32_t core_callee_save_mask_;
375  const uint32_t fpu_callee_save_mask_;
376
377 private:
378  void InitLocationsBaseline(HInstruction* instruction);
379  size_t GetStackOffsetOfSavedRegister(size_t index);
380  void CompileInternal(CodeAllocator* allocator, bool is_baseline);
381  void BlockIfInRegister(Location location, bool is_out = false) const;
382
383  HGraph* const graph_;
384  const CompilerOptions& compiler_options_;
385
386  GrowableArray<PcInfo> pc_infos_;
387  GrowableArray<SlowPathCode*> slow_paths_;
388
389  // The order to use for code generation.
390  const GrowableArray<HBasicBlock*>* block_order_;
391
392  // The current block index in `block_order_` of the block
393  // we are generating code for.
394  size_t current_block_index_;
395
396  // Whether the method is a leaf method.
397  bool is_leaf_;
398
399  // Whether an instruction in the graph accesses the current method.
400  bool requires_current_method_;
401
402  StackMapStream stack_map_stream_;
403
404  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
405};
406
407template <typename C, typename F>
408class CallingConvention {
409 public:
410  CallingConvention(const C* registers,
411                    size_t number_of_registers,
412                    const F* fpu_registers,
413                    size_t number_of_fpu_registers)
414      : registers_(registers),
415        number_of_registers_(number_of_registers),
416        fpu_registers_(fpu_registers),
417        number_of_fpu_registers_(number_of_fpu_registers) {}
418
419  size_t GetNumberOfRegisters() const { return number_of_registers_; }
420  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
421
422  C GetRegisterAt(size_t index) const {
423    DCHECK_LT(index, number_of_registers_);
424    return registers_[index];
425  }
426
427  F GetFpuRegisterAt(size_t index) const {
428    DCHECK_LT(index, number_of_fpu_registers_);
429    return fpu_registers_[index];
430  }
431
432  size_t GetStackOffsetOf(size_t index) const {
433    // We still reserve the space for parameters passed by registers.
434    // Add one for the method pointer.
435    return (index + 1) * kVRegSize;
436  }
437
438 private:
439  const C* registers_;
440  const size_t number_of_registers_;
441  const F* fpu_registers_;
442  const size_t number_of_fpu_registers_;
443
444  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
445};
446
447}  // namespace art
448
449#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
450