code_generator.h revision 2ae48182573da7087bffc2873730bc758ec29696
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/arena_containers.h"
23#include "base/arena_object.h"
24#include "base/bit_field.h"
25#include "compiled_method.h"
26#include "driver/compiler_options.h"
27#include "globals.h"
28#include "graph_visualizer.h"
29#include "locations.h"
30#include "memory_region.h"
31#include "nodes.h"
32#include "optimizing_compiler_stats.h"
33#include "stack_map_stream.h"
34#include "utils/label.h"
35
36namespace art {
37
38// Binary encoding of 2^32 for type double.
39static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
40// Binary encoding of 2^31 for type double.
41static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
42
43// Minimum value for a primitive integer.
44static int32_t constexpr kPrimIntMin = 0x80000000;
45// Minimum value for a primitive long.
46static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
47
48// Maximum value for a primitive integer.
49static int32_t constexpr kPrimIntMax = 0x7fffffff;
50// Maximum value for a primitive long.
51static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
52
53class Assembler;
54class CodeGenerator;
55class CompilerDriver;
56class LinkerPatch;
57class ParallelMoveResolver;
58
59class CodeAllocator {
60 public:
61  CodeAllocator() {}
62  virtual ~CodeAllocator() {}
63
64  virtual uint8_t* Allocate(size_t size) = 0;
65
66 private:
67  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
68};
69
70class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
71 public:
72  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
73    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
74      saved_core_stack_offsets_[i] = kRegisterNotSaved;
75      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
76    }
77  }
78
79  virtual ~SlowPathCode() {}
80
81  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
82
83  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
84  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
85
86  bool IsCoreRegisterSaved(int reg) const {
87    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
88  }
89
90  bool IsFpuRegisterSaved(int reg) const {
91    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
92  }
93
94  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
95    return saved_core_stack_offsets_[reg];
96  }
97
98  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
99    return saved_fpu_stack_offsets_[reg];
100  }
101
102  virtual bool IsFatal() const { return false; }
103
104  virtual const char* GetDescription() const = 0;
105
106  Label* GetEntryLabel() { return &entry_label_; }
107  Label* GetExitLabel() { return &exit_label_; }
108
109  uint32_t GetDexPc() const {
110    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
111  }
112
113 protected:
114  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
115  static constexpr uint32_t kRegisterNotSaved = -1;
116  // The instruction where this slow path is happening.
117  HInstruction* instruction_;
118  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
119  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
120
121 private:
122  Label entry_label_;
123  Label exit_label_;
124
125  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
126};
127
128class InvokeDexCallingConventionVisitor {
129 public:
130  virtual Location GetNextLocation(Primitive::Type type) = 0;
131  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
132  virtual Location GetMethodLocation() const = 0;
133
134 protected:
135  InvokeDexCallingConventionVisitor() {}
136  virtual ~InvokeDexCallingConventionVisitor() {}
137
138  // The current index for core registers.
139  uint32_t gp_index_ = 0u;
140  // The current index for floating-point registers.
141  uint32_t float_index_ = 0u;
142  // The current stack index.
143  uint32_t stack_index_ = 0u;
144
145 private:
146  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
147};
148
149class FieldAccessCallingConvention {
150 public:
151  virtual Location GetObjectLocation() const = 0;
152  virtual Location GetFieldIndexLocation() const = 0;
153  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
154  virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
155  virtual Location GetFpuLocation(Primitive::Type type) const = 0;
156  virtual ~FieldAccessCallingConvention() {}
157
158 protected:
159  FieldAccessCallingConvention() {}
160
161 private:
162  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
163};
164
165class CodeGenerator {
166 public:
167  // Compiles the graph to executable instructions.
168  void Compile(CodeAllocator* allocator);
169  static CodeGenerator* Create(HGraph* graph,
170                               InstructionSet instruction_set,
171                               const InstructionSetFeatures& isa_features,
172                               const CompilerOptions& compiler_options,
173                               OptimizingCompilerStats* stats = nullptr);
174  virtual ~CodeGenerator() {}
175
176  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
177  HGraph* GetGraph() const { return graph_; }
178
179  HBasicBlock* GetNextBlockToEmit() const;
180  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
181  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
182
183  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
184    // Note that this follows the current calling convention.
185    return GetFrameSize()
186        + InstructionSetPointerSize(GetInstructionSet())  // Art method
187        + parameter->GetIndex() * kVRegSize;
188  }
189
190  virtual void Initialize() = 0;
191  virtual void Finalize(CodeAllocator* allocator);
192  virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
193  virtual void GenerateFrameEntry() = 0;
194  virtual void GenerateFrameExit() = 0;
195  virtual void Bind(HBasicBlock* block) = 0;
196  virtual void MoveConstant(Location destination, int32_t value) = 0;
197  virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
198  virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
199
200  virtual Assembler* GetAssembler() = 0;
201  virtual const Assembler& GetAssembler() const = 0;
202  virtual size_t GetWordSize() const = 0;
203  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
204  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
205  void InitializeCodeGeneration(size_t number_of_spill_slots,
206                                size_t maximum_number_of_live_core_registers,
207                                size_t maximum_number_of_live_fpu_registers,
208                                size_t number_of_out_slots,
209                                const ArenaVector<HBasicBlock*>& block_order);
210  int32_t GetStackSlot(HLocal* local) const;
211
212  uint32_t GetFrameSize() const { return frame_size_; }
213  void SetFrameSize(uint32_t size) { frame_size_ = size; }
214  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
215  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
216
217  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
218  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
219  virtual void SetupBlockedRegisters() const = 0;
220
221  virtual void ComputeSpillMask() {
222    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
223    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
224    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
225  }
226
227  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
228    uint32_t mask = 0;
229    for (size_t i = 0, e = length; i < e; ++i) {
230      mask |= (1 << registers[i]);
231    }
232    return mask;
233  }
234
235  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
236  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
237  virtual InstructionSet GetInstructionSet() const = 0;
238
239  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
240
241  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
242
243  // Saves the register in the stack. Returns the size taken on stack.
244  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
245  // Restores the register from the stack. Returns the size taken on stack.
246  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
247
248  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
249  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
250
251  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
252  // Returns whether we should split long moves in parallel moves.
253  virtual bool ShouldSplitLongMoves() const { return false; }
254
255  size_t GetNumberOfCoreCalleeSaveRegisters() const {
256    return POPCOUNT(core_callee_save_mask_);
257  }
258
259  size_t GetNumberOfCoreCallerSaveRegisters() const {
260    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
261    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
262  }
263
264  bool IsCoreCalleeSaveRegister(int reg) const {
265    return (core_callee_save_mask_ & (1 << reg)) != 0;
266  }
267
268  bool IsFloatingPointCalleeSaveRegister(int reg) const {
269    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
270  }
271
272  // Record native to dex mapping for a suspend point.  Required by runtime.
273  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
274  // Check whether we have already recorded mapping at this PC.
275  bool HasStackMapAtCurrentPc();
276  // Record extra stack maps if we support native debugging.
277  void MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc);
278
279  bool CanMoveNullCheckToUser(HNullCheck* null_check);
280  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
281  void GenerateNullCheck(HNullCheck* null_check);
282  virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
283  virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
284
285  // Records a stack map which the runtime might use to set catch phi values
286  // during exception delivery.
287  // TODO: Replace with a catch-entering instruction that records the environment.
288  void RecordCatchBlockInfo();
289
290  // Returns true if implicit null checks are allowed in the compiler options
291  // and if the null check is not inside a try block. We currently cannot do
292  // implicit null checks in that case because we need the NullCheckSlowPath to
293  // save live registers, which may be needed by the runtime to set catch phis.
294  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
295
296  void AddSlowPath(SlowPathCode* slow_path) {
297    slow_paths_.push_back(slow_path);
298  }
299
300  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
301  size_t ComputeStackMapsSize();
302
303  bool IsLeafMethod() const {
304    return is_leaf_;
305  }
306
307  void MarkNotLeaf() {
308    is_leaf_ = false;
309    requires_current_method_ = true;
310  }
311
312  void SetRequiresCurrentMethod() {
313    requires_current_method_ = true;
314  }
315
316  bool RequiresCurrentMethod() const {
317    return requires_current_method_;
318  }
319
320  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
321  // suspend check. This is called when the code generator generates code
322  // for the suspend check at the back edge (instead of where the suspend check
323  // is, which is the loop entry). At this point, the spill slots for the phis
324  // have not been written to.
325  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
326
327  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
328  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
329
330  // Helper that returns the pointer offset of an index in an object array.
331  // Note: this method assumes we always have the same pointer size, regardless
332  // of the architecture.
333  static size_t GetCacheOffset(uint32_t index);
334  // Pointer variant for ArtMethod and ArtField arrays.
335  size_t GetCachePointerOffset(uint32_t index);
336
337  void EmitParallelMoves(Location from1,
338                         Location to1,
339                         Primitive::Type type1,
340                         Location from2,
341                         Location to2,
342                         Primitive::Type type2);
343
344  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
345    // Check that null value is not represented as an integer constant.
346    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
347    return type == Primitive::kPrimNot && !value->IsNullConstant();
348  }
349
350  void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
351
352  void AddAllocatedRegister(Location location) {
353    allocated_registers_.Add(location);
354  }
355
356  bool HasAllocatedRegister(bool is_core, int reg) const {
357    return is_core
358        ? allocated_registers_.ContainsCoreRegister(reg)
359        : allocated_registers_.ContainsFloatingPointRegister(reg);
360  }
361
362  void AllocateLocations(HInstruction* instruction);
363
364  // Tells whether the stack frame of the compiled method is
365  // considered "empty", that is either actually having a size of zero,
366  // or just containing the saved return address register.
367  bool HasEmptyFrame() const {
368    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
369  }
370
371  static int32_t GetInt32ValueOf(HConstant* constant) {
372    if (constant->IsIntConstant()) {
373      return constant->AsIntConstant()->GetValue();
374    } else if (constant->IsNullConstant()) {
375      return 0;
376    } else {
377      DCHECK(constant->IsFloatConstant());
378      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
379    }
380  }
381
382  static int64_t GetInt64ValueOf(HConstant* constant) {
383    if (constant->IsIntConstant()) {
384      return constant->AsIntConstant()->GetValue();
385    } else if (constant->IsNullConstant()) {
386      return 0;
387    } else if (constant->IsFloatConstant()) {
388      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
389    } else if (constant->IsLongConstant()) {
390      return constant->AsLongConstant()->GetValue();
391    } else {
392      DCHECK(constant->IsDoubleConstant());
393      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
394    }
395  }
396
397  size_t GetFirstRegisterSlotInSlowPath() const {
398    return first_register_slot_in_slow_path_;
399  }
400
401  uint32_t FrameEntrySpillSize() const {
402    return GetFpuSpillSize() + GetCoreSpillSize();
403  }
404
405  virtual ParallelMoveResolver* GetMoveResolver() = 0;
406
407  static void CreateCommonInvokeLocationSummary(
408      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
409
410  void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
411
412  void CreateUnresolvedFieldLocationSummary(
413      HInstruction* field_access,
414      Primitive::Type field_type,
415      const FieldAccessCallingConvention& calling_convention);
416
417  void GenerateUnresolvedFieldAccess(
418      HInstruction* field_access,
419      Primitive::Type field_type,
420      uint32_t field_index,
421      uint32_t dex_pc,
422      const FieldAccessCallingConvention& calling_convention);
423
424  // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
425  static void CreateLoadClassLocationSummary(HLoadClass* cls,
426                                             Location runtime_type_index_location,
427                                             Location runtime_return_location,
428                                             bool code_generator_supports_read_barrier = false);
429
430  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
431
432  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
433  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
434
435  virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
436                             HInstruction* instruction,
437                             uint32_t dex_pc,
438                             SlowPathCode* slow_path) = 0;
439
440  // Check if the desired_dispatch_info is supported. If it is, return it,
441  // otherwise return a fall-back info that should be used instead.
442  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
443      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
444      MethodReference target_method) = 0;
445
446  // Generate a call to a static or direct method.
447  virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
448  // Generate a call to a virtual method.
449  virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
450
451  // Copy the result of a call into the given target.
452  virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
453
454  virtual void GenerateNop() = 0;
455
456 protected:
457  // Method patch info used for recording locations of required linker patches and
458  // target methods. The target method can be used for various purposes, whether for
459  // patching the address of the method or the code pointer or a PC-relative call.
460  template <typename LabelType>
461  struct MethodPatchInfo {
462    explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
463
464    MethodReference target_method;
465    LabelType label;
466  };
467
468  CodeGenerator(HGraph* graph,
469                size_t number_of_core_registers,
470                size_t number_of_fpu_registers,
471                size_t number_of_register_pairs,
472                uint32_t core_callee_save_mask,
473                uint32_t fpu_callee_save_mask,
474                const CompilerOptions& compiler_options,
475                OptimizingCompilerStats* stats)
476      : frame_size_(0),
477        core_spill_mask_(0),
478        fpu_spill_mask_(0),
479        first_register_slot_in_slow_path_(0),
480        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
481                                                                    kArenaAllocCodeGenerator)),
482        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
483                                                                   kArenaAllocCodeGenerator)),
484        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
485                                                                    kArenaAllocCodeGenerator)),
486        number_of_core_registers_(number_of_core_registers),
487        number_of_fpu_registers_(number_of_fpu_registers),
488        number_of_register_pairs_(number_of_register_pairs),
489        core_callee_save_mask_(core_callee_save_mask),
490        fpu_callee_save_mask_(fpu_callee_save_mask),
491        stack_map_stream_(graph->GetArena()),
492        block_order_(nullptr),
493        disasm_info_(nullptr),
494        stats_(stats),
495        graph_(graph),
496        compiler_options_(compiler_options),
497        slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
498        current_slow_path_(nullptr),
499        current_block_index_(0),
500        is_leaf_(true),
501        requires_current_method_(false) {
502    slow_paths_.reserve(8);
503  }
504
505  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
506
507  virtual HGraphVisitor* GetLocationBuilder() = 0;
508  virtual HGraphVisitor* GetInstructionVisitor() = 0;
509
510  // Returns the location of the first spilled entry for floating point registers,
511  // relative to the stack pointer.
512  uint32_t GetFpuSpillStart() const {
513    return GetFrameSize() - FrameEntrySpillSize();
514  }
515
516  uint32_t GetFpuSpillSize() const {
517    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
518  }
519
520  uint32_t GetCoreSpillSize() const {
521    return POPCOUNT(core_spill_mask_) * GetWordSize();
522  }
523
524  bool HasAllocatedCalleeSaveRegisters() const {
525    // We check the core registers against 1 because it always comprises the return PC.
526    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
527      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
528  }
529
530  bool CallPushesPC() const {
531    InstructionSet instruction_set = GetInstructionSet();
532    return instruction_set == kX86 || instruction_set == kX86_64;
533  }
534
535  // Arm64 has its own type for a label, so we need to templatize these methods
536  // to share the logic.
537
538  template <typename LabelType>
539  LabelType* CommonInitializeLabels() {
540    // We use raw array allocations instead of ArenaVector<> because Labels are
541    // non-constructible and non-movable and as such cannot be held in a vector.
542    size_t size = GetGraph()->GetBlocks().size();
543    LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
544                                                                      kArenaAllocCodeGenerator);
545    for (size_t i = 0; i != size; ++i) {
546      new(labels + i) LabelType();
547    }
548    return labels;
549  }
550
551  template <typename LabelType>
552  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
553    block = FirstNonEmptyBlock(block);
554    return raw_pointer_to_labels_array + block->GetBlockId();
555  }
556
557  SlowPathCode* GetCurrentSlowPath() {
558    return current_slow_path_;
559  }
560
561  // Frame size required for this method.
562  uint32_t frame_size_;
563  uint32_t core_spill_mask_;
564  uint32_t fpu_spill_mask_;
565  uint32_t first_register_slot_in_slow_path_;
566
567  // Registers that were allocated during linear scan.
568  RegisterSet allocated_registers_;
569
570  // Arrays used when doing register allocation to know which
571  // registers we can allocate. `SetupBlockedRegisters` updates the
572  // arrays.
573  bool* const blocked_core_registers_;
574  bool* const blocked_fpu_registers_;
575  bool* const blocked_register_pairs_;
576  size_t number_of_core_registers_;
577  size_t number_of_fpu_registers_;
578  size_t number_of_register_pairs_;
579  const uint32_t core_callee_save_mask_;
580  const uint32_t fpu_callee_save_mask_;
581
582  StackMapStream stack_map_stream_;
583
584  // The order to use for code generation.
585  const ArenaVector<HBasicBlock*>* block_order_;
586
587  DisassemblyInformation* disasm_info_;
588
589 private:
590  size_t GetStackOffsetOfSavedRegister(size_t index);
591  void GenerateSlowPaths();
592  void BlockIfInRegister(Location location, bool is_out = false) const;
593  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
594
595  OptimizingCompilerStats* stats_;
596
597  HGraph* const graph_;
598  const CompilerOptions& compiler_options_;
599
600  ArenaVector<SlowPathCode*> slow_paths_;
601
602  // The current slow-path that we're generating code for.
603  SlowPathCode* current_slow_path_;
604
605  // The current block index in `block_order_` of the block
606  // we are generating code for.
607  size_t current_block_index_;
608
609  // Whether the method is a leaf method.
610  bool is_leaf_;
611
612  // Whether an instruction in the graph accesses the current method.
613  bool requires_current_method_;
614
615  friend class OptimizingCFITest;
616
617  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
618};
619
620template <typename C, typename F>
621class CallingConvention {
622 public:
623  CallingConvention(const C* registers,
624                    size_t number_of_registers,
625                    const F* fpu_registers,
626                    size_t number_of_fpu_registers,
627                    size_t pointer_size)
628      : registers_(registers),
629        number_of_registers_(number_of_registers),
630        fpu_registers_(fpu_registers),
631        number_of_fpu_registers_(number_of_fpu_registers),
632        pointer_size_(pointer_size) {}
633
634  size_t GetNumberOfRegisters() const { return number_of_registers_; }
635  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
636
637  C GetRegisterAt(size_t index) const {
638    DCHECK_LT(index, number_of_registers_);
639    return registers_[index];
640  }
641
642  F GetFpuRegisterAt(size_t index) const {
643    DCHECK_LT(index, number_of_fpu_registers_);
644    return fpu_registers_[index];
645  }
646
647  size_t GetStackOffsetOf(size_t index) const {
648    // We still reserve the space for parameters passed by registers.
649    // Add space for the method pointer.
650    return pointer_size_ + index * kVRegSize;
651  }
652
653 private:
654  const C* registers_;
655  const size_t number_of_registers_;
656  const F* fpu_registers_;
657  const size_t number_of_fpu_registers_;
658  const size_t pointer_size_;
659
660  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
661};
662
663/**
664 * A templated class SlowPathGenerator with a templated method NewSlowPath()
665 * that can be used by any code generator to share equivalent slow-paths with
666 * the objective of reducing generated code size.
667 *
668 * InstructionType:  instruction that requires SlowPathCodeType
669 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
670 */
671template <typename InstructionType>
672class SlowPathGenerator {
673  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
674                "InstructionType is not a subclass of art::HInstruction");
675
676 public:
677  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
678      : graph_(graph),
679        codegen_(codegen),
680        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
681
682  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
683  // Templating the method (rather than the whole class) on the slow-path type enables
684  // keeping this code at a generic, non architecture-specific place.
685  //
686  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
687  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
688  //       or template the class as a whole on SlowPathType.
689  template <typename SlowPathCodeType>
690  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
691    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
692                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
693    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
694                  "SlowPathCodeType is not constructible from InstructionType*");
695    // Iterate over potential candidates for sharing. Currently, only same-typed
696    // slow-paths with exactly the same dex-pc are viable candidates.
697    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
698    const uint32_t dex_pc = instruction->GetDexPc();
699    auto iter = slow_path_map_.find(dex_pc);
700    if (iter != slow_path_map_.end()) {
701      auto candidates = iter->second;
702      for (const auto& it : candidates) {
703        InstructionType* other_instruction = it.first;
704        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
705        // Determine if the instructions allow for slow-path sharing.
706        if (HaveSameLiveRegisters(instruction, other_instruction) &&
707            HaveSameStackMap(instruction, other_instruction)) {
708          // Can share: reuse existing one.
709          return other_slow_path;
710        }
711      }
712    } else {
713      // First time this dex-pc is seen.
714      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
715    }
716    // Cannot share: create and add new slow-path for this particular dex-pc.
717    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
718    iter->second.emplace_back(std::make_pair(instruction, slow_path));
719    codegen_->AddSlowPath(slow_path);
720    return slow_path;
721  }
722
723 private:
724  // Tests if both instructions have same set of live physical registers. This ensures
725  // the slow-path has exactly the same preamble on saving these registers to stack.
726  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
727    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
728    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
729    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
730    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
731    return (((live1->GetCoreRegisters() & core_spill) ==
732             (live2->GetCoreRegisters() & core_spill)) &&
733            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
734             (live2->GetFloatingPointRegisters() & fpu_spill)));
735  }
736
737  // Tests if both instructions have the same stack map. This ensures the interpreter
738  // will find exactly the same dex-registers at the same entries.
739  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
740    DCHECK(i1->HasEnvironment());
741    DCHECK(i2->HasEnvironment());
742    // We conservatively test if the two instructions find exactly the same instructions
743    // and location in each dex-register. This guarantees they will have the same stack map.
744    HEnvironment* e1 = i1->GetEnvironment();
745    HEnvironment* e2 = i2->GetEnvironment();
746    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
747      return false;
748    }
749    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
750      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
751          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
752        return false;
753      }
754    }
755    return true;
756  }
757
758  HGraph* const graph_;
759  CodeGenerator* const codegen_;
760
761  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
762  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
763
764  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
765};
766
767class InstructionCodeGenerator : public HGraphVisitor {
768 public:
769  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
770      : HGraphVisitor(graph),
771        deopt_slow_paths_(graph, codegen) {}
772
773 protected:
774  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
775  // TODO: under current regime, only deopt sharing make sense; extend later.
776  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
777};
778
779}  // namespace art
780
781#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
782