code_generator.h revision d28f4a00933a4a3b8d5e9db73b8532924d0f989d
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/arena_containers.h"
23#include "base/arena_object.h"
24#include "base/bit_field.h"
25#include "compiled_method.h"
26#include "driver/compiler_options.h"
27#include "globals.h"
28#include "graph_visualizer.h"
29#include "locations.h"
30#include "memory_region.h"
31#include "nodes.h"
32#include "optimizing_compiler_stats.h"
33#include "stack_map_stream.h"
34#include "utils/label.h"
35
36namespace art {
37
38// Binary encoding of 2^32 for type double.
39static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
40// Binary encoding of 2^31 for type double.
41static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
42
43// Minimum value for a primitive integer.
44static int32_t constexpr kPrimIntMin = 0x80000000;
45// Minimum value for a primitive long.
46static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
47
48// Maximum value for a primitive integer.
49static int32_t constexpr kPrimIntMax = 0x7fffffff;
50// Maximum value for a primitive long.
51static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
52
53class Assembler;
54class CodeGenerator;
55class CompilerDriver;
56class LinkerPatch;
57class ParallelMoveResolver;
58
59class CodeAllocator {
60 public:
61  CodeAllocator() {}
62  virtual ~CodeAllocator() {}
63
64  virtual uint8_t* Allocate(size_t size) = 0;
65
66 private:
67  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
68};
69
70class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
71 public:
72  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
73    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
74      saved_core_stack_offsets_[i] = kRegisterNotSaved;
75      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
76    }
77  }
78
79  virtual ~SlowPathCode() {}
80
81  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
82
83  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
84  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
85
86  bool IsCoreRegisterSaved(int reg) const {
87    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
88  }
89
90  bool IsFpuRegisterSaved(int reg) const {
91    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
92  }
93
94  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
95    return saved_core_stack_offsets_[reg];
96  }
97
98  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
99    return saved_fpu_stack_offsets_[reg];
100  }
101
102  virtual bool IsFatal() const { return false; }
103
104  virtual const char* GetDescription() const = 0;
105
106  Label* GetEntryLabel() { return &entry_label_; }
107  Label* GetExitLabel() { return &exit_label_; }
108
109  HInstruction* GetInstruction() const {
110    return instruction_;
111  }
112
113  uint32_t GetDexPc() const {
114    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
115  }
116
117 protected:
118  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
119  static constexpr uint32_t kRegisterNotSaved = -1;
120  // The instruction where this slow path is happening.
121  HInstruction* instruction_;
122  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
123  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
124
125 private:
126  Label entry_label_;
127  Label exit_label_;
128
129  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
130};
131
132class InvokeDexCallingConventionVisitor {
133 public:
134  virtual Location GetNextLocation(Primitive::Type type) = 0;
135  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
136  virtual Location GetMethodLocation() const = 0;
137
138 protected:
139  InvokeDexCallingConventionVisitor() {}
140  virtual ~InvokeDexCallingConventionVisitor() {}
141
142  // The current index for core registers.
143  uint32_t gp_index_ = 0u;
144  // The current index for floating-point registers.
145  uint32_t float_index_ = 0u;
146  // The current stack index.
147  uint32_t stack_index_ = 0u;
148
149 private:
150  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
151};
152
153class FieldAccessCallingConvention {
154 public:
155  virtual Location GetObjectLocation() const = 0;
156  virtual Location GetFieldIndexLocation() const = 0;
157  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
158  virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
159  virtual Location GetFpuLocation(Primitive::Type type) const = 0;
160  virtual ~FieldAccessCallingConvention() {}
161
162 protected:
163  FieldAccessCallingConvention() {}
164
165 private:
166  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
167};
168
169class CodeGenerator {
170 public:
171  // Compiles the graph to executable instructions.
172  void Compile(CodeAllocator* allocator);
173  static CodeGenerator* Create(HGraph* graph,
174                               InstructionSet instruction_set,
175                               const InstructionSetFeatures& isa_features,
176                               const CompilerOptions& compiler_options,
177                               OptimizingCompilerStats* stats = nullptr);
178  virtual ~CodeGenerator() {}
179
180  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
181  HGraph* GetGraph() const { return graph_; }
182
183  HBasicBlock* GetNextBlockToEmit() const;
184  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
185  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
186
187  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
188    // Note that this follows the current calling convention.
189    return GetFrameSize()
190        + InstructionSetPointerSize(GetInstructionSet())  // Art method
191        + parameter->GetIndex() * kVRegSize;
192  }
193
194  virtual void Initialize() = 0;
195  virtual void Finalize(CodeAllocator* allocator);
196  virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
197  virtual void GenerateFrameEntry() = 0;
198  virtual void GenerateFrameExit() = 0;
199  virtual void Bind(HBasicBlock* block) = 0;
200  virtual void MoveConstant(Location destination, int32_t value) = 0;
201  virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
202  virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
203
204  virtual Assembler* GetAssembler() = 0;
205  virtual const Assembler& GetAssembler() const = 0;
206  virtual size_t GetWordSize() const = 0;
207  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
208  virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
209  void InitializeCodeGeneration(size_t number_of_spill_slots,
210                                size_t maximum_number_of_live_core_registers,
211                                size_t maximum_number_of_live_fpu_registers,
212                                size_t number_of_out_slots,
213                                const ArenaVector<HBasicBlock*>& block_order);
214  int32_t GetStackSlot(HLocal* local) const;
215
216  uint32_t GetFrameSize() const { return frame_size_; }
217  void SetFrameSize(uint32_t size) { frame_size_ = size; }
218  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
219  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
220
221  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
222  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
223  virtual void SetupBlockedRegisters() const = 0;
224
225  virtual void ComputeSpillMask() {
226    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
227    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
228    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
229  }
230
231  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
232    uint32_t mask = 0;
233    for (size_t i = 0, e = length; i < e; ++i) {
234      mask |= (1 << registers[i]);
235    }
236    return mask;
237  }
238
239  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
240  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
241  virtual InstructionSet GetInstructionSet() const = 0;
242
243  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
244
245  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
246
247  // Saves the register in the stack. Returns the size taken on stack.
248  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
249  // Restores the register from the stack. Returns the size taken on stack.
250  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
251
252  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
253  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
254
255  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
256  // Returns whether we should split long moves in parallel moves.
257  virtual bool ShouldSplitLongMoves() const { return false; }
258
259  size_t GetNumberOfCoreCalleeSaveRegisters() const {
260    return POPCOUNT(core_callee_save_mask_);
261  }
262
263  size_t GetNumberOfCoreCallerSaveRegisters() const {
264    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
265    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
266  }
267
268  bool IsCoreCalleeSaveRegister(int reg) const {
269    return (core_callee_save_mask_ & (1 << reg)) != 0;
270  }
271
272  bool IsFloatingPointCalleeSaveRegister(int reg) const {
273    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
274  }
275
276  // Record native to dex mapping for a suspend point.  Required by runtime.
277  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
278  // Check whether we have already recorded mapping at this PC.
279  bool HasStackMapAtCurrentPc();
280  // Record extra stack maps if we support native debugging.
281  void MaybeRecordNativeDebugInfo(HInstruction* instruction,
282                                  uint32_t dex_pc,
283                                  SlowPathCode* slow_path = nullptr);
284
285  bool CanMoveNullCheckToUser(HNullCheck* null_check);
286  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
287
288  // Records a stack map which the runtime might use to set catch phi values
289  // during exception delivery.
290  // TODO: Replace with a catch-entering instruction that records the environment.
291  void RecordCatchBlockInfo();
292
293  // Returns true if implicit null checks are allowed in the compiler options
294  // and if the null check is not inside a try block. We currently cannot do
295  // implicit null checks in that case because we need the NullCheckSlowPath to
296  // save live registers, which may be needed by the runtime to set catch phis.
297  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
298
299  void AddSlowPath(SlowPathCode* slow_path) {
300    slow_paths_.push_back(slow_path);
301  }
302
303  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
304  size_t ComputeStackMapsSize();
305
306  bool IsLeafMethod() const {
307    return is_leaf_;
308  }
309
310  void MarkNotLeaf() {
311    is_leaf_ = false;
312    requires_current_method_ = true;
313  }
314
315  void SetRequiresCurrentMethod() {
316    requires_current_method_ = true;
317  }
318
319  bool RequiresCurrentMethod() const {
320    return requires_current_method_;
321  }
322
323  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
324  // suspend check. This is called when the code generator generates code
325  // for the suspend check at the back edge (instead of where the suspend check
326  // is, which is the loop entry). At this point, the spill slots for the phis
327  // have not been written to.
328  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
329
330  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
331  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
332
333  // Helper that returns the pointer offset of an index in an object array.
334  // Note: this method assumes we always have the same pointer size, regardless
335  // of the architecture.
336  static size_t GetCacheOffset(uint32_t index);
337  // Pointer variant for ArtMethod and ArtField arrays.
338  size_t GetCachePointerOffset(uint32_t index);
339
340  void EmitParallelMoves(Location from1,
341                         Location to1,
342                         Primitive::Type type1,
343                         Location from2,
344                         Location to2,
345                         Primitive::Type type2);
346
347  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
348    // Check that null value is not represented as an integer constant.
349    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
350    return type == Primitive::kPrimNot && !value->IsNullConstant();
351  }
352
353  void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
354
355  void AddAllocatedRegister(Location location) {
356    allocated_registers_.Add(location);
357  }
358
359  bool HasAllocatedRegister(bool is_core, int reg) const {
360    return is_core
361        ? allocated_registers_.ContainsCoreRegister(reg)
362        : allocated_registers_.ContainsFloatingPointRegister(reg);
363  }
364
365  void AllocateLocations(HInstruction* instruction);
366
367  // Tells whether the stack frame of the compiled method is
368  // considered "empty", that is either actually having a size of zero,
369  // or just containing the saved return address register.
370  bool HasEmptyFrame() const {
371    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
372  }
373
374  static int32_t GetInt32ValueOf(HConstant* constant) {
375    if (constant->IsIntConstant()) {
376      return constant->AsIntConstant()->GetValue();
377    } else if (constant->IsNullConstant()) {
378      return 0;
379    } else {
380      DCHECK(constant->IsFloatConstant());
381      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
382    }
383  }
384
385  static int64_t GetInt64ValueOf(HConstant* constant) {
386    if (constant->IsIntConstant()) {
387      return constant->AsIntConstant()->GetValue();
388    } else if (constant->IsNullConstant()) {
389      return 0;
390    } else if (constant->IsFloatConstant()) {
391      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
392    } else if (constant->IsLongConstant()) {
393      return constant->AsLongConstant()->GetValue();
394    } else {
395      DCHECK(constant->IsDoubleConstant());
396      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
397    }
398  }
399
400  size_t GetFirstRegisterSlotInSlowPath() const {
401    return first_register_slot_in_slow_path_;
402  }
403
404  uint32_t FrameEntrySpillSize() const {
405    return GetFpuSpillSize() + GetCoreSpillSize();
406  }
407
408  virtual ParallelMoveResolver* GetMoveResolver() = 0;
409
410  static void CreateCommonInvokeLocationSummary(
411      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
412
413  void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
414
415  void CreateUnresolvedFieldLocationSummary(
416      HInstruction* field_access,
417      Primitive::Type field_type,
418      const FieldAccessCallingConvention& calling_convention);
419
420  void GenerateUnresolvedFieldAccess(
421      HInstruction* field_access,
422      Primitive::Type field_type,
423      uint32_t field_index,
424      uint32_t dex_pc,
425      const FieldAccessCallingConvention& calling_convention);
426
427  // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
428  static void CreateLoadClassLocationSummary(HLoadClass* cls,
429                                             Location runtime_type_index_location,
430                                             Location runtime_return_location,
431                                             bool code_generator_supports_read_barrier = false);
432
433  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
434
435  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
436  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
437
438  virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
439                             HInstruction* instruction,
440                             uint32_t dex_pc,
441                             SlowPathCode* slow_path) = 0;
442
443  // Check if the desired_dispatch_info is supported. If it is, return it,
444  // otherwise return a fall-back info that should be used instead.
445  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
446      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
447      MethodReference target_method) = 0;
448
449  // Generate a call to a static or direct method.
450  virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
451  // Generate a call to a virtual method.
452  virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
453
454  // Copy the result of a call into the given target.
455  virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
456
457  virtual void GenerateNop() = 0;
458
459 protected:
460  // Method patch info used for recording locations of required linker patches and
461  // target methods. The target method can be used for various purposes, whether for
462  // patching the address of the method or the code pointer or a PC-relative call.
463  template <typename LabelType>
464  struct MethodPatchInfo {
465    explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
466
467    MethodReference target_method;
468    LabelType label;
469  };
470
471  CodeGenerator(HGraph* graph,
472                size_t number_of_core_registers,
473                size_t number_of_fpu_registers,
474                size_t number_of_register_pairs,
475                uint32_t core_callee_save_mask,
476                uint32_t fpu_callee_save_mask,
477                const CompilerOptions& compiler_options,
478                OptimizingCompilerStats* stats)
479      : frame_size_(0),
480        core_spill_mask_(0),
481        fpu_spill_mask_(0),
482        first_register_slot_in_slow_path_(0),
483        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
484                                                                    kArenaAllocCodeGenerator)),
485        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
486                                                                   kArenaAllocCodeGenerator)),
487        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
488                                                                    kArenaAllocCodeGenerator)),
489        number_of_core_registers_(number_of_core_registers),
490        number_of_fpu_registers_(number_of_fpu_registers),
491        number_of_register_pairs_(number_of_register_pairs),
492        core_callee_save_mask_(core_callee_save_mask),
493        fpu_callee_save_mask_(fpu_callee_save_mask),
494        stack_map_stream_(graph->GetArena()),
495        block_order_(nullptr),
496        disasm_info_(nullptr),
497        stats_(stats),
498        graph_(graph),
499        compiler_options_(compiler_options),
500        slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
501        current_slow_path_(nullptr),
502        current_block_index_(0),
503        is_leaf_(true),
504        requires_current_method_(false) {
505    slow_paths_.reserve(8);
506  }
507
508  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
509
510  virtual HGraphVisitor* GetLocationBuilder() = 0;
511  virtual HGraphVisitor* GetInstructionVisitor() = 0;
512
513  // Returns the location of the first spilled entry for floating point registers,
514  // relative to the stack pointer.
515  uint32_t GetFpuSpillStart() const {
516    return GetFrameSize() - FrameEntrySpillSize();
517  }
518
519  uint32_t GetFpuSpillSize() const {
520    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
521  }
522
523  uint32_t GetCoreSpillSize() const {
524    return POPCOUNT(core_spill_mask_) * GetWordSize();
525  }
526
527  bool HasAllocatedCalleeSaveRegisters() const {
528    // We check the core registers against 1 because it always comprises the return PC.
529    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
530      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
531  }
532
533  bool CallPushesPC() const {
534    InstructionSet instruction_set = GetInstructionSet();
535    return instruction_set == kX86 || instruction_set == kX86_64;
536  }
537
538  // Arm64 has its own type for a label, so we need to templatize these methods
539  // to share the logic.
540
541  template <typename LabelType>
542  LabelType* CommonInitializeLabels() {
543    // We use raw array allocations instead of ArenaVector<> because Labels are
544    // non-constructible and non-movable and as such cannot be held in a vector.
545    size_t size = GetGraph()->GetBlocks().size();
546    LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
547                                                                      kArenaAllocCodeGenerator);
548    for (size_t i = 0; i != size; ++i) {
549      new(labels + i) LabelType();
550    }
551    return labels;
552  }
553
554  template <typename LabelType>
555  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
556    block = FirstNonEmptyBlock(block);
557    return raw_pointer_to_labels_array + block->GetBlockId();
558  }
559
560  SlowPathCode* GetCurrentSlowPath() {
561    return current_slow_path_;
562  }
563
564  // Frame size required for this method.
565  uint32_t frame_size_;
566  uint32_t core_spill_mask_;
567  uint32_t fpu_spill_mask_;
568  uint32_t first_register_slot_in_slow_path_;
569
570  // Registers that were allocated during linear scan.
571  RegisterSet allocated_registers_;
572
573  // Arrays used when doing register allocation to know which
574  // registers we can allocate. `SetupBlockedRegisters` updates the
575  // arrays.
576  bool* const blocked_core_registers_;
577  bool* const blocked_fpu_registers_;
578  bool* const blocked_register_pairs_;
579  size_t number_of_core_registers_;
580  size_t number_of_fpu_registers_;
581  size_t number_of_register_pairs_;
582  const uint32_t core_callee_save_mask_;
583  const uint32_t fpu_callee_save_mask_;
584
585  StackMapStream stack_map_stream_;
586
587  // The order to use for code generation.
588  const ArenaVector<HBasicBlock*>* block_order_;
589
590  DisassemblyInformation* disasm_info_;
591
592 private:
593  size_t GetStackOffsetOfSavedRegister(size_t index);
594  void GenerateSlowPaths();
595  void BlockIfInRegister(Location location, bool is_out = false) const;
596  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
597
598  OptimizingCompilerStats* stats_;
599
600  HGraph* const graph_;
601  const CompilerOptions& compiler_options_;
602
603  ArenaVector<SlowPathCode*> slow_paths_;
604
605  // The current slow-path that we're generating code for.
606  SlowPathCode* current_slow_path_;
607
608  // The current block index in `block_order_` of the block
609  // we are generating code for.
610  size_t current_block_index_;
611
612  // Whether the method is a leaf method.
613  bool is_leaf_;
614
615  // Whether an instruction in the graph accesses the current method.
616  bool requires_current_method_;
617
618  friend class OptimizingCFITest;
619
620  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
621};
622
623template <typename C, typename F>
624class CallingConvention {
625 public:
626  CallingConvention(const C* registers,
627                    size_t number_of_registers,
628                    const F* fpu_registers,
629                    size_t number_of_fpu_registers,
630                    size_t pointer_size)
631      : registers_(registers),
632        number_of_registers_(number_of_registers),
633        fpu_registers_(fpu_registers),
634        number_of_fpu_registers_(number_of_fpu_registers),
635        pointer_size_(pointer_size) {}
636
637  size_t GetNumberOfRegisters() const { return number_of_registers_; }
638  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
639
640  C GetRegisterAt(size_t index) const {
641    DCHECK_LT(index, number_of_registers_);
642    return registers_[index];
643  }
644
645  F GetFpuRegisterAt(size_t index) const {
646    DCHECK_LT(index, number_of_fpu_registers_);
647    return fpu_registers_[index];
648  }
649
650  size_t GetStackOffsetOf(size_t index) const {
651    // We still reserve the space for parameters passed by registers.
652    // Add space for the method pointer.
653    return pointer_size_ + index * kVRegSize;
654  }
655
656 private:
657  const C* registers_;
658  const size_t number_of_registers_;
659  const F* fpu_registers_;
660  const size_t number_of_fpu_registers_;
661  const size_t pointer_size_;
662
663  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
664};
665
666/**
667 * A templated class SlowPathGenerator with a templated method NewSlowPath()
668 * that can be used by any code generator to share equivalent slow-paths with
669 * the objective of reducing generated code size.
670 *
671 * InstructionType:  instruction that requires SlowPathCodeType
672 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
673 */
674template <typename InstructionType>
675class SlowPathGenerator {
676  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
677                "InstructionType is not a subclass of art::HInstruction");
678
679 public:
680  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
681      : graph_(graph),
682        codegen_(codegen),
683        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
684
685  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
686  // Templating the method (rather than the whole class) on the slow-path type enables
687  // keeping this code at a generic, non architecture-specific place.
688  //
689  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
690  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
691  //       or template the class as a whole on SlowPathType.
692  template <typename SlowPathCodeType>
693  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
694    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
695                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
696    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
697                  "SlowPathCodeType is not constructible from InstructionType*");
698    // Iterate over potential candidates for sharing. Currently, only same-typed
699    // slow-paths with exactly the same dex-pc are viable candidates.
700    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
701    const uint32_t dex_pc = instruction->GetDexPc();
702    auto iter = slow_path_map_.find(dex_pc);
703    if (iter != slow_path_map_.end()) {
704      auto candidates = iter->second;
705      for (const auto& it : candidates) {
706        InstructionType* other_instruction = it.first;
707        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
708        // Determine if the instructions allow for slow-path sharing.
709        if (HaveSameLiveRegisters(instruction, other_instruction) &&
710            HaveSameStackMap(instruction, other_instruction)) {
711          // Can share: reuse existing one.
712          return other_slow_path;
713        }
714      }
715    } else {
716      // First time this dex-pc is seen.
717      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
718    }
719    // Cannot share: create and add new slow-path for this particular dex-pc.
720    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
721    iter->second.emplace_back(std::make_pair(instruction, slow_path));
722    codegen_->AddSlowPath(slow_path);
723    return slow_path;
724  }
725
726 private:
727  // Tests if both instructions have same set of live physical registers. This ensures
728  // the slow-path has exactly the same preamble on saving these registers to stack.
729  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
730    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
731    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
732    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
733    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
734    return (((live1->GetCoreRegisters() & core_spill) ==
735             (live2->GetCoreRegisters() & core_spill)) &&
736            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
737             (live2->GetFloatingPointRegisters() & fpu_spill)));
738  }
739
740  // Tests if both instructions have the same stack map. This ensures the interpreter
741  // will find exactly the same dex-registers at the same entries.
742  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
743    DCHECK(i1->HasEnvironment());
744    DCHECK(i2->HasEnvironment());
745    // We conservatively test if the two instructions find exactly the same instructions
746    // and location in each dex-register. This guarantees they will have the same stack map.
747    HEnvironment* e1 = i1->GetEnvironment();
748    HEnvironment* e2 = i2->GetEnvironment();
749    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
750      return false;
751    }
752    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
753      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
754          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
755        return false;
756      }
757    }
758    return true;
759  }
760
761  HGraph* const graph_;
762  CodeGenerator* const codegen_;
763
764  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
765  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
766
767  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
768};
769
770class InstructionCodeGenerator : public HGraphVisitor {
771 public:
772  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
773      : HGraphVisitor(graph),
774        deopt_slow_paths_(graph, codegen) {}
775
776 protected:
777  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
778  // TODO: under current regime, only deopt sharing make sense; extend later.
779  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
780};
781
782}  // namespace art
783
784#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
785