1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
19
20#include "arch/instruction_set.h"
21#include "arch/instruction_set_features.h"
22#include "base/arena_containers.h"
23#include "base/arena_object.h"
24#include "base/bit_field.h"
25#include "compiled_method.h"
26#include "driver/compiler_options.h"
27#include "globals.h"
28#include "graph_visualizer.h"
29#include "locations.h"
30#include "memory_region.h"
31#include "nodes.h"
32#include "optimizing_compiler_stats.h"
33#include "stack_map_stream.h"
34#include "utils/label.h"
35
36namespace art {
37
38// Binary encoding of 2^32 for type double.
39static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
40// Binary encoding of 2^31 for type double.
41static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
42
43// Minimum value for a primitive integer.
44static int32_t constexpr kPrimIntMin = 0x80000000;
45// Minimum value for a primitive long.
46static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
47
48// Maximum value for a primitive integer.
49static int32_t constexpr kPrimIntMax = 0x7fffffff;
50// Maximum value for a primitive long.
51static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
52
53class Assembler;
54class CodeGenerator;
55class CompilerDriver;
56class LinkerPatch;
57class ParallelMoveResolver;
58
59class CodeAllocator {
60 public:
61  CodeAllocator() {}
62  virtual ~CodeAllocator() {}
63
64  virtual uint8_t* Allocate(size_t size) = 0;
65
66 private:
67  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
68};
69
70class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
71 public:
72  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
73    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
74      saved_core_stack_offsets_[i] = kRegisterNotSaved;
75      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
76    }
77  }
78
79  virtual ~SlowPathCode() {}
80
81  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
82
83  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
84  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
85
86  bool IsCoreRegisterSaved(int reg) const {
87    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
88  }
89
90  bool IsFpuRegisterSaved(int reg) const {
91    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
92  }
93
94  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
95    return saved_core_stack_offsets_[reg];
96  }
97
98  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
99    return saved_fpu_stack_offsets_[reg];
100  }
101
102  virtual bool IsFatal() const { return false; }
103
104  virtual const char* GetDescription() const = 0;
105
106  Label* GetEntryLabel() { return &entry_label_; }
107  Label* GetExitLabel() { return &exit_label_; }
108
109  HInstruction* GetInstruction() const {
110    return instruction_;
111  }
112
113  uint32_t GetDexPc() const {
114    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
115  }
116
117 protected:
118  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
119  static constexpr uint32_t kRegisterNotSaved = -1;
120  // The instruction where this slow path is happening.
121  HInstruction* instruction_;
122  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
123  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
124
125 private:
126  Label entry_label_;
127  Label exit_label_;
128
129  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
130};
131
132class InvokeDexCallingConventionVisitor {
133 public:
134  virtual Location GetNextLocation(Primitive::Type type) = 0;
135  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
136  virtual Location GetMethodLocation() const = 0;
137
138 protected:
139  InvokeDexCallingConventionVisitor() {}
140  virtual ~InvokeDexCallingConventionVisitor() {}
141
142  // The current index for core registers.
143  uint32_t gp_index_ = 0u;
144  // The current index for floating-point registers.
145  uint32_t float_index_ = 0u;
146  // The current stack index.
147  uint32_t stack_index_ = 0u;
148
149 private:
150  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
151};
152
153class FieldAccessCallingConvention {
154 public:
155  virtual Location GetObjectLocation() const = 0;
156  virtual Location GetFieldIndexLocation() const = 0;
157  virtual Location GetReturnLocation(Primitive::Type type) const = 0;
158  virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
159  virtual Location GetFpuLocation(Primitive::Type type) const = 0;
160  virtual ~FieldAccessCallingConvention() {}
161
162 protected:
163  FieldAccessCallingConvention() {}
164
165 private:
166  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
167};
168
169class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
170 public:
171  // Compiles the graph to executable instructions.
172  void Compile(CodeAllocator* allocator);
173  static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
174                                               InstructionSet instruction_set,
175                                               const InstructionSetFeatures& isa_features,
176                                               const CompilerOptions& compiler_options,
177                                               OptimizingCompilerStats* stats = nullptr);
178  virtual ~CodeGenerator() {}
179
180  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
181  HGraph* GetGraph() const { return graph_; }
182
183  HBasicBlock* GetNextBlockToEmit() const;
184  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
185  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
186
187  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
188    // Note that this follows the current calling convention.
189    return GetFrameSize()
190        + InstructionSetPointerSize(GetInstructionSet())  // Art method
191        + parameter->GetIndex() * kVRegSize;
192  }
193
194  virtual void Initialize() = 0;
195  virtual void Finalize(CodeAllocator* allocator);
196  virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
197  virtual void GenerateFrameEntry() = 0;
198  virtual void GenerateFrameExit() = 0;
199  virtual void Bind(HBasicBlock* block) = 0;
200  virtual void MoveConstant(Location destination, int32_t value) = 0;
201  virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
202  virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
203
204  virtual Assembler* GetAssembler() = 0;
205  virtual const Assembler& GetAssembler() const = 0;
206  virtual size_t GetWordSize() const = 0;
207  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
208  virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
209  void InitializeCodeGeneration(size_t number_of_spill_slots,
210                                size_t maximum_number_of_live_core_registers,
211                                size_t maximum_number_of_live_fpu_registers,
212                                size_t number_of_out_slots,
213                                const ArenaVector<HBasicBlock*>& block_order);
214
215  uint32_t GetFrameSize() const { return frame_size_; }
216  void SetFrameSize(uint32_t size) { frame_size_ = size; }
217  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
218  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
219
220  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
221  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
222  virtual void SetupBlockedRegisters() const = 0;
223
224  virtual void ComputeSpillMask() {
225    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
226    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
227    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
228  }
229
230  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
231    uint32_t mask = 0;
232    for (size_t i = 0, e = length; i < e; ++i) {
233      mask |= (1 << registers[i]);
234    }
235    return mask;
236  }
237
238  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
239  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
240  virtual InstructionSet GetInstructionSet() const = 0;
241
242  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
243
244  void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
245
246  // Saves the register in the stack. Returns the size taken on stack.
247  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
248  // Restores the register from the stack. Returns the size taken on stack.
249  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
250
251  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
252  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
253
254  virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
255  // Returns whether we should split long moves in parallel moves.
256  virtual bool ShouldSplitLongMoves() const { return false; }
257
258  size_t GetNumberOfCoreCalleeSaveRegisters() const {
259    return POPCOUNT(core_callee_save_mask_);
260  }
261
262  size_t GetNumberOfCoreCallerSaveRegisters() const {
263    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
264    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
265  }
266
267  bool IsCoreCalleeSaveRegister(int reg) const {
268    return (core_callee_save_mask_ & (1 << reg)) != 0;
269  }
270
271  bool IsFloatingPointCalleeSaveRegister(int reg) const {
272    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
273  }
274
275  // Record native to dex mapping for a suspend point.  Required by runtime.
276  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
277  // Check whether we have already recorded mapping at this PC.
278  bool HasStackMapAtCurrentPc();
279  // Record extra stack maps if we support native debugging.
280  void MaybeRecordNativeDebugInfo(HInstruction* instruction,
281                                  uint32_t dex_pc,
282                                  SlowPathCode* slow_path = nullptr);
283
284  bool CanMoveNullCheckToUser(HNullCheck* null_check);
285  void MaybeRecordImplicitNullCheck(HInstruction* instruction);
286  void GenerateNullCheck(HNullCheck* null_check);
287  virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
288  virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
289
290  // Records a stack map which the runtime might use to set catch phi values
291  // during exception delivery.
292  // TODO: Replace with a catch-entering instruction that records the environment.
293  void RecordCatchBlockInfo();
294
295  // Returns true if implicit null checks are allowed in the compiler options
296  // and if the null check is not inside a try block. We currently cannot do
297  // implicit null checks in that case because we need the NullCheckSlowPath to
298  // save live registers, which may be needed by the runtime to set catch phis.
299  bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const;
300
301  // TODO: Avoid creating the `std::unique_ptr` here.
302  void AddSlowPath(SlowPathCode* slow_path) {
303    slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
304  }
305
306  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
307  size_t ComputeStackMapsSize();
308
309  bool IsLeafMethod() const {
310    return is_leaf_;
311  }
312
313  void MarkNotLeaf() {
314    is_leaf_ = false;
315    requires_current_method_ = true;
316  }
317
318  void SetRequiresCurrentMethod() {
319    requires_current_method_ = true;
320  }
321
322  bool RequiresCurrentMethod() const {
323    return requires_current_method_;
324  }
325
326  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
327  // suspend check. This is called when the code generator generates code
328  // for the suspend check at the back edge (instead of where the suspend check
329  // is, which is the loop entry). At this point, the spill slots for the phis
330  // have not been written to.
331  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
332
333  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
334  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
335
336  // Helper that returns the pointer offset of an index in an object array.
337  // Note: this method assumes we always have the same pointer size, regardless
338  // of the architecture.
339  static size_t GetCacheOffset(uint32_t index);
340  // Pointer variant for ArtMethod and ArtField arrays.
341  size_t GetCachePointerOffset(uint32_t index);
342
343  void EmitParallelMoves(Location from1,
344                         Location to1,
345                         Primitive::Type type1,
346                         Location from2,
347                         Location to2,
348                         Primitive::Type type2);
349
350  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
351    // Check that null value is not represented as an integer constant.
352    DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
353    return type == Primitive::kPrimNot && !value->IsNullConstant();
354  }
355
356  void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path);
357
358  void AddAllocatedRegister(Location location) {
359    allocated_registers_.Add(location);
360  }
361
362  bool HasAllocatedRegister(bool is_core, int reg) const {
363    return is_core
364        ? allocated_registers_.ContainsCoreRegister(reg)
365        : allocated_registers_.ContainsFloatingPointRegister(reg);
366  }
367
368  void AllocateLocations(HInstruction* instruction);
369
370  // Tells whether the stack frame of the compiled method is
371  // considered "empty", that is either actually having a size of zero,
372  // or just containing the saved return address register.
373  bool HasEmptyFrame() const {
374    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
375  }
376
377  static int32_t GetInt32ValueOf(HConstant* constant) {
378    if (constant->IsIntConstant()) {
379      return constant->AsIntConstant()->GetValue();
380    } else if (constant->IsNullConstant()) {
381      return 0;
382    } else {
383      DCHECK(constant->IsFloatConstant());
384      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
385    }
386  }
387
388  static int64_t GetInt64ValueOf(HConstant* constant) {
389    if (constant->IsIntConstant()) {
390      return constant->AsIntConstant()->GetValue();
391    } else if (constant->IsNullConstant()) {
392      return 0;
393    } else if (constant->IsFloatConstant()) {
394      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
395    } else if (constant->IsLongConstant()) {
396      return constant->AsLongConstant()->GetValue();
397    } else {
398      DCHECK(constant->IsDoubleConstant());
399      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
400    }
401  }
402
403  size_t GetFirstRegisterSlotInSlowPath() const {
404    return first_register_slot_in_slow_path_;
405  }
406
407  uint32_t FrameEntrySpillSize() const {
408    return GetFpuSpillSize() + GetCoreSpillSize();
409  }
410
411  virtual ParallelMoveResolver* GetMoveResolver() = 0;
412
413  static void CreateCommonInvokeLocationSummary(
414      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
415
416  void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
417
418  void CreateUnresolvedFieldLocationSummary(
419      HInstruction* field_access,
420      Primitive::Type field_type,
421      const FieldAccessCallingConvention& calling_convention);
422
423  void GenerateUnresolvedFieldAccess(
424      HInstruction* field_access,
425      Primitive::Type field_type,
426      uint32_t field_index,
427      uint32_t dex_pc,
428      const FieldAccessCallingConvention& calling_convention);
429
430  // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
431  static void CreateLoadClassLocationSummary(HLoadClass* cls,
432                                             Location runtime_type_index_location,
433                                             Location runtime_return_location,
434                                             bool code_generator_supports_read_barrier = false);
435
436  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
437
438  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
439  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
440
441  virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
442                             HInstruction* instruction,
443                             uint32_t dex_pc,
444                             SlowPathCode* slow_path) = 0;
445
446  // Check if the desired_string_load_kind is supported. If it is, return it,
447  // otherwise return a fall-back info that should be used instead.
448  virtual HLoadString::LoadKind GetSupportedLoadStringKind(
449      HLoadString::LoadKind desired_string_load_kind) = 0;
450
451  // Check if the desired_dispatch_info is supported. If it is, return it,
452  // otherwise return a fall-back info that should be used instead.
453  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
454      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
455      MethodReference target_method) = 0;
456
457  // Generate a call to a static or direct method.
458  virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
459  // Generate a call to a virtual method.
460  virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
461
462  // Copy the result of a call into the given target.
463  virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
464
465  virtual void GenerateNop() = 0;
466
467 protected:
468  // Method patch info used for recording locations of required linker patches and
469  // target methods. The target method can be used for various purposes, whether for
470  // patching the address of the method or the code pointer or a PC-relative call.
471  template <typename LabelType>
472  struct MethodPatchInfo {
473    explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
474
475    MethodReference target_method;
476    LabelType label;
477  };
478
479  // String patch info used for recording locations of required linker patches and
480  // target strings. The actual string address can be absolute or PC-relative.
481  template <typename LabelType>
482  struct StringPatchInfo {
483    StringPatchInfo(const DexFile& df, uint32_t index)
484        : dex_file(df), string_index(index), label() { }
485
486    const DexFile& dex_file;
487    uint32_t string_index;
488    LabelType label;
489  };
490
491  CodeGenerator(HGraph* graph,
492                size_t number_of_core_registers,
493                size_t number_of_fpu_registers,
494                size_t number_of_register_pairs,
495                uint32_t core_callee_save_mask,
496                uint32_t fpu_callee_save_mask,
497                const CompilerOptions& compiler_options,
498                OptimizingCompilerStats* stats)
499      : frame_size_(0),
500        core_spill_mask_(0),
501        fpu_spill_mask_(0),
502        first_register_slot_in_slow_path_(0),
503        blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
504                                                                    kArenaAllocCodeGenerator)),
505        blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
506                                                                   kArenaAllocCodeGenerator)),
507        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
508                                                                    kArenaAllocCodeGenerator)),
509        number_of_core_registers_(number_of_core_registers),
510        number_of_fpu_registers_(number_of_fpu_registers),
511        number_of_register_pairs_(number_of_register_pairs),
512        core_callee_save_mask_(core_callee_save_mask),
513        fpu_callee_save_mask_(fpu_callee_save_mask),
514        stack_map_stream_(graph->GetArena()),
515        block_order_(nullptr),
516        disasm_info_(nullptr),
517        stats_(stats),
518        graph_(graph),
519        compiler_options_(compiler_options),
520        slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
521        current_slow_path_(nullptr),
522        current_block_index_(0),
523        is_leaf_(true),
524        requires_current_method_(false) {
525    slow_paths_.reserve(8);
526  }
527
528  virtual HGraphVisitor* GetLocationBuilder() = 0;
529  virtual HGraphVisitor* GetInstructionVisitor() = 0;
530
531  // Returns the location of the first spilled entry for floating point registers,
532  // relative to the stack pointer.
533  uint32_t GetFpuSpillStart() const {
534    return GetFrameSize() - FrameEntrySpillSize();
535  }
536
537  uint32_t GetFpuSpillSize() const {
538    return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
539  }
540
541  uint32_t GetCoreSpillSize() const {
542    return POPCOUNT(core_spill_mask_) * GetWordSize();
543  }
544
545  bool HasAllocatedCalleeSaveRegisters() const {
546    // We check the core registers against 1 because it always comprises the return PC.
547    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
548      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
549  }
550
551  bool CallPushesPC() const {
552    InstructionSet instruction_set = GetInstructionSet();
553    return instruction_set == kX86 || instruction_set == kX86_64;
554  }
555
556  // Arm64 has its own type for a label, so we need to templatize these methods
557  // to share the logic.
558
559  template <typename LabelType>
560  LabelType* CommonInitializeLabels() {
561    // We use raw array allocations instead of ArenaVector<> because Labels are
562    // non-constructible and non-movable and as such cannot be held in a vector.
563    size_t size = GetGraph()->GetBlocks().size();
564    LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
565                                                                      kArenaAllocCodeGenerator);
566    for (size_t i = 0; i != size; ++i) {
567      new(labels + i) LabelType();
568    }
569    return labels;
570  }
571
572  template <typename LabelType>
573  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
574    block = FirstNonEmptyBlock(block);
575    return raw_pointer_to_labels_array + block->GetBlockId();
576  }
577
578  SlowPathCode* GetCurrentSlowPath() {
579    return current_slow_path_;
580  }
581
582  // Frame size required for this method.
583  uint32_t frame_size_;
584  uint32_t core_spill_mask_;
585  uint32_t fpu_spill_mask_;
586  uint32_t first_register_slot_in_slow_path_;
587
588  // Registers that were allocated during linear scan.
589  RegisterSet allocated_registers_;
590
591  // Arrays used when doing register allocation to know which
592  // registers we can allocate. `SetupBlockedRegisters` updates the
593  // arrays.
594  bool* const blocked_core_registers_;
595  bool* const blocked_fpu_registers_;
596  bool* const blocked_register_pairs_;
597  size_t number_of_core_registers_;
598  size_t number_of_fpu_registers_;
599  size_t number_of_register_pairs_;
600  const uint32_t core_callee_save_mask_;
601  const uint32_t fpu_callee_save_mask_;
602
603  StackMapStream stack_map_stream_;
604
605  // The order to use for code generation.
606  const ArenaVector<HBasicBlock*>* block_order_;
607
608  DisassemblyInformation* disasm_info_;
609
610 private:
611  size_t GetStackOffsetOfSavedRegister(size_t index);
612  void GenerateSlowPaths();
613  void BlockIfInRegister(Location location, bool is_out = false) const;
614  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
615
616  OptimizingCompilerStats* stats_;
617
618  HGraph* const graph_;
619  const CompilerOptions& compiler_options_;
620
621  ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
622
623  // The current slow-path that we're generating code for.
624  SlowPathCode* current_slow_path_;
625
626  // The current block index in `block_order_` of the block
627  // we are generating code for.
628  size_t current_block_index_;
629
630  // Whether the method is a leaf method.
631  bool is_leaf_;
632
633  // Whether an instruction in the graph accesses the current method.
634  bool requires_current_method_;
635
636  friend class OptimizingCFITest;
637
638  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
639};
640
641template <typename C, typename F>
642class CallingConvention {
643 public:
644  CallingConvention(const C* registers,
645                    size_t number_of_registers,
646                    const F* fpu_registers,
647                    size_t number_of_fpu_registers,
648                    size_t pointer_size)
649      : registers_(registers),
650        number_of_registers_(number_of_registers),
651        fpu_registers_(fpu_registers),
652        number_of_fpu_registers_(number_of_fpu_registers),
653        pointer_size_(pointer_size) {}
654
655  size_t GetNumberOfRegisters() const { return number_of_registers_; }
656  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
657
658  C GetRegisterAt(size_t index) const {
659    DCHECK_LT(index, number_of_registers_);
660    return registers_[index];
661  }
662
663  F GetFpuRegisterAt(size_t index) const {
664    DCHECK_LT(index, number_of_fpu_registers_);
665    return fpu_registers_[index];
666  }
667
668  size_t GetStackOffsetOf(size_t index) const {
669    // We still reserve the space for parameters passed by registers.
670    // Add space for the method pointer.
671    return pointer_size_ + index * kVRegSize;
672  }
673
674 private:
675  const C* registers_;
676  const size_t number_of_registers_;
677  const F* fpu_registers_;
678  const size_t number_of_fpu_registers_;
679  const size_t pointer_size_;
680
681  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
682};
683
684/**
685 * A templated class SlowPathGenerator with a templated method NewSlowPath()
686 * that can be used by any code generator to share equivalent slow-paths with
687 * the objective of reducing generated code size.
688 *
689 * InstructionType:  instruction that requires SlowPathCodeType
690 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
691 */
692template <typename InstructionType>
693class SlowPathGenerator {
694  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
695                "InstructionType is not a subclass of art::HInstruction");
696
697 public:
698  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
699      : graph_(graph),
700        codegen_(codegen),
701        slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
702
703  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
704  // Templating the method (rather than the whole class) on the slow-path type enables
705  // keeping this code at a generic, non architecture-specific place.
706  //
707  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
708  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
709  //       or template the class as a whole on SlowPathType.
710  template <typename SlowPathCodeType>
711  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
712    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
713                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
714    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
715                  "SlowPathCodeType is not constructible from InstructionType*");
716    // Iterate over potential candidates for sharing. Currently, only same-typed
717    // slow-paths with exactly the same dex-pc are viable candidates.
718    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
719    const uint32_t dex_pc = instruction->GetDexPc();
720    auto iter = slow_path_map_.find(dex_pc);
721    if (iter != slow_path_map_.end()) {
722      auto candidates = iter->second;
723      for (const auto& it : candidates) {
724        InstructionType* other_instruction = it.first;
725        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
726        // Determine if the instructions allow for slow-path sharing.
727        if (HaveSameLiveRegisters(instruction, other_instruction) &&
728            HaveSameStackMap(instruction, other_instruction)) {
729          // Can share: reuse existing one.
730          return other_slow_path;
731        }
732      }
733    } else {
734      // First time this dex-pc is seen.
735      iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
736    }
737    // Cannot share: create and add new slow-path for this particular dex-pc.
738    SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
739    iter->second.emplace_back(std::make_pair(instruction, slow_path));
740    codegen_->AddSlowPath(slow_path);
741    return slow_path;
742  }
743
744 private:
745  // Tests if both instructions have same set of live physical registers. This ensures
746  // the slow-path has exactly the same preamble on saving these registers to stack.
747  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
748    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
749    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
750    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
751    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
752    return (((live1->GetCoreRegisters() & core_spill) ==
753             (live2->GetCoreRegisters() & core_spill)) &&
754            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
755             (live2->GetFloatingPointRegisters() & fpu_spill)));
756  }
757
758  // Tests if both instructions have the same stack map. This ensures the interpreter
759  // will find exactly the same dex-registers at the same entries.
760  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
761    DCHECK(i1->HasEnvironment());
762    DCHECK(i2->HasEnvironment());
763    // We conservatively test if the two instructions find exactly the same instructions
764    // and location in each dex-register. This guarantees they will have the same stack map.
765    HEnvironment* e1 = i1->GetEnvironment();
766    HEnvironment* e2 = i2->GetEnvironment();
767    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
768      return false;
769    }
770    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
771      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
772          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
773        return false;
774      }
775    }
776    return true;
777  }
778
779  HGraph* const graph_;
780  CodeGenerator* const codegen_;
781
782  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
783  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
784
785  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
786};
787
788class InstructionCodeGenerator : public HGraphVisitor {
789 public:
790  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
791      : HGraphVisitor(graph),
792        deopt_slow_paths_(graph, codegen) {}
793
794 protected:
795  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
796  // TODO: under current regime, only deopt sharing make sense; extend later.
797  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
798};
799
800}  // namespace art
801
802#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
803