code_generator.h revision 2ae48182573da7087bffc2873730bc758ec29696
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/arena_containers.h" 23#include "base/arena_object.h" 24#include "base/bit_field.h" 25#include "compiled_method.h" 26#include "driver/compiler_options.h" 27#include "globals.h" 28#include "graph_visualizer.h" 29#include "locations.h" 30#include "memory_region.h" 31#include "nodes.h" 32#include "optimizing_compiler_stats.h" 33#include "stack_map_stream.h" 34#include "utils/label.h" 35 36namespace art { 37 38// Binary encoding of 2^32 for type double. 39static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 40// Binary encoding of 2^31 for type double. 41static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 42 43// Minimum value for a primitive integer. 44static int32_t constexpr kPrimIntMin = 0x80000000; 45// Minimum value for a primitive long. 46static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 47 48// Maximum value for a primitive integer. 49static int32_t constexpr kPrimIntMax = 0x7fffffff; 50// Maximum value for a primitive long. 51static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 52 53class Assembler; 54class CodeGenerator; 55class CompilerDriver; 56class LinkerPatch; 57class ParallelMoveResolver; 58 59class CodeAllocator { 60 public: 61 CodeAllocator() {} 62 virtual ~CodeAllocator() {} 63 64 virtual uint8_t* Allocate(size_t size) = 0; 65 66 private: 67 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 68}; 69 70class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 71 public: 72 explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { 73 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 74 saved_core_stack_offsets_[i] = kRegisterNotSaved; 75 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 76 } 77 } 78 79 virtual ~SlowPathCode() {} 80 81 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 82 83 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 84 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 85 86 bool IsCoreRegisterSaved(int reg) const { 87 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 88 } 89 90 bool IsFpuRegisterSaved(int reg) const { 91 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 92 } 93 94 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 95 return saved_core_stack_offsets_[reg]; 96 } 97 98 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 99 return saved_fpu_stack_offsets_[reg]; 100 } 101 102 virtual bool IsFatal() const { return false; } 103 104 virtual const char* GetDescription() const = 0; 105 106 Label* GetEntryLabel() { return &entry_label_; } 107 Label* GetExitLabel() { return &exit_label_; } 108 109 uint32_t GetDexPc() const { 110 return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; 111 } 112 113 protected: 114 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 115 static constexpr uint32_t kRegisterNotSaved = -1; 116 // The instruction where this slow path is happening. 117 HInstruction* instruction_; 118 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 119 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 120 121 private: 122 Label entry_label_; 123 Label exit_label_; 124 125 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 126}; 127 128class InvokeDexCallingConventionVisitor { 129 public: 130 virtual Location GetNextLocation(Primitive::Type type) = 0; 131 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 132 virtual Location GetMethodLocation() const = 0; 133 134 protected: 135 InvokeDexCallingConventionVisitor() {} 136 virtual ~InvokeDexCallingConventionVisitor() {} 137 138 // The current index for core registers. 139 uint32_t gp_index_ = 0u; 140 // The current index for floating-point registers. 141 uint32_t float_index_ = 0u; 142 // The current stack index. 143 uint32_t stack_index_ = 0u; 144 145 private: 146 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 147}; 148 149class FieldAccessCallingConvention { 150 public: 151 virtual Location GetObjectLocation() const = 0; 152 virtual Location GetFieldIndexLocation() const = 0; 153 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 154 virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; 155 virtual Location GetFpuLocation(Primitive::Type type) const = 0; 156 virtual ~FieldAccessCallingConvention() {} 157 158 protected: 159 FieldAccessCallingConvention() {} 160 161 private: 162 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); 163}; 164 165class CodeGenerator { 166 public: 167 // Compiles the graph to executable instructions. 168 void Compile(CodeAllocator* allocator); 169 static CodeGenerator* Create(HGraph* graph, 170 InstructionSet instruction_set, 171 const InstructionSetFeatures& isa_features, 172 const CompilerOptions& compiler_options, 173 OptimizingCompilerStats* stats = nullptr); 174 virtual ~CodeGenerator() {} 175 176 // Get the graph. This is the outermost graph, never the graph of a method being inlined. 177 HGraph* GetGraph() const { return graph_; } 178 179 HBasicBlock* GetNextBlockToEmit() const; 180 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 181 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 182 183 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 184 // Note that this follows the current calling convention. 185 return GetFrameSize() 186 + InstructionSetPointerSize(GetInstructionSet()) // Art method 187 + parameter->GetIndex() * kVRegSize; 188 } 189 190 virtual void Initialize() = 0; 191 virtual void Finalize(CodeAllocator* allocator); 192 virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); 193 virtual void GenerateFrameEntry() = 0; 194 virtual void GenerateFrameExit() = 0; 195 virtual void Bind(HBasicBlock* block) = 0; 196 virtual void MoveConstant(Location destination, int32_t value) = 0; 197 virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; 198 virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; 199 200 virtual Assembler* GetAssembler() = 0; 201 virtual const Assembler& GetAssembler() const = 0; 202 virtual size_t GetWordSize() const = 0; 203 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 204 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 205 void InitializeCodeGeneration(size_t number_of_spill_slots, 206 size_t maximum_number_of_live_core_registers, 207 size_t maximum_number_of_live_fpu_registers, 208 size_t number_of_out_slots, 209 const ArenaVector<HBasicBlock*>& block_order); 210 int32_t GetStackSlot(HLocal* local) const; 211 212 uint32_t GetFrameSize() const { return frame_size_; } 213 void SetFrameSize(uint32_t size) { frame_size_ = size; } 214 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 215 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 216 217 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 218 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 219 virtual void SetupBlockedRegisters() const = 0; 220 221 virtual void ComputeSpillMask() { 222 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 223 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 224 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 225 } 226 227 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 228 uint32_t mask = 0; 229 for (size_t i = 0, e = length; i < e; ++i) { 230 mask |= (1 << registers[i]); 231 } 232 return mask; 233 } 234 235 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 236 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 237 virtual InstructionSet GetInstructionSet() const = 0; 238 239 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 240 241 void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; 242 243 // Saves the register in the stack. Returns the size taken on stack. 244 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 245 // Restores the register from the stack. Returns the size taken on stack. 246 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 247 248 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 249 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 250 251 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 252 // Returns whether we should split long moves in parallel moves. 253 virtual bool ShouldSplitLongMoves() const { return false; } 254 255 size_t GetNumberOfCoreCalleeSaveRegisters() const { 256 return POPCOUNT(core_callee_save_mask_); 257 } 258 259 size_t GetNumberOfCoreCallerSaveRegisters() const { 260 DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); 261 return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); 262 } 263 264 bool IsCoreCalleeSaveRegister(int reg) const { 265 return (core_callee_save_mask_ & (1 << reg)) != 0; 266 } 267 268 bool IsFloatingPointCalleeSaveRegister(int reg) const { 269 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 270 } 271 272 // Record native to dex mapping for a suspend point. Required by runtime. 273 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 274 // Check whether we have already recorded mapping at this PC. 275 bool HasStackMapAtCurrentPc(); 276 // Record extra stack maps if we support native debugging. 277 void MaybeRecordNativeDebugInfo(HInstruction* instruction, uint32_t dex_pc); 278 279 bool CanMoveNullCheckToUser(HNullCheck* null_check); 280 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 281 void GenerateNullCheck(HNullCheck* null_check); 282 virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; 283 virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; 284 285 // Records a stack map which the runtime might use to set catch phi values 286 // during exception delivery. 287 // TODO: Replace with a catch-entering instruction that records the environment. 288 void RecordCatchBlockInfo(); 289 290 // Returns true if implicit null checks are allowed in the compiler options 291 // and if the null check is not inside a try block. We currently cannot do 292 // implicit null checks in that case because we need the NullCheckSlowPath to 293 // save live registers, which may be needed by the runtime to set catch phis. 294 bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; 295 296 void AddSlowPath(SlowPathCode* slow_path) { 297 slow_paths_.push_back(slow_path); 298 } 299 300 void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); 301 size_t ComputeStackMapsSize(); 302 303 bool IsLeafMethod() const { 304 return is_leaf_; 305 } 306 307 void MarkNotLeaf() { 308 is_leaf_ = false; 309 requires_current_method_ = true; 310 } 311 312 void SetRequiresCurrentMethod() { 313 requires_current_method_ = true; 314 } 315 316 bool RequiresCurrentMethod() const { 317 return requires_current_method_; 318 } 319 320 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 321 // suspend check. This is called when the code generator generates code 322 // for the suspend check at the back edge (instead of where the suspend check 323 // is, which is the loop entry). At this point, the spill slots for the phis 324 // have not been written to. 325 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 326 327 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 328 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 329 330 // Helper that returns the pointer offset of an index in an object array. 331 // Note: this method assumes we always have the same pointer size, regardless 332 // of the architecture. 333 static size_t GetCacheOffset(uint32_t index); 334 // Pointer variant for ArtMethod and ArtField arrays. 335 size_t GetCachePointerOffset(uint32_t index); 336 337 void EmitParallelMoves(Location from1, 338 Location to1, 339 Primitive::Type type1, 340 Location from2, 341 Location to2, 342 Primitive::Type type2); 343 344 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 345 // Check that null value is not represented as an integer constant. 346 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 347 return type == Primitive::kPrimNot && !value->IsNullConstant(); 348 } 349 350 void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); 351 352 void AddAllocatedRegister(Location location) { 353 allocated_registers_.Add(location); 354 } 355 356 bool HasAllocatedRegister(bool is_core, int reg) const { 357 return is_core 358 ? allocated_registers_.ContainsCoreRegister(reg) 359 : allocated_registers_.ContainsFloatingPointRegister(reg); 360 } 361 362 void AllocateLocations(HInstruction* instruction); 363 364 // Tells whether the stack frame of the compiled method is 365 // considered "empty", that is either actually having a size of zero, 366 // or just containing the saved return address register. 367 bool HasEmptyFrame() const { 368 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 369 } 370 371 static int32_t GetInt32ValueOf(HConstant* constant) { 372 if (constant->IsIntConstant()) { 373 return constant->AsIntConstant()->GetValue(); 374 } else if (constant->IsNullConstant()) { 375 return 0; 376 } else { 377 DCHECK(constant->IsFloatConstant()); 378 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 379 } 380 } 381 382 static int64_t GetInt64ValueOf(HConstant* constant) { 383 if (constant->IsIntConstant()) { 384 return constant->AsIntConstant()->GetValue(); 385 } else if (constant->IsNullConstant()) { 386 return 0; 387 } else if (constant->IsFloatConstant()) { 388 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 389 } else if (constant->IsLongConstant()) { 390 return constant->AsLongConstant()->GetValue(); 391 } else { 392 DCHECK(constant->IsDoubleConstant()); 393 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 394 } 395 } 396 397 size_t GetFirstRegisterSlotInSlowPath() const { 398 return first_register_slot_in_slow_path_; 399 } 400 401 uint32_t FrameEntrySpillSize() const { 402 return GetFpuSpillSize() + GetCoreSpillSize(); 403 } 404 405 virtual ParallelMoveResolver* GetMoveResolver() = 0; 406 407 static void CreateCommonInvokeLocationSummary( 408 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 409 410 void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); 411 412 void CreateUnresolvedFieldLocationSummary( 413 HInstruction* field_access, 414 Primitive::Type field_type, 415 const FieldAccessCallingConvention& calling_convention); 416 417 void GenerateUnresolvedFieldAccess( 418 HInstruction* field_access, 419 Primitive::Type field_type, 420 uint32_t field_index, 421 uint32_t dex_pc, 422 const FieldAccessCallingConvention& calling_convention); 423 424 // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. 425 static void CreateLoadClassLocationSummary(HLoadClass* cls, 426 Location runtime_type_index_location, 427 Location runtime_return_location, 428 bool code_generator_supports_read_barrier = false); 429 430 static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); 431 432 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } 433 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 434 435 virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, 436 HInstruction* instruction, 437 uint32_t dex_pc, 438 SlowPathCode* slow_path) = 0; 439 440 // Check if the desired_dispatch_info is supported. If it is, return it, 441 // otherwise return a fall-back info that should be used instead. 442 virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 443 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 444 MethodReference target_method) = 0; 445 446 // Generate a call to a static or direct method. 447 virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; 448 // Generate a call to a virtual method. 449 virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; 450 451 // Copy the result of a call into the given target. 452 virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; 453 454 virtual void GenerateNop() = 0; 455 456 protected: 457 // Method patch info used for recording locations of required linker patches and 458 // target methods. The target method can be used for various purposes, whether for 459 // patching the address of the method or the code pointer or a PC-relative call. 460 template <typename LabelType> 461 struct MethodPatchInfo { 462 explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } 463 464 MethodReference target_method; 465 LabelType label; 466 }; 467 468 CodeGenerator(HGraph* graph, 469 size_t number_of_core_registers, 470 size_t number_of_fpu_registers, 471 size_t number_of_register_pairs, 472 uint32_t core_callee_save_mask, 473 uint32_t fpu_callee_save_mask, 474 const CompilerOptions& compiler_options, 475 OptimizingCompilerStats* stats) 476 : frame_size_(0), 477 core_spill_mask_(0), 478 fpu_spill_mask_(0), 479 first_register_slot_in_slow_path_(0), 480 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, 481 kArenaAllocCodeGenerator)), 482 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, 483 kArenaAllocCodeGenerator)), 484 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs, 485 kArenaAllocCodeGenerator)), 486 number_of_core_registers_(number_of_core_registers), 487 number_of_fpu_registers_(number_of_fpu_registers), 488 number_of_register_pairs_(number_of_register_pairs), 489 core_callee_save_mask_(core_callee_save_mask), 490 fpu_callee_save_mask_(fpu_callee_save_mask), 491 stack_map_stream_(graph->GetArena()), 492 block_order_(nullptr), 493 disasm_info_(nullptr), 494 stats_(stats), 495 graph_(graph), 496 compiler_options_(compiler_options), 497 slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), 498 current_slow_path_(nullptr), 499 current_block_index_(0), 500 is_leaf_(true), 501 requires_current_method_(false) { 502 slow_paths_.reserve(8); 503 } 504 505 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 506 507 virtual HGraphVisitor* GetLocationBuilder() = 0; 508 virtual HGraphVisitor* GetInstructionVisitor() = 0; 509 510 // Returns the location of the first spilled entry for floating point registers, 511 // relative to the stack pointer. 512 uint32_t GetFpuSpillStart() const { 513 return GetFrameSize() - FrameEntrySpillSize(); 514 } 515 516 uint32_t GetFpuSpillSize() const { 517 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 518 } 519 520 uint32_t GetCoreSpillSize() const { 521 return POPCOUNT(core_spill_mask_) * GetWordSize(); 522 } 523 524 bool HasAllocatedCalleeSaveRegisters() const { 525 // We check the core registers against 1 because it always comprises the return PC. 526 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 527 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 528 } 529 530 bool CallPushesPC() const { 531 InstructionSet instruction_set = GetInstructionSet(); 532 return instruction_set == kX86 || instruction_set == kX86_64; 533 } 534 535 // Arm64 has its own type for a label, so we need to templatize these methods 536 // to share the logic. 537 538 template <typename LabelType> 539 LabelType* CommonInitializeLabels() { 540 // We use raw array allocations instead of ArenaVector<> because Labels are 541 // non-constructible and non-movable and as such cannot be held in a vector. 542 size_t size = GetGraph()->GetBlocks().size(); 543 LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, 544 kArenaAllocCodeGenerator); 545 for (size_t i = 0; i != size; ++i) { 546 new(labels + i) LabelType(); 547 } 548 return labels; 549 } 550 551 template <typename LabelType> 552 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 553 block = FirstNonEmptyBlock(block); 554 return raw_pointer_to_labels_array + block->GetBlockId(); 555 } 556 557 SlowPathCode* GetCurrentSlowPath() { 558 return current_slow_path_; 559 } 560 561 // Frame size required for this method. 562 uint32_t frame_size_; 563 uint32_t core_spill_mask_; 564 uint32_t fpu_spill_mask_; 565 uint32_t first_register_slot_in_slow_path_; 566 567 // Registers that were allocated during linear scan. 568 RegisterSet allocated_registers_; 569 570 // Arrays used when doing register allocation to know which 571 // registers we can allocate. `SetupBlockedRegisters` updates the 572 // arrays. 573 bool* const blocked_core_registers_; 574 bool* const blocked_fpu_registers_; 575 bool* const blocked_register_pairs_; 576 size_t number_of_core_registers_; 577 size_t number_of_fpu_registers_; 578 size_t number_of_register_pairs_; 579 const uint32_t core_callee_save_mask_; 580 const uint32_t fpu_callee_save_mask_; 581 582 StackMapStream stack_map_stream_; 583 584 // The order to use for code generation. 585 const ArenaVector<HBasicBlock*>* block_order_; 586 587 DisassemblyInformation* disasm_info_; 588 589 private: 590 size_t GetStackOffsetOfSavedRegister(size_t index); 591 void GenerateSlowPaths(); 592 void BlockIfInRegister(Location location, bool is_out = false) const; 593 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 594 595 OptimizingCompilerStats* stats_; 596 597 HGraph* const graph_; 598 const CompilerOptions& compiler_options_; 599 600 ArenaVector<SlowPathCode*> slow_paths_; 601 602 // The current slow-path that we're generating code for. 603 SlowPathCode* current_slow_path_; 604 605 // The current block index in `block_order_` of the block 606 // we are generating code for. 607 size_t current_block_index_; 608 609 // Whether the method is a leaf method. 610 bool is_leaf_; 611 612 // Whether an instruction in the graph accesses the current method. 613 bool requires_current_method_; 614 615 friend class OptimizingCFITest; 616 617 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 618}; 619 620template <typename C, typename F> 621class CallingConvention { 622 public: 623 CallingConvention(const C* registers, 624 size_t number_of_registers, 625 const F* fpu_registers, 626 size_t number_of_fpu_registers, 627 size_t pointer_size) 628 : registers_(registers), 629 number_of_registers_(number_of_registers), 630 fpu_registers_(fpu_registers), 631 number_of_fpu_registers_(number_of_fpu_registers), 632 pointer_size_(pointer_size) {} 633 634 size_t GetNumberOfRegisters() const { return number_of_registers_; } 635 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 636 637 C GetRegisterAt(size_t index) const { 638 DCHECK_LT(index, number_of_registers_); 639 return registers_[index]; 640 } 641 642 F GetFpuRegisterAt(size_t index) const { 643 DCHECK_LT(index, number_of_fpu_registers_); 644 return fpu_registers_[index]; 645 } 646 647 size_t GetStackOffsetOf(size_t index) const { 648 // We still reserve the space for parameters passed by registers. 649 // Add space for the method pointer. 650 return pointer_size_ + index * kVRegSize; 651 } 652 653 private: 654 const C* registers_; 655 const size_t number_of_registers_; 656 const F* fpu_registers_; 657 const size_t number_of_fpu_registers_; 658 const size_t pointer_size_; 659 660 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 661}; 662 663/** 664 * A templated class SlowPathGenerator with a templated method NewSlowPath() 665 * that can be used by any code generator to share equivalent slow-paths with 666 * the objective of reducing generated code size. 667 * 668 * InstructionType: instruction that requires SlowPathCodeType 669 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) 670 */ 671template <typename InstructionType> 672class SlowPathGenerator { 673 static_assert(std::is_base_of<HInstruction, InstructionType>::value, 674 "InstructionType is not a subclass of art::HInstruction"); 675 676 public: 677 SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) 678 : graph_(graph), 679 codegen_(codegen), 680 slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} 681 682 // Creates and adds a new slow-path, if needed, or returns existing one otherwise. 683 // Templating the method (rather than the whole class) on the slow-path type enables 684 // keeping this code at a generic, non architecture-specific place. 685 // 686 // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. 687 // To relax this requirement, we would need some RTTI on the stored slow-paths, 688 // or template the class as a whole on SlowPathType. 689 template <typename SlowPathCodeType> 690 SlowPathCodeType* NewSlowPath(InstructionType* instruction) { 691 static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, 692 "SlowPathCodeType is not a subclass of art::SlowPathCode"); 693 static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, 694 "SlowPathCodeType is not constructible from InstructionType*"); 695 // Iterate over potential candidates for sharing. Currently, only same-typed 696 // slow-paths with exactly the same dex-pc are viable candidates. 697 // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? 698 const uint32_t dex_pc = instruction->GetDexPc(); 699 auto iter = slow_path_map_.find(dex_pc); 700 if (iter != slow_path_map_.end()) { 701 auto candidates = iter->second; 702 for (const auto& it : candidates) { 703 InstructionType* other_instruction = it.first; 704 SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); 705 // Determine if the instructions allow for slow-path sharing. 706 if (HaveSameLiveRegisters(instruction, other_instruction) && 707 HaveSameStackMap(instruction, other_instruction)) { 708 // Can share: reuse existing one. 709 return other_slow_path; 710 } 711 } 712 } else { 713 // First time this dex-pc is seen. 714 iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); 715 } 716 // Cannot share: create and add new slow-path for this particular dex-pc. 717 SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); 718 iter->second.emplace_back(std::make_pair(instruction, slow_path)); 719 codegen_->AddSlowPath(slow_path); 720 return slow_path; 721 } 722 723 private: 724 // Tests if both instructions have same set of live physical registers. This ensures 725 // the slow-path has exactly the same preamble on saving these registers to stack. 726 bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { 727 const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); 728 const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); 729 RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); 730 RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); 731 return (((live1->GetCoreRegisters() & core_spill) == 732 (live2->GetCoreRegisters() & core_spill)) && 733 ((live1->GetFloatingPointRegisters() & fpu_spill) == 734 (live2->GetFloatingPointRegisters() & fpu_spill))); 735 } 736 737 // Tests if both instructions have the same stack map. This ensures the interpreter 738 // will find exactly the same dex-registers at the same entries. 739 bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { 740 DCHECK(i1->HasEnvironment()); 741 DCHECK(i2->HasEnvironment()); 742 // We conservatively test if the two instructions find exactly the same instructions 743 // and location in each dex-register. This guarantees they will have the same stack map. 744 HEnvironment* e1 = i1->GetEnvironment(); 745 HEnvironment* e2 = i2->GetEnvironment(); 746 if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { 747 return false; 748 } 749 for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { 750 if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || 751 !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { 752 return false; 753 } 754 } 755 return true; 756 } 757 758 HGraph* const graph_; 759 CodeGenerator* const codegen_; 760 761 // Map from dex-pc to vector of already existing instruction/slow-path pairs. 762 ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; 763 764 DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); 765}; 766 767class InstructionCodeGenerator : public HGraphVisitor { 768 public: 769 InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) 770 : HGraphVisitor(graph), 771 deopt_slow_paths_(graph, codegen) {} 772 773 protected: 774 // Add slow-path generator for each instruction/slow-path combination that desires sharing. 775 // TODO: under current regime, only deopt sharing make sense; extend later. 776 SlowPathGenerator<HDeoptimize> deopt_slow_paths_; 777}; 778 779} // namespace art 780 781#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 782