code_generator.h revision b022fa1300e6d78639b3b910af0cf85c43df44bb
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/arena_containers.h" 23#include "base/arena_object.h" 24#include "base/bit_field.h" 25#include "driver/compiler_options.h" 26#include "globals.h" 27#include "graph_visualizer.h" 28#include "locations.h" 29#include "memory_region.h" 30#include "nodes.h" 31#include "stack_map_stream.h" 32 33namespace art { 34 35// Binary encoding of 2^32 for type double. 36static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 37// Binary encoding of 2^31 for type double. 38static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 39 40// Minimum value for a primitive integer. 41static int32_t constexpr kPrimIntMin = 0x80000000; 42// Minimum value for a primitive long. 43static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 44 45// Maximum value for a primitive integer. 46static int32_t constexpr kPrimIntMax = 0x7fffffff; 47// Maximum value for a primitive long. 48static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 49 50class Assembler; 51class CodeGenerator; 52class DexCompilationUnit; 53class LinkerPatch; 54class ParallelMoveResolver; 55class SrcMapElem; 56template <class Alloc> 57class SrcMap; 58using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 59 60class CodeAllocator { 61 public: 62 CodeAllocator() {} 63 virtual ~CodeAllocator() {} 64 65 virtual uint8_t* Allocate(size_t size) = 0; 66 67 private: 68 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 69}; 70 71class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 72 public: 73 SlowPathCode() { 74 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 75 saved_core_stack_offsets_[i] = kRegisterNotSaved; 76 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 77 } 78 } 79 80 virtual ~SlowPathCode() {} 81 82 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 83 84 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 85 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 86 87 bool IsCoreRegisterSaved(int reg) const { 88 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 89 } 90 91 bool IsFpuRegisterSaved(int reg) const { 92 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 93 } 94 95 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 96 return saved_core_stack_offsets_[reg]; 97 } 98 99 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 100 return saved_fpu_stack_offsets_[reg]; 101 } 102 103 virtual bool IsFatal() const { return false; } 104 105 virtual const char* GetDescription() const = 0; 106 107 protected: 108 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 109 static constexpr uint32_t kRegisterNotSaved = -1; 110 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 111 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 112 113 private: 114 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 115}; 116 117class InvokeDexCallingConventionVisitor { 118 public: 119 virtual Location GetNextLocation(Primitive::Type type) = 0; 120 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 121 virtual Location GetMethodLocation() const = 0; 122 123 protected: 124 InvokeDexCallingConventionVisitor() {} 125 virtual ~InvokeDexCallingConventionVisitor() {} 126 127 // The current index for core registers. 128 uint32_t gp_index_ = 0u; 129 // The current index for floating-point registers. 130 uint32_t float_index_ = 0u; 131 // The current stack index. 132 uint32_t stack_index_ = 0u; 133 134 private: 135 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 136}; 137 138class CodeGenerator { 139 public: 140 // Compiles the graph to executable instructions. Returns whether the compilation 141 // succeeded. 142 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 143 void CompileOptimized(CodeAllocator* allocator); 144 static CodeGenerator* Create(HGraph* graph, 145 InstructionSet instruction_set, 146 const InstructionSetFeatures& isa_features, 147 const CompilerOptions& compiler_options); 148 virtual ~CodeGenerator() {} 149 150 HGraph* GetGraph() const { return graph_; } 151 152 HBasicBlock* GetNextBlockToEmit() const; 153 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 154 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 155 156 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 157 // Note that this follows the current calling convention. 158 return GetFrameSize() 159 + InstructionSetPointerSize(GetInstructionSet()) // Art method 160 + parameter->GetIndex() * kVRegSize; 161 } 162 163 virtual void Initialize() = 0; 164 virtual void Finalize(CodeAllocator* allocator); 165 virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); 166 virtual void GenerateFrameEntry() = 0; 167 virtual void GenerateFrameExit() = 0; 168 virtual void Bind(HBasicBlock* block) = 0; 169 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 170 virtual Assembler* GetAssembler() = 0; 171 virtual const Assembler& GetAssembler() const = 0; 172 virtual size_t GetWordSize() const = 0; 173 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 174 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 175 void InitializeCodeGeneration(size_t number_of_spill_slots, 176 size_t maximum_number_of_live_core_registers, 177 size_t maximum_number_of_live_fp_registers, 178 size_t number_of_out_slots, 179 const GrowableArray<HBasicBlock*>& block_order); 180 int32_t GetStackSlot(HLocal* local) const; 181 Location GetTemporaryLocation(HTemporary* temp) const; 182 183 uint32_t GetFrameSize() const { return frame_size_; } 184 void SetFrameSize(uint32_t size) { frame_size_ = size; } 185 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 186 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 187 188 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 189 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 190 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 191 192 virtual void ComputeSpillMask() { 193 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 194 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 195 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 196 } 197 198 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 199 uint32_t mask = 0; 200 for (size_t i = 0, e = length; i < e; ++i) { 201 mask |= (1 << registers[i]); 202 } 203 return mask; 204 } 205 206 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 207 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 208 virtual InstructionSet GetInstructionSet() const = 0; 209 210 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 211 212 // Saves the register in the stack. Returns the size taken on stack. 213 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 214 // Restores the register from the stack. Returns the size taken on stack. 215 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 216 217 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 218 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 219 220 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 221 // Returns whether we should split long moves in parallel moves. 222 virtual bool ShouldSplitLongMoves() const { return false; } 223 224 bool IsCoreCalleeSaveRegister(int reg) const { 225 return (core_callee_save_mask_ & (1 << reg)) != 0; 226 } 227 228 bool IsFloatingPointCalleeSaveRegister(int reg) const { 229 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 230 } 231 232 // Record native to dex mapping for a suspend point. Required by runtime. 233 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 234 // Record additional native to dex mappings for native debugging/profiling tools. 235 void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end); 236 237 bool CanMoveNullCheckToUser(HNullCheck* null_check); 238 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 239 240 // Records a stack map which the runtime might use to set catch phi values 241 // during exception delivery. 242 // TODO: Replace with a catch-entering instruction that records the environment. 243 void RecordCatchBlockInfo(); 244 245 // Returns true if implicit null checks are allowed in the compiler options 246 // and if the null check is not inside a try block. We currently cannot do 247 // implicit null checks in that case because we need the NullCheckSlowPath to 248 // save live registers, which may be needed by the runtime to set catch phis. 249 bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; 250 251 void AddSlowPath(SlowPathCode* slow_path) { 252 slow_paths_.Add(slow_path); 253 } 254 255 void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } 256 257 void BuildMappingTable(ArenaVector<uint8_t>* vector) const; 258 void BuildVMapTable(ArenaVector<uint8_t>* vector) const; 259 void BuildNativeGCMap( 260 ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 261 void BuildStackMaps(ArenaVector<uint8_t>* vector); 262 263 bool IsBaseline() const { 264 return is_baseline_; 265 } 266 267 bool IsLeafMethod() const { 268 return is_leaf_; 269 } 270 271 void MarkNotLeaf() { 272 is_leaf_ = false; 273 requires_current_method_ = true; 274 } 275 276 void SetRequiresCurrentMethod() { 277 requires_current_method_ = true; 278 } 279 280 bool RequiresCurrentMethod() const { 281 return requires_current_method_; 282 } 283 284 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 285 // suspend check. This is called when the code generator generates code 286 // for the suspend check at the back edge (instead of where the suspend check 287 // is, which is the loop entry). At this point, the spill slots for the phis 288 // have not been written to. 289 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 290 291 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 292 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 293 294 // Helper that returns the pointer offset of an index in an object array. 295 // Note: this method assumes we always have the same pointer size, regardless 296 // of the architecture. 297 static size_t GetCacheOffset(uint32_t index); 298 // Pointer variant for ArtMethod and ArtField arrays. 299 size_t GetCachePointerOffset(uint32_t index); 300 301 void EmitParallelMoves(Location from1, 302 Location to1, 303 Primitive::Type type1, 304 Location from2, 305 Location to2, 306 Primitive::Type type2); 307 308 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 309 // Check that null value is not represented as an integer constant. 310 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 311 return type == Primitive::kPrimNot && !value->IsNullConstant(); 312 } 313 314 void ValidateInvokeRuntime(HInstruction* instruction, SlowPathCode* slow_path); 315 316 void AddAllocatedRegister(Location location) { 317 allocated_registers_.Add(location); 318 } 319 320 bool HasAllocatedRegister(bool is_core, int reg) const { 321 return is_core 322 ? allocated_registers_.ContainsCoreRegister(reg) 323 : allocated_registers_.ContainsFloatingPointRegister(reg); 324 } 325 326 void AllocateLocations(HInstruction* instruction); 327 328 // Tells whether the stack frame of the compiled method is 329 // considered "empty", that is either actually having a size of zero, 330 // or just containing the saved return address register. 331 bool HasEmptyFrame() const { 332 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 333 } 334 335 static int32_t GetInt32ValueOf(HConstant* constant) { 336 if (constant->IsIntConstant()) { 337 return constant->AsIntConstant()->GetValue(); 338 } else if (constant->IsNullConstant()) { 339 return 0; 340 } else { 341 DCHECK(constant->IsFloatConstant()); 342 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 343 } 344 } 345 346 static int64_t GetInt64ValueOf(HConstant* constant) { 347 if (constant->IsIntConstant()) { 348 return constant->AsIntConstant()->GetValue(); 349 } else if (constant->IsNullConstant()) { 350 return 0; 351 } else if (constant->IsFloatConstant()) { 352 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 353 } else if (constant->IsLongConstant()) { 354 return constant->AsLongConstant()->GetValue(); 355 } else { 356 DCHECK(constant->IsDoubleConstant()); 357 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 358 } 359 } 360 361 size_t GetFirstRegisterSlotInSlowPath() const { 362 return first_register_slot_in_slow_path_; 363 } 364 365 uint32_t FrameEntrySpillSize() const { 366 return GetFpuSpillSize() + GetCoreSpillSize(); 367 } 368 369 virtual ParallelMoveResolver* GetMoveResolver() = 0; 370 371 static void CreateCommonInvokeLocationSummary( 372 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 373 374 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } 375 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 376 377 protected: 378 // Method patch info used for recording locations of required linker patches and 379 // target methods. The target method can be used for various purposes, whether for 380 // patching the address of the method or the code pointer or a PC-relative call. 381 template <typename LabelType> 382 struct MethodPatchInfo { 383 explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } 384 385 MethodReference target_method; 386 LabelType label; 387 }; 388 389 CodeGenerator(HGraph* graph, 390 size_t number_of_core_registers, 391 size_t number_of_fpu_registers, 392 size_t number_of_register_pairs, 393 uint32_t core_callee_save_mask, 394 uint32_t fpu_callee_save_mask, 395 const CompilerOptions& compiler_options) 396 : frame_size_(0), 397 core_spill_mask_(0), 398 fpu_spill_mask_(0), 399 first_register_slot_in_slow_path_(0), 400 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 401 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 402 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 403 number_of_core_registers_(number_of_core_registers), 404 number_of_fpu_registers_(number_of_fpu_registers), 405 number_of_register_pairs_(number_of_register_pairs), 406 core_callee_save_mask_(core_callee_save_mask), 407 fpu_callee_save_mask_(fpu_callee_save_mask), 408 stack_map_stream_(graph->GetArena()), 409 block_order_(nullptr), 410 is_baseline_(false), 411 disasm_info_(nullptr), 412 graph_(graph), 413 compiler_options_(compiler_options), 414 src_map_(nullptr), 415 slow_paths_(graph->GetArena(), 8), 416 current_block_index_(0), 417 is_leaf_(true), 418 requires_current_method_(false) {} 419 420 // Register allocation logic. 421 void AllocateRegistersLocally(HInstruction* instruction) const; 422 423 // Backend specific implementation for allocating a register. 424 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 425 426 static size_t FindFreeEntry(bool* array, size_t length); 427 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 428 429 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 430 431 virtual HGraphVisitor* GetLocationBuilder() = 0; 432 virtual HGraphVisitor* GetInstructionVisitor() = 0; 433 434 // Returns the location of the first spilled entry for floating point registers, 435 // relative to the stack pointer. 436 uint32_t GetFpuSpillStart() const { 437 return GetFrameSize() - FrameEntrySpillSize(); 438 } 439 440 uint32_t GetFpuSpillSize() const { 441 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 442 } 443 444 uint32_t GetCoreSpillSize() const { 445 return POPCOUNT(core_spill_mask_) * GetWordSize(); 446 } 447 448 bool HasAllocatedCalleeSaveRegisters() const { 449 // We check the core registers against 1 because it always comprises the return PC. 450 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 451 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 452 } 453 454 bool CallPushesPC() const { 455 InstructionSet instruction_set = GetInstructionSet(); 456 return instruction_set == kX86 || instruction_set == kX86_64; 457 } 458 459 // Arm64 has its own type for a label, so we need to templatize this method 460 // to share the logic. 461 template <typename LabelType> 462 LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { 463 block = FirstNonEmptyBlock(block); 464 return raw_pointer_to_labels_array + block->GetBlockId(); 465 } 466 467 // Frame size required for this method. 468 uint32_t frame_size_; 469 uint32_t core_spill_mask_; 470 uint32_t fpu_spill_mask_; 471 uint32_t first_register_slot_in_slow_path_; 472 473 // Registers that were allocated during linear scan. 474 RegisterSet allocated_registers_; 475 476 // Arrays used when doing register allocation to know which 477 // registers we can allocate. `SetupBlockedRegisters` updates the 478 // arrays. 479 bool* const blocked_core_registers_; 480 bool* const blocked_fpu_registers_; 481 bool* const blocked_register_pairs_; 482 size_t number_of_core_registers_; 483 size_t number_of_fpu_registers_; 484 size_t number_of_register_pairs_; 485 const uint32_t core_callee_save_mask_; 486 const uint32_t fpu_callee_save_mask_; 487 488 StackMapStream stack_map_stream_; 489 490 // The order to use for code generation. 491 const GrowableArray<HBasicBlock*>* block_order_; 492 493 // Whether we are using baseline. 494 bool is_baseline_; 495 496 DisassemblyInformation* disasm_info_; 497 498 private: 499 void InitLocationsBaseline(HInstruction* instruction); 500 size_t GetStackOffsetOfSavedRegister(size_t index); 501 void GenerateSlowPaths(); 502 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 503 void BlockIfInRegister(Location location, bool is_out = false) const; 504 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 505 506 HGraph* const graph_; 507 const CompilerOptions& compiler_options_; 508 509 // Native to dex_pc map used for native debugging/profiling tools. 510 DefaultSrcMap* src_map_; 511 GrowableArray<SlowPathCode*> slow_paths_; 512 513 // The current block index in `block_order_` of the block 514 // we are generating code for. 515 size_t current_block_index_; 516 517 // Whether the method is a leaf method. 518 bool is_leaf_; 519 520 // Whether an instruction in the graph accesses the current method. 521 bool requires_current_method_; 522 523 friend class OptimizingCFITest; 524 525 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 526}; 527 528template <typename C, typename F> 529class CallingConvention { 530 public: 531 CallingConvention(const C* registers, 532 size_t number_of_registers, 533 const F* fpu_registers, 534 size_t number_of_fpu_registers, 535 size_t pointer_size) 536 : registers_(registers), 537 number_of_registers_(number_of_registers), 538 fpu_registers_(fpu_registers), 539 number_of_fpu_registers_(number_of_fpu_registers), 540 pointer_size_(pointer_size) {} 541 542 size_t GetNumberOfRegisters() const { return number_of_registers_; } 543 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 544 545 C GetRegisterAt(size_t index) const { 546 DCHECK_LT(index, number_of_registers_); 547 return registers_[index]; 548 } 549 550 F GetFpuRegisterAt(size_t index) const { 551 DCHECK_LT(index, number_of_fpu_registers_); 552 return fpu_registers_[index]; 553 } 554 555 size_t GetStackOffsetOf(size_t index) const { 556 // We still reserve the space for parameters passed by registers. 557 // Add space for the method pointer. 558 return pointer_size_ + index * kVRegSize; 559 } 560 561 private: 562 const C* registers_; 563 const size_t number_of_registers_; 564 const F* fpu_registers_; 565 const size_t number_of_fpu_registers_; 566 const size_t pointer_size_; 567 568 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 569}; 570 571} // namespace art 572 573#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 574