code_generator.h revision 8158f28b6689314213eb4dbbe14166073be71f7e
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/bit_field.h" 23#include "driver/compiler_options.h" 24#include "globals.h" 25#include "graph_visualizer.h" 26#include "locations.h" 27#include "memory_region.h" 28#include "nodes.h" 29#include "stack_map_stream.h" 30 31namespace art { 32 33// Binary encoding of 2^32 for type double. 34static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 35// Binary encoding of 2^31 for type double. 36static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 37 38// Minimum value for a primitive integer. 39static int32_t constexpr kPrimIntMin = 0x80000000; 40// Minimum value for a primitive long. 41static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); 42 43// Maximum value for a primitive integer. 44static int32_t constexpr kPrimIntMax = 0x7fffffff; 45// Maximum value for a primitive long. 46static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); 47 48class Assembler; 49class CodeGenerator; 50class DexCompilationUnit; 51class ParallelMoveResolver; 52class SrcMapElem; 53template <class Alloc> 54class SrcMap; 55using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 56 57class CodeAllocator { 58 public: 59 CodeAllocator() {} 60 virtual ~CodeAllocator() {} 61 62 virtual uint8_t* Allocate(size_t size) = 0; 63 64 private: 65 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 66}; 67 68class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 69 public: 70 SlowPathCode() { 71 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 72 saved_core_stack_offsets_[i] = kRegisterNotSaved; 73 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 74 } 75 } 76 77 virtual ~SlowPathCode() {} 78 79 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 80 81 virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 82 virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 83 void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc); 84 85 bool IsCoreRegisterSaved(int reg) const { 86 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 87 } 88 89 bool IsFpuRegisterSaved(int reg) const { 90 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 91 } 92 93 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 94 return saved_core_stack_offsets_[reg]; 95 } 96 97 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 98 return saved_fpu_stack_offsets_[reg]; 99 } 100 101 virtual bool IsFatal() const { return false; } 102 103 virtual const char* GetDescription() const = 0; 104 105 protected: 106 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 107 static constexpr uint32_t kRegisterNotSaved = -1; 108 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 109 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 110 111 private: 112 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 113}; 114 115class InvokeDexCallingConventionVisitor { 116 public: 117 virtual Location GetNextLocation(Primitive::Type type) = 0; 118 virtual Location GetReturnLocation(Primitive::Type type) const = 0; 119 virtual Location GetMethodLocation() const = 0; 120 121 protected: 122 InvokeDexCallingConventionVisitor() {} 123 virtual ~InvokeDexCallingConventionVisitor() {} 124 125 // The current index for core registers. 126 uint32_t gp_index_ = 0u; 127 // The current index for floating-point registers. 128 uint32_t float_index_ = 0u; 129 // The current stack index. 130 uint32_t stack_index_ = 0u; 131 132 private: 133 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); 134}; 135 136class CodeGenerator { 137 public: 138 // Compiles the graph to executable instructions. Returns whether the compilation 139 // succeeded. 140 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 141 void CompileOptimized(CodeAllocator* allocator); 142 static CodeGenerator* Create(HGraph* graph, 143 InstructionSet instruction_set, 144 const InstructionSetFeatures& isa_features, 145 const CompilerOptions& compiler_options); 146 virtual ~CodeGenerator() {} 147 148 HGraph* GetGraph() const { return graph_; } 149 150 HBasicBlock* GetNextBlockToEmit() const; 151 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 152 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 153 154 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 155 // Note that this follows the current calling convention. 156 return GetFrameSize() 157 + InstructionSetPointerSize(GetInstructionSet()) // Art method 158 + parameter->GetIndex() * kVRegSize; 159 } 160 161 virtual void Initialize() = 0; 162 virtual void Finalize(CodeAllocator* allocator); 163 virtual void GenerateFrameEntry() = 0; 164 virtual void GenerateFrameExit() = 0; 165 virtual void Bind(HBasicBlock* block) = 0; 166 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 167 virtual Assembler* GetAssembler() = 0; 168 virtual const Assembler& GetAssembler() const = 0; 169 virtual size_t GetWordSize() const = 0; 170 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 171 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 172 void InitializeCodeGeneration(size_t number_of_spill_slots, 173 size_t maximum_number_of_live_core_registers, 174 size_t maximum_number_of_live_fp_registers, 175 size_t number_of_out_slots, 176 const GrowableArray<HBasicBlock*>& block_order); 177 int32_t GetStackSlot(HLocal* local) const; 178 Location GetTemporaryLocation(HTemporary* temp) const; 179 180 uint32_t GetFrameSize() const { return frame_size_; } 181 void SetFrameSize(uint32_t size) { frame_size_ = size; } 182 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 183 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 184 185 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 186 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 187 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 188 189 virtual void ComputeSpillMask() { 190 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 191 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 192 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 193 } 194 195 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 196 uint32_t mask = 0; 197 for (size_t i = 0, e = length; i < e; ++i) { 198 mask |= (1 << registers[i]); 199 } 200 return mask; 201 } 202 203 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 204 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 205 virtual InstructionSet GetInstructionSet() const = 0; 206 207 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 208 209 // Saves the register in the stack. Returns the size taken on stack. 210 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 211 // Restores the register from the stack. Returns the size taken on stack. 212 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 213 214 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 215 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 216 217 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 218 // Returns whether we should split long moves in parallel moves. 219 virtual bool ShouldSplitLongMoves() const { return false; } 220 221 bool IsCoreCalleeSaveRegister(int reg) const { 222 return (core_callee_save_mask_ & (1 << reg)) != 0; 223 } 224 225 bool IsFloatingPointCalleeSaveRegister(int reg) const { 226 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 227 } 228 229 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 230 bool CanMoveNullCheckToUser(HNullCheck* null_check); 231 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 232 233 void AddSlowPath(SlowPathCode* slow_path) { 234 slow_paths_.Add(slow_path); 235 } 236 237 void BuildSourceMap(DefaultSrcMap* src_map) const; 238 void BuildMappingTable(std::vector<uint8_t>* vector) const; 239 void BuildVMapTable(std::vector<uint8_t>* vector) const; 240 void BuildNativeGCMap( 241 std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 242 void BuildStackMaps(std::vector<uint8_t>* vector); 243 244 bool IsBaseline() const { 245 return is_baseline_; 246 } 247 248 bool IsLeafMethod() const { 249 return is_leaf_; 250 } 251 252 void MarkNotLeaf() { 253 is_leaf_ = false; 254 requires_current_method_ = true; 255 } 256 257 void SetRequiresCurrentMethod() { 258 requires_current_method_ = true; 259 } 260 261 bool RequiresCurrentMethod() const { 262 return requires_current_method_; 263 } 264 265 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 266 // suspend check. This is called when the code generator generates code 267 // for the suspend check at the back edge (instead of where the suspend check 268 // is, which is the loop entry). At this point, the spill slots for the phis 269 // have not been written to. 270 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 271 272 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 273 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 274 275 // Helper that returns the pointer offset of an index in an object array. 276 // Note: this method assumes we always have the same pointer size, regardless 277 // of the architecture. 278 static size_t GetCacheOffset(uint32_t index); 279 // Pointer variant for ArtMethod and ArtField arrays. 280 size_t GetCachePointerOffset(uint32_t index); 281 282 void EmitParallelMoves(Location from1, 283 Location to1, 284 Primitive::Type type1, 285 Location from2, 286 Location to2, 287 Primitive::Type type2); 288 289 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 290 // Check that null value is not represented as an integer constant. 291 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 292 return type == Primitive::kPrimNot && !value->IsNullConstant(); 293 } 294 295 void AddAllocatedRegister(Location location) { 296 allocated_registers_.Add(location); 297 } 298 299 bool HasAllocatedRegister(bool is_core, int reg) const { 300 return is_core 301 ? allocated_registers_.ContainsCoreRegister(reg) 302 : allocated_registers_.ContainsFloatingPointRegister(reg); 303 } 304 305 void AllocateLocations(HInstruction* instruction); 306 307 // Tells whether the stack frame of the compiled method is 308 // considered "empty", that is either actually having a size of zero, 309 // or just containing the saved return address register. 310 bool HasEmptyFrame() const { 311 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 312 } 313 314 static int32_t GetInt32ValueOf(HConstant* constant) { 315 if (constant->IsIntConstant()) { 316 return constant->AsIntConstant()->GetValue(); 317 } else if (constant->IsNullConstant()) { 318 return 0; 319 } else { 320 DCHECK(constant->IsFloatConstant()); 321 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 322 } 323 } 324 325 static int64_t GetInt64ValueOf(HConstant* constant) { 326 if (constant->IsIntConstant()) { 327 return constant->AsIntConstant()->GetValue(); 328 } else if (constant->IsNullConstant()) { 329 return 0; 330 } else if (constant->IsFloatConstant()) { 331 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 332 } else if (constant->IsLongConstant()) { 333 return constant->AsLongConstant()->GetValue(); 334 } else { 335 DCHECK(constant->IsDoubleConstant()); 336 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 337 } 338 } 339 340 size_t GetFirstRegisterSlotInSlowPath() const { 341 return first_register_slot_in_slow_path_; 342 } 343 344 uint32_t FrameEntrySpillSize() const { 345 return GetFpuSpillSize() + GetCoreSpillSize(); 346 } 347 348 virtual ParallelMoveResolver* GetMoveResolver() = 0; 349 350 static void CreateCommonInvokeLocationSummary( 351 HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); 352 353 void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } 354 DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } 355 356 protected: 357 CodeGenerator(HGraph* graph, 358 size_t number_of_core_registers, 359 size_t number_of_fpu_registers, 360 size_t number_of_register_pairs, 361 uint32_t core_callee_save_mask, 362 uint32_t fpu_callee_save_mask, 363 const CompilerOptions& compiler_options) 364 : frame_size_(0), 365 core_spill_mask_(0), 366 fpu_spill_mask_(0), 367 first_register_slot_in_slow_path_(0), 368 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 369 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 370 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 371 number_of_core_registers_(number_of_core_registers), 372 number_of_fpu_registers_(number_of_fpu_registers), 373 number_of_register_pairs_(number_of_register_pairs), 374 core_callee_save_mask_(core_callee_save_mask), 375 fpu_callee_save_mask_(fpu_callee_save_mask), 376 stack_map_stream_(graph->GetArena()), 377 block_order_(nullptr), 378 is_baseline_(false), 379 disasm_info_(nullptr), 380 graph_(graph), 381 compiler_options_(compiler_options), 382 slow_paths_(graph->GetArena(), 8), 383 current_block_index_(0), 384 is_leaf_(true), 385 requires_current_method_(false) {} 386 387 // Register allocation logic. 388 void AllocateRegistersLocally(HInstruction* instruction) const; 389 390 // Backend specific implementation for allocating a register. 391 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 392 393 static size_t FindFreeEntry(bool* array, size_t length); 394 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 395 396 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 397 398 virtual HGraphVisitor* GetLocationBuilder() = 0; 399 virtual HGraphVisitor* GetInstructionVisitor() = 0; 400 401 // Returns the location of the first spilled entry for floating point registers, 402 // relative to the stack pointer. 403 uint32_t GetFpuSpillStart() const { 404 return GetFrameSize() - FrameEntrySpillSize(); 405 } 406 407 uint32_t GetFpuSpillSize() const { 408 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 409 } 410 411 uint32_t GetCoreSpillSize() const { 412 return POPCOUNT(core_spill_mask_) * GetWordSize(); 413 } 414 415 bool HasAllocatedCalleeSaveRegisters() const { 416 // We check the core registers against 1 because it always comprises the return PC. 417 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 418 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 419 } 420 421 bool CallPushesPC() const { 422 InstructionSet instruction_set = GetInstructionSet(); 423 return instruction_set == kX86 || instruction_set == kX86_64; 424 } 425 426 // Arm64 has its own type for a label, so we need to templatize this method 427 // to share the logic. 428 template <typename T> 429 T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { 430 block = FirstNonEmptyBlock(block); 431 return raw_pointer_to_labels_array + block->GetBlockId(); 432 } 433 434 // Frame size required for this method. 435 uint32_t frame_size_; 436 uint32_t core_spill_mask_; 437 uint32_t fpu_spill_mask_; 438 uint32_t first_register_slot_in_slow_path_; 439 440 // Registers that were allocated during linear scan. 441 RegisterSet allocated_registers_; 442 443 // Arrays used when doing register allocation to know which 444 // registers we can allocate. `SetupBlockedRegisters` updates the 445 // arrays. 446 bool* const blocked_core_registers_; 447 bool* const blocked_fpu_registers_; 448 bool* const blocked_register_pairs_; 449 size_t number_of_core_registers_; 450 size_t number_of_fpu_registers_; 451 size_t number_of_register_pairs_; 452 const uint32_t core_callee_save_mask_; 453 const uint32_t fpu_callee_save_mask_; 454 455 StackMapStream stack_map_stream_; 456 457 // The order to use for code generation. 458 const GrowableArray<HBasicBlock*>* block_order_; 459 460 // Whether we are using baseline. 461 bool is_baseline_; 462 463 DisassemblyInformation* disasm_info_; 464 465 private: 466 void InitLocationsBaseline(HInstruction* instruction); 467 size_t GetStackOffsetOfSavedRegister(size_t index); 468 void GenerateSlowPaths(); 469 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 470 void BlockIfInRegister(Location location, bool is_out = false) const; 471 void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); 472 473 HGraph* const graph_; 474 const CompilerOptions& compiler_options_; 475 476 GrowableArray<SlowPathCode*> slow_paths_; 477 478 // The current block index in `block_order_` of the block 479 // we are generating code for. 480 size_t current_block_index_; 481 482 // Whether the method is a leaf method. 483 bool is_leaf_; 484 485 // Whether an instruction in the graph accesses the current method. 486 bool requires_current_method_; 487 488 friend class OptimizingCFITest; 489 490 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 491}; 492 493template <typename C, typename F> 494class CallingConvention { 495 public: 496 CallingConvention(const C* registers, 497 size_t number_of_registers, 498 const F* fpu_registers, 499 size_t number_of_fpu_registers, 500 size_t pointer_size) 501 : registers_(registers), 502 number_of_registers_(number_of_registers), 503 fpu_registers_(fpu_registers), 504 number_of_fpu_registers_(number_of_fpu_registers), 505 pointer_size_(pointer_size) {} 506 507 size_t GetNumberOfRegisters() const { return number_of_registers_; } 508 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 509 510 C GetRegisterAt(size_t index) const { 511 DCHECK_LT(index, number_of_registers_); 512 return registers_[index]; 513 } 514 515 F GetFpuRegisterAt(size_t index) const { 516 DCHECK_LT(index, number_of_fpu_registers_); 517 return fpu_registers_[index]; 518 } 519 520 size_t GetStackOffsetOf(size_t index) const { 521 // We still reserve the space for parameters passed by registers. 522 // Add space for the method pointer. 523 return pointer_size_ + index * kVRegSize; 524 } 525 526 private: 527 const C* registers_; 528 const size_t number_of_registers_; 529 const F* fpu_registers_; 530 const size_t number_of_fpu_registers_; 531 const size_t pointer_size_; 532 533 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 534}; 535 536} // namespace art 537 538#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 539