code_generator.h revision c6b4dd8980350aaf250f0185f73e9c42ec17cd57
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/bit_field.h" 23#include "driver/compiler_options.h" 24#include "globals.h" 25#include "locations.h" 26#include "memory_region.h" 27#include "nodes.h" 28#include "stack_map_stream.h" 29 30namespace art { 31 32// Binary encoding of 2^32 for type double. 33static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 34// Binary encoding of 2^31 for type double. 35static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 36 37// Maximum value for a primitive integer. 38static int32_t constexpr kPrimIntMax = 0x7fffffff; 39// Maximum value for a primitive long. 40static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; 41 42class Assembler; 43class CodeGenerator; 44class DexCompilationUnit; 45class ParallelMoveResolver; 46class SrcMapElem; 47template <class Alloc> 48class SrcMap; 49using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 50 51class CodeAllocator { 52 public: 53 CodeAllocator() {} 54 virtual ~CodeAllocator() {} 55 56 virtual uint8_t* Allocate(size_t size) = 0; 57 58 private: 59 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 60}; 61 62struct PcInfo { 63 uint32_t dex_pc; 64 uintptr_t native_pc; 65}; 66 67class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 68 public: 69 SlowPathCode() { 70 for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { 71 saved_core_stack_offsets_[i] = kRegisterNotSaved; 72 saved_fpu_stack_offsets_[i] = kRegisterNotSaved; 73 } 74 } 75 76 virtual ~SlowPathCode() {} 77 78 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 79 80 void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 81 void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); 82 void RecordPcInfo(CodeGenerator* codegen, HInstruction* instruction, uint32_t dex_pc); 83 84 bool IsCoreRegisterSaved(int reg) const { 85 return saved_core_stack_offsets_[reg] != kRegisterNotSaved; 86 } 87 88 bool IsFpuRegisterSaved(int reg) const { 89 return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; 90 } 91 92 uint32_t GetStackOffsetOfCoreRegister(int reg) const { 93 return saved_core_stack_offsets_[reg]; 94 } 95 96 uint32_t GetStackOffsetOfFpuRegister(int reg) const { 97 return saved_fpu_stack_offsets_[reg]; 98 } 99 100 private: 101 static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; 102 static constexpr uint32_t kRegisterNotSaved = -1; 103 uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 104 uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; 105 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 106}; 107 108class CodeGenerator { 109 public: 110 // Compiles the graph to executable instructions. Returns whether the compilation 111 // succeeded. 112 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 113 void CompileOptimized(CodeAllocator* allocator); 114 static CodeGenerator* Create(HGraph* graph, 115 InstructionSet instruction_set, 116 const InstructionSetFeatures& isa_features, 117 const CompilerOptions& compiler_options); 118 virtual ~CodeGenerator() {} 119 120 HGraph* GetGraph() const { return graph_; } 121 122 HBasicBlock* GetNextBlockToEmit() const; 123 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 124 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 125 126 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 127 // Note that this follows the current calling convention. 128 return GetFrameSize() 129 + kVRegSize // Art method 130 + parameter->GetIndex() * kVRegSize; 131 } 132 133 virtual void Initialize() = 0; 134 virtual void Finalize(CodeAllocator* allocator); 135 virtual void GenerateFrameEntry() = 0; 136 virtual void GenerateFrameExit() = 0; 137 virtual void Bind(HBasicBlock* block) = 0; 138 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 139 virtual Assembler* GetAssembler() = 0; 140 virtual size_t GetWordSize() const = 0; 141 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 142 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 143 void InitializeCodeGeneration(size_t number_of_spill_slots, 144 size_t maximum_number_of_live_core_registers, 145 size_t maximum_number_of_live_fp_registers, 146 size_t number_of_out_slots, 147 const GrowableArray<HBasicBlock*>& block_order); 148 int32_t GetStackSlot(HLocal* local) const; 149 Location GetTemporaryLocation(HTemporary* temp) const; 150 151 uint32_t GetFrameSize() const { return frame_size_; } 152 void SetFrameSize(uint32_t size) { frame_size_ = size; } 153 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 154 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 155 156 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 157 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 158 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 159 160 virtual void ComputeSpillMask() { 161 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 162 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 163 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 164 } 165 166 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 167 uint32_t mask = 0; 168 for (size_t i = 0, e = length; i < e; ++i) { 169 mask |= (1 << registers[i]); 170 } 171 return mask; 172 } 173 174 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 175 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 176 virtual InstructionSet GetInstructionSet() const = 0; 177 178 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 179 180 // Saves the register in the stack. Returns the size taken on stack. 181 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 182 // Restores the register from the stack. Returns the size taken on stack. 183 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 184 185 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 186 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 187 188 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 189 // Returns whether we should split long moves in parallel moves. 190 virtual bool ShouldSplitLongMoves() const { return false; } 191 192 bool IsCoreCalleeSaveRegister(int reg) const { 193 return (core_callee_save_mask_ & (1 << reg)) != 0; 194 } 195 196 bool IsFloatingPointCalleeSaveRegister(int reg) const { 197 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 198 } 199 200 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); 201 bool CanMoveNullCheckToUser(HNullCheck* null_check); 202 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 203 204 void AddSlowPath(SlowPathCode* slow_path) { 205 slow_paths_.Add(slow_path); 206 } 207 208 void BuildSourceMap(DefaultSrcMap* src_map) const; 209 void BuildMappingTable(std::vector<uint8_t>* vector) const; 210 void BuildVMapTable(std::vector<uint8_t>* vector) const; 211 void BuildNativeGCMap( 212 std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 213 void BuildStackMaps(std::vector<uint8_t>* vector); 214 215 bool IsLeafMethod() const { 216 return is_leaf_; 217 } 218 219 void MarkNotLeaf() { 220 is_leaf_ = false; 221 requires_current_method_ = true; 222 } 223 224 void SetRequiresCurrentMethod() { 225 requires_current_method_ = true; 226 } 227 228 bool RequiresCurrentMethod() const { 229 return requires_current_method_; 230 } 231 232 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 233 // suspend check. This is called when the code generator generates code 234 // for the suspend check at the back edge (instead of where the suspend check 235 // is, which is the loop entry). At this point, the spill slots for the phis 236 // have not been written to. 237 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 238 239 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 240 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 241 242 // Helper that returns the pointer offset of an index in an object array. 243 // Note: this method assumes we always have the same pointer size, regardless 244 // of the architecture. 245 static size_t GetCacheOffset(uint32_t index); 246 247 void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); 248 249 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 250 // Check that null value is not represented as an integer constant. 251 DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); 252 return type == Primitive::kPrimNot && !value->IsNullConstant(); 253 } 254 255 void AddAllocatedRegister(Location location) { 256 allocated_registers_.Add(location); 257 } 258 259 void AllocateLocations(HInstruction* instruction); 260 261 // Tells whether the stack frame of the compiled method is 262 // considered "empty", that is either actually having a size of zero, 263 // or just containing the saved return address register. 264 bool HasEmptyFrame() const { 265 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 266 } 267 268 static int32_t GetInt32ValueOf(HConstant* constant) { 269 if (constant->IsIntConstant()) { 270 return constant->AsIntConstant()->GetValue(); 271 } else if (constant->IsNullConstant()) { 272 return 0; 273 } else { 274 DCHECK(constant->IsFloatConstant()); 275 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 276 } 277 } 278 279 static int64_t GetInt64ValueOf(HConstant* constant) { 280 if (constant->IsIntConstant()) { 281 return constant->AsIntConstant()->GetValue(); 282 } else if (constant->IsNullConstant()) { 283 return 0; 284 } else if (constant->IsFloatConstant()) { 285 return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); 286 } else if (constant->IsLongConstant()) { 287 return constant->AsLongConstant()->GetValue(); 288 } else { 289 DCHECK(constant->IsDoubleConstant()); 290 return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); 291 } 292 } 293 294 size_t GetFirstRegisterSlotInSlowPath() const { 295 return first_register_slot_in_slow_path_; 296 } 297 298 uint32_t FrameEntrySpillSize() const { 299 return GetFpuSpillSize() + GetCoreSpillSize(); 300 } 301 302 303 protected: 304 CodeGenerator(HGraph* graph, 305 size_t number_of_core_registers, 306 size_t number_of_fpu_registers, 307 size_t number_of_register_pairs, 308 uint32_t core_callee_save_mask, 309 uint32_t fpu_callee_save_mask, 310 const CompilerOptions& compiler_options) 311 : frame_size_(0), 312 core_spill_mask_(0), 313 fpu_spill_mask_(0), 314 first_register_slot_in_slow_path_(0), 315 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 316 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 317 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 318 number_of_core_registers_(number_of_core_registers), 319 number_of_fpu_registers_(number_of_fpu_registers), 320 number_of_register_pairs_(number_of_register_pairs), 321 core_callee_save_mask_(core_callee_save_mask), 322 fpu_callee_save_mask_(fpu_callee_save_mask), 323 graph_(graph), 324 compiler_options_(compiler_options), 325 pc_infos_(graph->GetArena(), 32), 326 slow_paths_(graph->GetArena(), 8), 327 block_order_(nullptr), 328 current_block_index_(0), 329 is_leaf_(true), 330 requires_current_method_(false), 331 stack_map_stream_(graph->GetArena()) {} 332 333 // Register allocation logic. 334 void AllocateRegistersLocally(HInstruction* instruction) const; 335 336 // Backend specific implementation for allocating a register. 337 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 338 339 static size_t FindFreeEntry(bool* array, size_t length); 340 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 341 342 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 343 344 virtual ParallelMoveResolver* GetMoveResolver() = 0; 345 virtual HGraphVisitor* GetLocationBuilder() = 0; 346 virtual HGraphVisitor* GetInstructionVisitor() = 0; 347 348 // Returns the location of the first spilled entry for floating point registers, 349 // relative to the stack pointer. 350 uint32_t GetFpuSpillStart() const { 351 return GetFrameSize() - FrameEntrySpillSize(); 352 } 353 354 uint32_t GetFpuSpillSize() const { 355 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 356 } 357 358 uint32_t GetCoreSpillSize() const { 359 return POPCOUNT(core_spill_mask_) * GetWordSize(); 360 } 361 362 bool HasAllocatedCalleeSaveRegisters() const { 363 // We check the core registers against 1 because it always comprises the return PC. 364 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 365 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 366 } 367 368 bool CallPushesPC() const { 369 InstructionSet instruction_set = GetInstructionSet(); 370 return instruction_set == kX86 || instruction_set == kX86_64; 371 } 372 373 // Arm64 has its own type for a label, so we need to templatize this method 374 // to share the logic. 375 template <typename T> 376 T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { 377 block = FirstNonEmptyBlock(block); 378 return raw_pointer_to_labels_array + block->GetBlockId(); 379 } 380 381 // Frame size required for this method. 382 uint32_t frame_size_; 383 uint32_t core_spill_mask_; 384 uint32_t fpu_spill_mask_; 385 uint32_t first_register_slot_in_slow_path_; 386 387 // Registers that were allocated during linear scan. 388 RegisterSet allocated_registers_; 389 390 // Arrays used when doing register allocation to know which 391 // registers we can allocate. `SetupBlockedRegisters` updates the 392 // arrays. 393 bool* const blocked_core_registers_; 394 bool* const blocked_fpu_registers_; 395 bool* const blocked_register_pairs_; 396 size_t number_of_core_registers_; 397 size_t number_of_fpu_registers_; 398 size_t number_of_register_pairs_; 399 const uint32_t core_callee_save_mask_; 400 const uint32_t fpu_callee_save_mask_; 401 402 private: 403 void InitLocationsBaseline(HInstruction* instruction); 404 size_t GetStackOffsetOfSavedRegister(size_t index); 405 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 406 void BlockIfInRegister(Location location, bool is_out = false) const; 407 408 HGraph* const graph_; 409 const CompilerOptions& compiler_options_; 410 411 GrowableArray<PcInfo> pc_infos_; 412 GrowableArray<SlowPathCode*> slow_paths_; 413 414 // The order to use for code generation. 415 const GrowableArray<HBasicBlock*>* block_order_; 416 417 // The current block index in `block_order_` of the block 418 // we are generating code for. 419 size_t current_block_index_; 420 421 // Whether the method is a leaf method. 422 bool is_leaf_; 423 424 // Whether an instruction in the graph accesses the current method. 425 bool requires_current_method_; 426 427 StackMapStream stack_map_stream_; 428 429 friend class OptimizingCFITest; 430 431 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 432}; 433 434template <typename C, typename F> 435class CallingConvention { 436 public: 437 CallingConvention(const C* registers, 438 size_t number_of_registers, 439 const F* fpu_registers, 440 size_t number_of_fpu_registers) 441 : registers_(registers), 442 number_of_registers_(number_of_registers), 443 fpu_registers_(fpu_registers), 444 number_of_fpu_registers_(number_of_fpu_registers) {} 445 446 size_t GetNumberOfRegisters() const { return number_of_registers_; } 447 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 448 449 C GetRegisterAt(size_t index) const { 450 DCHECK_LT(index, number_of_registers_); 451 return registers_[index]; 452 } 453 454 F GetFpuRegisterAt(size_t index) const { 455 DCHECK_LT(index, number_of_fpu_registers_); 456 return fpu_registers_[index]; 457 } 458 459 size_t GetStackOffsetOf(size_t index) const { 460 // We still reserve the space for parameters passed by registers. 461 // Add one for the method pointer. 462 return (index + 1) * kVRegSize; 463 } 464 465 private: 466 const C* registers_; 467 const size_t number_of_registers_; 468 const F* fpu_registers_; 469 const size_t number_of_fpu_registers_; 470 471 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 472}; 473 474} // namespace art 475 476#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 477