code_generator.h revision fead4e4f397455aa31905b2982d4d861126ab89d
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/bit_field.h" 23#include "driver/compiler_options.h" 24#include "globals.h" 25#include "locations.h" 26#include "memory_region.h" 27#include "nodes.h" 28#include "stack_map_stream.h" 29 30namespace art { 31 32// Binary encoding of 2^32 for type double. 33static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 34// Binary encoding of 2^31 for type double. 35static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 36 37// Maximum value for a primitive integer. 38static int32_t constexpr kPrimIntMax = 0x7fffffff; 39// Maximum value for a primitive long. 40static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; 41 42class Assembler; 43class CodeGenerator; 44class DexCompilationUnit; 45class ParallelMoveResolver; 46class SrcMapElem; 47template <class Alloc> 48class SrcMap; 49using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 50 51class CodeAllocator { 52 public: 53 CodeAllocator() {} 54 virtual ~CodeAllocator() {} 55 56 virtual uint8_t* Allocate(size_t size) = 0; 57 58 private: 59 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 60}; 61 62struct PcInfo { 63 uint32_t dex_pc; 64 uintptr_t native_pc; 65}; 66 67class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 68 public: 69 SlowPathCode() {} 70 virtual ~SlowPathCode() {} 71 72 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 73 74 private: 75 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 76}; 77 78class CodeGenerator { 79 public: 80 // Compiles the graph to executable instructions. Returns whether the compilation 81 // succeeded. 82 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 83 void CompileOptimized(CodeAllocator* allocator); 84 static CodeGenerator* Create(HGraph* graph, 85 InstructionSet instruction_set, 86 const InstructionSetFeatures& isa_features, 87 const CompilerOptions& compiler_options); 88 virtual ~CodeGenerator() {} 89 90 HGraph* GetGraph() const { return graph_; } 91 92 HBasicBlock* GetNextBlockToEmit() const; 93 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 94 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 95 96 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 97 // Note that this follows the current calling convention. 98 return GetFrameSize() 99 + kVRegSize // Art method 100 + parameter->GetIndex() * kVRegSize; 101 } 102 103 virtual void Initialize() = 0; 104 virtual void Finalize(CodeAllocator* allocator); 105 virtual void GenerateFrameEntry() = 0; 106 virtual void GenerateFrameExit() = 0; 107 virtual void Bind(HBasicBlock* block) = 0; 108 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 109 virtual Assembler* GetAssembler() = 0; 110 virtual size_t GetWordSize() const = 0; 111 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 112 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 113 void InitializeCodeGeneration(size_t number_of_spill_slots, 114 size_t maximum_number_of_live_core_registers, 115 size_t maximum_number_of_live_fp_registers, 116 size_t number_of_out_slots, 117 const GrowableArray<HBasicBlock*>& block_order); 118 int32_t GetStackSlot(HLocal* local) const; 119 Location GetTemporaryLocation(HTemporary* temp) const; 120 121 uint32_t GetFrameSize() const { return frame_size_; } 122 void SetFrameSize(uint32_t size) { frame_size_ = size; } 123 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 124 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 125 126 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 127 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 128 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 129 130 virtual void ComputeSpillMask() { 131 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 132 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 133 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 134 } 135 136 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 137 uint32_t mask = 0; 138 for (size_t i = 0, e = length; i < e; ++i) { 139 mask |= (1 << registers[i]); 140 } 141 return mask; 142 } 143 144 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 145 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 146 virtual InstructionSet GetInstructionSet() const = 0; 147 148 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 149 150 // Saves the register in the stack. Returns the size taken on stack. 151 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 152 // Restores the register from the stack. Returns the size taken on stack. 153 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 154 155 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 156 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; 157 158 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 159 // Returns whether we should split long moves in parallel moves. 160 virtual bool ShouldSplitLongMoves() const { return false; } 161 162 bool IsCoreCalleeSaveRegister(int reg) const { 163 return (core_callee_save_mask_ & (1 << reg)) != 0; 164 } 165 166 bool IsFloatingPointCalleeSaveRegister(int reg) const { 167 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 168 } 169 170 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); 171 bool CanMoveNullCheckToUser(HNullCheck* null_check); 172 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 173 174 void AddSlowPath(SlowPathCode* slow_path) { 175 slow_paths_.Add(slow_path); 176 } 177 178 void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; 179 void BuildVMapTable(std::vector<uint8_t>* vector) const; 180 void BuildNativeGCMap( 181 std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 182 void BuildStackMaps(std::vector<uint8_t>* vector); 183 void SaveLiveRegisters(LocationSummary* locations); 184 void RestoreLiveRegisters(LocationSummary* locations); 185 186 bool IsLeafMethod() const { 187 return is_leaf_; 188 } 189 190 void MarkNotLeaf() { 191 is_leaf_ = false; 192 requires_current_method_ = true; 193 } 194 195 void SetRequiresCurrentMethod() { 196 requires_current_method_ = true; 197 } 198 199 bool RequiresCurrentMethod() const { 200 return requires_current_method_; 201 } 202 203 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 204 // suspend check. This is called when the code generator generates code 205 // for the suspend check at the back edge (instead of where the suspend check 206 // is, which is the loop entry). At this point, the spill slots for the phis 207 // have not been written to. 208 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 209 210 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 211 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 212 213 // Helper that returns the pointer offset of an index in an object array. 214 // Note: this method assumes we always have the same pointer size, regardless 215 // of the architecture. 216 static size_t GetCacheOffset(uint32_t index); 217 218 void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); 219 220 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 221 if (kIsDebugBuild) { 222 if (type == Primitive::kPrimNot && value->IsIntConstant()) { 223 CHECK_EQ(value->AsIntConstant()->GetValue(), 0); 224 } 225 } 226 return type == Primitive::kPrimNot && !value->IsIntConstant(); 227 } 228 229 void AddAllocatedRegister(Location location) { 230 allocated_registers_.Add(location); 231 } 232 233 void AllocateLocations(HInstruction* instruction); 234 235 // Tells whether the stack frame of the compiled method is 236 // considered "empty", that is either actually having a size of zero, 237 // or just containing the saved return address register. 238 bool HasEmptyFrame() const { 239 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 240 } 241 242 static int32_t GetInt32ValueOf(HConstant* constant) { 243 if (constant->IsIntConstant()) { 244 return constant->AsIntConstant()->GetValue(); 245 } else if (constant->IsNullConstant()) { 246 return 0; 247 } else { 248 DCHECK(constant->IsFloatConstant()); 249 return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); 250 } 251 } 252 253 static int64_t GetInt64ValueOf(HConstant* constant) { 254 if (constant->IsIntConstant()) { 255 return constant->AsIntConstant()->GetValue(); 256 } else if (constant->IsNullConstant()) { 257 return 0; 258 } else if (constant->IsFloatConstant()) { 259 return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); 260 } else if (constant->IsLongConstant()) { 261 return constant->AsLongConstant()->GetValue(); 262 } else { 263 DCHECK(constant->IsDoubleConstant()); 264 return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); 265 } 266 } 267 268 protected: 269 CodeGenerator(HGraph* graph, 270 size_t number_of_core_registers, 271 size_t number_of_fpu_registers, 272 size_t number_of_register_pairs, 273 uint32_t core_callee_save_mask, 274 uint32_t fpu_callee_save_mask, 275 const CompilerOptions& compiler_options) 276 : frame_size_(0), 277 core_spill_mask_(0), 278 fpu_spill_mask_(0), 279 first_register_slot_in_slow_path_(0), 280 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 281 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 282 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 283 number_of_core_registers_(number_of_core_registers), 284 number_of_fpu_registers_(number_of_fpu_registers), 285 number_of_register_pairs_(number_of_register_pairs), 286 core_callee_save_mask_(core_callee_save_mask), 287 fpu_callee_save_mask_(fpu_callee_save_mask), 288 graph_(graph), 289 compiler_options_(compiler_options), 290 pc_infos_(graph->GetArena(), 32), 291 slow_paths_(graph->GetArena(), 8), 292 block_order_(nullptr), 293 current_block_index_(0), 294 is_leaf_(true), 295 requires_current_method_(false), 296 stack_map_stream_(graph->GetArena()) {} 297 298 // Register allocation logic. 299 void AllocateRegistersLocally(HInstruction* instruction) const; 300 301 // Backend specific implementation for allocating a register. 302 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 303 304 static size_t FindFreeEntry(bool* array, size_t length); 305 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 306 307 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 308 309 virtual ParallelMoveResolver* GetMoveResolver() = 0; 310 virtual HGraphVisitor* GetLocationBuilder() = 0; 311 virtual HGraphVisitor* GetInstructionVisitor() = 0; 312 313 // Returns the location of the first spilled entry for floating point registers, 314 // relative to the stack pointer. 315 uint32_t GetFpuSpillStart() const { 316 return GetFrameSize() - FrameEntrySpillSize(); 317 } 318 319 uint32_t GetFpuSpillSize() const { 320 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 321 } 322 323 uint32_t GetCoreSpillSize() const { 324 return POPCOUNT(core_spill_mask_) * GetWordSize(); 325 } 326 327 uint32_t FrameEntrySpillSize() const { 328 return GetFpuSpillSize() + GetCoreSpillSize(); 329 } 330 331 bool HasAllocatedCalleeSaveRegisters() const { 332 // We check the core registers against 1 because it always comprises the return PC. 333 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 334 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 335 } 336 337 bool CallPushesPC() const { 338 InstructionSet instruction_set = GetInstructionSet(); 339 return instruction_set == kX86 || instruction_set == kX86_64; 340 } 341 342 // Arm64 has its own type for a label, so we need to templatize this method 343 // to share the logic. 344 template <typename T> 345 T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { 346 block = FirstNonEmptyBlock(block); 347 return raw_pointer_to_labels_array + block->GetBlockId(); 348 } 349 350 // Frame size required for this method. 351 uint32_t frame_size_; 352 uint32_t core_spill_mask_; 353 uint32_t fpu_spill_mask_; 354 uint32_t first_register_slot_in_slow_path_; 355 356 // Registers that were allocated during linear scan. 357 RegisterSet allocated_registers_; 358 359 // Arrays used when doing register allocation to know which 360 // registers we can allocate. `SetupBlockedRegisters` updates the 361 // arrays. 362 bool* const blocked_core_registers_; 363 bool* const blocked_fpu_registers_; 364 bool* const blocked_register_pairs_; 365 size_t number_of_core_registers_; 366 size_t number_of_fpu_registers_; 367 size_t number_of_register_pairs_; 368 const uint32_t core_callee_save_mask_; 369 const uint32_t fpu_callee_save_mask_; 370 371 private: 372 void InitLocationsBaseline(HInstruction* instruction); 373 size_t GetStackOffsetOfSavedRegister(size_t index); 374 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 375 void BlockIfInRegister(Location location, bool is_out = false) const; 376 377 HGraph* const graph_; 378 const CompilerOptions& compiler_options_; 379 380 GrowableArray<PcInfo> pc_infos_; 381 GrowableArray<SlowPathCode*> slow_paths_; 382 383 // The order to use for code generation. 384 const GrowableArray<HBasicBlock*>* block_order_; 385 386 // The current block index in `block_order_` of the block 387 // we are generating code for. 388 size_t current_block_index_; 389 390 // Whether the method is a leaf method. 391 bool is_leaf_; 392 393 // Whether an instruction in the graph accesses the current method. 394 bool requires_current_method_; 395 396 StackMapStream stack_map_stream_; 397 398 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 399}; 400 401template <typename C, typename F> 402class CallingConvention { 403 public: 404 CallingConvention(const C* registers, 405 size_t number_of_registers, 406 const F* fpu_registers, 407 size_t number_of_fpu_registers) 408 : registers_(registers), 409 number_of_registers_(number_of_registers), 410 fpu_registers_(fpu_registers), 411 number_of_fpu_registers_(number_of_fpu_registers) {} 412 413 size_t GetNumberOfRegisters() const { return number_of_registers_; } 414 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 415 416 C GetRegisterAt(size_t index) const { 417 DCHECK_LT(index, number_of_registers_); 418 return registers_[index]; 419 } 420 421 F GetFpuRegisterAt(size_t index) const { 422 DCHECK_LT(index, number_of_fpu_registers_); 423 return fpu_registers_[index]; 424 } 425 426 size_t GetStackOffsetOf(size_t index) const { 427 // We still reserve the space for parameters passed by registers. 428 // Add one for the method pointer. 429 return (index + 1) * kVRegSize; 430 } 431 432 private: 433 const C* registers_; 434 const size_t number_of_registers_; 435 const F* fpu_registers_; 436 const size_t number_of_fpu_registers_; 437 438 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 439}; 440 441} // namespace art 442 443#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 444