code_generator.h revision 5f8741860d465410bfed495dbb5f794590d338da
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 19 20#include "arch/instruction_set.h" 21#include "arch/instruction_set_features.h" 22#include "base/bit_field.h" 23#include "driver/compiler_options.h" 24#include "globals.h" 25#include "locations.h" 26#include "memory_region.h" 27#include "nodes.h" 28#include "stack_map_stream.h" 29 30namespace art { 31 32static size_t constexpr kVRegSize = 4; 33 34// Binary encoding of 2^32 for type double. 35static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); 36// Binary encoding of 2^31 for type double. 37static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); 38 39// Maximum value for a primitive integer. 40static int32_t constexpr kPrimIntMax = 0x7fffffff; 41// Maximum value for a primitive long. 42static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; 43 44class Assembler; 45class CodeGenerator; 46class DexCompilationUnit; 47class ParallelMoveResolver; 48class SrcMapElem; 49template <class Alloc> 50class SrcMap; 51using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; 52 53class CodeAllocator { 54 public: 55 CodeAllocator() {} 56 virtual ~CodeAllocator() {} 57 58 virtual uint8_t* Allocate(size_t size) = 0; 59 60 private: 61 DISALLOW_COPY_AND_ASSIGN(CodeAllocator); 62}; 63 64struct PcInfo { 65 uint32_t dex_pc; 66 uintptr_t native_pc; 67}; 68 69class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { 70 public: 71 SlowPathCode() {} 72 virtual ~SlowPathCode() {} 73 74 virtual void EmitNativeCode(CodeGenerator* codegen) = 0; 75 76 private: 77 DISALLOW_COPY_AND_ASSIGN(SlowPathCode); 78}; 79 80class CodeGenerator { 81 public: 82 // Compiles the graph to executable instructions. Returns whether the compilation 83 // succeeded. 84 void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); 85 void CompileOptimized(CodeAllocator* allocator); 86 static CodeGenerator* Create(HGraph* graph, 87 InstructionSet instruction_set, 88 const InstructionSetFeatures& isa_features, 89 const CompilerOptions& compiler_options); 90 virtual ~CodeGenerator() {} 91 92 HGraph* GetGraph() const { return graph_; } 93 94 HBasicBlock* GetNextBlockToEmit() const; 95 HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; 96 bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; 97 98 size_t GetStackSlotOfParameter(HParameterValue* parameter) const { 99 // Note that this follows the current calling convention. 100 return GetFrameSize() 101 + kVRegSize // Art method 102 + parameter->GetIndex() * kVRegSize; 103 } 104 105 virtual void Initialize() = 0; 106 virtual void Finalize(CodeAllocator* allocator); 107 virtual void GenerateFrameEntry() = 0; 108 virtual void GenerateFrameExit() = 0; 109 virtual void Bind(HBasicBlock* block) = 0; 110 virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; 111 virtual Assembler* GetAssembler() = 0; 112 virtual size_t GetWordSize() const = 0; 113 virtual size_t GetFloatingPointSpillSlotSize() const = 0; 114 virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; 115 void InitializeCodeGeneration(size_t number_of_spill_slots, 116 size_t maximum_number_of_live_core_registers, 117 size_t maximum_number_of_live_fp_registers, 118 size_t number_of_out_slots, 119 const GrowableArray<HBasicBlock*>& block_order); 120 int32_t GetStackSlot(HLocal* local) const; 121 Location GetTemporaryLocation(HTemporary* temp) const; 122 123 uint32_t GetFrameSize() const { return frame_size_; } 124 void SetFrameSize(uint32_t size) { frame_size_ = size; } 125 uint32_t GetCoreSpillMask() const { return core_spill_mask_; } 126 uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } 127 128 size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } 129 size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } 130 virtual void SetupBlockedRegisters(bool is_baseline) const = 0; 131 132 virtual void ComputeSpillMask() { 133 core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; 134 DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; 135 fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; 136 } 137 138 static uint32_t ComputeRegisterMask(const int* registers, size_t length) { 139 uint32_t mask = 0; 140 for (size_t i = 0, e = length; i < e; ++i) { 141 mask |= (1 << registers[i]); 142 } 143 return mask; 144 } 145 146 virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; 147 virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; 148 virtual InstructionSet GetInstructionSet() const = 0; 149 150 const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } 151 152 // Saves the register in the stack. Returns the size taken on stack. 153 virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 154 // Restores the register from the stack. Returns the size taken on stack. 155 virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; 156 virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 157 UNUSED(stack_index, reg_id); 158 UNIMPLEMENTED(FATAL); 159 UNREACHABLE(); 160 } 161 virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 162 UNUSED(stack_index, reg_id); 163 UNIMPLEMENTED(FATAL); 164 UNREACHABLE(); 165 } 166 virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; 167 168 bool IsCoreCalleeSaveRegister(int reg) const { 169 return (core_callee_save_mask_ & (1 << reg)) != 0; 170 } 171 172 bool IsFloatingPointCalleeSaveRegister(int reg) const { 173 return (fpu_callee_save_mask_ & (1 << reg)) != 0; 174 } 175 176 void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); 177 bool CanMoveNullCheckToUser(HNullCheck* null_check); 178 void MaybeRecordImplicitNullCheck(HInstruction* instruction); 179 180 void AddSlowPath(SlowPathCode* slow_path) { 181 slow_paths_.Add(slow_path); 182 } 183 184 void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; 185 void BuildVMapTable(std::vector<uint8_t>* vector) const; 186 void BuildNativeGCMap( 187 std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; 188 void BuildStackMaps(std::vector<uint8_t>* vector); 189 void SaveLiveRegisters(LocationSummary* locations); 190 void RestoreLiveRegisters(LocationSummary* locations); 191 192 bool IsLeafMethod() const { 193 return is_leaf_; 194 } 195 196 void MarkNotLeaf() { 197 is_leaf_ = false; 198 requires_current_method_ = true; 199 } 200 201 void SetRequiresCurrentMethod() { 202 requires_current_method_ = true; 203 } 204 205 bool RequiresCurrentMethod() const { 206 return requires_current_method_; 207 } 208 209 // Clears the spill slots taken by loop phis in the `LocationSummary` of the 210 // suspend check. This is called when the code generator generates code 211 // for the suspend check at the back edge (instead of where the suspend check 212 // is, which is the loop entry). At this point, the spill slots for the phis 213 // have not been written to. 214 void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; 215 216 bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } 217 bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } 218 219 // Helper that returns the pointer offset of an index in an object array. 220 // Note: this method assumes we always have the same pointer size, regardless 221 // of the architecture. 222 static size_t GetCacheOffset(uint32_t index); 223 224 void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); 225 226 static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { 227 if (kIsDebugBuild) { 228 if (type == Primitive::kPrimNot && value->IsIntConstant()) { 229 CHECK_EQ(value->AsIntConstant()->GetValue(), 0); 230 } 231 } 232 return type == Primitive::kPrimNot && !value->IsIntConstant(); 233 } 234 235 void AddAllocatedRegister(Location location) { 236 allocated_registers_.Add(location); 237 } 238 239 void AllocateLocations(HInstruction* instruction); 240 241 // Tells whether the stack frame of the compiled method is 242 // considered "empty", that is either actually having a size of zero, 243 // or just containing the saved return address register. 244 bool HasEmptyFrame() const { 245 return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); 246 } 247 248 static int32_t GetInt32ValueOf(HConstant* constant) { 249 if (constant->IsIntConstant()) { 250 return constant->AsIntConstant()->GetValue(); 251 } else if (constant->IsNullConstant()) { 252 return 0; 253 } else { 254 DCHECK(constant->IsFloatConstant()); 255 return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); 256 } 257 } 258 259 static int64_t GetInt64ValueOf(HConstant* constant) { 260 if (constant->IsIntConstant()) { 261 return constant->AsIntConstant()->GetValue(); 262 } else if (constant->IsNullConstant()) { 263 return 0; 264 } else if (constant->IsFloatConstant()) { 265 return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); 266 } else if (constant->IsLongConstant()) { 267 return constant->AsLongConstant()->GetValue(); 268 } else { 269 DCHECK(constant->IsDoubleConstant()); 270 return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); 271 } 272 } 273 274 protected: 275 CodeGenerator(HGraph* graph, 276 size_t number_of_core_registers, 277 size_t number_of_fpu_registers, 278 size_t number_of_register_pairs, 279 uint32_t core_callee_save_mask, 280 uint32_t fpu_callee_save_mask, 281 const CompilerOptions& compiler_options) 282 : frame_size_(0), 283 core_spill_mask_(0), 284 fpu_spill_mask_(0), 285 first_register_slot_in_slow_path_(0), 286 blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), 287 blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), 288 blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), 289 number_of_core_registers_(number_of_core_registers), 290 number_of_fpu_registers_(number_of_fpu_registers), 291 number_of_register_pairs_(number_of_register_pairs), 292 core_callee_save_mask_(core_callee_save_mask), 293 fpu_callee_save_mask_(fpu_callee_save_mask), 294 graph_(graph), 295 compiler_options_(compiler_options), 296 pc_infos_(graph->GetArena(), 32), 297 slow_paths_(graph->GetArena(), 8), 298 block_order_(nullptr), 299 current_block_index_(0), 300 is_leaf_(true), 301 requires_current_method_(false), 302 stack_map_stream_(graph->GetArena()) {} 303 304 // Register allocation logic. 305 void AllocateRegistersLocally(HInstruction* instruction) const; 306 307 // Backend specific implementation for allocating a register. 308 virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; 309 310 static size_t FindFreeEntry(bool* array, size_t length); 311 static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); 312 313 virtual Location GetStackLocation(HLoadLocal* load) const = 0; 314 315 virtual ParallelMoveResolver* GetMoveResolver() = 0; 316 virtual HGraphVisitor* GetLocationBuilder() = 0; 317 virtual HGraphVisitor* GetInstructionVisitor() = 0; 318 319 // Returns the location of the first spilled entry for floating point registers, 320 // relative to the stack pointer. 321 uint32_t GetFpuSpillStart() const { 322 return GetFrameSize() - FrameEntrySpillSize(); 323 } 324 325 uint32_t GetFpuSpillSize() const { 326 return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); 327 } 328 329 uint32_t GetCoreSpillSize() const { 330 return POPCOUNT(core_spill_mask_) * GetWordSize(); 331 } 332 333 uint32_t FrameEntrySpillSize() const { 334 return GetFpuSpillSize() + GetCoreSpillSize(); 335 } 336 337 bool HasAllocatedCalleeSaveRegisters() const { 338 // We check the core registers against 1 because it always comprises the return PC. 339 return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) 340 || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); 341 } 342 343 bool CallPushesPC() const { 344 InstructionSet instruction_set = GetInstructionSet(); 345 return instruction_set == kX86 || instruction_set == kX86_64; 346 } 347 348 // Arm64 has its own type for a label, so we need to templatize this method 349 // to share the logic. 350 template <typename T> 351 T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { 352 block = FirstNonEmptyBlock(block); 353 return raw_pointer_to_labels_array + block->GetBlockId(); 354 } 355 356 // Frame size required for this method. 357 uint32_t frame_size_; 358 uint32_t core_spill_mask_; 359 uint32_t fpu_spill_mask_; 360 uint32_t first_register_slot_in_slow_path_; 361 362 // Registers that were allocated during linear scan. 363 RegisterSet allocated_registers_; 364 365 // Arrays used when doing register allocation to know which 366 // registers we can allocate. `SetupBlockedRegisters` updates the 367 // arrays. 368 bool* const blocked_core_registers_; 369 bool* const blocked_fpu_registers_; 370 bool* const blocked_register_pairs_; 371 size_t number_of_core_registers_; 372 size_t number_of_fpu_registers_; 373 size_t number_of_register_pairs_; 374 const uint32_t core_callee_save_mask_; 375 const uint32_t fpu_callee_save_mask_; 376 377 private: 378 void InitLocationsBaseline(HInstruction* instruction); 379 size_t GetStackOffsetOfSavedRegister(size_t index); 380 void CompileInternal(CodeAllocator* allocator, bool is_baseline); 381 void BlockIfInRegister(Location location, bool is_out = false) const; 382 383 HGraph* const graph_; 384 const CompilerOptions& compiler_options_; 385 386 GrowableArray<PcInfo> pc_infos_; 387 GrowableArray<SlowPathCode*> slow_paths_; 388 389 // The order to use for code generation. 390 const GrowableArray<HBasicBlock*>* block_order_; 391 392 // The current block index in `block_order_` of the block 393 // we are generating code for. 394 size_t current_block_index_; 395 396 // Whether the method is a leaf method. 397 bool is_leaf_; 398 399 // Whether an instruction in the graph accesses the current method. 400 bool requires_current_method_; 401 402 StackMapStream stack_map_stream_; 403 404 DISALLOW_COPY_AND_ASSIGN(CodeGenerator); 405}; 406 407template <typename C, typename F> 408class CallingConvention { 409 public: 410 CallingConvention(const C* registers, 411 size_t number_of_registers, 412 const F* fpu_registers, 413 size_t number_of_fpu_registers) 414 : registers_(registers), 415 number_of_registers_(number_of_registers), 416 fpu_registers_(fpu_registers), 417 number_of_fpu_registers_(number_of_fpu_registers) {} 418 419 size_t GetNumberOfRegisters() const { return number_of_registers_; } 420 size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } 421 422 C GetRegisterAt(size_t index) const { 423 DCHECK_LT(index, number_of_registers_); 424 return registers_[index]; 425 } 426 427 F GetFpuRegisterAt(size_t index) const { 428 DCHECK_LT(index, number_of_fpu_registers_); 429 return fpu_registers_[index]; 430 } 431 432 size_t GetStackOffsetOf(size_t index) const { 433 // We still reserve the space for parameters passed by registers. 434 // Add one for the method pointer. 435 return (index + 1) * kVRegSize; 436 } 437 438 private: 439 const C* registers_; 440 const size_t number_of_registers_; 441 const F* fpu_registers_; 442 const size_t number_of_fpu_registers_; 443 444 DISALLOW_COPY_AND_ASSIGN(CallingConvention); 445}; 446 447} // namespace art 448 449#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ 450