code_generator_x86_64.h revision 0da3b9117706760e8722029f407da6d0297cc943
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20#include "code_generator.h" 21#include "dex/compiler_enums.h" 22#include "driver/compiler_options.h" 23#include "nodes.h" 24#include "parallel_move_resolver.h" 25#include "utils/x86_64/assembler_x86_64.h" 26 27namespace art { 28namespace x86_64 { 29 30// Use a local definition to prevent copying mistakes. 31static constexpr size_t kX86_64WordSize = kX86_64PointerSize; 32 33// Some x86_64 instructions require a register to be available as temp. 34static constexpr Register TMP = R11; 35 36static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 37static constexpr FloatRegister kParameterFloatRegisters[] = 38 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 39 40static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 41static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 42 43static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 44static constexpr size_t kRuntimeParameterCoreRegistersLength = 45 arraysize(kRuntimeParameterCoreRegisters); 46static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 47static constexpr size_t kRuntimeParameterFpuRegistersLength = 48 arraysize(kRuntimeParameterFpuRegisters); 49 50// These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 51// If the ART ABI changes, this list must be updated. It is used to ensure that 52// these are not clobbered by any direct call to native code (such as math intrinsics). 53static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 54 55 56class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 57 public: 58 InvokeRuntimeCallingConvention() 59 : CallingConvention(kRuntimeParameterCoreRegisters, 60 kRuntimeParameterCoreRegistersLength, 61 kRuntimeParameterFpuRegisters, 62 kRuntimeParameterFpuRegistersLength, 63 kX86_64PointerSize) {} 64 65 private: 66 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 67}; 68 69class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 70 public: 71 InvokeDexCallingConvention() : CallingConvention( 72 kParameterCoreRegisters, 73 kParameterCoreRegistersLength, 74 kParameterFloatRegisters, 75 kParameterFloatRegistersLength, 76 kX86_64PointerSize) {} 77 78 private: 79 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 80}; 81 82class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 83 public: 84 FieldAccessCallingConventionX86_64() {} 85 86 Location GetObjectLocation() const OVERRIDE { 87 return Location::RegisterLocation(RSI); 88 } 89 Location GetFieldIndexLocation() const OVERRIDE { 90 return Location::RegisterLocation(RDI); 91 } 92 Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 93 return Location::RegisterLocation(RAX); 94 } 95 Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { 96 return Primitive::Is64BitType(type) 97 ? Location::RegisterLocation(RDX) 98 : (is_instance 99 ? Location::RegisterLocation(RDX) 100 : Location::RegisterLocation(RSI)); 101 } 102 Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 103 return Location::FpuRegisterLocation(XMM0); 104 } 105 106 private: 107 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 108}; 109 110 111class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 112 public: 113 InvokeDexCallingConventionVisitorX86_64() {} 114 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 115 116 Location GetNextLocation(Primitive::Type type) OVERRIDE; 117 Location GetReturnLocation(Primitive::Type type) const OVERRIDE; 118 Location GetMethodLocation() const OVERRIDE; 119 120 private: 121 InvokeDexCallingConvention calling_convention; 122 123 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 124}; 125 126class CodeGeneratorX86_64; 127 128class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 129 public: 130 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 131 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 132 133 void EmitMove(size_t index) OVERRIDE; 134 void EmitSwap(size_t index) OVERRIDE; 135 void SpillScratch(int reg) OVERRIDE; 136 void RestoreScratch(int reg) OVERRIDE; 137 138 X86_64Assembler* GetAssembler() const; 139 140 private: 141 void Exchange32(CpuRegister reg, int mem); 142 void Exchange32(XmmRegister reg, int mem); 143 void Exchange32(int mem1, int mem2); 144 void Exchange64(CpuRegister reg, int mem); 145 void Exchange64(XmmRegister reg, int mem); 146 void Exchange64(int mem1, int mem2); 147 148 CodeGeneratorX86_64* const codegen_; 149 150 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 151}; 152 153class LocationsBuilderX86_64 : public HGraphVisitor { 154 public: 155 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 156 : HGraphVisitor(graph), codegen_(codegen) {} 157 158#define DECLARE_VISIT_INSTRUCTION(name, super) \ 159 void Visit##name(H##name* instr) OVERRIDE; 160 161 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) 162 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 163 164#undef DECLARE_VISIT_INSTRUCTION 165 166 void VisitInstruction(HInstruction* instruction) OVERRIDE { 167 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 168 << " (id " << instruction->GetId() << ")"; 169 } 170 171 private: 172 void HandleInvoke(HInvoke* invoke); 173 void HandleBitwiseOperation(HBinaryOperation* operation); 174 void HandleShift(HBinaryOperation* operation); 175 void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); 176 void HandleFieldGet(HInstruction* instruction); 177 178 CodeGeneratorX86_64* const codegen_; 179 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 180 181 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 182}; 183 184class InstructionCodeGeneratorX86_64 : public HGraphVisitor { 185 public: 186 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 187 188#define DECLARE_VISIT_INSTRUCTION(name, super) \ 189 void Visit##name(H##name* instr) OVERRIDE; 190 191 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) 192 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 193 194#undef DECLARE_VISIT_INSTRUCTION 195 196 void VisitInstruction(HInstruction* instruction) OVERRIDE { 197 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 198 << " (id " << instruction->GetId() << ")"; 199 } 200 201 X86_64Assembler* GetAssembler() const { return assembler_; } 202 203 private: 204 // Generate code for the given suspend check. If not null, `successor` 205 // is the block to branch to if the suspend check is not needed, and after 206 // the suspend call. 207 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 208 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 209 void HandleBitwiseOperation(HBinaryOperation* operation); 210 void GenerateRemFP(HRem* rem); 211 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 212 void DivByPowerOfTwo(HDiv* instruction); 213 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 214 void GenerateDivRemIntegral(HBinaryOperation* instruction); 215 void HandleShift(HBinaryOperation* operation); 216 void GenerateMemoryBarrier(MemBarrierKind kind); 217 void HandleFieldSet(HInstruction* instruction, 218 const FieldInfo& field_info, 219 bool value_can_be_null); 220 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 221 void GenerateImplicitNullCheck(HNullCheck* instruction); 222 void GenerateExplicitNullCheck(HNullCheck* instruction); 223 void PushOntoFPStack(Location source, uint32_t temp_offset, 224 uint32_t stack_adjustment, bool is_float); 225 void GenerateTestAndBranch(HInstruction* instruction, 226 size_t condition_input_index, 227 Label* true_target, 228 Label* false_target); 229 void GenerateCompareTestAndBranch(HCondition* condition, 230 Label* true_target, 231 Label* false_target); 232 void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); 233 void HandleGoto(HInstruction* got, HBasicBlock* successor); 234 235 X86_64Assembler* const assembler_; 236 CodeGeneratorX86_64* const codegen_; 237 238 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 239}; 240 241// Class for fixups to jump tables. 242class JumpTableRIPFixup; 243 244class CodeGeneratorX86_64 : public CodeGenerator { 245 public: 246 CodeGeneratorX86_64(HGraph* graph, 247 const X86_64InstructionSetFeatures& isa_features, 248 const CompilerOptions& compiler_options, 249 OptimizingCompilerStats* stats = nullptr); 250 virtual ~CodeGeneratorX86_64() {} 251 252 void GenerateFrameEntry() OVERRIDE; 253 void GenerateFrameExit() OVERRIDE; 254 void Bind(HBasicBlock* block) OVERRIDE; 255 void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; 256 void MoveConstant(Location destination, int32_t value) OVERRIDE; 257 void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; 258 void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; 259 260 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 261 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 262 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 263 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 264 265 // Generate code to invoke a runtime entry point. 266 void InvokeRuntime(QuickEntrypointEnum entrypoint, 267 HInstruction* instruction, 268 uint32_t dex_pc, 269 SlowPathCode* slow_path) OVERRIDE; 270 271 void InvokeRuntime(int32_t entry_point_offset, 272 HInstruction* instruction, 273 uint32_t dex_pc, 274 SlowPathCode* slow_path); 275 276 size_t GetWordSize() const OVERRIDE { 277 return kX86_64WordSize; 278 } 279 280 size_t GetFloatingPointSpillSlotSize() const OVERRIDE { 281 return kX86_64WordSize; 282 } 283 284 HGraphVisitor* GetLocationBuilder() OVERRIDE { 285 return &location_builder_; 286 } 287 288 HGraphVisitor* GetInstructionVisitor() OVERRIDE { 289 return &instruction_visitor_; 290 } 291 292 X86_64Assembler* GetAssembler() OVERRIDE { 293 return &assembler_; 294 } 295 296 const X86_64Assembler& GetAssembler() const OVERRIDE { 297 return assembler_; 298 } 299 300 ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE { 301 return &move_resolver_; 302 } 303 304 uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { 305 return GetLabelOf(block)->Position(); 306 } 307 308 Location GetStackLocation(HLoadLocal* load) const OVERRIDE; 309 310 void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; 311 Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; 312 void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; 313 void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; 314 void Finalize(CodeAllocator* allocator) OVERRIDE; 315 316 InstructionSet GetInstructionSet() const OVERRIDE { 317 return InstructionSet::kX86_64; 318 } 319 320 // Emit a write barrier. 321 void MarkGCCard(CpuRegister temp, 322 CpuRegister card, 323 CpuRegister object, 324 CpuRegister value, 325 bool value_can_be_null); 326 327 // Helper method to move a value between two locations. 328 void Move(Location destination, Location source); 329 330 Label* GetLabelOf(HBasicBlock* block) const { 331 return CommonGetLabelOf<Label>(block_labels_, block); 332 } 333 334 void Initialize() OVERRIDE { 335 block_labels_ = CommonInitializeLabels<Label>(); 336 } 337 338 bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 339 return false; 340 } 341 342 // Check if the desired_dispatch_info is supported. If it is, return it, 343 // otherwise return a fall-back info that should be used instead. 344 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 345 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 346 MethodReference target_method) OVERRIDE; 347 348 void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; 349 void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; 350 351 void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; 352 353 void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; 354 355 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { 356 return isa_features_; 357 } 358 359 // Generate a read barrier for a heap reference within `instruction`. 360 // 361 // A read barrier for an object reference read from the heap is 362 // implemented as a call to the artReadBarrierSlow runtime entry 363 // point, which is passed the values in locations `ref`, `obj`, and 364 // `offset`: 365 // 366 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 367 // mirror::Object* obj, 368 // uint32_t offset); 369 // 370 // The `out` location contains the value returned by 371 // artReadBarrierSlow. 372 // 373 // When `index` provided (i.e., when it is different from 374 // Location::NoLocation()), the offset value passed to 375 // artReadBarrierSlow is adjusted to take `index` into account. 376 void GenerateReadBarrier(HInstruction* instruction, 377 Location out, 378 Location ref, 379 Location obj, 380 uint32_t offset, 381 Location index = Location::NoLocation()); 382 383 // If read barriers are enabled, generate a read barrier for a heap reference. 384 // If heap poisoning is enabled, also unpoison the reference in `out`. 385 void MaybeGenerateReadBarrier(HInstruction* instruction, 386 Location out, 387 Location ref, 388 Location obj, 389 uint32_t offset, 390 Location index = Location::NoLocation()); 391 392 // Generate a read barrier for a GC root within `instruction`. 393 // 394 // A read barrier for an object reference GC root is implemented as 395 // a call to the artReadBarrierForRootSlow runtime entry point, 396 // which is passed the value in location `root`: 397 // 398 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 399 // 400 // The `out` location contains the value returned by 401 // artReadBarrierForRootSlow. 402 void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); 403 404 int ConstantAreaStart() const { 405 return constant_area_start_; 406 } 407 408 Address LiteralDoubleAddress(double v); 409 Address LiteralFloatAddress(float v); 410 Address LiteralInt32Address(int32_t v); 411 Address LiteralInt64Address(int64_t v); 412 413 // Load a 64 bit value into a register in the most efficient manner. 414 void Load64BitValue(CpuRegister dest, int64_t value); 415 Address LiteralCaseTable(HPackedSwitch* switch_instr); 416 417 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 418 void Store64BitValueToStack(Location dest, int64_t value); 419 420 // Assign a 64 bit constant to an address. 421 void MoveInt64ToAddress(const Address& addr_low, 422 const Address& addr_high, 423 int64_t v, 424 HInstruction* instruction); 425 426 private: 427 struct PcRelativeDexCacheAccessInfo { 428 PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) 429 : target_dex_file(dex_file), element_offset(element_off), label() { } 430 431 const DexFile& target_dex_file; 432 uint32_t element_offset; 433 Label label; 434 }; 435 436 // Labels for each block that will be compiled. 437 Label* block_labels_; // Indexed by block id. 438 Label frame_entry_label_; 439 LocationsBuilderX86_64 location_builder_; 440 InstructionCodeGeneratorX86_64 instruction_visitor_; 441 ParallelMoveResolverX86_64 move_resolver_; 442 X86_64Assembler assembler_; 443 const X86_64InstructionSetFeatures& isa_features_; 444 445 // Offset to the start of the constant area in the assembled code. 446 // Used for fixups to the constant area. 447 int constant_area_start_; 448 449 // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). 450 ArenaDeque<MethodPatchInfo<Label>> method_patches_; 451 ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; 452 // PC-relative DexCache access info. 453 ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; 454 455 // When we don't know the proper offset for the value, we use kDummy32BitOffset. 456 // We will fix this up in the linker later to have the right value. 457 static constexpr int32_t kDummy32BitOffset = 256; 458 459 // Fixups for jump tables need to be handled specially. 460 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 461 462 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 463}; 464 465} // namespace x86_64 466} // namespace art 467 468#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 469