code_generator_x86_64.h revision 2ae48182573da7087bffc2873730bc758ec29696
1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 18#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 19 20#include "arch/x86_64/instruction_set_features_x86_64.h" 21#include "code_generator.h" 22#include "dex/compiler_enums.h" 23#include "driver/compiler_options.h" 24#include "nodes.h" 25#include "parallel_move_resolver.h" 26#include "utils/x86_64/assembler_x86_64.h" 27 28namespace art { 29namespace x86_64 { 30 31// Use a local definition to prevent copying mistakes. 32static constexpr size_t kX86_64WordSize = kX86_64PointerSize; 33 34// Some x86_64 instructions require a register to be available as temp. 35static constexpr Register TMP = R11; 36 37static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 }; 38static constexpr FloatRegister kParameterFloatRegisters[] = 39 { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 }; 40 41static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); 42static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); 43 44static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; 45static constexpr size_t kRuntimeParameterCoreRegistersLength = 46 arraysize(kRuntimeParameterCoreRegisters); 47static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; 48static constexpr size_t kRuntimeParameterFpuRegistersLength = 49 arraysize(kRuntimeParameterFpuRegisters); 50 51// These XMM registers are non-volatile in ART ABI, but volatile in native ABI. 52// If the ART ABI changes, this list must be updated. It is used to ensure that 53// these are not clobbered by any direct call to native code (such as math intrinsics). 54static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; 55 56 57class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { 58 public: 59 InvokeRuntimeCallingConvention() 60 : CallingConvention(kRuntimeParameterCoreRegisters, 61 kRuntimeParameterCoreRegistersLength, 62 kRuntimeParameterFpuRegisters, 63 kRuntimeParameterFpuRegistersLength, 64 kX86_64PointerSize) {} 65 66 private: 67 DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); 68}; 69 70class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> { 71 public: 72 InvokeDexCallingConvention() : CallingConvention( 73 kParameterCoreRegisters, 74 kParameterCoreRegistersLength, 75 kParameterFloatRegisters, 76 kParameterFloatRegistersLength, 77 kX86_64PointerSize) {} 78 79 private: 80 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); 81}; 82 83class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { 84 public: 85 FieldAccessCallingConventionX86_64() {} 86 87 Location GetObjectLocation() const OVERRIDE { 88 return Location::RegisterLocation(RSI); 89 } 90 Location GetFieldIndexLocation() const OVERRIDE { 91 return Location::RegisterLocation(RDI); 92 } 93 Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 94 return Location::RegisterLocation(RAX); 95 } 96 Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { 97 return Primitive::Is64BitType(type) 98 ? Location::RegisterLocation(RDX) 99 : (is_instance 100 ? Location::RegisterLocation(RDX) 101 : Location::RegisterLocation(RSI)); 102 } 103 Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 104 return Location::FpuRegisterLocation(XMM0); 105 } 106 107 private: 108 DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); 109}; 110 111 112class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { 113 public: 114 InvokeDexCallingConventionVisitorX86_64() {} 115 virtual ~InvokeDexCallingConventionVisitorX86_64() {} 116 117 Location GetNextLocation(Primitive::Type type) OVERRIDE; 118 Location GetReturnLocation(Primitive::Type type) const OVERRIDE; 119 Location GetMethodLocation() const OVERRIDE; 120 121 private: 122 InvokeDexCallingConvention calling_convention; 123 124 DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); 125}; 126 127class CodeGeneratorX86_64; 128 129class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { 130 public: 131 ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) 132 : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} 133 134 void EmitMove(size_t index) OVERRIDE; 135 void EmitSwap(size_t index) OVERRIDE; 136 void SpillScratch(int reg) OVERRIDE; 137 void RestoreScratch(int reg) OVERRIDE; 138 139 X86_64Assembler* GetAssembler() const; 140 141 private: 142 void Exchange32(CpuRegister reg, int mem); 143 void Exchange32(XmmRegister reg, int mem); 144 void Exchange32(int mem1, int mem2); 145 void Exchange64(CpuRegister reg, int mem); 146 void Exchange64(XmmRegister reg, int mem); 147 void Exchange64(int mem1, int mem2); 148 149 CodeGeneratorX86_64* const codegen_; 150 151 DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverX86_64); 152}; 153 154class LocationsBuilderX86_64 : public HGraphVisitor { 155 public: 156 LocationsBuilderX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) 157 : HGraphVisitor(graph), codegen_(codegen) {} 158 159#define DECLARE_VISIT_INSTRUCTION(name, super) \ 160 void Visit##name(H##name* instr) OVERRIDE; 161 162 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) 163 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 164 165#undef DECLARE_VISIT_INSTRUCTION 166 167 void VisitInstruction(HInstruction* instruction) OVERRIDE { 168 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 169 << " (id " << instruction->GetId() << ")"; 170 } 171 172 private: 173 void HandleInvoke(HInvoke* invoke); 174 void HandleBitwiseOperation(HBinaryOperation* operation); 175 void HandleCondition(HCondition* condition); 176 void HandleShift(HBinaryOperation* operation); 177 void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); 178 void HandleFieldGet(HInstruction* instruction); 179 180 CodeGeneratorX86_64* const codegen_; 181 InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; 182 183 DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); 184}; 185 186class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { 187 public: 188 InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); 189 190#define DECLARE_VISIT_INSTRUCTION(name, super) \ 191 void Visit##name(H##name* instr) OVERRIDE; 192 193 FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) 194 FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) 195 196#undef DECLARE_VISIT_INSTRUCTION 197 198 void VisitInstruction(HInstruction* instruction) OVERRIDE { 199 LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() 200 << " (id " << instruction->GetId() << ")"; 201 } 202 203 X86_64Assembler* GetAssembler() const { return assembler_; } 204 205 private: 206 // Generate code for the given suspend check. If not null, `successor` 207 // is the block to branch to if the suspend check is not needed, and after 208 // the suspend call. 209 void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); 210 void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); 211 void HandleBitwiseOperation(HBinaryOperation* operation); 212 void GenerateRemFP(HRem* rem); 213 void DivRemOneOrMinusOne(HBinaryOperation* instruction); 214 void DivByPowerOfTwo(HDiv* instruction); 215 void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); 216 void GenerateDivRemIntegral(HBinaryOperation* instruction); 217 void HandleCondition(HCondition* condition); 218 void HandleShift(HBinaryOperation* operation); 219 220 void HandleFieldSet(HInstruction* instruction, 221 const FieldInfo& field_info, 222 bool value_can_be_null); 223 void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); 224 225 // Generate a heap reference load using one register `out`: 226 // 227 // out <- *(out + offset) 228 // 229 // while honoring heap poisoning and/or read barriers (if any). 230 // 231 // Location `maybe_temp` is used when generating a read barrier and 232 // shall be a register in that case; it may be an invalid location 233 // otherwise. 234 void GenerateReferenceLoadOneRegister(HInstruction* instruction, 235 Location out, 236 uint32_t offset, 237 Location maybe_temp); 238 // Generate a heap reference load using two different registers 239 // `out` and `obj`: 240 // 241 // out <- *(obj + offset) 242 // 243 // while honoring heap poisoning and/or read barriers (if any). 244 // 245 // Location `maybe_temp` is used when generating a Baker's (fast 246 // path) read barrier and shall be a register in that case; it may 247 // be an invalid location otherwise. 248 void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, 249 Location out, 250 Location obj, 251 uint32_t offset, 252 Location maybe_temp); 253 // Generate a GC root reference load: 254 // 255 // root <- *(obj + offset) 256 // 257 // while honoring read barriers (if any). 258 void GenerateGcRootFieldLoad(HInstruction* instruction, 259 Location root, 260 CpuRegister obj, 261 uint32_t offset); 262 263 void PushOntoFPStack(Location source, uint32_t temp_offset, 264 uint32_t stack_adjustment, bool is_float); 265 void GenerateCompareTest(HCondition* condition); 266 template<class LabelType> 267 void GenerateTestAndBranch(HInstruction* instruction, 268 size_t condition_input_index, 269 LabelType* true_target, 270 LabelType* false_target); 271 template<class LabelType> 272 void GenerateCompareTestAndBranch(HCondition* condition, 273 LabelType* true_target, 274 LabelType* false_target); 275 template<class LabelType> 276 void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); 277 278 void HandleGoto(HInstruction* got, HBasicBlock* successor); 279 280 X86_64Assembler* const assembler_; 281 CodeGeneratorX86_64* const codegen_; 282 283 DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64); 284}; 285 286// Class for fixups to jump tables. 287class JumpTableRIPFixup; 288 289class CodeGeneratorX86_64 : public CodeGenerator { 290 public: 291 CodeGeneratorX86_64(HGraph* graph, 292 const X86_64InstructionSetFeatures& isa_features, 293 const CompilerOptions& compiler_options, 294 OptimizingCompilerStats* stats = nullptr); 295 virtual ~CodeGeneratorX86_64() {} 296 297 void GenerateFrameEntry() OVERRIDE; 298 void GenerateFrameExit() OVERRIDE; 299 void Bind(HBasicBlock* block) OVERRIDE; 300 void MoveConstant(Location destination, int32_t value) OVERRIDE; 301 void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; 302 void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; 303 304 size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 305 size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 306 size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 307 size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; 308 309 // Generate code to invoke a runtime entry point. 310 void InvokeRuntime(QuickEntrypointEnum entrypoint, 311 HInstruction* instruction, 312 uint32_t dex_pc, 313 SlowPathCode* slow_path) OVERRIDE; 314 315 void InvokeRuntime(int32_t entry_point_offset, 316 HInstruction* instruction, 317 uint32_t dex_pc, 318 SlowPathCode* slow_path); 319 320 size_t GetWordSize() const OVERRIDE { 321 return kX86_64WordSize; 322 } 323 324 size_t GetFloatingPointSpillSlotSize() const OVERRIDE { 325 return kX86_64WordSize; 326 } 327 328 HGraphVisitor* GetLocationBuilder() OVERRIDE { 329 return &location_builder_; 330 } 331 332 HGraphVisitor* GetInstructionVisitor() OVERRIDE { 333 return &instruction_visitor_; 334 } 335 336 X86_64Assembler* GetAssembler() OVERRIDE { 337 return &assembler_; 338 } 339 340 const X86_64Assembler& GetAssembler() const OVERRIDE { 341 return assembler_; 342 } 343 344 ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE { 345 return &move_resolver_; 346 } 347 348 uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { 349 return GetLabelOf(block)->Position(); 350 } 351 352 Location GetStackLocation(HLoadLocal* load) const OVERRIDE; 353 354 void SetupBlockedRegisters() const OVERRIDE; 355 void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; 356 void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; 357 void Finalize(CodeAllocator* allocator) OVERRIDE; 358 359 InstructionSet GetInstructionSet() const OVERRIDE { 360 return InstructionSet::kX86_64; 361 } 362 363 // Emit a write barrier. 364 void MarkGCCard(CpuRegister temp, 365 CpuRegister card, 366 CpuRegister object, 367 CpuRegister value, 368 bool value_can_be_null); 369 370 void GenerateMemoryBarrier(MemBarrierKind kind); 371 372 // Helper method to move a value between two locations. 373 void Move(Location destination, Location source); 374 375 Label* GetLabelOf(HBasicBlock* block) const { 376 return CommonGetLabelOf<Label>(block_labels_, block); 377 } 378 379 void Initialize() OVERRIDE { 380 block_labels_ = CommonInitializeLabels<Label>(); 381 } 382 383 bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { 384 return false; 385 } 386 387 // Check if the desired_dispatch_info is supported. If it is, return it, 388 // otherwise return a fall-back info that should be used instead. 389 HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( 390 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 391 MethodReference target_method) OVERRIDE; 392 393 void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; 394 void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; 395 396 void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; 397 398 void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; 399 400 const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { 401 return isa_features_; 402 } 403 404 // Fast path implementation of ReadBarrier::Barrier for a heap 405 // reference field load when Baker's read barriers are used. 406 void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 407 Location ref, 408 CpuRegister obj, 409 uint32_t offset, 410 Location temp, 411 bool needs_null_check); 412 // Fast path implementation of ReadBarrier::Barrier for a heap 413 // reference array load when Baker's read barriers are used. 414 void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 415 Location ref, 416 CpuRegister obj, 417 uint32_t data_offset, 418 Location index, 419 Location temp, 420 bool needs_null_check); 421 422 // Generate a read barrier for a heap reference within `instruction` 423 // using a slow path. 424 // 425 // A read barrier for an object reference read from the heap is 426 // implemented as a call to the artReadBarrierSlow runtime entry 427 // point, which is passed the values in locations `ref`, `obj`, and 428 // `offset`: 429 // 430 // mirror::Object* artReadBarrierSlow(mirror::Object* ref, 431 // mirror::Object* obj, 432 // uint32_t offset); 433 // 434 // The `out` location contains the value returned by 435 // artReadBarrierSlow. 436 // 437 // When `index` provided (i.e., when it is different from 438 // Location::NoLocation()), the offset value passed to 439 // artReadBarrierSlow is adjusted to take `index` into account. 440 void GenerateReadBarrierSlow(HInstruction* instruction, 441 Location out, 442 Location ref, 443 Location obj, 444 uint32_t offset, 445 Location index = Location::NoLocation()); 446 447 // If read barriers are enabled, generate a read barrier for a heap 448 // reference using a slow path. If heap poisoning is enabled, also 449 // unpoison the reference in `out`. 450 void MaybeGenerateReadBarrierSlow(HInstruction* instruction, 451 Location out, 452 Location ref, 453 Location obj, 454 uint32_t offset, 455 Location index = Location::NoLocation()); 456 457 // Generate a read barrier for a GC root within `instruction` using 458 // a slow path. 459 // 460 // A read barrier for an object reference GC root is implemented as 461 // a call to the artReadBarrierForRootSlow runtime entry point, 462 // which is passed the value in location `root`: 463 // 464 // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); 465 // 466 // The `out` location contains the value returned by 467 // artReadBarrierForRootSlow. 468 void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); 469 470 int ConstantAreaStart() const { 471 return constant_area_start_; 472 } 473 474 Address LiteralDoubleAddress(double v); 475 Address LiteralFloatAddress(float v); 476 Address LiteralInt32Address(int32_t v); 477 Address LiteralInt64Address(int64_t v); 478 479 // Load a 32/64-bit value into a register in the most efficient manner. 480 void Load32BitValue(CpuRegister dest, int32_t value); 481 void Load64BitValue(CpuRegister dest, int64_t value); 482 void Load32BitValue(XmmRegister dest, int32_t value); 483 void Load64BitValue(XmmRegister dest, int64_t value); 484 void Load32BitValue(XmmRegister dest, float value); 485 void Load64BitValue(XmmRegister dest, double value); 486 487 // Compare a register with a 32/64-bit value in the most efficient manner. 488 void Compare32BitValue(CpuRegister dest, int32_t value); 489 void Compare64BitValue(CpuRegister dest, int64_t value); 490 491 Address LiteralCaseTable(HPackedSwitch* switch_instr); 492 493 // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. 494 void Store64BitValueToStack(Location dest, int64_t value); 495 496 // Assign a 64 bit constant to an address. 497 void MoveInt64ToAddress(const Address& addr_low, 498 const Address& addr_high, 499 int64_t v, 500 HInstruction* instruction); 501 502 // Ensure that prior stores complete to memory before subsequent loads. 503 // The locked add implementation will avoid serializing device memory, but will 504 // touch (but not change) the top of the stack. The locked add should not be used for 505 // ordering non-temporal stores. 506 void MemoryFence(bool force_mfence = false) { 507 if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { 508 assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); 509 } else { 510 assembler_.mfence(); 511 } 512 } 513 514 void GenerateNop(); 515 void GenerateImplicitNullCheck(HNullCheck* instruction); 516 void GenerateExplicitNullCheck(HNullCheck* instruction); 517 518 private: 519 // Factored implementation of GenerateFieldLoadWithBakerReadBarrier 520 // and GenerateArrayLoadWithBakerReadBarrier. 521 void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 522 Location ref, 523 CpuRegister obj, 524 const Address& src, 525 Location temp, 526 bool needs_null_check); 527 528 struct PcRelativeDexCacheAccessInfo { 529 PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) 530 : target_dex_file(dex_file), element_offset(element_off), label() { } 531 532 const DexFile& target_dex_file; 533 uint32_t element_offset; 534 Label label; 535 }; 536 537 // Labels for each block that will be compiled. 538 Label* block_labels_; // Indexed by block id. 539 Label frame_entry_label_; 540 LocationsBuilderX86_64 location_builder_; 541 InstructionCodeGeneratorX86_64 instruction_visitor_; 542 ParallelMoveResolverX86_64 move_resolver_; 543 X86_64Assembler assembler_; 544 const X86_64InstructionSetFeatures& isa_features_; 545 546 // Offset to the start of the constant area in the assembled code. 547 // Used for fixups to the constant area. 548 int constant_area_start_; 549 550 // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). 551 ArenaDeque<MethodPatchInfo<Label>> method_patches_; 552 ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; 553 // PC-relative DexCache access info. 554 ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; 555 556 // When we don't know the proper offset for the value, we use kDummy32BitOffset. 557 // We will fix this up in the linker later to have the right value. 558 static constexpr int32_t kDummy32BitOffset = 256; 559 560 // Fixups for jump tables need to be handled specially. 561 ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; 562 563 DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); 564}; 565 566} // namespace x86_64 567} // namespace art 568 569#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ 570