1//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief The AMDGPU target machine contains all of the hardware specific 12/// information needed to emit code for R600 and SI GPUs. 13// 14//===----------------------------------------------------------------------===// 15 16#include "AMDGPUTargetMachine.h" 17#include "AMDGPU.h" 18#include "AMDGPUTargetTransformInfo.h" 19#include "R600ISelLowering.h" 20#include "R600InstrInfo.h" 21#include "R600MachineScheduler.h" 22#include "SIISelLowering.h" 23#include "SIInstrInfo.h" 24#include "llvm/Analysis/Passes.h" 25#include "llvm/CodeGen/MachineFunctionAnalysis.h" 26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 27#include "llvm/CodeGen/MachineModuleInfo.h" 28#include "llvm/CodeGen/Passes.h" 29#include "llvm/IR/Verifier.h" 30#include "llvm/MC/MCAsmInfo.h" 31#include "llvm/IR/LegacyPassManager.h" 32#include "llvm/Support/TargetRegistry.h" 33#include "llvm/Support/raw_os_ostream.h" 34#include "llvm/Transforms/IPO.h" 35#include "llvm/Transforms/Scalar.h" 36#include <llvm/CodeGen/Passes.h> 37 38using namespace llvm; 39 40extern "C" void LLVMInitializeR600Target() { 41 // Register the target 42 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 43 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 44} 45 46static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 47 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 48} 49 50static MachineSchedRegistry 51SchedCustomRegistry("r600", "Run R600's custom scheduler", 52 createR600MachineScheduler); 53 54static std::string computeDataLayout(StringRef TT) { 55 Triple Triple(TT); 56 std::string Ret = "e-p:32:32"; 57 58 if (Triple.getArch() == Triple::amdgcn) { 59 // 32-bit private, local, and region pointers. 64-bit global and constant. 60 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 61 } 62 63 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 64 "-v512:512-v1024:1024-v2048:2048-n32:64"; 65 66 return Ret; 67} 68 69AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, 70 StringRef CPU, StringRef FS, 71 TargetOptions Options, Reloc::Model RM, 72 CodeModel::Model CM, 73 CodeGenOpt::Level OptLevel) 74 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 75 OptLevel), 76 TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this), 77 IntrinsicInfo() { 78 setRequiresStructuredCFG(true); 79 initAsmInfo(); 80} 81 82AMDGPUTargetMachine::~AMDGPUTargetMachine() { 83 delete TLOF; 84} 85 86//===----------------------------------------------------------------------===// 87// R600 Target Machine (R600 -> Cayman) 88//===----------------------------------------------------------------------===// 89 90R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS, 91 StringRef CPU, TargetOptions Options, Reloc::Model RM, 92 CodeModel::Model CM, CodeGenOpt::Level OL) : 93 AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { } 94 95 96//===----------------------------------------------------------------------===// 97// GCN Target Machine (SI+) 98//===----------------------------------------------------------------------===// 99 100GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS, 101 StringRef CPU, TargetOptions Options, Reloc::Model RM, 102 CodeModel::Model CM, CodeGenOpt::Level OL) : 103 AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { } 104 105//===----------------------------------------------------------------------===// 106// AMDGPU Pass Setup 107//===----------------------------------------------------------------------===// 108 109namespace { 110class AMDGPUPassConfig : public TargetPassConfig { 111public: 112 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 113 : TargetPassConfig(TM, PM) {} 114 115 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 116 return getTM<AMDGPUTargetMachine>(); 117 } 118 119 ScheduleDAGInstrs * 120 createMachineScheduler(MachineSchedContext *C) const override { 121 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 122 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 123 return createR600MachineScheduler(C); 124 return nullptr; 125 } 126 127 void addIRPasses() override; 128 void addCodeGenPrepare() override; 129 virtual bool addPreISel() override; 130 virtual bool addInstSelector() override; 131}; 132 133class R600PassConfig : public AMDGPUPassConfig { 134public: 135 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 136 : AMDGPUPassConfig(TM, PM) { } 137 138 bool addPreISel() override; 139 void addPreRegAlloc() override; 140 void addPreSched2() override; 141 void addPreEmitPass() override; 142}; 143 144class GCNPassConfig : public AMDGPUPassConfig { 145public: 146 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 147 : AMDGPUPassConfig(TM, PM) { } 148 bool addPreISel() override; 149 bool addInstSelector() override; 150 void addPreRegAlloc() override; 151 void addPostRegAlloc() override; 152 void addPreSched2() override; 153 void addPreEmitPass() override; 154}; 155 156} // End of anonymous namespace 157 158TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 159 return TargetIRAnalysis( 160 [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); }); 161} 162 163void AMDGPUPassConfig::addIRPasses() { 164 // Function calls are not supported, so make sure we inline everything. 165 addPass(createAMDGPUAlwaysInlinePass()); 166 addPass(createAlwaysInlinerPass()); 167 // We need to add the barrier noop pass, otherwise adding the function 168 // inlining pass will cause all of the PassConfigs passes to be run 169 // one function at a time, which means if we have a nodule with two 170 // functions, then we will generate code for the first function 171 // without ever running any passes on the second. 172 addPass(createBarrierNoopPass()); 173 TargetPassConfig::addIRPasses(); 174} 175 176void AMDGPUPassConfig::addCodeGenPrepare() { 177 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 178 if (ST.isPromoteAllocaEnabled()) { 179 addPass(createAMDGPUPromoteAlloca(ST)); 180 addPass(createSROAPass()); 181 } 182 TargetPassConfig::addCodeGenPrepare(); 183} 184 185bool 186AMDGPUPassConfig::addPreISel() { 187 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 188 addPass(createFlattenCFGPass()); 189 if (ST.IsIRStructurizerEnabled()) 190 addPass(createStructurizeCFGPass()); 191 return false; 192} 193 194bool AMDGPUPassConfig::addInstSelector() { 195 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 196 return false; 197} 198 199//===----------------------------------------------------------------------===// 200// R600 Pass Setup 201//===----------------------------------------------------------------------===// 202 203bool R600PassConfig::addPreISel() { 204 AMDGPUPassConfig::addPreISel(); 205 addPass(createR600TextureIntrinsicsReplacer()); 206 return false; 207} 208 209void R600PassConfig::addPreRegAlloc() { 210 addPass(createR600VectorRegMerger(*TM)); 211} 212 213void R600PassConfig::addPreSched2() { 214 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 215 addPass(createR600EmitClauseMarkers(), false); 216 if (ST.isIfCvtEnabled()) 217 addPass(&IfConverterID, false); 218 addPass(createR600ClauseMergePass(*TM), false); 219} 220 221void R600PassConfig::addPreEmitPass() { 222 addPass(createAMDGPUCFGStructurizerPass(), false); 223 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 224 addPass(&FinalizeMachineBundlesID, false); 225 addPass(createR600Packetizer(*TM), false); 226 addPass(createR600ControlFlowFinalizer(*TM), false); 227} 228 229TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 230 return new R600PassConfig(this, PM); 231} 232 233//===----------------------------------------------------------------------===// 234// GCN Pass Setup 235//===----------------------------------------------------------------------===// 236 237bool GCNPassConfig::addPreISel() { 238 AMDGPUPassConfig::addPreISel(); 239 addPass(createSinkingPass()); 240 addPass(createSITypeRewriter()); 241 addPass(createSIAnnotateControlFlowPass()); 242 return false; 243} 244 245bool GCNPassConfig::addInstSelector() { 246 AMDGPUPassConfig::addInstSelector(); 247 addPass(createSILowerI1CopiesPass()); 248 addPass(createSIFixSGPRCopiesPass(*TM)); 249 addPass(createSIFoldOperandsPass()); 250 return false; 251} 252 253void GCNPassConfig::addPreRegAlloc() { 254 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 255 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 256 // Don't do this with no optimizations since it throws away debug info by 257 // merging nonadjacent loads. 258 259 // This should be run after scheduling, but before register allocation. It 260 // also need extra copies to the address operand to be eliminated. 261 initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); 262 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 263 } 264 addPass(createSIShrinkInstructionsPass(), false); 265 addPass(createSIFixSGPRLiveRangesPass(), false); 266} 267 268void GCNPassConfig::addPostRegAlloc() { 269 addPass(createSIPrepareScratchRegs(), false); 270 addPass(createSIShrinkInstructionsPass(), false); 271} 272 273void GCNPassConfig::addPreSched2() { 274 addPass(createSIInsertWaits(*TM), false); 275} 276 277void GCNPassConfig::addPreEmitPass() { 278 addPass(createSILowerControlFlowPass(*TM), false); 279} 280 281TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 282 return new GCNPassConfig(this, PM); 283} 284