NVPTXTargetMachine.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
1//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Top-level implementation for the NVPTX target. 11// 12//===----------------------------------------------------------------------===// 13 14#include "NVPTXTargetMachine.h" 15#include "MCTargetDesc/NVPTXMCAsmInfo.h" 16#include "NVPTX.h" 17#include "NVPTXAllocaHoisting.h" 18#include "NVPTXLowerAggrCopies.h" 19#include "llvm/Analysis/Passes.h" 20#include "llvm/CodeGen/AsmPrinter.h" 21#include "llvm/CodeGen/MachineFunctionAnalysis.h" 22#include "llvm/CodeGen/MachineModuleInfo.h" 23#include "llvm/CodeGen/Passes.h" 24#include "llvm/IR/DataLayout.h" 25#include "llvm/IR/IRPrintingPasses.h" 26#include "llvm/IR/Verifier.h" 27#include "llvm/MC/MCAsmInfo.h" 28#include "llvm/MC/MCInstrInfo.h" 29#include "llvm/MC/MCStreamer.h" 30#include "llvm/MC/MCSubtargetInfo.h" 31#include "llvm/PassManager.h" 32#include "llvm/Support/CommandLine.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/FormattedStream.h" 35#include "llvm/Support/TargetRegistry.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/Target/TargetInstrInfo.h" 38#include "llvm/Target/TargetLowering.h" 39#include "llvm/Target/TargetLoweringObjectFile.h" 40#include "llvm/Target/TargetMachine.h" 41#include "llvm/Target/TargetOptions.h" 42#include "llvm/Target/TargetRegisterInfo.h" 43#include "llvm/Target/TargetSubtargetInfo.h" 44#include "llvm/Transforms/Scalar.h" 45 46using namespace llvm; 47 48namespace llvm { 49void initializeNVVMReflectPass(PassRegistry&); 50void initializeGenericToNVVMPass(PassRegistry&); 51void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 52void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 53} 54 55extern "C" void LLVMInitializeNVPTXTarget() { 56 // Register the target. 57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 58 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 59 60 // FIXME: This pass is really intended to be invoked during IR optimization, 61 // but it's very NVPTX-specific. 62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 64 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 65 initializeNVPTXFavorNonGenericAddrSpacesPass( 66 *PassRegistry::getPassRegistry()); 67} 68 69NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 70 StringRef CPU, StringRef FS, 71 const TargetOptions &Options, 72 Reloc::Model RM, CodeModel::Model CM, 73 CodeGenOpt::Level OL, bool is64bit) 74 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 75 Subtarget(TT, CPU, FS, *this, is64bit) { 76 initAsmInfo(); 77} 78 79void NVPTXTargetMachine32::anchor() {} 80 81NVPTXTargetMachine32::NVPTXTargetMachine32( 82 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 83 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 84 CodeGenOpt::Level OL) 85 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 86 87void NVPTXTargetMachine64::anchor() {} 88 89NVPTXTargetMachine64::NVPTXTargetMachine64( 90 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 91 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 92 CodeGenOpt::Level OL) 93 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 94 95namespace { 96class NVPTXPassConfig : public TargetPassConfig { 97public: 98 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 99 : TargetPassConfig(TM, PM) {} 100 101 NVPTXTargetMachine &getNVPTXTargetMachine() const { 102 return getTM<NVPTXTargetMachine>(); 103 } 104 105 void addIRPasses() override; 106 bool addInstSelector() override; 107 bool addPreRegAlloc() override; 108 bool addPostRegAlloc() override; 109 void addMachineSSAOptimization() override; 110 111 FunctionPass *createTargetRegisterAllocator(bool) override; 112 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 113 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 114}; 115} // end anonymous namespace 116 117TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 118 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 119 return PassConfig; 120} 121 122void NVPTXPassConfig::addIRPasses() { 123 // The following passes are known to not play well with virtual regs hanging 124 // around after register allocation (which in our case, is *all* registers). 125 // We explicitly disable them here. We do, however, need some functionality 126 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 127 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 128 disablePass(&PrologEpilogCodeInserterID); 129 disablePass(&MachineCopyPropagationID); 130 disablePass(&BranchFolderPassID); 131 disablePass(&TailDuplicateID); 132 133 addPass(createNVPTXImageOptimizerPass()); 134 TargetPassConfig::addIRPasses(); 135 addPass(createNVPTXAssignValidGlobalNamesPass()); 136 addPass(createGenericToNVVMPass()); 137 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 138 addPass(createSeparateConstOffsetFromGEPPass()); 139 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 140 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 141 // significantly better code than EarlyCSE for some of our benchmarks. 142 if (getOptLevel() == CodeGenOpt::Aggressive) 143 addPass(createGVNPass()); 144 else 145 addPass(createEarlyCSEPass()); 146 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 147 // some dead code. We could remove dead code in an ad-hoc manner, but that 148 // requires manual work and might be error-prone. 149 // 150 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 151 // and leave them unused. 152 // 153 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 154 // old index and some of its intermediate results may become unused. 155 addPass(createDeadCodeEliminationPass()); 156} 157 158bool NVPTXPassConfig::addInstSelector() { 159 const NVPTXSubtarget &ST = 160 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 161 162 addPass(createLowerAggrCopies()); 163 addPass(createAllocaHoisting()); 164 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 165 166 if (!ST.hasImageHandles()) 167 addPass(createNVPTXReplaceImageHandlesPass()); 168 169 return false; 170} 171 172bool NVPTXPassConfig::addPreRegAlloc() { return false; } 173bool NVPTXPassConfig::addPostRegAlloc() { 174 addPass(createNVPTXPrologEpilogPass()); 175 return false; 176} 177 178FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 179 return nullptr; // No reg alloc 180} 181 182void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 183 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 184 addPass(&PHIEliminationID); 185 addPass(&TwoAddressInstructionPassID); 186} 187 188void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 189 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 190 191 addPass(&ProcessImplicitDefsID); 192 addPass(&LiveVariablesID); 193 addPass(&MachineLoopInfoID); 194 addPass(&PHIEliminationID); 195 196 addPass(&TwoAddressInstructionPassID); 197 addPass(&RegisterCoalescerID); 198 199 // PreRA instruction scheduling. 200 if (addPass(&MachineSchedulerID)) 201 printAndVerify("After Machine Scheduling"); 202 203 204 addPass(&StackSlotColoringID); 205 206 // FIXME: Needs physical registers 207 //addPass(&PostRAMachineLICMID); 208 209 printAndVerify("After StackSlotColoring"); 210} 211 212void NVPTXPassConfig::addMachineSSAOptimization() { 213 // Pre-ra tail duplication. 214 if (addPass(&EarlyTailDuplicateID)) 215 printAndVerify("After Pre-RegAlloc TailDuplicate"); 216 217 // Optimize PHIs before DCE: removing dead PHI cycles may make more 218 // instructions dead. 219 addPass(&OptimizePHIsID); 220 221 // This pass merges large allocas. StackSlotColoring is a different pass 222 // which merges spill slots. 223 addPass(&StackColoringID); 224 225 // If the target requests it, assign local variables to stack slots relative 226 // to one another and simplify frame index references where possible. 227 addPass(&LocalStackSlotAllocationID); 228 229 // With optimization, dead code should already be eliminated. However 230 // there is one known exception: lowered code for arguments that are only 231 // used by tail calls, where the tail calls reuse the incoming stack 232 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 233 addPass(&DeadMachineInstructionElimID); 234 printAndVerify("After codegen DCE pass"); 235 236 // Allow targets to insert passes that improve instruction level parallelism, 237 // like if-conversion. Such passes will typically need dominator trees and 238 // loop info, just like LICM and CSE below. 239 if (addILPOpts()) 240 printAndVerify("After ILP optimizations"); 241 242 addPass(&MachineLICMID); 243 addPass(&MachineCSEID); 244 245 addPass(&MachineSinkingID); 246 printAndVerify("After Machine LICM, CSE and Sinking passes"); 247 248 addPass(&PeepholeOptimizerID); 249 printAndVerify("After codegen peephole optimization pass"); 250} 251