149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// The LLVM Compiler Infrastructure 449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// This file is distributed under the University of Illinois Open Source 649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// License. See LICENSE.TXT for details. 749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===// 949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 1049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// Top-level implementation for the NVPTX target. 1149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// 1249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===// 1349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 1449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "NVPTXTargetMachine.h" 1549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "MCTargetDesc/NVPTXMCAsmInfo.h" 16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "NVPTX.h" 1749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "NVPTXAllocaHoisting.h" 18d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "NVPTXLowerAggrCopies.h" 1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "NVPTXTargetObjectFile.h" 20ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "NVPTXTargetTransformInfo.h" 2149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Analysis/Passes.h" 2249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/AsmPrinter.h" 2349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/MachineFunctionAnalysis.h" 2449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/MachineModuleInfo.h" 2549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/Passes.h" 260b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h" 2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRPrintingPasses.h" 28ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "llvm/IR/LegacyPassManager.h" 2936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h" 3049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCAsmInfo.h" 3149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCInstrInfo.h" 3249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCStreamer.h" 3349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCSubtargetInfo.h" 34d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/CommandLine.h" 35d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/Debug.h" 36d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/FormattedStream.h" 37d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/TargetRegistry.h" 3849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Support/raw_ostream.h" 3949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetInstrInfo.h" 4049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetLowering.h" 4149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetLoweringObjectFile.h" 4249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetMachine.h" 4349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetOptions.h" 4449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetRegisterInfo.h" 4549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetSubtargetInfo.h" 4649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Transforms/Scalar.h" 4749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 4849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiusing namespace llvm; 4949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 5021fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinskinamespace llvm { 5121fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinskivoid initializeNVVMReflectPass(PassRegistry&); 527536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinskivoid initializeGenericToNVVMPass(PassRegistry&); 534c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarvoid initializeNVPTXAllocaHoistingPass(PassRegistry &); 5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 56cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid initializeNVPTXLowerAggrCopiesPass(PassRegistry &); 576948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainarvoid initializeNVPTXLowerKernelArgsPass(PassRegistry &); 58cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid initializeNVPTXLowerAllocaPass(PassRegistry &); 5921fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski} 6021fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski 6149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiextern "C" void LLVMInitializeNVPTXTarget() { 6249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski // Register the target. 6349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 6449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 6549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 6621fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski // FIXME: This pass is really intended to be invoked during IR optimization, 6721fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski // but it's very NVPTX-specific. 68cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar PassRegistry &PR = *PassRegistry::getPassRegistry(); 69cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVVMReflectPass(PR); 70cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeGenericToNVVMPass(PR); 71cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXAllocaHoistingPass(PR); 72cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXAssignValidGlobalNamesPass(PR); 73cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXFavorNonGenericAddrSpacesPass(PR); 74cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXLowerKernelArgsPass(PR); 75cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXLowerAllocaPass(PR); 76cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar initializeNVPTXLowerAggrCopiesPass(PR); 7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic std::string computeDataLayout(bool is64Bit) { 80ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines std::string Ret = "e"; 81ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!is64Bit) 83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Ret += "-p:32:32"; 84ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 85ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 86ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return Ret; 88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 906948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT, 91c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines StringRef CPU, StringRef FS, 92c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetOptions &Options, 93c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Reloc::Model RM, CodeModel::Model CM, 94c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines CodeGenOpt::Level OL, bool is64bit) 954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, 964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar CM, OL), 974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), 984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Subtarget(TT, CPU, FS, *this) { 996948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (TT.getOS() == Triple::NVCL) 100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines drvInterface = NVPTX::NVCL; 101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines else 102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines drvInterface = NVPTX::CUDA; 1034a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola initAsmInfo(); 1044a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola} 10549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 10637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesNVPTXTargetMachine::~NVPTXTargetMachine() {} 10737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 10849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskivoid NVPTXTargetMachine32::anchor() {} 10949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 1106948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT, 1116948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar StringRef CPU, StringRef FS, 1126948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar const TargetOptions &Options, 1136948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Reloc::Model RM, CodeModel::Model CM, 1146948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar CodeGenOpt::Level OL) 1153639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 11649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 11749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskivoid NVPTXTargetMachine64::anchor() {} 11849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 1196948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT, 1206948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar StringRef CPU, StringRef FS, 1216948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar const TargetOptions &Options, 1226948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Reloc::Model RM, CodeModel::Model CM, 1236948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar CodeGenOpt::Level OL) 1243639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 12549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 1265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace { 12749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiclass NVPTXPassConfig : public TargetPassConfig { 12849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskipublic: 12949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 1303639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski : TargetPassConfig(TM, PM) {} 13149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 13249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski NVPTXTargetMachine &getNVPTXTargetMachine() const { 13349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski return getTM<NVPTXTargetMachine>(); 13449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski } 13549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void addIRPasses() override; 137dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines bool addInstSelector() override; 138ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines void addPostRegAlloc() override; 139c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines void addMachineSSAOptimization() override; 1405443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski 141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines FunctionPass *createTargetRegisterAllocator(bool) override; 142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void addFastRegAlloc(FunctionPass *RegAllocPass) override; 143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 144cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar 145cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarprivate: 146cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE. 147cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar void addEarlyCSEOrGVNPass(); 14849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski}; 1495c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace 15049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 15149683f3c961379fbc088871a5d6304950f1f1cbcJustin HolewinskiTargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 15249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 15349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski return PassConfig; 15449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski} 15549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 156ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesTargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 157cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar return TargetIRAnalysis([this](const Function &F) { 158cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar return TargetTransformInfo(NVPTXTTIImpl(this, F)); 159cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar }); 160cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar} 161cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar 162cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid NVPTXPassConfig::addEarlyCSEOrGVNPass() { 163cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar if (getOptLevel() == CodeGenOpt::Aggressive) 164cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createGVNPass()); 165cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar else 166cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createEarlyCSEPass()); 16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 16837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1697536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinskivoid NVPTXPassConfig::addIRPasses() { 1705443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski // The following passes are known to not play well with virtual regs hanging 1715443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski // around after register allocation (which in our case, is *all* registers). 1725443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski // We explicitly disable them here. We do, however, need some functionality 1735443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 1745443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 1755443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski disablePass(&PrologEpilogCodeInserterID); 1765443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski disablePass(&MachineCopyPropagationID); 17740f689851f6737d59b6f6a771ab1e07ce84c9bc3Justin Holewinski disablePass(&TailDuplicateID); 1785443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski 179cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createNVVMReflectPass()); 180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addPass(createNVPTXImageOptimizerPass()); 18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines addPass(createNVPTXAssignValidGlobalNamesPass()); 1827536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski addPass(createGenericToNVVMPass()); 183cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar 184cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // === Propagate special address spaces === 1856948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); 1866948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // NVPTXLowerKernelArgs emits alloca for byval parameters which can often 187cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // be eliminated by SROA. 1886948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createSROAPass()); 189cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createNVPTXLowerAllocaPass()); 190cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 1916948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave 1926948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // them unused. We could remove dead code in an ad-hoc manner, but that 1936948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // requires manual work and might be error-prone. 1946948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createDeadCodeEliminationPass()); 195cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar 196cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // === Straight-line scalar optimizations === 197dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addPass(createSeparateConstOffsetFromGEPPass()); 198cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createSpeculativeExecutionPass()); 1996948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // ReassociateGEPs exposes more opportunites for SLSR. See 2006948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // the example in reassociate-geps-and-slsr.ll. 2016948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createStraightLineStrengthReducePass()); 2026948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 2036948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE 2046948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // for some of our benchmarks. 205cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addEarlyCSEOrGVNPass(); 2066948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // Run NaryReassociate after EarlyCSE/GVN to be more effective. 2076948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createNaryReassociatePass()); 2086948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // NaryReassociate on GEPs creates redundant common expressions, so run 2096948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // EarlyCSE after it. 2106948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar addPass(createEarlyCSEPass()); 211cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar 212cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // === LSR and other generic IR passes === 213cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar TargetPassConfig::addIRPasses(); 214cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // EarlyCSE is not always strong enough to clean up what LSR produces. For 215cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // example, GVN can combine 216cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // 217cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // %0 = add %a, %b 218cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // %1 = add %b, %a 219cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // 220cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // and 221cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // 222cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // %0 = shl nsw %a, 2 223cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // %1 = shl %a, 2 224cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // 225cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // but EarlyCSE can do neither of them. 226cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addEarlyCSEOrGVNPass(); 2277536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski} 2287536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski 22949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskibool NVPTXPassConfig::addInstSelector() { 2304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 231dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 232564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson addPass(createLowerAggrCopies()); 233564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson addPass(createAllocaHoisting()); 234564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!ST.hasImageHandles()) 237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addPass(createNVPTXReplaceImageHandlesPass()); 238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 23949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski return false; 24049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski} 24149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski 242ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid NVPTXPassConfig::addPostRegAlloc() { 243ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines addPass(createNVPTXPrologEpilogPass(), false); 244cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // NVPTXPrologEpilogPass calculates frame object offset and replace frame 245cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // index with VRFrame register. NVPTXPeephole need to be run after that and 246cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar // will replace VRFrame with VRFrameLocal when possible. 247cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar addPass(createNVPTXPeephole()); 2485443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski} 2495443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski 250488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin KramerFunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; // No reg alloc 252488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer} 253488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer 2545443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinskivoid NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 255488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer assert(!RegAllocPass && "NVPTX uses no regalloc!"); 25681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&PHIEliminationID); 25781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&TwoAddressInstructionPassID); 2585443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski} 2595443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski 2605443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinskivoid NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 261488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer assert(!RegAllocPass && "NVPTX uses no regalloc!"); 26281d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 26381d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&ProcessImplicitDefsID); 26481d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&LiveVariablesID); 26581d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&MachineLoopInfoID); 26681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&PHIEliminationID); 26781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 26881d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&TwoAddressInstructionPassID); 26981d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&RegisterCoalescerID); 27081d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 27181d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski // PreRA instruction scheduling. 27281d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski if (addPass(&MachineSchedulerID)) 27381d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski printAndVerify("After Machine Scheduling"); 27481d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 27581d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 27681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski addPass(&StackSlotColoringID); 27781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 27881d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski // FIXME: Needs physical registers 27981d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski //addPass(&PostRAMachineLICMID); 28081d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski 28181d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski printAndVerify("After StackSlotColoring"); 2825443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski} 283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 284c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesvoid NVPTXPassConfig::addMachineSSAOptimization() { 285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Pre-ra tail duplication. 286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (addPass(&EarlyTailDuplicateID)) 287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines printAndVerify("After Pre-RegAlloc TailDuplicate"); 288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Optimize PHIs before DCE: removing dead PHI cycles may make more 290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // instructions dead. 291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&OptimizePHIsID); 292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 293c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // This pass merges large allocas. StackSlotColoring is a different pass 294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // which merges spill slots. 295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&StackColoringID); 296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // If the target requests it, assign local variables to stack slots relative 298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // to one another and simplify frame index references where possible. 299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&LocalStackSlotAllocationID); 300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // With optimization, dead code should already be eliminated. However 302c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // there is one known exception: lowered code for arguments that are only 303c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // used by tail calls, where the tail calls reuse the incoming stack 304c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 305c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&DeadMachineInstructionElimID); 306c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines printAndVerify("After codegen DCE pass"); 307c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 308c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Allow targets to insert passes that improve instruction level parallelism, 309c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // like if-conversion. Such passes will typically need dominator trees and 310c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // loop info, just like LICM and CSE below. 311c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (addILPOpts()) 312c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines printAndVerify("After ILP optimizations"); 313c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 314c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&MachineLICMID); 315c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&MachineCSEID); 316c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 317c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&MachineSinkingID); 318c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines printAndVerify("After Machine LICM, CSE and Sinking passes"); 319c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 320c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines addPass(&PeepholeOptimizerID); 321c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines printAndVerify("After codegen peephole optimization pass"); 322c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 323