149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//
349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//                     The LLVM Compiler Infrastructure
449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//
549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// This file is distributed under the University of Illinois Open Source
649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// License. See LICENSE.TXT for details.
749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//
849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===//
949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//
1049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski// Top-level implementation for the NVPTX target.
1149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//
1249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski//===----------------------------------------------------------------------===//
1349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
1449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "NVPTXTargetMachine.h"
1549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "MCTargetDesc/NVPTXMCAsmInfo.h"
16d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "NVPTX.h"
1749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "NVPTXAllocaHoisting.h"
18d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "NVPTXLowerAggrCopies.h"
1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "NVPTXTargetObjectFile.h"
20ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "NVPTXTargetTransformInfo.h"
2149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Analysis/Passes.h"
2249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/AsmPrinter.h"
2349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/MachineFunctionAnalysis.h"
2449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/MachineModuleInfo.h"
2549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/CodeGen/Passes.h"
260b8c9a80f20772c3793201ab5b251d3520b9cea3Chandler Carruth#include "llvm/IR/DataLayout.h"
2736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/IRPrintingPasses.h"
28ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines#include "llvm/IR/LegacyPassManager.h"
2936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/IR/Verifier.h"
3049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCAsmInfo.h"
3149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCInstrInfo.h"
3249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCStreamer.h"
3349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/MC/MCSubtargetInfo.h"
34d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/CommandLine.h"
35d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/Debug.h"
36d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/FormattedStream.h"
37d04a8d4b33ff316ca4cf961e06c9e312eff8e64fChandler Carruth#include "llvm/Support/TargetRegistry.h"
3849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Support/raw_ostream.h"
3949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetInstrInfo.h"
4049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetLowering.h"
4149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetLoweringObjectFile.h"
4249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetMachine.h"
4349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetOptions.h"
4449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetRegisterInfo.h"
4549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Target/TargetSubtargetInfo.h"
4649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski#include "llvm/Transforms/Scalar.h"
4749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
4849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiusing namespace llvm;
4949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
5021fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinskinamespace llvm {
5121fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinskivoid initializeNVVMReflectPass(PassRegistry&);
527536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinskivoid initializeGenericToNVVMPass(PassRegistry&);
534c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainarvoid initializeNVPTXAllocaHoistingPass(PassRegistry &);
5436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
56cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
576948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainarvoid initializeNVPTXLowerKernelArgsPass(PassRegistry &);
58cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid initializeNVPTXLowerAllocaPass(PassRegistry &);
5921fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski}
6021fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski
6149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiextern "C" void LLVMInitializeNVPTXTarget() {
6249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  // Register the target.
6349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
6449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
6549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
6621fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski  // FIXME: This pass is really intended to be invoked during IR optimization,
6721fdcb02716f5eae097abfd2f44e40563e90180aJustin Holewinski  // but it's very NVPTX-specific.
68cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  PassRegistry &PR = *PassRegistry::getPassRegistry();
69cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVVMReflectPass(PR);
70cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeGenericToNVVMPass(PR);
71cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXAllocaHoistingPass(PR);
72cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXAssignValidGlobalNamesPass(PR);
73cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
74cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXLowerKernelArgsPass(PR);
75cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXLowerAllocaPass(PR);
76cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  initializeNVPTXLowerAggrCopiesPass(PR);
7736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
7836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
79ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic std::string computeDataLayout(bool is64Bit) {
80ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  std::string Ret = "e";
81ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (!is64Bit)
83ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    Ret += "-p:32:32";
84ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
85ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  Ret += "-i64:64-v16:16-v32:32-n16:32:64";
86ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return Ret;
88ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
89ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
906948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
91c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                                       StringRef CPU, StringRef FS,
92c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                                       const TargetOptions &Options,
93c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                                       Reloc::Model RM, CodeModel::Model CM,
94c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines                                       CodeGenOpt::Level OL, bool is64bit)
954c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar    : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
964c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar                        CM, OL),
974c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar      Subtarget(TT, CPU, FS, *this) {
996948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  if (TT.getOS() == Triple::NVCL)
100ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    drvInterface = NVPTX::NVCL;
101ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  else
102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    drvInterface = NVPTX::CUDA;
1034a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola  initAsmInfo();
1044a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola}
10549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
10637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesNVPTXTargetMachine::~NVPTXTargetMachine() {}
10737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
10849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskivoid NVPTXTargetMachine32::anchor() {}
10949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
1106948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
1116948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           StringRef CPU, StringRef FS,
1126948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           const TargetOptions &Options,
1136948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           Reloc::Model RM, CodeModel::Model CM,
1146948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           CodeGenOpt::Level OL)
1153639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
11649683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
11749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskivoid NVPTXTargetMachine64::anchor() {}
11849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
1196948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga NainarNVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
1206948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           StringRef CPU, StringRef FS,
1216948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           const TargetOptions &Options,
1226948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           Reloc::Model RM, CodeModel::Model CM,
1236948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar                                           CodeGenOpt::Level OL)
1243639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
12549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
1265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramernamespace {
12749683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskiclass NVPTXPassConfig : public TargetPassConfig {
12849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskipublic:
12949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
1303639ce2575660a0e6938d2e84e8bd9a738fd7051Justin Holewinski      : TargetPassConfig(TM, PM) {}
13149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
13249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  NVPTXTargetMachine &getNVPTXTargetMachine() const {
13349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski    return getTM<NVPTXTargetMachine>();
13449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  }
13549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void addIRPasses() override;
137dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool addInstSelector() override;
138ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  void addPostRegAlloc() override;
139c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  void addMachineSSAOptimization() override;
1405443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski
141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  FunctionPass *createTargetRegisterAllocator(bool) override;
142dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
143dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
144cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar
145cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarprivate:
146cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
147cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  void addEarlyCSEOrGVNPass();
14849683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski};
1495c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer} // end anonymous namespace
15049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
15149683f3c961379fbc088871a5d6304950f1f1cbcJustin HolewinskiTargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
15249683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
15349683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  return PassConfig;
15449683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski}
15549683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
156ebe69fe11e48d322045d5949c83283927a0d790bStephen HinesTargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
157cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  return TargetIRAnalysis([this](const Function &F) {
158cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar    return TargetTransformInfo(NVPTXTTIImpl(this, F));
159cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  });
160cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar}
161cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar
162cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainarvoid NVPTXPassConfig::addEarlyCSEOrGVNPass() {
163cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  if (getOptLevel() == CodeGenOpt::Aggressive)
164cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar    addPass(createGVNPass());
165cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  else
166cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar    addPass(createEarlyCSEPass());
16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
16837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1697536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinskivoid NVPTXPassConfig::addIRPasses() {
1705443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  // The following passes are known to not play well with virtual regs hanging
1715443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  // around after register allocation (which in our case, is *all* registers).
1725443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  // We explicitly disable them here.  We do, however, need some functionality
1735443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
1745443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
1755443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  disablePass(&PrologEpilogCodeInserterID);
1765443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski  disablePass(&MachineCopyPropagationID);
17740f689851f6737d59b6f6a771ab1e07ce84c9bc3Justin Holewinski  disablePass(&TailDuplicateID);
1785443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski
179cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addPass(createNVVMReflectPass());
180dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addPass(createNVPTXImageOptimizerPass());
18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  addPass(createNVPTXAssignValidGlobalNamesPass());
1827536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski  addPass(createGenericToNVVMPass());
183cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar
184cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // === Propagate special address spaces ===
1856948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
1866948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
187cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // be eliminated by SROA.
1886948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createSROAPass());
189cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addPass(createNVPTXLowerAllocaPass());
190cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addPass(createNVPTXFavorNonGenericAddrSpacesPass());
1916948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
1926948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // them unused. We could remove dead code in an ad-hoc manner, but that
1936948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // requires manual work and might be error-prone.
1946948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createDeadCodeEliminationPass());
195cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar
196cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // === Straight-line scalar optimizations ===
197dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addPass(createSeparateConstOffsetFromGEPPass());
198cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addPass(createSpeculativeExecutionPass());
1996948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // ReassociateGEPs exposes more opportunites for SLSR. See
2006948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // the example in reassociate-geps-and-slsr.ll.
2016948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createStraightLineStrengthReducePass());
2026948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2036948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
2046948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // for some of our benchmarks.
205cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addEarlyCSEOrGVNPass();
2066948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // Run NaryReassociate after EarlyCSE/GVN to be more effective.
2076948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createNaryReassociatePass());
2086948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // NaryReassociate on GEPs creates redundant common expressions, so run
2096948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  // EarlyCSE after it.
2106948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar  addPass(createEarlyCSEPass());
211cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar
212cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // === LSR and other generic IR passes ===
213cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  TargetPassConfig::addIRPasses();
214cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // EarlyCSE is not always strong enough to clean up what LSR produces. For
215cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // example, GVN can combine
216cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //
217cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //   %0 = add %a, %b
218cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //   %1 = add %b, %a
219cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //
220cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // and
221cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //
222cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //   %0 = shl nsw %a, 2
223cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //   %1 = shl %a, 2
224cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  //
225cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // but EarlyCSE can do neither of them.
226cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addEarlyCSEOrGVNPass();
2277536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski}
2287536ecf2916a6a986f0d328069e3a210f34d5ea7Justin Holewinski
22949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinskibool NVPTXPassConfig::addInstSelector() {
2304c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
231dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
232564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson  addPass(createLowerAggrCopies());
233564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson  addPass(createAllocaHoisting());
234564fbf6aff8fb95646a1290078a37c2d4dbe629fBob Wilson  addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!ST.hasImageHandles())
237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    addPass(createNVPTXReplaceImageHandlesPass());
238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
23949683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski  return false;
24049683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski}
24149683f3c961379fbc088871a5d6304950f1f1cbcJustin Holewinski
242ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesvoid NVPTXPassConfig::addPostRegAlloc() {
243ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  addPass(createNVPTXPrologEpilogPass(), false);
244cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // NVPTXPrologEpilogPass calculates frame object offset and replace frame
245cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // index with VRFrame register. NVPTXPeephole need to be run after that and
246cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  // will replace VRFrame with VRFrameLocal when possible.
247cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  addPass(createNVPTXPeephole());
2485443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski}
2495443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski
250488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin KramerFunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr; // No reg alloc
252488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer}
253488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer
2545443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinskivoid NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
255488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer  assert(!RegAllocPass && "NVPTX uses no regalloc!");
25681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&PHIEliminationID);
25781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&TwoAddressInstructionPassID);
2585443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski}
2595443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski
2605443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinskivoid NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
261488401e9c995b6bfcc54fa7c54a5ec09e75d01a1Benjamin Kramer  assert(!RegAllocPass && "NVPTX uses no regalloc!");
26281d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
26381d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&ProcessImplicitDefsID);
26481d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&LiveVariablesID);
26581d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&MachineLoopInfoID);
26681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&PHIEliminationID);
26781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
26881d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&TwoAddressInstructionPassID);
26981d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&RegisterCoalescerID);
27081d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
27181d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  // PreRA instruction scheduling.
27281d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  if (addPass(&MachineSchedulerID))
27381d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski    printAndVerify("After Machine Scheduling");
27481d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
27581d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
27681d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  addPass(&StackSlotColoringID);
27781d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
27881d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  // FIXME: Needs physical registers
27981d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  //addPass(&PostRAMachineLICMID);
28081d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski
28181d9902bb11b05c19c2a82209c362bd4e772bfeaJustin Holewinski  printAndVerify("After StackSlotColoring");
2825443e7d79044f3198f2da044f1b389b40d9bea6fJustin Holewinski}
283c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
284c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesvoid NVPTXPassConfig::addMachineSSAOptimization() {
285c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Pre-ra tail duplication.
286c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (addPass(&EarlyTailDuplicateID))
287c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    printAndVerify("After Pre-RegAlloc TailDuplicate");
288c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
289c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Optimize PHIs before DCE: removing dead PHI cycles may make more
290c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // instructions dead.
291c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&OptimizePHIsID);
292c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
293c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // This pass merges large allocas. StackSlotColoring is a different pass
294c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // which merges spill slots.
295c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&StackColoringID);
296c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
297c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // If the target requests it, assign local variables to stack slots relative
298c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // to one another and simplify frame index references where possible.
299c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&LocalStackSlotAllocationID);
300c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
301c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // With optimization, dead code should already be eliminated. However
302c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // there is one known exception: lowered code for arguments that are only
303c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // used by tail calls, where the tail calls reuse the incoming stack
304c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
305c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&DeadMachineInstructionElimID);
306c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  printAndVerify("After codegen DCE pass");
307c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
308c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // Allow targets to insert passes that improve instruction level parallelism,
309c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // like if-conversion. Such passes will typically need dominator trees and
310c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  // loop info, just like LICM and CSE below.
311c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  if (addILPOpts())
312c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines    printAndVerify("After ILP optimizations");
313c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
314c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&MachineLICMID);
315c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&MachineCSEID);
316c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
317c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&MachineSinkingID);
318c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  printAndVerify("After Machine LICM, CSE and Sinking passes");
319c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines
320c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  addPass(&PeepholeOptimizerID);
321c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  printAndVerify("After codegen peephole optimization pass");
322c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines}
323