1//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief The AMDGPU target machine contains all of the hardware specific
12/// information  needed to emit code for R600 and SI GPUs.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPUTargetMachine.h"
17#include "AMDGPU.h"
18#include "AMDGPUTargetTransformInfo.h"
19#include "R600ISelLowering.h"
20#include "R600InstrInfo.h"
21#include "R600MachineScheduler.h"
22#include "SIISelLowering.h"
23#include "SIInstrInfo.h"
24#include "llvm/Analysis/Passes.h"
25#include "llvm/CodeGen/MachineFunctionAnalysis.h"
26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
27#include "llvm/CodeGen/MachineModuleInfo.h"
28#include "llvm/CodeGen/Passes.h"
29#include "llvm/IR/Verifier.h"
30#include "llvm/MC/MCAsmInfo.h"
31#include "llvm/IR/LegacyPassManager.h"
32#include "llvm/Support/TargetRegistry.h"
33#include "llvm/Support/raw_os_ostream.h"
34#include "llvm/Transforms/IPO.h"
35#include "llvm/Transforms/Scalar.h"
36#include <llvm/CodeGen/Passes.h>
37
38using namespace llvm;
39
40extern "C" void LLVMInitializeR600Target() {
41  // Register the target
42  RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
43  RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
44}
45
46static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
47  return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
48}
49
50static MachineSchedRegistry
51SchedCustomRegistry("r600", "Run R600's custom scheduler",
52                    createR600MachineScheduler);
53
54static std::string computeDataLayout(StringRef TT) {
55  Triple Triple(TT);
56  std::string Ret = "e-p:32:32";
57
58  if (Triple.getArch() == Triple::amdgcn) {
59    // 32-bit private, local, and region pointers. 64-bit global and constant.
60    Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
61  }
62
63  Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
64         "-v512:512-v1024:1024-v2048:2048-n32:64";
65
66  return Ret;
67}
68
69AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
70                                         StringRef CPU, StringRef FS,
71                                         TargetOptions Options, Reloc::Model RM,
72                                         CodeModel::Model CM,
73                                         CodeGenOpt::Level OptLevel)
74    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
75                        OptLevel),
76      TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
77      IntrinsicInfo() {
78  setRequiresStructuredCFG(true);
79  initAsmInfo();
80}
81
82AMDGPUTargetMachine::~AMDGPUTargetMachine() {
83  delete TLOF;
84}
85
86//===----------------------------------------------------------------------===//
87// R600 Target Machine (R600 -> Cayman)
88//===----------------------------------------------------------------------===//
89
90R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS,
91                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
92                    CodeModel::Model CM, CodeGenOpt::Level OL) :
93    AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
94
95
96//===----------------------------------------------------------------------===//
97// GCN Target Machine (SI+)
98//===----------------------------------------------------------------------===//
99
100GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS,
101                    StringRef CPU, TargetOptions Options, Reloc::Model RM,
102                    CodeModel::Model CM, CodeGenOpt::Level OL) :
103    AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { }
104
105//===----------------------------------------------------------------------===//
106// AMDGPU Pass Setup
107//===----------------------------------------------------------------------===//
108
109namespace {
110class AMDGPUPassConfig : public TargetPassConfig {
111public:
112  AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
113    : TargetPassConfig(TM, PM) {}
114
115  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
116    return getTM<AMDGPUTargetMachine>();
117  }
118
119  ScheduleDAGInstrs *
120  createMachineScheduler(MachineSchedContext *C) const override {
121    const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
122    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
123      return createR600MachineScheduler(C);
124    return nullptr;
125  }
126
127  void addIRPasses() override;
128  void addCodeGenPrepare() override;
129  virtual bool addPreISel() override;
130  virtual bool addInstSelector() override;
131};
132
133class R600PassConfig : public AMDGPUPassConfig {
134public:
135  R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
136    : AMDGPUPassConfig(TM, PM) { }
137
138  bool addPreISel() override;
139  void addPreRegAlloc() override;
140  void addPreSched2() override;
141  void addPreEmitPass() override;
142};
143
144class GCNPassConfig : public AMDGPUPassConfig {
145public:
146  GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
147    : AMDGPUPassConfig(TM, PM) { }
148  bool addPreISel() override;
149  bool addInstSelector() override;
150  void addPreRegAlloc() override;
151  void addPostRegAlloc() override;
152  void addPreSched2() override;
153  void addPreEmitPass() override;
154};
155
156} // End of anonymous namespace
157
158TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
159  return TargetIRAnalysis(
160      [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
161}
162
163void AMDGPUPassConfig::addIRPasses() {
164  // Function calls are not supported, so make sure we inline everything.
165  addPass(createAMDGPUAlwaysInlinePass());
166  addPass(createAlwaysInlinerPass());
167  // We need to add the barrier noop pass, otherwise adding the function
168  // inlining pass will cause all of the PassConfigs passes to be run
169  // one function at a time, which means if we have a nodule with two
170  // functions, then we will generate code for the first function
171  // without ever running any passes on the second.
172  addPass(createBarrierNoopPass());
173  TargetPassConfig::addIRPasses();
174}
175
176void AMDGPUPassConfig::addCodeGenPrepare() {
177  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
178  if (ST.isPromoteAllocaEnabled()) {
179    addPass(createAMDGPUPromoteAlloca(ST));
180    addPass(createSROAPass());
181  }
182  TargetPassConfig::addCodeGenPrepare();
183}
184
185bool
186AMDGPUPassConfig::addPreISel() {
187  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
188  addPass(createFlattenCFGPass());
189  if (ST.IsIRStructurizerEnabled())
190    addPass(createStructurizeCFGPass());
191  return false;
192}
193
194bool AMDGPUPassConfig::addInstSelector() {
195  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
196  return false;
197}
198
199//===----------------------------------------------------------------------===//
200// R600 Pass Setup
201//===----------------------------------------------------------------------===//
202
203bool R600PassConfig::addPreISel() {
204  AMDGPUPassConfig::addPreISel();
205  addPass(createR600TextureIntrinsicsReplacer());
206  return false;
207}
208
209void R600PassConfig::addPreRegAlloc() {
210  addPass(createR600VectorRegMerger(*TM));
211}
212
213void R600PassConfig::addPreSched2() {
214  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
215  addPass(createR600EmitClauseMarkers(), false);
216  if (ST.isIfCvtEnabled())
217    addPass(&IfConverterID, false);
218  addPass(createR600ClauseMergePass(*TM), false);
219}
220
221void R600PassConfig::addPreEmitPass() {
222  addPass(createAMDGPUCFGStructurizerPass(), false);
223  addPass(createR600ExpandSpecialInstrsPass(*TM), false);
224  addPass(&FinalizeMachineBundlesID, false);
225  addPass(createR600Packetizer(*TM), false);
226  addPass(createR600ControlFlowFinalizer(*TM), false);
227}
228
229TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
230  return new R600PassConfig(this, PM);
231}
232
233//===----------------------------------------------------------------------===//
234// GCN Pass Setup
235//===----------------------------------------------------------------------===//
236
237bool GCNPassConfig::addPreISel() {
238  AMDGPUPassConfig::addPreISel();
239  addPass(createSinkingPass());
240  addPass(createSITypeRewriter());
241  addPass(createSIAnnotateControlFlowPass());
242  return false;
243}
244
245bool GCNPassConfig::addInstSelector() {
246  AMDGPUPassConfig::addInstSelector();
247  addPass(createSILowerI1CopiesPass());
248  addPass(createSIFixSGPRCopiesPass(*TM));
249  addPass(createSIFoldOperandsPass());
250  return false;
251}
252
253void GCNPassConfig::addPreRegAlloc() {
254  const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
255  if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
256    // Don't do this with no optimizations since it throws away debug info by
257    // merging nonadjacent loads.
258
259    // This should be run after scheduling, but before register allocation. It
260    // also need extra copies to the address operand to be eliminated.
261    initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
262    insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
263  }
264  addPass(createSIShrinkInstructionsPass(), false);
265  addPass(createSIFixSGPRLiveRangesPass(), false);
266}
267
268void GCNPassConfig::addPostRegAlloc() {
269  addPass(createSIPrepareScratchRegs(), false);
270  addPass(createSIShrinkInstructionsPass(), false);
271}
272
273void GCNPassConfig::addPreSched2() {
274  addPass(createSIInsertWaits(*TM), false);
275}
276
277void GCNPassConfig::addPreEmitPass() {
278  addPass(createSILowerControlFlowPass(*TM), false);
279}
280
281TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
282  return new GCNPassConfig(this, PM);
283}
284