SIInsertWaits.cpp revision a2b4eb6d15a13de257319ac6231b5ab622cd02b1
1//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief Insert wait instructions for memory reads and writes.
12///
13/// Memory reads and writes are issued asynchronously, so we need to insert
14/// S_WAITCNT instructions when we want to access any of their results or
15/// overwrite any register that's used asynchronously.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AMDGPU.h"
20#include "SIInstrInfo.h"
21#include "SIMachineFunctionInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26
27using namespace llvm;
28
29namespace {
30
31/// \brief One variable for each of the hardware counters
32typedef union {
33  struct {
34    unsigned VM;
35    unsigned EXP;
36    unsigned LGKM;
37  } Named;
38  unsigned Array[3];
39
40} Counters;
41
42typedef Counters RegCounters[512];
43typedef std::pair<unsigned, unsigned> RegInterval;
44
45class SIInsertWaits : public MachineFunctionPass {
46
47private:
48  static char ID;
49  const SIInstrInfo *TII;
50  const SIRegisterInfo *TRI;
51  const MachineRegisterInfo *MRI;
52
53  /// \brief Constant hardware limits
54  static const Counters WaitCounts;
55
56  /// \brief Constant zero value
57  static const Counters ZeroCounts;
58
59  /// \brief Counter values we have already waited on.
60  Counters WaitedOn;
61
62  /// \brief Counter values for last instruction issued.
63  Counters LastIssued;
64
65  /// \brief Registers used by async instructions.
66  RegCounters UsedRegs;
67
68  /// \brief Registers defined by async instructions.
69  RegCounters DefinedRegs;
70
71  /// \brief Different export instruction types seen since last wait.
72  unsigned ExpInstrTypesSeen;
73
74  /// \brief Get increment/decrement amount for this instruction.
75  Counters getHwCounts(MachineInstr &MI);
76
77  /// \brief Is operand relevant for async execution?
78  bool isOpRelevant(MachineOperand &Op);
79
80  /// \brief Get register interval an operand affects.
81  RegInterval getRegInterval(MachineOperand &Op);
82
83  /// \brief Handle instructions async components
84  void pushInstruction(MachineInstr &MI);
85
86  /// \brief Insert the actual wait instruction
87  bool insertWait(MachineBasicBlock &MBB,
88                  MachineBasicBlock::iterator I,
89                  const Counters &Counts);
90
91  /// \brief Do we need def2def checks?
92  bool unorderedDefines(MachineInstr &MI);
93
94  /// \brief Resolve all operand dependencies to counter requirements
95  Counters handleOperands(MachineInstr &MI);
96
97public:
98  SIInsertWaits(TargetMachine &tm) :
99    MachineFunctionPass(ID),
100    TII(0),
101    TRI(0),
102    ExpInstrTypesSeen(0) { }
103
104  virtual bool runOnMachineFunction(MachineFunction &MF);
105
106  const char *getPassName() const {
107    return "SI insert wait  instructions";
108  }
109
110};
111
112} // End anonymous namespace
113
114char SIInsertWaits::ID = 0;
115
116const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
117const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
118
119FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
120  return new SIInsertWaits(tm);
121}
122
123Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
124
125  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
126  Counters Result;
127
128  Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
129
130  // Only consider stores or EXP for EXP_CNT
131  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
132      (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
133
134  // LGKM may uses larger values
135  if (TSFlags & SIInstrFlags::LGKM_CNT) {
136
137    if (TII->isSMRD(MI.getOpcode())) {
138
139      MachineOperand &Op = MI.getOperand(0);
140      assert(Op.isReg() && "First LGKM operand must be a register!");
141
142      unsigned Reg = Op.getReg();
143      unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
144      Result.Named.LGKM = Size > 4 ? 2 : 1;
145
146    } else {
147      // DS
148      Result.Named.LGKM = 1;
149    }
150
151  } else {
152    Result.Named.LGKM = 0;
153  }
154
155  return Result;
156}
157
158bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
159
160  // Constants are always irrelevant
161  if (!Op.isReg())
162    return false;
163
164  // Defines are always relevant
165  if (Op.isDef())
166    return true;
167
168  // For exports all registers are relevant
169  MachineInstr &MI = *Op.getParent();
170  if (MI.getOpcode() == AMDGPU::EXP)
171    return true;
172
173  // For stores the stored value is also relevant
174  if (!MI.getDesc().mayStore())
175    return false;
176
177  for (MachineInstr::mop_iterator I = MI.operands_begin(),
178       E = MI.operands_end(); I != E; ++I) {
179
180    if (I->isReg() && I->isUse())
181      return Op.isIdenticalTo(*I);
182  }
183
184  return false;
185}
186
187RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
188
189  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
190    return std::make_pair(0, 0);
191
192  unsigned Reg = Op.getReg();
193  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
194
195  assert(Size >= 4);
196
197  RegInterval Result;
198  Result.first = TRI->getEncodingValue(Reg);
199  Result.second = Result.first + Size / 4;
200
201  return Result;
202}
203
204void SIInsertWaits::pushInstruction(MachineInstr &MI) {
205
206  // Get the hardware counter increments and sum them up
207  Counters Increment = getHwCounts(MI);
208  unsigned Sum = 0;
209
210  for (unsigned i = 0; i < 3; ++i) {
211    LastIssued.Array[i] += Increment.Array[i];
212    Sum += Increment.Array[i];
213  }
214
215  // If we don't increase anything then that's it
216  if (Sum == 0)
217    return;
218
219  // Remember which export instructions we have seen
220  if (Increment.Named.EXP) {
221    ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
222  }
223
224  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
225
226    MachineOperand &Op = MI.getOperand(i);
227    if (!isOpRelevant(Op))
228      continue;
229
230    RegInterval Interval = getRegInterval(Op);
231    for (unsigned j = Interval.first; j < Interval.second; ++j) {
232
233      // Remember which registers we define
234      if (Op.isDef())
235        DefinedRegs[j] = LastIssued;
236
237      // and which one we are using
238      if (Op.isUse())
239        UsedRegs[j] = LastIssued;
240    }
241  }
242}
243
244bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
245                               MachineBasicBlock::iterator I,
246                               const Counters &Required) {
247
248  // End of program? No need to wait on anything
249  if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
250    return false;
251
252  // Figure out if the async instructions execute in order
253  bool Ordered[3];
254
255  // VM_CNT is always ordered
256  Ordered[0] = true;
257
258  // EXP_CNT is unordered if we have both EXP & VM-writes
259  Ordered[1] = ExpInstrTypesSeen == 3;
260
261  // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
262  Ordered[2] = false;
263
264  // The values we are going to put into the S_WAITCNT instruction
265  Counters Counts = WaitCounts;
266
267  // Do we really need to wait?
268  bool NeedWait = false;
269
270  for (unsigned i = 0; i < 3; ++i) {
271
272    if (Required.Array[i] <= WaitedOn.Array[i])
273      continue;
274
275    NeedWait = true;
276
277    if (Ordered[i]) {
278      unsigned Value = LastIssued.Array[i] - Required.Array[i];
279
280      // adjust the value to the real hardware posibilities
281      Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
282
283    } else
284      Counts.Array[i] = 0;
285
286    // Remember on what we have waited on
287    WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
288  }
289
290  if (!NeedWait)
291    return false;
292
293  // Reset EXP_CNT instruction types
294  if (Counts.Named.EXP == 0)
295    ExpInstrTypesSeen = 0;
296
297  // Build the wait instruction
298  BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
299          .addImm((Counts.Named.VM & 0xF) |
300                  ((Counts.Named.EXP & 0x7) << 4) |
301                  ((Counts.Named.LGKM & 0x7) << 8));
302
303  return true;
304}
305
306/// \brief helper function for handleOperands
307static void increaseCounters(Counters &Dst, const Counters &Src) {
308
309  for (unsigned i = 0; i < 3; ++i)
310    Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
311}
312
313Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
314
315  Counters Result = ZeroCounts;
316
317  // For each register affected by this
318  // instruction increase the result sequence
319  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
320
321    MachineOperand &Op = MI.getOperand(i);
322    RegInterval Interval = getRegInterval(Op);
323    for (unsigned j = Interval.first; j < Interval.second; ++j) {
324
325      if (Op.isDef()) {
326        increaseCounters(Result, UsedRegs[j]);
327        increaseCounters(Result, DefinedRegs[j]);
328      }
329
330      if (Op.isUse())
331        increaseCounters(Result, DefinedRegs[j]);
332    }
333  }
334
335  return Result;
336}
337
338bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
339  bool Changes = false;
340
341  TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
342  TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
343
344  MRI = &MF.getRegInfo();
345
346  WaitedOn = ZeroCounts;
347  LastIssued = ZeroCounts;
348
349  memset(&UsedRegs, 0, sizeof(UsedRegs));
350  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
351
352  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
353       BI != BE; ++BI) {
354
355    MachineBasicBlock &MBB = *BI;
356    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
357         I != E; ++I) {
358
359      Changes |= insertWait(MBB, I, handleOperands(*I));
360      pushInstruction(*I);
361    }
362
363    // Wait for everything at the end of the MBB
364    Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
365  }
366
367  return Changes;
368}
369