R600MachineScheduler.cpp revision ad7ecc65b1b1d6466ff035168c86f208a91aa1b4
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/LiveIntervalAnalysis.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace llvm;
26
27void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
28
29  DAG = dag;
30  TII = static_cast<const R600InstrInfo*>(DAG->TII);
31  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
32  MRI = &DAG->MRI;
33  CurInstKind = IDOther;
34  CurEmitted = 0;
35  OccupedSlotsMask = 15;
36  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37  InstKindLimit[IDOther] = 32;
38
39  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
41}
42
43void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
44                                  std::vector<SUnit *> &QDst)
45{
46  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
47  QSrc.clear();
48}
49
50SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
51  SUnit *SU = 0;
52  NextInstKind = IDOther;
53
54  IsTopNode = false;
55
56  // check if we might want to switch current clause type
57  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
58      (Available[CurInstKind].empty());
59  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
60      (!Available[IDFetch].empty() || !Available[IDOther].empty());
61
62  // We want to scheduled AR defs as soon as possible to make sure they aren't
63  // put in a different ALU clause from their uses.
64  if (!SU && !UnscheduledARDefs.empty()) {
65      SU = UnscheduledARDefs[0];
66      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
67      NextInstKind = IDAlu;
68  }
69
70  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
71      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
72    // try to pick ALU
73    SU = pickAlu();
74    if (SU) {
75      if (CurEmitted >= InstKindLimit[IDAlu])
76        CurEmitted = 0;
77      NextInstKind = IDAlu;
78    }
79  }
80
81  if (!SU) {
82    // try to pick FETCH
83    SU = pickOther(IDFetch);
84    if (SU)
85      NextInstKind = IDFetch;
86  }
87
88  // try to pick other
89  if (!SU) {
90    SU = pickOther(IDOther);
91    if (SU)
92      NextInstKind = IDOther;
93  }
94
95  // We want to schedule the AR uses as late as possible to make sure that
96  // the AR defs have been released.
97  if (!SU && !UnscheduledARUses.empty()) {
98      SU = UnscheduledARUses[0];
99      UnscheduledARUses.erase(UnscheduledARUses.begin());
100      NextInstKind = IDAlu;
101  }
102
103
104  DEBUG(
105      if (SU) {
106        dbgs() << " ** Pick node **\n";
107        SU->dump(DAG);
108      } else {
109        dbgs() << "NO NODE \n";
110        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
111          const SUnit &S = DAG->SUnits[i];
112          if (!S.isScheduled)
113            S.dump(DAG);
114        }
115      }
116  );
117
118  return SU;
119}
120
121void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
122
123  if (NextInstKind != CurInstKind) {
124    DEBUG(dbgs() << "Instruction Type Switch\n");
125    if (NextInstKind != IDAlu)
126      OccupedSlotsMask = 15;
127    CurEmitted = 0;
128    CurInstKind = NextInstKind;
129  }
130
131  if (CurInstKind == IDAlu) {
132    switch (getAluKind(SU)) {
133    case AluT_XYZW:
134      CurEmitted += 4;
135      break;
136    case AluDiscarded:
137      break;
138    default: {
139      ++CurEmitted;
140      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
141          E = SU->getInstr()->operands_end(); It != E; ++It) {
142        MachineOperand &MO = *It;
143        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
144          ++CurEmitted;
145      }
146    }
147    }
148  } else {
149    ++CurEmitted;
150  }
151
152
153  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
154
155  if (CurInstKind != IDFetch) {
156    MoveUnits(Pending[IDFetch], Available[IDFetch]);
157  }
158}
159
160void R600SchedStrategy::releaseTopNode(SUnit *SU) {
161  DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
162
163}
164
165void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
166  DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
167
168  int IK = getInstKind(SU);
169
170  // Check for AR register defines
171  for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
172                                        E = SU->getInstr()->operands_end();
173                                        I != E; ++I) {
174    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
175      if (I->isDef()) {
176        UnscheduledARDefs.push_back(SU);
177      } else {
178        UnscheduledARUses.push_back(SU);
179      }
180      return;
181    }
182  }
183
184  // There is no export clause, we can schedule one as soon as its ready
185  if (IK == IDOther)
186    Available[IDOther].push_back(SU);
187  else
188    Pending[IK].push_back(SU);
189
190}
191
192bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
193                                          const TargetRegisterClass *RC) const {
194  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
195    return RC->contains(Reg);
196  } else {
197    return MRI->getRegClass(Reg) == RC;
198  }
199}
200
201R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
202  MachineInstr *MI = SU->getInstr();
203
204    switch (MI->getOpcode()) {
205    case AMDGPU::PRED_X:
206      return AluPredX;
207    case AMDGPU::INTERP_PAIR_XY:
208    case AMDGPU::INTERP_PAIR_ZW:
209    case AMDGPU::INTERP_VEC_LOAD:
210    case AMDGPU::DOT_4:
211      return AluT_XYZW;
212    case AMDGPU::COPY:
213      if (MI->getOperand(1).isUndef()) {
214        // MI will become a KILL, don't considers it in scheduling
215        return AluDiscarded;
216      }
217    default:
218      break;
219    }
220
221    // Does the instruction take a whole IG ?
222    if(TII->isVector(*MI) ||
223        TII->isCubeOp(MI->getOpcode()) ||
224        TII->isReductionOp(MI->getOpcode()))
225      return AluT_XYZW;
226
227    // Is the result already assigned to a channel ?
228    unsigned DestSubReg = MI->getOperand(0).getSubReg();
229    switch (DestSubReg) {
230    case AMDGPU::sub0:
231      return AluT_X;
232    case AMDGPU::sub1:
233      return AluT_Y;
234    case AMDGPU::sub2:
235      return AluT_Z;
236    case AMDGPU::sub3:
237      return AluT_W;
238    default:
239      break;
240    }
241
242    // Is the result already member of a X/Y/Z/W class ?
243    unsigned DestReg = MI->getOperand(0).getReg();
244    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
245        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
246      return AluT_X;
247    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
248      return AluT_Y;
249    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
250      return AluT_Z;
251    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
252      return AluT_W;
253    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
254      return AluT_XYZW;
255
256    return AluAny;
257
258}
259
260int R600SchedStrategy::getInstKind(SUnit* SU) {
261  int Opcode = SU->getInstr()->getOpcode();
262
263  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
264    return IDFetch;
265
266  if (TII->isALUInstr(Opcode)) {
267    return IDAlu;
268  }
269
270  switch (Opcode) {
271  case AMDGPU::PRED_X:
272  case AMDGPU::COPY:
273  case AMDGPU::CONST_COPY:
274  case AMDGPU::INTERP_PAIR_XY:
275  case AMDGPU::INTERP_PAIR_ZW:
276  case AMDGPU::INTERP_VEC_LOAD:
277  case AMDGPU::DOT_4:
278    return IDAlu;
279  default:
280    return IDOther;
281  }
282}
283
284SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
285  if (Q.empty())
286    return NULL;
287  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
288      It != E; ++It) {
289    SUnit *SU = *It;
290    InstructionsGroupCandidate.push_back(SU->getInstr());
291    if (TII->canBundle(InstructionsGroupCandidate)) {
292      InstructionsGroupCandidate.pop_back();
293      Q.erase((It + 1).base());
294      return SU;
295    } else {
296      InstructionsGroupCandidate.pop_back();
297    }
298  }
299  return NULL;
300}
301
302void R600SchedStrategy::LoadAlu() {
303  std::vector<SUnit *> &QSrc = Pending[IDAlu];
304  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
305    AluKind AK = getAluKind(QSrc[i]);
306    AvailableAlus[AK].push_back(QSrc[i]);
307  }
308  QSrc.clear();
309}
310
311void R600SchedStrategy::PrepareNextSlot() {
312  DEBUG(dbgs() << "New Slot\n");
313  assert (OccupedSlotsMask && "Slot wasn't filled");
314  OccupedSlotsMask = 0;
315  InstructionsGroupCandidate.clear();
316  LoadAlu();
317}
318
319void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
320  unsigned DestReg = MI->getOperand(0).getReg();
321  // PressureRegister crashes if an operand is def and used in the same inst
322  // and we try to constraint its regclass
323  for (MachineInstr::mop_iterator It = MI->operands_begin(),
324      E = MI->operands_end(); It != E; ++It) {
325    MachineOperand &MO = *It;
326    if (MO.isReg() && !MO.isDef() &&
327        MO.getReg() == MI->getOperand(0).getReg())
328      return;
329  }
330  // Constrains the regclass of DestReg to assign it to Slot
331  switch (Slot) {
332  case 0:
333    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
334    break;
335  case 1:
336    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
337    break;
338  case 2:
339    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
340    break;
341  case 3:
342    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
343    break;
344  }
345}
346
347SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
348  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
349  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
350  if (SlotedSU)
351    return SlotedSU;
352  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
353  if (UnslotedSU)
354    AssignSlot(UnslotedSU->getInstr(), Slot);
355  return UnslotedSU;
356}
357
358bool R600SchedStrategy::isAvailablesAluEmpty() const {
359  return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
360      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
361      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
362      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
363      AvailableAlus[AluPredX].empty();
364}
365
366SUnit* R600SchedStrategy::pickAlu() {
367  while (!isAvailablesAluEmpty()) {
368    if (!OccupedSlotsMask) {
369      // Bottom up scheduling : predX must comes first
370      if (!AvailableAlus[AluPredX].empty()) {
371        OccupedSlotsMask = 15;
372        return PopInst(AvailableAlus[AluPredX]);
373      }
374      // Flush physical reg copies (RA will discard them)
375      if (!AvailableAlus[AluDiscarded].empty()) {
376        OccupedSlotsMask = 15;
377        return PopInst(AvailableAlus[AluDiscarded]);
378      }
379      // If there is a T_XYZW alu available, use it
380      if (!AvailableAlus[AluT_XYZW].empty()) {
381        OccupedSlotsMask = 15;
382        return PopInst(AvailableAlus[AluT_XYZW]);
383      }
384    }
385    for (int Chan = 3; Chan > -1; --Chan) {
386      bool isOccupied = OccupedSlotsMask & (1 << Chan);
387      if (!isOccupied) {
388        SUnit *SU = AttemptFillSlot(Chan);
389        if (SU) {
390          OccupedSlotsMask |= (1 << Chan);
391          InstructionsGroupCandidate.push_back(SU->getInstr());
392          return SU;
393        }
394      }
395    }
396    PrepareNextSlot();
397  }
398  return NULL;
399}
400
401SUnit* R600SchedStrategy::pickOther(int QID) {
402  SUnit *SU = 0;
403  std::vector<SUnit *> &AQ = Available[QID];
404
405  if (AQ.empty()) {
406    MoveUnits(Pending[QID], AQ);
407  }
408  if (!AQ.empty()) {
409    SU = AQ.back();
410    AQ.resize(AQ.size() - 1);
411  }
412  return SU;
413}
414
415