R600MachineScheduler.cpp revision 3ab0ba3cd8a499ebcc7eda3d7585c5ab4e7f0711
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/LiveIntervalAnalysis.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include "llvm/Support/raw_ostream.h"
24#include <set>
25
26using namespace llvm;
27
28void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
29
30  DAG = dag;
31  TII = static_cast<const R600InstrInfo*>(DAG->TII);
32  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33  MRI = &DAG->MRI;
34  Available[IDAlu]->clear();
35  Available[IDFetch]->clear();
36  Available[IDOther]->clear();
37  CurInstKind = IDOther;
38  CurEmitted = 0;
39  OccupedSlotsMask = 15;
40  InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
41
42
43  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
44  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
45    InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
46  } else {
47    InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
48  }
49}
50
51void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
52{
53  if (QSrc->empty())
54    return;
55  for (ReadyQueue::iterator I = QSrc->begin(),
56      E = QSrc->end(); I != E; ++I) {
57    (*I)->NodeQueueId &= ~QSrc->getID();
58    QDst->push(*I);
59  }
60  QSrc->clear();
61}
62
63SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
64  SUnit *SU = 0;
65  IsTopNode = true;
66  NextInstKind = IDOther;
67
68  // check if we might want to switch current clause type
69  bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
70      (CurEmitted > InstKindLimit[CurInstKind]) ||
71      (Available[CurInstKind]->empty());
72  bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
73      (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
74
75  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
76      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
77    // try to pick ALU
78    SU = pickAlu();
79    if (SU) {
80      if (CurEmitted >  InstKindLimit[IDAlu])
81        CurEmitted = 0;
82      NextInstKind = IDAlu;
83    }
84  }
85
86  if (!SU) {
87    // try to pick FETCH
88    SU = pickOther(IDFetch);
89    if (SU)
90      NextInstKind = IDFetch;
91  }
92
93  // try to pick other
94  if (!SU) {
95    SU = pickOther(IDOther);
96    if (SU)
97      NextInstKind = IDOther;
98  }
99
100  DEBUG(
101      if (SU) {
102        dbgs() << "picked node: ";
103        SU->dump(DAG);
104      } else {
105        dbgs() << "NO NODE ";
106        for (int i = 0; i < IDLast; ++i) {
107          Available[i]->dump();
108          Pending[i]->dump();
109        }
110        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
111          const SUnit &S = DAG->SUnits[i];
112          if (!S.isScheduled)
113            S.dump(DAG);
114        }
115      }
116  );
117
118  return SU;
119}
120
121void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
122
123  DEBUG(dbgs() << "scheduled: ");
124  DEBUG(SU->dump(DAG));
125
126  if (NextInstKind != CurInstKind) {
127    DEBUG(dbgs() << "Instruction Type Switch\n");
128    if (NextInstKind != IDAlu)
129      OccupedSlotsMask = 15;
130    CurEmitted = 0;
131    CurInstKind = NextInstKind;
132  }
133
134  if (CurInstKind == IDAlu) {
135    switch (getAluKind(SU)) {
136    case AluT_XYZW:
137      CurEmitted += 4;
138      break;
139    case AluDiscarded:
140      break;
141    default: {
142      ++CurEmitted;
143      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
144          E = SU->getInstr()->operands_end(); It != E; ++It) {
145        MachineOperand &MO = *It;
146        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
147          ++CurEmitted;
148      }
149    }
150    }
151  } else {
152    ++CurEmitted;
153  }
154
155
156  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
157
158  if (CurInstKind != IDFetch) {
159    MoveUnits(Pending[IDFetch], Available[IDFetch]);
160  }
161  MoveUnits(Pending[IDOther], Available[IDOther]);
162}
163
164void R600SchedStrategy::releaseTopNode(SUnit *SU) {
165  int IK = getInstKind(SU);
166
167  DEBUG(dbgs() << IK << " <= ");
168  DEBUG(SU->dump(DAG));
169
170  Pending[IK]->push(SU);
171}
172
173void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
174}
175
176bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
177                                          const TargetRegisterClass *RC) const {
178  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
179    return RC->contains(Reg);
180  } else {
181    return MRI->getRegClass(Reg) == RC;
182  }
183}
184
185R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
186  MachineInstr *MI = SU->getInstr();
187
188    switch (MI->getOpcode()) {
189    case AMDGPU::INTERP_PAIR_XY:
190    case AMDGPU::INTERP_PAIR_ZW:
191    case AMDGPU::INTERP_VEC_LOAD:
192      return AluT_XYZW;
193    case AMDGPU::COPY:
194      if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
195        // %vregX = COPY Tn_X is likely to be discarded in favor of an
196        // assignement of Tn_X to %vregX, don't considers it in scheduling
197        return AluDiscarded;
198      }
199      else if (MI->getOperand(1).isUndef()) {
200        // MI will become a KILL, don't considers it in scheduling
201        return AluDiscarded;
202      }
203    default:
204      break;
205    }
206
207    // Does the instruction take a whole IG ?
208    if(TII->isVector(*MI) ||
209        TII->isCubeOp(MI->getOpcode()) ||
210        TII->isReductionOp(MI->getOpcode()))
211      return AluT_XYZW;
212
213    // Is the result already assigned to a channel ?
214    unsigned DestSubReg = MI->getOperand(0).getSubReg();
215    switch (DestSubReg) {
216    case AMDGPU::sub0:
217      return AluT_X;
218    case AMDGPU::sub1:
219      return AluT_Y;
220    case AMDGPU::sub2:
221      return AluT_Z;
222    case AMDGPU::sub3:
223      return AluT_W;
224    default:
225      break;
226    }
227
228    // Is the result already member of a X/Y/Z/W class ?
229    unsigned DestReg = MI->getOperand(0).getReg();
230    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
231        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
232      return AluT_X;
233    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
234      return AluT_Y;
235    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
236      return AluT_Z;
237    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
238      return AluT_W;
239    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
240      return AluT_XYZW;
241
242    return AluAny;
243
244}
245
246int R600SchedStrategy::getInstKind(SUnit* SU) {
247  int Opcode = SU->getInstr()->getOpcode();
248
249  if (TII->isALUInstr(Opcode)) {
250    return IDAlu;
251  }
252
253  switch (Opcode) {
254  case AMDGPU::COPY:
255  case AMDGPU::CONST_COPY:
256  case AMDGPU::INTERP_PAIR_XY:
257  case AMDGPU::INTERP_PAIR_ZW:
258  case AMDGPU::INTERP_VEC_LOAD:
259  case AMDGPU::DOT4_eg_pseudo:
260  case AMDGPU::DOT4_r600_pseudo:
261    return IDAlu;
262  case AMDGPU::TEX_VTX_CONSTBUF:
263  case AMDGPU::TEX_VTX_TEXBUF:
264  case AMDGPU::TEX_LD:
265  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
266  case AMDGPU::TEX_GET_GRADIENTS_H:
267  case AMDGPU::TEX_GET_GRADIENTS_V:
268  case AMDGPU::TEX_SET_GRADIENTS_H:
269  case AMDGPU::TEX_SET_GRADIENTS_V:
270  case AMDGPU::TEX_SAMPLE:
271  case AMDGPU::TEX_SAMPLE_C:
272  case AMDGPU::TEX_SAMPLE_L:
273  case AMDGPU::TEX_SAMPLE_C_L:
274  case AMDGPU::TEX_SAMPLE_LB:
275  case AMDGPU::TEX_SAMPLE_C_LB:
276  case AMDGPU::TEX_SAMPLE_G:
277  case AMDGPU::TEX_SAMPLE_C_G:
278  case AMDGPU::TXD:
279  case AMDGPU::TXD_SHADOW:
280    return IDFetch;
281  default:
282    DEBUG(
283        dbgs() << "other inst: ";
284        SU->dump(DAG);
285    );
286    return IDOther;
287  }
288}
289
290SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
291  if (Q.empty())
292    return NULL;
293  for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
294      It != E; ++It) {
295    SUnit *SU = *It;
296    InstructionsGroupCandidate.push_back(SU->getInstr());
297    if (TII->canBundle(InstructionsGroupCandidate)) {
298      InstructionsGroupCandidate.pop_back();
299      Q.erase(It);
300      return SU;
301    } else {
302      InstructionsGroupCandidate.pop_back();
303    }
304  }
305  return NULL;
306}
307
308void R600SchedStrategy::LoadAlu() {
309  ReadyQueue *QSrc = Pending[IDAlu];
310  for (ReadyQueue::iterator I = QSrc->begin(),
311        E = QSrc->end(); I != E; ++I) {
312      (*I)->NodeQueueId &= ~QSrc->getID();
313      AluKind AK = getAluKind(*I);
314      AvailableAlus[AK].insert(*I);
315    }
316    QSrc->clear();
317}
318
319void R600SchedStrategy::PrepareNextSlot() {
320  DEBUG(dbgs() << "New Slot\n");
321  assert (OccupedSlotsMask && "Slot wasn't filled");
322  OccupedSlotsMask = 0;
323  InstructionsGroupCandidate.clear();
324  LoadAlu();
325}
326
327void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
328  unsigned DestReg = MI->getOperand(0).getReg();
329  // PressureRegister crashes if an operand is def and used in the same inst
330  // and we try to constraint its regclass
331  for (MachineInstr::mop_iterator It = MI->operands_begin(),
332      E = MI->operands_end(); It != E; ++It) {
333    MachineOperand &MO = *It;
334    if (MO.isReg() && !MO.isDef() &&
335        MO.getReg() == MI->getOperand(0).getReg())
336      return;
337  }
338  // Constrains the regclass of DestReg to assign it to Slot
339  switch (Slot) {
340  case 0:
341    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
342    break;
343  case 1:
344    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
345    break;
346  case 2:
347    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
348    break;
349  case 3:
350    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
351    break;
352  }
353}
354
355SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
356  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
357  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
358  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
359  if (!UnslotedSU) {
360    return SlotedSU;
361  } else if (!SlotedSU) {
362    AssignSlot(UnslotedSU->getInstr(), Slot);
363    return UnslotedSU;
364  } else {
365    //Determine which one to pick (the lesser one)
366    if (CompareSUnit()(SlotedSU, UnslotedSU)) {
367      AvailableAlus[AluAny].insert(UnslotedSU);
368      return SlotedSU;
369    } else {
370      AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
371      AssignSlot(UnslotedSU->getInstr(), Slot);
372      return UnslotedSU;
373    }
374  }
375}
376
377bool R600SchedStrategy::isAvailablesAluEmpty() const {
378  return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
379      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
380      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
381      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
382}
383
384SUnit* R600SchedStrategy::pickAlu() {
385  while (!isAvailablesAluEmpty()) {
386    if (!OccupedSlotsMask) {
387      // Flush physical reg copies (RA will discard them)
388      if (!AvailableAlus[AluDiscarded].empty()) {
389        OccupedSlotsMask = 15;
390        return PopInst(AvailableAlus[AluDiscarded]);
391      }
392      // If there is a T_XYZW alu available, use it
393      if (!AvailableAlus[AluT_XYZW].empty()) {
394        OccupedSlotsMask = 15;
395        return PopInst(AvailableAlus[AluT_XYZW]);
396      }
397    }
398    for (unsigned Chan = 0; Chan < 4; ++Chan) {
399      bool isOccupied = OccupedSlotsMask & (1 << Chan);
400      if (!isOccupied) {
401        SUnit *SU = AttemptFillSlot(Chan);
402        if (SU) {
403          OccupedSlotsMask |= (1 << Chan);
404          InstructionsGroupCandidate.push_back(SU->getInstr());
405          return SU;
406        }
407      }
408    }
409    PrepareNextSlot();
410  }
411  return NULL;
412}
413
414SUnit* R600SchedStrategy::pickOther(int QID) {
415  SUnit *SU = 0;
416  ReadyQueue *AQ = Available[QID];
417
418  if (AQ->empty()) {
419    MoveUnits(Pending[QID], AQ);
420  }
421  if (!AQ->empty()) {
422    SU = *AQ->begin();
423    AQ->remove(AQ->begin());
424  }
425  return SU;
426}
427
428