R600MachineScheduler.cpp revision 512119770e9c32eb0b9e6196ce51917fb2e30d9f
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/LiveIntervalAnalysis.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace llvm;
26
27void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
28
29  DAG = dag;
30  TII = static_cast<const R600InstrInfo*>(DAG->TII);
31  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
32  MRI = &DAG->MRI;
33  CurInstKind = IDOther;
34  CurEmitted = 0;
35  OccupedSlotsMask = 15;
36  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37  InstKindLimit[IDOther] = 32;
38
39  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
41}
42
43void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
44                                  std::vector<SUnit *> &QDst)
45{
46  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
47  QSrc.clear();
48}
49
50SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
51  SUnit *SU = 0;
52  NextInstKind = IDOther;
53
54  IsTopNode = false;
55
56  // check if we might want to switch current clause type
57  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
58      (Available[CurInstKind].empty());
59  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
60      (!Available[IDFetch].empty() || !Available[IDOther].empty());
61
62  // We want to scheduled AR defs as soon as possible to make sure they aren't
63  // put in a different ALU clause from their uses.
64  if (!SU && !UnscheduledARDefs.empty()) {
65      SU = UnscheduledARDefs[0];
66      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
67      NextInstKind = IDAlu;
68  }
69
70  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
71      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
72    // try to pick ALU
73    SU = pickAlu();
74    if (!SU && !PhysicalRegCopy.empty()) {
75      SU = PhysicalRegCopy.front();
76      PhysicalRegCopy.erase(PhysicalRegCopy.begin());
77    }
78    if (SU) {
79      if (CurEmitted >= InstKindLimit[IDAlu])
80        CurEmitted = 0;
81      NextInstKind = IDAlu;
82    }
83  }
84
85  if (!SU) {
86    // try to pick FETCH
87    SU = pickOther(IDFetch);
88    if (SU)
89      NextInstKind = IDFetch;
90  }
91
92  // try to pick other
93  if (!SU) {
94    SU = pickOther(IDOther);
95    if (SU)
96      NextInstKind = IDOther;
97  }
98
99  // We want to schedule the AR uses as late as possible to make sure that
100  // the AR defs have been released.
101  if (!SU && !UnscheduledARUses.empty()) {
102      SU = UnscheduledARUses[0];
103      UnscheduledARUses.erase(UnscheduledARUses.begin());
104      NextInstKind = IDAlu;
105  }
106
107
108  DEBUG(
109      if (SU) {
110        dbgs() << " ** Pick node **\n";
111        SU->dump(DAG);
112      } else {
113        dbgs() << "NO NODE \n";
114        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
115          const SUnit &S = DAG->SUnits[i];
116          if (!S.isScheduled)
117            S.dump(DAG);
118        }
119      }
120  );
121
122  return SU;
123}
124
125bool IsUnScheduled(const SUnit *SU) {
126  return SU->isScheduled;
127}
128
129static
130void Filter(std::vector<SUnit *> &List) {
131  List.erase(std::remove_if(List.begin(), List.end(), IsUnScheduled), List.end());
132}
133
134void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
135  if (IsTopNode) {
136    for (unsigned i = 0; i < AluLast; i++) {
137      Filter(Available[i]);
138      Filter(Pending[i]);
139    }
140  }
141
142  if (NextInstKind != CurInstKind) {
143    DEBUG(dbgs() << "Instruction Type Switch\n");
144    if (NextInstKind != IDAlu)
145      OccupedSlotsMask = 15;
146    CurEmitted = 0;
147    CurInstKind = NextInstKind;
148  }
149
150  if (CurInstKind == IDAlu) {
151    switch (getAluKind(SU)) {
152    case AluT_XYZW:
153      CurEmitted += 4;
154      break;
155    case AluDiscarded:
156      break;
157    default: {
158      ++CurEmitted;
159      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
160          E = SU->getInstr()->operands_end(); It != E; ++It) {
161        MachineOperand &MO = *It;
162        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
163          ++CurEmitted;
164      }
165    }
166    }
167  } else {
168    ++CurEmitted;
169  }
170
171
172  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
173
174  if (CurInstKind != IDFetch) {
175    MoveUnits(Pending[IDFetch], Available[IDFetch]);
176  }
177}
178
179static bool
180isPhysicalRegCopy(MachineInstr *MI) {
181  if (MI->getOpcode() != AMDGPU::COPY)
182    return false;
183
184  return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
185}
186
187void R600SchedStrategy::releaseTopNode(SUnit *SU) {
188  DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
189}
190
191void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
192  DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
193  if (isPhysicalRegCopy(SU->getInstr())) {
194    PhysicalRegCopy.push_back(SU);
195    return;
196  }
197
198  int IK = getInstKind(SU);
199
200  // Check for AR register defines
201  for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
202                                        E = SU->getInstr()->operands_end();
203                                        I != E; ++I) {
204    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
205      if (I->isDef()) {
206        UnscheduledARDefs.push_back(SU);
207      } else {
208        UnscheduledARUses.push_back(SU);
209      }
210      return;
211    }
212  }
213
214  // There is no export clause, we can schedule one as soon as its ready
215  if (IK == IDOther)
216    Available[IDOther].push_back(SU);
217  else
218    Pending[IK].push_back(SU);
219
220}
221
222bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
223                                          const TargetRegisterClass *RC) const {
224  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
225    return RC->contains(Reg);
226  } else {
227    return MRI->getRegClass(Reg) == RC;
228  }
229}
230
231R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
232  MachineInstr *MI = SU->getInstr();
233
234    switch (MI->getOpcode()) {
235    case AMDGPU::PRED_X:
236      return AluPredX;
237    case AMDGPU::INTERP_PAIR_XY:
238    case AMDGPU::INTERP_PAIR_ZW:
239    case AMDGPU::INTERP_VEC_LOAD:
240    case AMDGPU::DOT_4:
241      return AluT_XYZW;
242    case AMDGPU::COPY:
243      if (MI->getOperand(1).isUndef()) {
244        // MI will become a KILL, don't considers it in scheduling
245        return AluDiscarded;
246      }
247    default:
248      break;
249    }
250
251    // Does the instruction take a whole IG ?
252    if(TII->isVector(*MI) ||
253        TII->isCubeOp(MI->getOpcode()) ||
254        TII->isReductionOp(MI->getOpcode()))
255      return AluT_XYZW;
256
257    // Is the result already assigned to a channel ?
258    unsigned DestSubReg = MI->getOperand(0).getSubReg();
259    switch (DestSubReg) {
260    case AMDGPU::sub0:
261      return AluT_X;
262    case AMDGPU::sub1:
263      return AluT_Y;
264    case AMDGPU::sub2:
265      return AluT_Z;
266    case AMDGPU::sub3:
267      return AluT_W;
268    default:
269      break;
270    }
271
272    // Is the result already member of a X/Y/Z/W class ?
273    unsigned DestReg = MI->getOperand(0).getReg();
274    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
275        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
276      return AluT_X;
277    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
278      return AluT_Y;
279    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
280      return AluT_Z;
281    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
282      return AluT_W;
283    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
284      return AluT_XYZW;
285
286    return AluAny;
287
288}
289
290int R600SchedStrategy::getInstKind(SUnit* SU) {
291  int Opcode = SU->getInstr()->getOpcode();
292
293  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
294    return IDFetch;
295
296  if (TII->isALUInstr(Opcode)) {
297    return IDAlu;
298  }
299
300  switch (Opcode) {
301  case AMDGPU::PRED_X:
302  case AMDGPU::COPY:
303  case AMDGPU::CONST_COPY:
304  case AMDGPU::INTERP_PAIR_XY:
305  case AMDGPU::INTERP_PAIR_ZW:
306  case AMDGPU::INTERP_VEC_LOAD:
307  case AMDGPU::DOT_4:
308    return IDAlu;
309  default:
310    return IDOther;
311  }
312}
313
314SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
315  if (Q.empty())
316    return NULL;
317  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
318      It != E; ++It) {
319    SUnit *SU = *It;
320    InstructionsGroupCandidate.push_back(SU->getInstr());
321    if (TII->canBundle(InstructionsGroupCandidate)) {
322      InstructionsGroupCandidate.pop_back();
323      Q.erase((It + 1).base());
324      return SU;
325    } else {
326      InstructionsGroupCandidate.pop_back();
327    }
328  }
329  return NULL;
330}
331
332void R600SchedStrategy::LoadAlu() {
333  std::vector<SUnit *> &QSrc = Pending[IDAlu];
334  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
335    AluKind AK = getAluKind(QSrc[i]);
336    AvailableAlus[AK].push_back(QSrc[i]);
337  }
338  QSrc.clear();
339}
340
341void R600SchedStrategy::PrepareNextSlot() {
342  DEBUG(dbgs() << "New Slot\n");
343  assert (OccupedSlotsMask && "Slot wasn't filled");
344  OccupedSlotsMask = 0;
345  InstructionsGroupCandidate.clear();
346  LoadAlu();
347}
348
349void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
350  unsigned DestReg = MI->getOperand(0).getReg();
351  // PressureRegister crashes if an operand is def and used in the same inst
352  // and we try to constraint its regclass
353  for (MachineInstr::mop_iterator It = MI->operands_begin(),
354      E = MI->operands_end(); It != E; ++It) {
355    MachineOperand &MO = *It;
356    if (MO.isReg() && !MO.isDef() &&
357        MO.getReg() == MI->getOperand(0).getReg())
358      return;
359  }
360  // Constrains the regclass of DestReg to assign it to Slot
361  switch (Slot) {
362  case 0:
363    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
364    break;
365  case 1:
366    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
367    break;
368  case 2:
369    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
370    break;
371  case 3:
372    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
373    break;
374  }
375}
376
377SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
378  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
379  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
380  if (SlotedSU)
381    return SlotedSU;
382  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
383  if (UnslotedSU)
384    AssignSlot(UnslotedSU->getInstr(), Slot);
385  return UnslotedSU;
386}
387
388bool R600SchedStrategy::isAvailablesAluEmpty() const {
389  return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
390      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
391      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
392      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
393      AvailableAlus[AluPredX].empty();
394}
395
396SUnit* R600SchedStrategy::pickAlu() {
397  while (!isAvailablesAluEmpty()) {
398    if (!OccupedSlotsMask) {
399      // Bottom up scheduling : predX must comes first
400      if (!AvailableAlus[AluPredX].empty()) {
401        OccupedSlotsMask = 15;
402        return PopInst(AvailableAlus[AluPredX]);
403      }
404      // Flush physical reg copies (RA will discard them)
405      if (!AvailableAlus[AluDiscarded].empty()) {
406        OccupedSlotsMask = 15;
407        return PopInst(AvailableAlus[AluDiscarded]);
408      }
409      // If there is a T_XYZW alu available, use it
410      if (!AvailableAlus[AluT_XYZW].empty()) {
411        OccupedSlotsMask = 15;
412        return PopInst(AvailableAlus[AluT_XYZW]);
413      }
414    }
415    for (int Chan = 3; Chan > -1; --Chan) {
416      bool isOccupied = OccupedSlotsMask & (1 << Chan);
417      if (!isOccupied) {
418        SUnit *SU = AttemptFillSlot(Chan);
419        if (SU) {
420          OccupedSlotsMask |= (1 << Chan);
421          InstructionsGroupCandidate.push_back(SU->getInstr());
422          return SU;
423        }
424      }
425    }
426    PrepareNextSlot();
427  }
428  return NULL;
429}
430
431SUnit* R600SchedStrategy::pickOther(int QID) {
432  SUnit *SU = 0;
433  std::vector<SUnit *> &AQ = Available[QID];
434
435  if (AQ.empty()) {
436    MoveUnits(Pending[QID], AQ);
437  }
438  if (!AQ.empty()) {
439    SU = AQ.back();
440    AQ.resize(AQ.size() - 1);
441  }
442  return SU;
443}
444
445