R600MachineScheduler.cpp revision 3ff0abfaabc2c7f604d490be587b9c27e7c91ac0
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/LiveIntervalAnalysis.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include "llvm/Support/raw_ostream.h"
24
25using namespace llvm;
26
27void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
28
29  DAG = dag;
30  TII = static_cast<const R600InstrInfo*>(DAG->TII);
31  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
32  MRI = &DAG->MRI;
33  CurInstKind = IDOther;
34  CurEmitted = 0;
35  OccupedSlotsMask = 15;
36  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37  InstKindLimit[IDOther] = 32;
38
39  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
41
42}
43
44void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
45                                  std::vector<SUnit *> &QDst)
46{
47  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
48  QSrc.clear();
49}
50
51SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
52  SUnit *SU = 0;
53  NextInstKind = IDOther;
54
55  IsTopNode = false;
56
57  // check if we might want to switch current clause type
58  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
59      (Available[CurInstKind].empty());
60  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
61      (!Available[IDFetch].empty() || !Available[IDOther].empty());
62
63  // We want to scheduled AR defs as soon as possible to make sure they aren't
64  // put in a different ALU clause from their uses.
65  if (!SU && !UnscheduledARDefs.empty()) {
66      SU = UnscheduledARDefs[0];
67      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
68      NextInstKind = IDAlu;
69  }
70
71  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
72      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
73    // try to pick ALU
74    SU = pickAlu();
75    if (!SU && !PhysicalRegCopy.empty()) {
76      SU = PhysicalRegCopy.front();
77      PhysicalRegCopy.erase(PhysicalRegCopy.begin());
78    }
79    if (SU) {
80      if (CurEmitted >= InstKindLimit[IDAlu])
81        CurEmitted = 0;
82      NextInstKind = IDAlu;
83    }
84  }
85
86  if (!SU) {
87    // try to pick FETCH
88    SU = pickOther(IDFetch);
89    if (SU)
90      NextInstKind = IDFetch;
91  }
92
93  // try to pick other
94  if (!SU) {
95    SU = pickOther(IDOther);
96    if (SU)
97      NextInstKind = IDOther;
98  }
99
100  // We want to schedule the AR uses as late as possible to make sure that
101  // the AR defs have been released.
102  if (!SU && !UnscheduledARUses.empty()) {
103      SU = UnscheduledARUses[0];
104      UnscheduledARUses.erase(UnscheduledARUses.begin());
105      NextInstKind = IDAlu;
106  }
107
108
109  DEBUG(
110      if (SU) {
111        dbgs() << " ** Pick node **\n";
112        SU->dump(DAG);
113      } else {
114        dbgs() << "NO NODE \n";
115        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
116          const SUnit &S = DAG->SUnits[i];
117          if (!S.isScheduled)
118            S.dump(DAG);
119        }
120      }
121  );
122
123  return SU;
124}
125
126void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
127  if (NextInstKind != CurInstKind) {
128    DEBUG(dbgs() << "Instruction Type Switch\n");
129    if (NextInstKind != IDAlu)
130      OccupedSlotsMask = 15;
131    CurEmitted = 0;
132    CurInstKind = NextInstKind;
133  }
134
135  if (CurInstKind == IDAlu) {
136    switch (getAluKind(SU)) {
137    case AluT_XYZW:
138      CurEmitted += 4;
139      break;
140    case AluDiscarded:
141      break;
142    default: {
143      ++CurEmitted;
144      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
145          E = SU->getInstr()->operands_end(); It != E; ++It) {
146        MachineOperand &MO = *It;
147        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
148          ++CurEmitted;
149      }
150    }
151    }
152  } else {
153    ++CurEmitted;
154  }
155
156
157  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
158
159  if (CurInstKind != IDFetch) {
160    MoveUnits(Pending[IDFetch], Available[IDFetch]);
161  }
162}
163
164static bool
165isPhysicalRegCopy(MachineInstr *MI) {
166  if (MI->getOpcode() != AMDGPU::COPY)
167    return false;
168
169  return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
170}
171
172void R600SchedStrategy::releaseTopNode(SUnit *SU) {
173  DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
174}
175
176void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
177  DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
178  if (isPhysicalRegCopy(SU->getInstr())) {
179    PhysicalRegCopy.push_back(SU);
180    return;
181  }
182
183  int IK = getInstKind(SU);
184
185  // Check for AR register defines
186  for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
187                                        E = SU->getInstr()->operands_end();
188                                        I != E; ++I) {
189    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
190      if (I->isDef()) {
191        UnscheduledARDefs.push_back(SU);
192      } else {
193        UnscheduledARUses.push_back(SU);
194      }
195      return;
196    }
197  }
198
199  // There is no export clause, we can schedule one as soon as its ready
200  if (IK == IDOther)
201    Available[IDOther].push_back(SU);
202  else
203    Pending[IK].push_back(SU);
204
205}
206
207bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
208                                          const TargetRegisterClass *RC) const {
209  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
210    return RC->contains(Reg);
211  } else {
212    return MRI->getRegClass(Reg) == RC;
213  }
214}
215
216R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
217  MachineInstr *MI = SU->getInstr();
218
219    switch (MI->getOpcode()) {
220    case AMDGPU::PRED_X:
221      return AluPredX;
222    case AMDGPU::INTERP_PAIR_XY:
223    case AMDGPU::INTERP_PAIR_ZW:
224    case AMDGPU::INTERP_VEC_LOAD:
225    case AMDGPU::DOT_4:
226      return AluT_XYZW;
227    case AMDGPU::COPY:
228      if (MI->getOperand(1).isUndef()) {
229        // MI will become a KILL, don't considers it in scheduling
230        return AluDiscarded;
231      }
232    default:
233      break;
234    }
235
236    // Does the instruction take a whole IG ?
237    if(TII->isVector(*MI) ||
238        TII->isCubeOp(MI->getOpcode()) ||
239        TII->isReductionOp(MI->getOpcode()))
240      return AluT_XYZW;
241
242    // Is the result already assigned to a channel ?
243    unsigned DestSubReg = MI->getOperand(0).getSubReg();
244    switch (DestSubReg) {
245    case AMDGPU::sub0:
246      return AluT_X;
247    case AMDGPU::sub1:
248      return AluT_Y;
249    case AMDGPU::sub2:
250      return AluT_Z;
251    case AMDGPU::sub3:
252      return AluT_W;
253    default:
254      break;
255    }
256
257    // Is the result already member of a X/Y/Z/W class ?
258    unsigned DestReg = MI->getOperand(0).getReg();
259    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
260        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
261      return AluT_X;
262    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
263      return AluT_Y;
264    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
265      return AluT_Z;
266    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
267      return AluT_W;
268    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
269      return AluT_XYZW;
270
271    return AluAny;
272
273}
274
275int R600SchedStrategy::getInstKind(SUnit* SU) {
276  int Opcode = SU->getInstr()->getOpcode();
277
278  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
279    return IDFetch;
280
281  if (TII->isALUInstr(Opcode)) {
282    return IDAlu;
283  }
284
285  switch (Opcode) {
286  case AMDGPU::PRED_X:
287  case AMDGPU::COPY:
288  case AMDGPU::CONST_COPY:
289  case AMDGPU::INTERP_PAIR_XY:
290  case AMDGPU::INTERP_PAIR_ZW:
291  case AMDGPU::INTERP_VEC_LOAD:
292  case AMDGPU::DOT_4:
293    return IDAlu;
294  default:
295    return IDOther;
296  }
297}
298
299SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
300  if (Q.empty())
301    return NULL;
302  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
303      It != E; ++It) {
304    SUnit *SU = *It;
305    InstructionsGroupCandidate.push_back(SU->getInstr());
306    if (TII->canBundle(InstructionsGroupCandidate)) {
307      InstructionsGroupCandidate.pop_back();
308      Q.erase((It + 1).base());
309      return SU;
310    } else {
311      InstructionsGroupCandidate.pop_back();
312    }
313  }
314  return NULL;
315}
316
317void R600SchedStrategy::LoadAlu() {
318  std::vector<SUnit *> &QSrc = Pending[IDAlu];
319  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
320    AluKind AK = getAluKind(QSrc[i]);
321    AvailableAlus[AK].push_back(QSrc[i]);
322  }
323  QSrc.clear();
324}
325
326void R600SchedStrategy::PrepareNextSlot() {
327  DEBUG(dbgs() << "New Slot\n");
328  assert (OccupedSlotsMask && "Slot wasn't filled");
329  OccupedSlotsMask = 0;
330  InstructionsGroupCandidate.clear();
331  LoadAlu();
332}
333
334void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
335  unsigned DestReg = MI->getOperand(0).getReg();
336  // PressureRegister crashes if an operand is def and used in the same inst
337  // and we try to constraint its regclass
338  for (MachineInstr::mop_iterator It = MI->operands_begin(),
339      E = MI->operands_end(); It != E; ++It) {
340    MachineOperand &MO = *It;
341    if (MO.isReg() && !MO.isDef() &&
342        MO.getReg() == MI->getOperand(0).getReg())
343      return;
344  }
345  // Constrains the regclass of DestReg to assign it to Slot
346  switch (Slot) {
347  case 0:
348    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
349    break;
350  case 1:
351    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
352    break;
353  case 2:
354    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
355    break;
356  case 3:
357    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
358    break;
359  }
360}
361
362SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
363  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
364  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
365  if (SlotedSU)
366    return SlotedSU;
367  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
368  if (UnslotedSU)
369    AssignSlot(UnslotedSU->getInstr(), Slot);
370  return UnslotedSU;
371}
372
373bool R600SchedStrategy::isAvailablesAluEmpty() const {
374  return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
375      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
376      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
377      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
378      AvailableAlus[AluPredX].empty();
379}
380
381SUnit* R600SchedStrategy::pickAlu() {
382  while (!isAvailablesAluEmpty()) {
383    if (!OccupedSlotsMask) {
384      // Bottom up scheduling : predX must comes first
385      if (!AvailableAlus[AluPredX].empty()) {
386        OccupedSlotsMask = 15;
387        return PopInst(AvailableAlus[AluPredX]);
388      }
389      // Flush physical reg copies (RA will discard them)
390      if (!AvailableAlus[AluDiscarded].empty()) {
391        OccupedSlotsMask = 15;
392        return PopInst(AvailableAlus[AluDiscarded]);
393      }
394      // If there is a T_XYZW alu available, use it
395      if (!AvailableAlus[AluT_XYZW].empty()) {
396        OccupedSlotsMask = 15;
397        return PopInst(AvailableAlus[AluT_XYZW]);
398      }
399    }
400    for (int Chan = 3; Chan > -1; --Chan) {
401      bool isOccupied = OccupedSlotsMask & (1 << Chan);
402      if (!isOccupied) {
403        SUnit *SU = AttemptFillSlot(Chan);
404        if (SU) {
405          OccupedSlotsMask |= (1 << Chan);
406          InstructionsGroupCandidate.push_back(SU->getInstr());
407          return SU;
408        }
409      }
410    }
411    PrepareNextSlot();
412  }
413  return NULL;
414}
415
416SUnit* R600SchedStrategy::pickOther(int QID) {
417  SUnit *SU = 0;
418  std::vector<SUnit *> &AQ = Available[QID];
419
420  if (AQ.empty()) {
421    MoveUnits(Pending[QID], AQ);
422  }
423  if (!AQ.empty()) {
424    SU = AQ.back();
425    AQ.resize(AQ.size() - 1);
426  }
427  return SU;
428}
429
430