R600MachineScheduler.cpp revision 3f179b59e53a8a6d5dfb509857d972461027b809
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/LiveIntervalAnalysis.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include "llvm/Support/raw_ostream.h"
24#include <set>
25
26using namespace llvm;
27
28void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
29
30  DAG = dag;
31  TII = static_cast<const R600InstrInfo*>(DAG->TII);
32  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33  MRI = &DAG->MRI;
34  Available[IDAlu]->clear();
35  Available[IDFetch]->clear();
36  Available[IDOther]->clear();
37  CurInstKind = IDOther;
38  CurEmitted = 0;
39  OccupedSlotsMask = 15;
40  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
41  InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
42
43
44  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
45  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
46    InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
47  } else {
48    InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
49  }
50}
51
52void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
53{
54  if (QSrc->empty())
55    return;
56  for (ReadyQueue::iterator I = QSrc->begin(),
57      E = QSrc->end(); I != E; ++I) {
58    (*I)->NodeQueueId &= ~QSrc->getID();
59    QDst->push(*I);
60  }
61  QSrc->clear();
62}
63
64SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
65  SUnit *SU = 0;
66  IsTopNode = true;
67  NextInstKind = IDOther;
68
69  // check if we might want to switch current clause type
70  bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
71      (CurEmitted > InstKindLimit[CurInstKind]) ||
72      (Available[CurInstKind]->empty());
73  bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
74      (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
75
76  if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
77      (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
78    // try to pick ALU
79    SU = pickAlu();
80    if (SU) {
81      if (CurEmitted >  InstKindLimit[IDAlu])
82        CurEmitted = 0;
83      NextInstKind = IDAlu;
84    }
85  }
86
87  if (!SU) {
88    // try to pick FETCH
89    SU = pickOther(IDFetch);
90    if (SU)
91      NextInstKind = IDFetch;
92  }
93
94  // try to pick other
95  if (!SU) {
96    SU = pickOther(IDOther);
97    if (SU)
98      NextInstKind = IDOther;
99  }
100
101  DEBUG(
102      if (SU) {
103        dbgs() << "picked node: ";
104        SU->dump(DAG);
105      } else {
106        dbgs() << "NO NODE ";
107        for (int i = 0; i < IDLast; ++i) {
108          Available[i]->dump();
109          Pending[i]->dump();
110        }
111        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
112          const SUnit &S = DAG->SUnits[i];
113          if (!S.isScheduled)
114            S.dump(DAG);
115        }
116      }
117  );
118
119  return SU;
120}
121
122void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
123
124  DEBUG(dbgs() << "scheduled: ");
125  DEBUG(SU->dump(DAG));
126
127  if (NextInstKind != CurInstKind) {
128    DEBUG(dbgs() << "Instruction Type Switch\n");
129    if (NextInstKind != IDAlu)
130      OccupedSlotsMask = 15;
131    CurEmitted = 0;
132    CurInstKind = NextInstKind;
133  }
134
135  if (CurInstKind == IDAlu) {
136    switch (getAluKind(SU)) {
137    case AluT_XYZW:
138      CurEmitted += 4;
139      break;
140    case AluDiscarded:
141      break;
142    default: {
143      ++CurEmitted;
144      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
145          E = SU->getInstr()->operands_end(); It != E; ++It) {
146        MachineOperand &MO = *It;
147        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
148          ++CurEmitted;
149      }
150    }
151    }
152  } else {
153    ++CurEmitted;
154  }
155
156
157  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
158
159  if (CurInstKind != IDFetch) {
160    MoveUnits(Pending[IDFetch], Available[IDFetch]);
161  }
162  MoveUnits(Pending[IDOther], Available[IDOther]);
163}
164
165void R600SchedStrategy::releaseTopNode(SUnit *SU) {
166  int IK = getInstKind(SU);
167
168  DEBUG(dbgs() << IK << " <= ");
169  DEBUG(SU->dump(DAG));
170
171  Pending[IK]->push(SU);
172}
173
174void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
175}
176
177bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
178                                          const TargetRegisterClass *RC) const {
179  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
180    return RC->contains(Reg);
181  } else {
182    return MRI->getRegClass(Reg) == RC;
183  }
184}
185
186R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
187  MachineInstr *MI = SU->getInstr();
188
189    switch (MI->getOpcode()) {
190    case AMDGPU::INTERP_PAIR_XY:
191    case AMDGPU::INTERP_PAIR_ZW:
192    case AMDGPU::INTERP_VEC_LOAD:
193      return AluT_XYZW;
194    case AMDGPU::COPY:
195      if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
196        // %vregX = COPY Tn_X is likely to be discarded in favor of an
197        // assignement of Tn_X to %vregX, don't considers it in scheduling
198        return AluDiscarded;
199      }
200      else if (MI->getOperand(1).isUndef()) {
201        // MI will become a KILL, don't considers it in scheduling
202        return AluDiscarded;
203      }
204    default:
205      break;
206    }
207
208    // Does the instruction take a whole IG ?
209    if(TII->isVector(*MI) ||
210        TII->isCubeOp(MI->getOpcode()) ||
211        TII->isReductionOp(MI->getOpcode()))
212      return AluT_XYZW;
213
214    // Is the result already assigned to a channel ?
215    unsigned DestSubReg = MI->getOperand(0).getSubReg();
216    switch (DestSubReg) {
217    case AMDGPU::sub0:
218      return AluT_X;
219    case AMDGPU::sub1:
220      return AluT_Y;
221    case AMDGPU::sub2:
222      return AluT_Z;
223    case AMDGPU::sub3:
224      return AluT_W;
225    default:
226      break;
227    }
228
229    // Is the result already member of a X/Y/Z/W class ?
230    unsigned DestReg = MI->getOperand(0).getReg();
231    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
232        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
233      return AluT_X;
234    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
235      return AluT_Y;
236    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
237      return AluT_Z;
238    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
239      return AluT_W;
240    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
241      return AluT_XYZW;
242
243    return AluAny;
244
245}
246
247int R600SchedStrategy::getInstKind(SUnit* SU) {
248  int Opcode = SU->getInstr()->getOpcode();
249
250  if (TII->isALUInstr(Opcode)) {
251    return IDAlu;
252  }
253
254  switch (Opcode) {
255  case AMDGPU::COPY:
256  case AMDGPU::CONST_COPY:
257  case AMDGPU::INTERP_PAIR_XY:
258  case AMDGPU::INTERP_PAIR_ZW:
259  case AMDGPU::INTERP_VEC_LOAD:
260  case AMDGPU::DOT4_eg_pseudo:
261  case AMDGPU::DOT4_r600_pseudo:
262    return IDAlu;
263  case AMDGPU::TEX_VTX_CONSTBUF:
264  case AMDGPU::TEX_VTX_TEXBUF:
265  case AMDGPU::TEX_LD:
266  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
267  case AMDGPU::TEX_GET_GRADIENTS_H:
268  case AMDGPU::TEX_GET_GRADIENTS_V:
269  case AMDGPU::TEX_SET_GRADIENTS_H:
270  case AMDGPU::TEX_SET_GRADIENTS_V:
271  case AMDGPU::TEX_SAMPLE:
272  case AMDGPU::TEX_SAMPLE_C:
273  case AMDGPU::TEX_SAMPLE_L:
274  case AMDGPU::TEX_SAMPLE_C_L:
275  case AMDGPU::TEX_SAMPLE_LB:
276  case AMDGPU::TEX_SAMPLE_C_LB:
277  case AMDGPU::TEX_SAMPLE_G:
278  case AMDGPU::TEX_SAMPLE_C_G:
279  case AMDGPU::TXD:
280  case AMDGPU::TXD_SHADOW:
281    return IDFetch;
282  default:
283    DEBUG(
284        dbgs() << "other inst: ";
285        SU->dump(DAG);
286    );
287    return IDOther;
288  }
289}
290
291class ConstPairs {
292private:
293  unsigned XYPair;
294  unsigned ZWPair;
295public:
296  ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
297    for (unsigned i = 0; i < 3; i++) {
298      unsigned ReadConstChan = ReadConst[i] & 3;
299      unsigned ReadConstIndex = ReadConst[i] & (~3);
300      if (ReadConstChan < 2) {
301        if (!XYPair) {
302          XYPair = ReadConstIndex;
303        }
304      } else {
305        if (!ZWPair) {
306          ZWPair = ReadConstIndex;
307        }
308      }
309    }
310  }
311
312  bool isCompatibleWith(const ConstPairs& CP) const {
313    return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
314        (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
315  }
316};
317
318static
319const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
320  unsigned ReadConsts[3] = {0, 0, 0};
321  R600Operands::Ops OpTable[3][2] = {
322    {R600Operands::SRC0, R600Operands::SRC0_SEL},
323    {R600Operands::SRC1, R600Operands::SRC1_SEL},
324    {R600Operands::SRC2, R600Operands::SRC2_SEL},
325  };
326
327  if (!TII->isALUInstr(MI.getOpcode()))
328    return ConstPairs(ReadConsts);
329
330  for (unsigned i = 0; i < 3; i++) {
331    int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
332    if (SrcIdx < 0)
333      break;
334    if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
335      ReadConsts[i] =MI.getOperand(
336          TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
337  }
338  return ConstPairs(ReadConsts);
339}
340
341bool
342R600SchedStrategy::isBundleable(const MachineInstr& MI) {
343  const ConstPairs &MIPair = getPairs(TII, MI);
344  for (unsigned i = 0; i < 4; i++) {
345    if (!InstructionsGroupCandidate[i])
346      continue;
347    const ConstPairs &IGPair = getPairs(TII,
348        *InstructionsGroupCandidate[i]->getInstr());
349    if (!IGPair.isCompatibleWith(MIPair))
350      return false;
351  }
352  return true;
353}
354
355SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
356  if (Q.empty())
357    return NULL;
358  for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
359      It != E; ++It) {
360    SUnit *SU = *It;
361    if (isBundleable(*SU->getInstr())) {
362      Q.erase(It);
363      return SU;
364    }
365  }
366  return NULL;
367}
368
369void R600SchedStrategy::LoadAlu() {
370  ReadyQueue *QSrc = Pending[IDAlu];
371  for (ReadyQueue::iterator I = QSrc->begin(),
372        E = QSrc->end(); I != E; ++I) {
373      (*I)->NodeQueueId &= ~QSrc->getID();
374      AluKind AK = getAluKind(*I);
375      AvailableAlus[AK].insert(*I);
376    }
377    QSrc->clear();
378}
379
380void R600SchedStrategy::PrepareNextSlot() {
381  DEBUG(dbgs() << "New Slot\n");
382  assert (OccupedSlotsMask && "Slot wasn't filled");
383  OccupedSlotsMask = 0;
384  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
385  LoadAlu();
386}
387
388void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
389  unsigned DestReg = MI->getOperand(0).getReg();
390  // PressureRegister crashes if an operand is def and used in the same inst
391  // and we try to constraint its regclass
392  for (MachineInstr::mop_iterator It = MI->operands_begin(),
393      E = MI->operands_end(); It != E; ++It) {
394    MachineOperand &MO = *It;
395    if (MO.isReg() && !MO.isDef() &&
396        MO.getReg() == MI->getOperand(0).getReg())
397      return;
398  }
399  // Constrains the regclass of DestReg to assign it to Slot
400  switch (Slot) {
401  case 0:
402    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
403    break;
404  case 1:
405    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
406    break;
407  case 2:
408    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
409    break;
410  case 3:
411    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
412    break;
413  }
414}
415
416SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
417  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
418  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
419  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
420  if (!UnslotedSU) {
421    return SlotedSU;
422  } else if (!SlotedSU) {
423    AssignSlot(UnslotedSU->getInstr(), Slot);
424    return UnslotedSU;
425  } else {
426    //Determine which one to pick (the lesser one)
427    if (CompareSUnit()(SlotedSU, UnslotedSU)) {
428      AvailableAlus[AluAny].insert(UnslotedSU);
429      return SlotedSU;
430    } else {
431      AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
432      AssignSlot(UnslotedSU->getInstr(), Slot);
433      return UnslotedSU;
434    }
435  }
436}
437
438bool R600SchedStrategy::isAvailablesAluEmpty() const {
439  return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
440      AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
441      AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
442      AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
443}
444
445SUnit* R600SchedStrategy::pickAlu() {
446  while (!isAvailablesAluEmpty()) {
447    if (!OccupedSlotsMask) {
448      // Flush physical reg copies (RA will discard them)
449      if (!AvailableAlus[AluDiscarded].empty()) {
450        OccupedSlotsMask = 15;
451        return PopInst(AvailableAlus[AluDiscarded]);
452      }
453      // If there is a T_XYZW alu available, use it
454      if (!AvailableAlus[AluT_XYZW].empty()) {
455        OccupedSlotsMask = 15;
456        return PopInst(AvailableAlus[AluT_XYZW]);
457      }
458    }
459    for (unsigned Chan = 0; Chan < 4; ++Chan) {
460      bool isOccupied = OccupedSlotsMask & (1 << Chan);
461      if (!isOccupied) {
462        SUnit *SU = AttemptFillSlot(Chan);
463        if (SU) {
464          OccupedSlotsMask |= (1 << Chan);
465          InstructionsGroupCandidate[Chan] = SU;
466          return SU;
467        }
468      }
469    }
470    PrepareNextSlot();
471  }
472  return NULL;
473}
474
475SUnit* R600SchedStrategy::pickOther(int QID) {
476  SUnit *SU = 0;
477  ReadyQueue *AQ = Available[QID];
478
479  if (AQ->empty()) {
480    MoveUnits(Pending[QID], AQ);
481  }
482  if (!AQ->empty()) {
483    SU = *AQ->begin();
484    AQ->remove(AQ->begin());
485  }
486  return SU;
487}
488
489