15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details.
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \file
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \brief R600 Machine Scheduler interface
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "misched"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "R600MachineScheduler.h"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/LiveIntervalAnalysis.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h"
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "llvm/Pass.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/PassManager.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/raw_ostream.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)using namespace llvm;
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
29ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  DAG = dag;
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  TII = static_cast<const R600InstrInfo*>(DAG->TII);
317d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
327d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  MRI = &DAG->MRI;
337d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  CurInstKind = IDOther;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CurEmitted = 0;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  OccupedSlotsMask = 31;
364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  InstKindLimit[IDOther] = 32;
383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
40d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
41d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)  AluInstCount = 0;
4268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  FetchInstCount = 0;
4368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)}
443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
453551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                                  std::vector<SUnit *> &QDst)
473551c9c881056c480085172ff9840cab31610854Torne (Richard Coles){
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  QSrc.clear();
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  assert (GPRCount && "GPRCount cannot be 0");
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 248 / GPRCount;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SUnit *SU = 0;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NextInstKind = IDOther;
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IsTopNode = false;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // check if we might want to switch current clause type
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
6658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)      (Available[CurInstKind].empty());
6758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles)  bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (!Available[IDFetch].empty() || !Available[IDOther].empty());
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (CurInstKind == IDAlu && !Available[IDFetch].empty()) {
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We use the heuristic provided by AMD Accelerated Parallel Processing
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // OpenCL Programming Guide :
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // The approx. number of WF that allows TEX inst to hide ALU inst is :
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU))
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    float ALUFetchRationEstimate =
7668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)        (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) /
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (FetchInstCount + Available[IDFetch].size());
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned NeededWF = 62.5f / ALUFetchRationEstimate;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" );
8068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    // We assume the local GPR requirements to be "dominated" by the requirement
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // after TEX are indeed likely to consume or generate values from/for the
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TEX clause.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // one GPR) or TmXYZW = TnXYZW (need 2 GPR).
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // (TODO : use RegisterPressure)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If we are going too use too many GPR, we flush Fetch instruction to lower
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // register pressure on 128 bits regs.
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned NearRegisterRequirement = 2 * Available[IDFetch].size();
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement))
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AllowSwitchFromAlu = true;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We want to scheduled AR defs as soon as possible to make sure they aren't
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // put in a different ALU clause from their uses.
9868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  if (!SU && !UnscheduledARDefs.empty()) {
99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      SU = UnscheduledARDefs[0];
100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      UnscheduledARDefs.erase(UnscheduledARDefs.begin());
101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      NextInstKind = IDAlu;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // try to pick ALU
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SU = pickAlu();
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!SU && !PhysicalRegCopy.empty()) {
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SU = PhysicalRegCopy.front();
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      PhysicalRegCopy.erase(PhysicalRegCopy.begin());
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (SU) {
11368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)      if (CurEmitted >= InstKindLimit[IDAlu])
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        CurEmitted = 0;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NextInstKind = IDAlu;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!SU) {
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // try to pick FETCH
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SU = pickOther(IDFetch);
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (SU)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NextInstKind = IDFetch;
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
12568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // try to pick other
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!SU) {
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SU = pickOther(IDOther);
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (SU)
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NextInstKind = IDOther;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We want to schedule the AR uses as late as possible to make sure that
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the AR defs have been released.
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!SU && !UnscheduledARUses.empty()) {
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SU = UnscheduledARUses[0];
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      UnscheduledARUses.erase(UnscheduledARUses.begin());
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      NextInstKind = IDAlu;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DEBUG(
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (SU) {
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        dbgs() << " ** Pick node **\n";
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SU->dump(DAG);
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        dbgs() << "NO NODE \n";
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          const SUnit &S = DAG->SUnits[i];
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (!S.isScheduled)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            S.dump(DAG);
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
15368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)      }
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  );
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SU;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
16068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  if (NextInstKind != CurInstKind) {
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DEBUG(dbgs() << "Instruction Type Switch\n");
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (NextInstKind != IDAlu)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      OccupedSlotsMask |= 31;
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CurEmitted = 0;
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CurInstKind = NextInstKind;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (CurInstKind == IDAlu) {
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AluInstCount ++;
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (getAluKind(SU)) {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AluT_XYZW:
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      CurEmitted += 4;
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AluDiscarded:
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    default: {
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++CurEmitted;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          E = SU->getInstr()->operands_end(); It != E; ++It) {
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        MachineOperand &MO = *It;
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++CurEmitted;
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++CurEmitted;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (CurInstKind != IDFetch) {
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MoveUnits(Pending[IDFetch], Available[IDFetch]);
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FetchInstCount++;
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)isPhysicalRegCopy(MachineInstr *MI) {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (MI->getOpcode() != AMDGPU::COPY)
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::releaseTopNode(SUnit *SU) {
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (isPhysicalRegCopy(SU->getInstr())) {
21468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    PhysicalRegCopy.push_back(SU);
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int IK = getInstKind(SU);
21968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check for AR register defines
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        E = SU->getInstr()->operands_end();
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                        I != E; ++I) {
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (I->isDef()) {
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        UnscheduledARDefs.push_back(SU);
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        UnscheduledARUses.push_back(SU);
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // There is no export clause, we can schedule one as soon as its ready
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (IK == IDOther)
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Available[IDOther].push_back(SU);
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Pending[IK].push_back(SU);
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                          const TargetRegisterClass *RC) const {
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return RC->contains(Reg);
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return MRI->getRegClass(Reg) == RC;
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MachineInstr *MI = SU->getInstr();
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (TII->isTransOnly(MI))
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return AluTrans;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (MI->getOpcode()) {
25868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    case AMDGPU::PRED_X:
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluPredX;
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::INTERP_PAIR_XY:
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::INTERP_PAIR_ZW:
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::INTERP_VEC_LOAD:
263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    case AMDGPU::DOT_4:
264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return AluT_XYZW;
265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    case AMDGPU::COPY:
266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      if (MI->getOperand(1).isUndef()) {
267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        // MI will become a KILL, don't considers it in scheduling
268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        return AluDiscarded;
269f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      }
270f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    default:
271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      break;
272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Does the instruction take a whole IG ?
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // XXX: Is it possible to add a helper function in R600InstrInfo that can
27668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)    // be used here and in R600PacketizerList::isSoloInstruction() ?
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if(TII->isVector(*MI) ||
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        TII->isCubeOp(MI->getOpcode()) ||
279f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        TII->isReductionOp(MI->getOpcode()) ||
280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
281f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      return AluT_XYZW;
282f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
28368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (TII->isLDSInstr(MI->getOpcode())) {
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_X;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Is the result already assigned to a channel ?
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned DestSubReg = MI->getOperand(0).getSubReg();
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (DestSubReg) {
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::sub0:
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_X;
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::sub1:
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_Y;
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::sub2:
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_Z;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case AMDGPU::sub3:
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_W;
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Is the result already member of a X/Y/Z/W class ?
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned DestReg = MI->getOperand(0).getReg();
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_X;
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_Y;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_Z;
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_W;
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return AluT_XYZW;
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return AluAny;
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3204e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int R600SchedStrategy::getInstKind(SUnit* SU) {
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int Opcode = SU->getInstr()->getOpcode();
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return IDFetch;
326d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles)
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (TII->isALUInstr(Opcode)) {
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return IDAlu;
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (Opcode) {
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::PRED_X:
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::COPY:
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::CONST_COPY:
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::INTERP_PAIR_XY:
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::INTERP_PAIR_ZW:
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::INTERP_VEC_LOAD:
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case AMDGPU::DOT_4:
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return IDAlu;
34068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  default:
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return IDOther;
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (Q.empty())
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      It != E; ++It) {
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SUnit *SU = *It;
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InstructionsGroupCandidate.push_back(SU->getInstr());
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
35368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)      InstructionsGroupCandidate.pop_back();
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      Q.erase((It + 1).base());
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return SU;
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      InstructionsGroupCandidate.pop_back();
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return NULL;
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::LoadAlu() {
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<SUnit *> &QSrc = Pending[IDAlu];
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AluKind AK = getAluKind(QSrc[i]);
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AvailableAlus[AK].push_back(QSrc[i]);
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
36968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)  QSrc.clear();
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::PrepareNextSlot() {
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DEBUG(dbgs() << "New Slot\n");
3742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  assert (OccupedSlotsMask && "Slot wasn't filled");
3752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  OccupedSlotsMask = 0;
3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  InstructionsGroupCandidate.clear();
3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  LoadAlu();
3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (DstIndex == -1) {
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned DestReg = MI->getOperand(DstIndex).getReg();
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // PressureRegister crashes if an operand is def and used in the same inst
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and we try to constraint its regclass
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (MachineInstr::mop_iterator It = MI->operands_begin(),
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      E = MI->operands_end(); It != E; ++It) {
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MachineOperand &MO = *It;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (MO.isReg() && !MO.isDef() &&
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        MO.getReg() == DestReg)
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Constrains the regclass of DestReg to assign it to Slot
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  switch (Slot) {
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case 0:
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    break;
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case 1:
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    break;
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case 2:
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    break;
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  case 3:
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    break;
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (SlotedSU)
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return SlotedSU;
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (UnslotedSU)
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AssignSlot(UnslotedSU->getInstr(), Slot);
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return UnslotedSU;
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)unsigned R600SchedStrategy::AvailablesAluCount() const {
424a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
425a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)      AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AvailableAlus[AluPredX].size();
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickAlu() {
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!OccupedSlotsMask) {
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Bottom up scheduling : predX must comes first
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!AvailableAlus[AluPredX].empty()) {
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        OccupedSlotsMask |= 31;
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return PopInst(AvailableAlus[AluPredX]);
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Flush physical reg copies (RA will discard them)
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!AvailableAlus[AluDiscarded].empty()) {
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        OccupedSlotsMask |= 31;
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return PopInst(AvailableAlus[AluDiscarded]);
44390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      }
44468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)      // If there is a T_XYZW alu available, use it
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!AvailableAlus[AluT_XYZW].empty()) {
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        OccupedSlotsMask |= 15;
447a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)        return PopInst(AvailableAlus[AluT_XYZW]);
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool TransSlotOccuped = OccupedSlotsMask & 16;
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!TransSlotOccuped) {
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!AvailableAlus[AluTrans].empty()) {
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        OccupedSlotsMask |= 16;
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return PopInst(AvailableAlus[AluTrans]);
45590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)      }
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (int Chan = 3; Chan > -1; --Chan) {
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      bool isOccupied = OccupedSlotsMask & (1 << Chan);
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!isOccupied) {
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SUnit *SU = AttemptFillSlot(Chan);
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (SU) {
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          OccupedSlotsMask |= (1 << Chan);
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          InstructionsGroupCandidate.push_back(SU->getInstr());
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return SU;
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    PrepareNextSlot();
4697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  }
4707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  return NULL;
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickOther(int QID) {
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SUnit *SU = 0;
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<SUnit *> &AQ = Available[QID];
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
477a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  if (AQ.empty()) {
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    MoveUnits(Pending[QID], AQ);
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!AQ.empty()) {
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SU = AQ.back();
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    AQ.resize(AQ.size() - 1);
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return SU;
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)