15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The LLVM Compiler Infrastructure 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details. 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \file 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// \brief R600 Machine Scheduler interface 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===// 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "misched" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "R600MachineScheduler.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/LiveIntervalAnalysis.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h" 21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "llvm/Pass.h" 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/PassManager.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/raw_ostream.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)using namespace llvm; 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 29ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch DAG = dag; 30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) TII = static_cast<const R600InstrInfo*>(DAG->TII); 317d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); 327d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) MRI = &DAG->MRI; 337d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) CurInstKind = IDOther; 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CurEmitted = 0; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask = 31; 364e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); 373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) InstKindLimit[IDOther] = 32; 383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); 40d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); 41d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) AluInstCount = 0; 4268043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) FetchInstCount = 0; 4368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles)} 443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 453551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, 463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) std::vector<SUnit *> &QDst) 473551c9c881056c480085172ff9840cab31610854Torne (Richard Coles){ 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) QDst.insert(QDst.end(), QSrc.begin(), QSrc.end()); 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) QSrc.clear(); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)unsigned getWFCountLimitedByGPR(unsigned GPRCount) { 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) assert (GPRCount && "GPRCount cannot be 0"); 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 248 / GPRCount; 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *SU = 0; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NextInstKind = IDOther; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) IsTopNode = false; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // check if we might want to switch current clause type 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) || 6658537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) (Available[CurInstKind].empty()); 6758537e28ecd584eab876aee8be7156509866d23aTorne (Richard Coles) bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!Available[IDFetch].empty() || !Available[IDOther].empty()); 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CurInstKind == IDAlu && !Available[IDFetch].empty()) { 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We use the heuristic provided by AMD Accelerated Parallel Processing 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // OpenCL Programming Guide : 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The approx. number of WF that allows TEX inst to hide ALU inst is : 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU)) 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float ALUFetchRationEstimate = 7668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) / 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (FetchInstCount + Available[IDFetch].size()); 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned NeededWF = 62.5f / ALUFetchRationEstimate; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" ); 8068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) // We assume the local GPR requirements to be "dominated" by the requirement 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // after TEX are indeed likely to consume or generate values from/for the 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TEX clause. 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // one GPR) or TmXYZW = TnXYZW (need 2 GPR). 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (TODO : use RegisterPressure) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If we are going too use too many GPR, we flush Fetch instruction to lower 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // register pressure on 128 bits regs. 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned NearRegisterRequirement = 2 * Available[IDFetch].size(); 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement)) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AllowSwitchFromAlu = true; 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We want to scheduled AR defs as soon as possible to make sure they aren't 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // put in a different ALU clause from their uses. 9868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (!SU && !UnscheduledARDefs.empty()) { 99c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) SU = UnscheduledARDefs[0]; 100c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) UnscheduledARDefs.erase(UnscheduledARDefs.begin()); 101c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) NextInstKind = IDAlu; 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // try to pick ALU 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = pickAlu(); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!SU && !PhysicalRegCopy.empty()) { 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = PhysicalRegCopy.front(); 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PhysicalRegCopy.erase(PhysicalRegCopy.begin()); 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SU) { 11368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (CurEmitted >= InstKindLimit[IDAlu]) 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CurEmitted = 0; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NextInstKind = IDAlu; 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!SU) { 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // try to pick FETCH 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = pickOther(IDFetch); 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SU) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NextInstKind = IDFetch; 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 12568043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // try to pick other 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!SU) { 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = pickOther(IDOther); 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SU) 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NextInstKind = IDOther; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We want to schedule the AR uses as late as possible to make sure that 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the AR defs have been released. 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!SU && !UnscheduledARUses.empty()) { 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = UnscheduledARUses[0]; 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UnscheduledARUses.erase(UnscheduledARUses.begin()); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NextInstKind = IDAlu; 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG( 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SU) { 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dbgs() << " ** Pick node **\n"; 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU->dump(DAG); 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) dbgs() << "NO NODE \n"; 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (unsigned i = 0; i < DAG->SUnits.size(); i++) { 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SUnit &S = DAG->SUnits[i]; 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!S.isScheduled) 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) S.dump(DAG); 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 15368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) } 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ); 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SU; 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { 16068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) if (NextInstKind != CurInstKind) { 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "Instruction Type Switch\n"); 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (NextInstKind != IDAlu) 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= 31; 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CurEmitted = 0; 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CurInstKind = NextInstKind; 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CurInstKind == IDAlu) { 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AluInstCount ++; 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (getAluKind(SU)) { 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AluT_XYZW: 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) CurEmitted += 4; 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AluDiscarded: 1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) break; 1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) default: { 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++CurEmitted; 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) E = SU->getInstr()->operands_end(); It != E; ++It) { 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineOperand &MO = *It; 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++CurEmitted; 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++CurEmitted; 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (CurInstKind != IDFetch) { 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MoveUnits(Pending[IDFetch], Available[IDFetch]); 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FetchInstCount++; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static bool 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)isPhysicalRegCopy(MachineInstr *MI) { 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (MI->getOpcode() != AMDGPU::COPY) 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::releaseTopNode(SUnit *SU) { 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::releaseBottomNode(SUnit *SU) { 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (isPhysicalRegCopy(SU->getInstr())) { 21468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) PhysicalRegCopy.push_back(SU); 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int IK = getInstKind(SU); 21968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Check for AR register defines 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(), 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) E = SU->getInstr()->operands_end(); 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) I != E; ++I) { 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (I->isReg() && I->getReg() == AMDGPU::AR_X) { 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (I->isDef()) { 2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UnscheduledARDefs.push_back(SU); 2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UnscheduledARUses.push_back(SU); 2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // There is no export clause, we can schedule one as soon as its ready 2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IK == IDOther) 2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Available[IDOther].push_back(SU); 2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Pending[IK].push_back(SU); 2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool R600SchedStrategy::regBelongsToClass(unsigned Reg, 2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TargetRegisterClass *RC) const { 2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return RC->contains(Reg); 2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return MRI->getRegClass(Reg) == RC; 2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { 2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineInstr *MI = SU->getInstr(); 2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TII->isTransOnly(MI)) 2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluTrans; 2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (MI->getOpcode()) { 25868043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) case AMDGPU::PRED_X: 2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluPredX; 2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_PAIR_XY: 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_PAIR_ZW: 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_VEC_LOAD: 263f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) case AMDGPU::DOT_4: 264f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return AluT_XYZW; 265f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) case AMDGPU::COPY: 266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) if (MI->getOperand(1).isUndef()) { 267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // MI will become a KILL, don't considers it in scheduling 268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return AluDiscarded; 269f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 270f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) default: 271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) break; 272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Does the instruction take a whole IG ? 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // XXX: Is it possible to add a helper function in R600InstrInfo that can 27668043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) // be used here and in R600PacketizerList::isSoloInstruction() ? 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if(TII->isVector(*MI) || 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TII->isCubeOp(MI->getOpcode()) || 279f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) TII->isReductionOp(MI->getOpcode()) || 280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) MI->getOpcode() == AMDGPU::GROUP_BARRIER) { 281f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return AluT_XYZW; 282f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 28368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TII->isLDSInstr(MI->getOpcode())) { 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_X; 2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Is the result already assigned to a channel ? 2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned DestSubReg = MI->getOperand(0).getSubReg(); 2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (DestSubReg) { 2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::sub0: 2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_X; 2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::sub1: 2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_Y; 2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::sub2: 2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_Z; 2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::sub3: 2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_W; 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Is the result already member of a X/Y/Z/W class ? 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned DestReg = MI->getOperand(0).getReg(); 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_X; 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_Y; 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_Z; 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_W; 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluT_XYZW; 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return AluAny; 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3204e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int R600SchedStrategy::getInstKind(SUnit* SU) { 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int Opcode = SU->getInstr()->getOpcode(); 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode)) 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return IDFetch; 326d0247b1b59f9c528cb6df88b4f2b9afaf80d181eTorne (Richard Coles) 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TII->isALUInstr(Opcode)) { 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return IDAlu; 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (Opcode) { 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::PRED_X: 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::COPY: 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::CONST_COPY: 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_PAIR_XY: 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_PAIR_ZW: 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::INTERP_VEC_LOAD: 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case AMDGPU::DOT_4: 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return IDAlu; 34068043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) default: 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return IDOther; 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (Q.empty()) 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return NULL; 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend(); 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) It != E; ++It) { 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *SU = *It; 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InstructionsGroupCandidate.push_back(SU->getInstr()); 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) { 35368043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) InstructionsGroupCandidate.pop_back(); 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Q.erase((It + 1).base()); 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SU; 3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InstructionsGroupCandidate.pop_back(); 3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return NULL; 3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::LoadAlu() { 3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<SUnit *> &QSrc = Pending[IDAlu]; 3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { 3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AluKind AK = getAluKind(QSrc[i]); 3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AvailableAlus[AK].push_back(QSrc[i]); 3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 36968043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) QSrc.clear(); 3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void R600SchedStrategy::PrepareNextSlot() { 3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DEBUG(dbgs() << "New Slot\n"); 3742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert (OccupedSlotsMask && "Slot wasn't filled"); 3752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) OccupedSlotsMask = 0; 3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) InstructionsGroupCandidate.clear(); 3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) LoadAlu(); 3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { 3812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst); 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (DstIndex == -1) { 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unsigned DestReg = MI->getOperand(DstIndex).getReg(); 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // PressureRegister crashes if an operand is def and used in the same inst 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // and we try to constraint its regclass 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (MachineInstr::mop_iterator It = MI->operands_begin(), 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) E = MI->operands_end(); It != E; ++It) { 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MachineOperand &MO = *It; 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (MO.isReg() && !MO.isDef() && 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MO.getReg() == DestReg) 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Constrains the regclass of DestReg to assign it to Slot 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (Slot) { 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 0: 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 1: 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 2: 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case 3: 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); 4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { 4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; 4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); 4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SlotedSU) 4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SlotedSU; 4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (UnslotedSU) 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AssignSlot(UnslotedSU->getInstr(), Slot); 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return UnslotedSU; 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)unsigned R600SchedStrategy::AvailablesAluCount() const { 424a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() + 425a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() + 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() + 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() + 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AvailableAlus[AluPredX].size(); 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickAlu() { 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (AvailablesAluCount() || !Pending[IDAlu].empty()) { 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!OccupedSlotsMask) { 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Bottom up scheduling : predX must comes first 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AvailableAlus[AluPredX].empty()) { 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= 31; 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return PopInst(AvailableAlus[AluPredX]); 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Flush physical reg copies (RA will discard them) 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AvailableAlus[AluDiscarded].empty()) { 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= 31; 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return PopInst(AvailableAlus[AluDiscarded]); 44390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 44468043e1e95eeb07d5cae7aca370b26518b0867d6Torne (Richard Coles) // If there is a T_XYZW alu available, use it 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AvailableAlus[AluT_XYZW].empty()) { 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= 15; 447a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) return PopInst(AvailableAlus[AluT_XYZW]); 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool TransSlotOccuped = OccupedSlotsMask & 16; 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!TransSlotOccuped) { 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AvailableAlus[AluTrans].empty()) { 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= 16; 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return PopInst(AvailableAlus[AluTrans]); 45590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int Chan = 3; Chan > -1; --Chan) { 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool isOccupied = OccupedSlotsMask & (1 << Chan); 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!isOccupied) { 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *SU = AttemptFillSlot(Chan); 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (SU) { 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OccupedSlotsMask |= (1 << Chan); 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InstructionsGroupCandidate.push_back(SU->getInstr()); 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SU; 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) PrepareNextSlot(); 4697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) } 4707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) return NULL; 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)SUnit* R600SchedStrategy::pickOther(int QID) { 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SUnit *SU = 0; 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<SUnit *> &AQ = Available[QID]; 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 477a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) if (AQ.empty()) { 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) MoveUnits(Pending[QID], AQ); 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!AQ.empty()) { 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SU = AQ.back(); 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) AQ.resize(AQ.size() - 1); 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return SU; 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)