R600MachineScheduler.cpp revision 3f179b59e53a8a6d5dfb509857d972461027b809
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Machine Scheduler interface 12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "misched" 17 18#include "R600MachineScheduler.h" 19#include "llvm/CodeGen/MachineRegisterInfo.h" 20#include "llvm/CodeGen/LiveIntervalAnalysis.h" 21#include "llvm/Pass.h" 22#include "llvm/PassManager.h" 23#include "llvm/Support/raw_ostream.h" 24#include <set> 25 26using namespace llvm; 27 28void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { 29 30 DAG = dag; 31 TII = static_cast<const R600InstrInfo*>(DAG->TII); 32 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); 33 MRI = &DAG->MRI; 34 Available[IDAlu]->clear(); 35 Available[IDFetch]->clear(); 36 Available[IDOther]->clear(); 37 CurInstKind = IDOther; 38 CurEmitted = 0; 39 OccupedSlotsMask = 15; 40 memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); 41 InstKindLimit[IDAlu] = 120; // 120 minus 8 for security 42 43 44 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); 45 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { 46 InstKindLimit[IDFetch] = 7; // 8 minus 1 for security 47 } else { 48 InstKindLimit[IDFetch] = 15; // 16 minus 1 for security 49 } 50} 51 52void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) 53{ 54 if (QSrc->empty()) 55 return; 56 for (ReadyQueue::iterator I = QSrc->begin(), 57 E = QSrc->end(); I != E; ++I) { 58 (*I)->NodeQueueId &= ~QSrc->getID(); 59 QDst->push(*I); 60 } 61 QSrc->clear(); 62} 63 64SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { 65 SUnit *SU = 0; 66 IsTopNode = true; 67 NextInstKind = IDOther; 68 69 // check if we might want to switch current clause type 70 bool AllowSwitchToAlu = (CurInstKind == IDOther) || 71 (CurEmitted > InstKindLimit[CurInstKind]) || 72 (Available[CurInstKind]->empty()); 73 bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && 74 (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); 75 76 if ((AllowSwitchToAlu && CurInstKind != IDAlu) || 77 (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { 78 // try to pick ALU 79 SU = pickAlu(); 80 if (SU) { 81 if (CurEmitted > InstKindLimit[IDAlu]) 82 CurEmitted = 0; 83 NextInstKind = IDAlu; 84 } 85 } 86 87 if (!SU) { 88 // try to pick FETCH 89 SU = pickOther(IDFetch); 90 if (SU) 91 NextInstKind = IDFetch; 92 } 93 94 // try to pick other 95 if (!SU) { 96 SU = pickOther(IDOther); 97 if (SU) 98 NextInstKind = IDOther; 99 } 100 101 DEBUG( 102 if (SU) { 103 dbgs() << "picked node: "; 104 SU->dump(DAG); 105 } else { 106 dbgs() << "NO NODE "; 107 for (int i = 0; i < IDLast; ++i) { 108 Available[i]->dump(); 109 Pending[i]->dump(); 110 } 111 for (unsigned i = 0; i < DAG->SUnits.size(); i++) { 112 const SUnit &S = DAG->SUnits[i]; 113 if (!S.isScheduled) 114 S.dump(DAG); 115 } 116 } 117 ); 118 119 return SU; 120} 121 122void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { 123 124 DEBUG(dbgs() << "scheduled: "); 125 DEBUG(SU->dump(DAG)); 126 127 if (NextInstKind != CurInstKind) { 128 DEBUG(dbgs() << "Instruction Type Switch\n"); 129 if (NextInstKind != IDAlu) 130 OccupedSlotsMask = 15; 131 CurEmitted = 0; 132 CurInstKind = NextInstKind; 133 } 134 135 if (CurInstKind == IDAlu) { 136 switch (getAluKind(SU)) { 137 case AluT_XYZW: 138 CurEmitted += 4; 139 break; 140 case AluDiscarded: 141 break; 142 default: { 143 ++CurEmitted; 144 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), 145 E = SU->getInstr()->operands_end(); It != E; ++It) { 146 MachineOperand &MO = *It; 147 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 148 ++CurEmitted; 149 } 150 } 151 } 152 } else { 153 ++CurEmitted; 154 } 155 156 157 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); 158 159 if (CurInstKind != IDFetch) { 160 MoveUnits(Pending[IDFetch], Available[IDFetch]); 161 } 162 MoveUnits(Pending[IDOther], Available[IDOther]); 163} 164 165void R600SchedStrategy::releaseTopNode(SUnit *SU) { 166 int IK = getInstKind(SU); 167 168 DEBUG(dbgs() << IK << " <= "); 169 DEBUG(SU->dump(DAG)); 170 171 Pending[IK]->push(SU); 172} 173 174void R600SchedStrategy::releaseBottomNode(SUnit *SU) { 175} 176 177bool R600SchedStrategy::regBelongsToClass(unsigned Reg, 178 const TargetRegisterClass *RC) const { 179 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 180 return RC->contains(Reg); 181 } else { 182 return MRI->getRegClass(Reg) == RC; 183 } 184} 185 186R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { 187 MachineInstr *MI = SU->getInstr(); 188 189 switch (MI->getOpcode()) { 190 case AMDGPU::INTERP_PAIR_XY: 191 case AMDGPU::INTERP_PAIR_ZW: 192 case AMDGPU::INTERP_VEC_LOAD: 193 return AluT_XYZW; 194 case AMDGPU::COPY: 195 if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { 196 // %vregX = COPY Tn_X is likely to be discarded in favor of an 197 // assignement of Tn_X to %vregX, don't considers it in scheduling 198 return AluDiscarded; 199 } 200 else if (MI->getOperand(1).isUndef()) { 201 // MI will become a KILL, don't considers it in scheduling 202 return AluDiscarded; 203 } 204 default: 205 break; 206 } 207 208 // Does the instruction take a whole IG ? 209 if(TII->isVector(*MI) || 210 TII->isCubeOp(MI->getOpcode()) || 211 TII->isReductionOp(MI->getOpcode())) 212 return AluT_XYZW; 213 214 // Is the result already assigned to a channel ? 215 unsigned DestSubReg = MI->getOperand(0).getSubReg(); 216 switch (DestSubReg) { 217 case AMDGPU::sub0: 218 return AluT_X; 219 case AMDGPU::sub1: 220 return AluT_Y; 221 case AMDGPU::sub2: 222 return AluT_Z; 223 case AMDGPU::sub3: 224 return AluT_W; 225 default: 226 break; 227 } 228 229 // Is the result already member of a X/Y/Z/W class ? 230 unsigned DestReg = MI->getOperand(0).getReg(); 231 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || 232 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) 233 return AluT_X; 234 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) 235 return AluT_Y; 236 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) 237 return AluT_Z; 238 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) 239 return AluT_W; 240 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) 241 return AluT_XYZW; 242 243 return AluAny; 244 245} 246 247int R600SchedStrategy::getInstKind(SUnit* SU) { 248 int Opcode = SU->getInstr()->getOpcode(); 249 250 if (TII->isALUInstr(Opcode)) { 251 return IDAlu; 252 } 253 254 switch (Opcode) { 255 case AMDGPU::COPY: 256 case AMDGPU::CONST_COPY: 257 case AMDGPU::INTERP_PAIR_XY: 258 case AMDGPU::INTERP_PAIR_ZW: 259 case AMDGPU::INTERP_VEC_LOAD: 260 case AMDGPU::DOT4_eg_pseudo: 261 case AMDGPU::DOT4_r600_pseudo: 262 return IDAlu; 263 case AMDGPU::TEX_VTX_CONSTBUF: 264 case AMDGPU::TEX_VTX_TEXBUF: 265 case AMDGPU::TEX_LD: 266 case AMDGPU::TEX_GET_TEXTURE_RESINFO: 267 case AMDGPU::TEX_GET_GRADIENTS_H: 268 case AMDGPU::TEX_GET_GRADIENTS_V: 269 case AMDGPU::TEX_SET_GRADIENTS_H: 270 case AMDGPU::TEX_SET_GRADIENTS_V: 271 case AMDGPU::TEX_SAMPLE: 272 case AMDGPU::TEX_SAMPLE_C: 273 case AMDGPU::TEX_SAMPLE_L: 274 case AMDGPU::TEX_SAMPLE_C_L: 275 case AMDGPU::TEX_SAMPLE_LB: 276 case AMDGPU::TEX_SAMPLE_C_LB: 277 case AMDGPU::TEX_SAMPLE_G: 278 case AMDGPU::TEX_SAMPLE_C_G: 279 case AMDGPU::TXD: 280 case AMDGPU::TXD_SHADOW: 281 return IDFetch; 282 default: 283 DEBUG( 284 dbgs() << "other inst: "; 285 SU->dump(DAG); 286 ); 287 return IDOther; 288 } 289} 290 291class ConstPairs { 292private: 293 unsigned XYPair; 294 unsigned ZWPair; 295public: 296 ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { 297 for (unsigned i = 0; i < 3; i++) { 298 unsigned ReadConstChan = ReadConst[i] & 3; 299 unsigned ReadConstIndex = ReadConst[i] & (~3); 300 if (ReadConstChan < 2) { 301 if (!XYPair) { 302 XYPair = ReadConstIndex; 303 } 304 } else { 305 if (!ZWPair) { 306 ZWPair = ReadConstIndex; 307 } 308 } 309 } 310 } 311 312 bool isCompatibleWith(const ConstPairs& CP) const { 313 return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && 314 (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); 315 } 316}; 317 318static 319const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { 320 unsigned ReadConsts[3] = {0, 0, 0}; 321 R600Operands::Ops OpTable[3][2] = { 322 {R600Operands::SRC0, R600Operands::SRC0_SEL}, 323 {R600Operands::SRC1, R600Operands::SRC1_SEL}, 324 {R600Operands::SRC2, R600Operands::SRC2_SEL}, 325 }; 326 327 if (!TII->isALUInstr(MI.getOpcode())) 328 return ConstPairs(ReadConsts); 329 330 for (unsigned i = 0; i < 3; i++) { 331 int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); 332 if (SrcIdx < 0) 333 break; 334 if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) 335 ReadConsts[i] =MI.getOperand( 336 TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); 337 } 338 return ConstPairs(ReadConsts); 339} 340 341bool 342R600SchedStrategy::isBundleable(const MachineInstr& MI) { 343 const ConstPairs &MIPair = getPairs(TII, MI); 344 for (unsigned i = 0; i < 4; i++) { 345 if (!InstructionsGroupCandidate[i]) 346 continue; 347 const ConstPairs &IGPair = getPairs(TII, 348 *InstructionsGroupCandidate[i]->getInstr()); 349 if (!IGPair.isCompatibleWith(MIPair)) 350 return false; 351 } 352 return true; 353} 354 355SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { 356 if (Q.empty()) 357 return NULL; 358 for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); 359 It != E; ++It) { 360 SUnit *SU = *It; 361 if (isBundleable(*SU->getInstr())) { 362 Q.erase(It); 363 return SU; 364 } 365 } 366 return NULL; 367} 368 369void R600SchedStrategy::LoadAlu() { 370 ReadyQueue *QSrc = Pending[IDAlu]; 371 for (ReadyQueue::iterator I = QSrc->begin(), 372 E = QSrc->end(); I != E; ++I) { 373 (*I)->NodeQueueId &= ~QSrc->getID(); 374 AluKind AK = getAluKind(*I); 375 AvailableAlus[AK].insert(*I); 376 } 377 QSrc->clear(); 378} 379 380void R600SchedStrategy::PrepareNextSlot() { 381 DEBUG(dbgs() << "New Slot\n"); 382 assert (OccupedSlotsMask && "Slot wasn't filled"); 383 OccupedSlotsMask = 0; 384 memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); 385 LoadAlu(); 386} 387 388void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { 389 unsigned DestReg = MI->getOperand(0).getReg(); 390 // PressureRegister crashes if an operand is def and used in the same inst 391 // and we try to constraint its regclass 392 for (MachineInstr::mop_iterator It = MI->operands_begin(), 393 E = MI->operands_end(); It != E; ++It) { 394 MachineOperand &MO = *It; 395 if (MO.isReg() && !MO.isDef() && 396 MO.getReg() == MI->getOperand(0).getReg()) 397 return; 398 } 399 // Constrains the regclass of DestReg to assign it to Slot 400 switch (Slot) { 401 case 0: 402 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); 403 break; 404 case 1: 405 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); 406 break; 407 case 2: 408 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); 409 break; 410 case 3: 411 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); 412 break; 413 } 414} 415 416SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { 417 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; 418 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); 419 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); 420 if (!UnslotedSU) { 421 return SlotedSU; 422 } else if (!SlotedSU) { 423 AssignSlot(UnslotedSU->getInstr(), Slot); 424 return UnslotedSU; 425 } else { 426 //Determine which one to pick (the lesser one) 427 if (CompareSUnit()(SlotedSU, UnslotedSU)) { 428 AvailableAlus[AluAny].insert(UnslotedSU); 429 return SlotedSU; 430 } else { 431 AvailableAlus[IndexToID[Slot]].insert(SlotedSU); 432 AssignSlot(UnslotedSU->getInstr(), Slot); 433 return UnslotedSU; 434 } 435 } 436} 437 438bool R600SchedStrategy::isAvailablesAluEmpty() const { 439 return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && 440 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && 441 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && 442 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); 443} 444 445SUnit* R600SchedStrategy::pickAlu() { 446 while (!isAvailablesAluEmpty()) { 447 if (!OccupedSlotsMask) { 448 // Flush physical reg copies (RA will discard them) 449 if (!AvailableAlus[AluDiscarded].empty()) { 450 OccupedSlotsMask = 15; 451 return PopInst(AvailableAlus[AluDiscarded]); 452 } 453 // If there is a T_XYZW alu available, use it 454 if (!AvailableAlus[AluT_XYZW].empty()) { 455 OccupedSlotsMask = 15; 456 return PopInst(AvailableAlus[AluT_XYZW]); 457 } 458 } 459 for (unsigned Chan = 0; Chan < 4; ++Chan) { 460 bool isOccupied = OccupedSlotsMask & (1 << Chan); 461 if (!isOccupied) { 462 SUnit *SU = AttemptFillSlot(Chan); 463 if (SU) { 464 OccupedSlotsMask |= (1 << Chan); 465 InstructionsGroupCandidate[Chan] = SU; 466 return SU; 467 } 468 } 469 } 470 PrepareNextSlot(); 471 } 472 return NULL; 473} 474 475SUnit* R600SchedStrategy::pickOther(int QID) { 476 SUnit *SU = 0; 477 ReadyQueue *AQ = Available[QID]; 478 479 if (AQ->empty()) { 480 MoveUnits(Pending[QID], AQ); 481 } 482 if (!AQ->empty()) { 483 SU = *AQ->begin(); 484 AQ->remove(AQ->begin()); 485 } 486 return SU; 487} 488 489