R600MachineScheduler.cpp revision 3ff0abfaabc2c7f604d490be587b9c27e7c91ac0
1//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief R600 Machine Scheduler interface 12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "misched" 17 18#include "R600MachineScheduler.h" 19#include "llvm/CodeGen/LiveIntervalAnalysis.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/Pass.h" 22#include "llvm/PassManager.h" 23#include "llvm/Support/raw_ostream.h" 24 25using namespace llvm; 26 27void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { 28 29 DAG = dag; 30 TII = static_cast<const R600InstrInfo*>(DAG->TII); 31 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); 32 MRI = &DAG->MRI; 33 CurInstKind = IDOther; 34 CurEmitted = 0; 35 OccupedSlotsMask = 15; 36 InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); 37 InstKindLimit[IDOther] = 32; 38 39 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); 40 InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); 41 42} 43 44void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, 45 std::vector<SUnit *> &QDst) 46{ 47 QDst.insert(QDst.end(), QSrc.begin(), QSrc.end()); 48 QSrc.clear(); 49} 50 51SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { 52 SUnit *SU = 0; 53 NextInstKind = IDOther; 54 55 IsTopNode = false; 56 57 // check if we might want to switch current clause type 58 bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) || 59 (Available[CurInstKind].empty()); 60 bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && 61 (!Available[IDFetch].empty() || !Available[IDOther].empty()); 62 63 // We want to scheduled AR defs as soon as possible to make sure they aren't 64 // put in a different ALU clause from their uses. 65 if (!SU && !UnscheduledARDefs.empty()) { 66 SU = UnscheduledARDefs[0]; 67 UnscheduledARDefs.erase(UnscheduledARDefs.begin()); 68 NextInstKind = IDAlu; 69 } 70 71 if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || 72 (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { 73 // try to pick ALU 74 SU = pickAlu(); 75 if (!SU && !PhysicalRegCopy.empty()) { 76 SU = PhysicalRegCopy.front(); 77 PhysicalRegCopy.erase(PhysicalRegCopy.begin()); 78 } 79 if (SU) { 80 if (CurEmitted >= InstKindLimit[IDAlu]) 81 CurEmitted = 0; 82 NextInstKind = IDAlu; 83 } 84 } 85 86 if (!SU) { 87 // try to pick FETCH 88 SU = pickOther(IDFetch); 89 if (SU) 90 NextInstKind = IDFetch; 91 } 92 93 // try to pick other 94 if (!SU) { 95 SU = pickOther(IDOther); 96 if (SU) 97 NextInstKind = IDOther; 98 } 99 100 // We want to schedule the AR uses as late as possible to make sure that 101 // the AR defs have been released. 102 if (!SU && !UnscheduledARUses.empty()) { 103 SU = UnscheduledARUses[0]; 104 UnscheduledARUses.erase(UnscheduledARUses.begin()); 105 NextInstKind = IDAlu; 106 } 107 108 109 DEBUG( 110 if (SU) { 111 dbgs() << " ** Pick node **\n"; 112 SU->dump(DAG); 113 } else { 114 dbgs() << "NO NODE \n"; 115 for (unsigned i = 0; i < DAG->SUnits.size(); i++) { 116 const SUnit &S = DAG->SUnits[i]; 117 if (!S.isScheduled) 118 S.dump(DAG); 119 } 120 } 121 ); 122 123 return SU; 124} 125 126void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { 127 if (NextInstKind != CurInstKind) { 128 DEBUG(dbgs() << "Instruction Type Switch\n"); 129 if (NextInstKind != IDAlu) 130 OccupedSlotsMask = 15; 131 CurEmitted = 0; 132 CurInstKind = NextInstKind; 133 } 134 135 if (CurInstKind == IDAlu) { 136 switch (getAluKind(SU)) { 137 case AluT_XYZW: 138 CurEmitted += 4; 139 break; 140 case AluDiscarded: 141 break; 142 default: { 143 ++CurEmitted; 144 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), 145 E = SU->getInstr()->operands_end(); It != E; ++It) { 146 MachineOperand &MO = *It; 147 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) 148 ++CurEmitted; 149 } 150 } 151 } 152 } else { 153 ++CurEmitted; 154 } 155 156 157 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); 158 159 if (CurInstKind != IDFetch) { 160 MoveUnits(Pending[IDFetch], Available[IDFetch]); 161 } 162} 163 164static bool 165isPhysicalRegCopy(MachineInstr *MI) { 166 if (MI->getOpcode() != AMDGPU::COPY) 167 return false; 168 169 return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); 170} 171 172void R600SchedStrategy::releaseTopNode(SUnit *SU) { 173 DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); 174} 175 176void R600SchedStrategy::releaseBottomNode(SUnit *SU) { 177 DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); 178 if (isPhysicalRegCopy(SU->getInstr())) { 179 PhysicalRegCopy.push_back(SU); 180 return; 181 } 182 183 int IK = getInstKind(SU); 184 185 // Check for AR register defines 186 for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(), 187 E = SU->getInstr()->operands_end(); 188 I != E; ++I) { 189 if (I->isReg() && I->getReg() == AMDGPU::AR_X) { 190 if (I->isDef()) { 191 UnscheduledARDefs.push_back(SU); 192 } else { 193 UnscheduledARUses.push_back(SU); 194 } 195 return; 196 } 197 } 198 199 // There is no export clause, we can schedule one as soon as its ready 200 if (IK == IDOther) 201 Available[IDOther].push_back(SU); 202 else 203 Pending[IK].push_back(SU); 204 205} 206 207bool R600SchedStrategy::regBelongsToClass(unsigned Reg, 208 const TargetRegisterClass *RC) const { 209 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 210 return RC->contains(Reg); 211 } else { 212 return MRI->getRegClass(Reg) == RC; 213 } 214} 215 216R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { 217 MachineInstr *MI = SU->getInstr(); 218 219 switch (MI->getOpcode()) { 220 case AMDGPU::PRED_X: 221 return AluPredX; 222 case AMDGPU::INTERP_PAIR_XY: 223 case AMDGPU::INTERP_PAIR_ZW: 224 case AMDGPU::INTERP_VEC_LOAD: 225 case AMDGPU::DOT_4: 226 return AluT_XYZW; 227 case AMDGPU::COPY: 228 if (MI->getOperand(1).isUndef()) { 229 // MI will become a KILL, don't considers it in scheduling 230 return AluDiscarded; 231 } 232 default: 233 break; 234 } 235 236 // Does the instruction take a whole IG ? 237 if(TII->isVector(*MI) || 238 TII->isCubeOp(MI->getOpcode()) || 239 TII->isReductionOp(MI->getOpcode())) 240 return AluT_XYZW; 241 242 // Is the result already assigned to a channel ? 243 unsigned DestSubReg = MI->getOperand(0).getSubReg(); 244 switch (DestSubReg) { 245 case AMDGPU::sub0: 246 return AluT_X; 247 case AMDGPU::sub1: 248 return AluT_Y; 249 case AMDGPU::sub2: 250 return AluT_Z; 251 case AMDGPU::sub3: 252 return AluT_W; 253 default: 254 break; 255 } 256 257 // Is the result already member of a X/Y/Z/W class ? 258 unsigned DestReg = MI->getOperand(0).getReg(); 259 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || 260 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) 261 return AluT_X; 262 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) 263 return AluT_Y; 264 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) 265 return AluT_Z; 266 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) 267 return AluT_W; 268 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) 269 return AluT_XYZW; 270 271 return AluAny; 272 273} 274 275int R600SchedStrategy::getInstKind(SUnit* SU) { 276 int Opcode = SU->getInstr()->getOpcode(); 277 278 if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode)) 279 return IDFetch; 280 281 if (TII->isALUInstr(Opcode)) { 282 return IDAlu; 283 } 284 285 switch (Opcode) { 286 case AMDGPU::PRED_X: 287 case AMDGPU::COPY: 288 case AMDGPU::CONST_COPY: 289 case AMDGPU::INTERP_PAIR_XY: 290 case AMDGPU::INTERP_PAIR_ZW: 291 case AMDGPU::INTERP_VEC_LOAD: 292 case AMDGPU::DOT_4: 293 return IDAlu; 294 default: 295 return IDOther; 296 } 297} 298 299SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { 300 if (Q.empty()) 301 return NULL; 302 for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend(); 303 It != E; ++It) { 304 SUnit *SU = *It; 305 InstructionsGroupCandidate.push_back(SU->getInstr()); 306 if (TII->canBundle(InstructionsGroupCandidate)) { 307 InstructionsGroupCandidate.pop_back(); 308 Q.erase((It + 1).base()); 309 return SU; 310 } else { 311 InstructionsGroupCandidate.pop_back(); 312 } 313 } 314 return NULL; 315} 316 317void R600SchedStrategy::LoadAlu() { 318 std::vector<SUnit *> &QSrc = Pending[IDAlu]; 319 for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { 320 AluKind AK = getAluKind(QSrc[i]); 321 AvailableAlus[AK].push_back(QSrc[i]); 322 } 323 QSrc.clear(); 324} 325 326void R600SchedStrategy::PrepareNextSlot() { 327 DEBUG(dbgs() << "New Slot\n"); 328 assert (OccupedSlotsMask && "Slot wasn't filled"); 329 OccupedSlotsMask = 0; 330 InstructionsGroupCandidate.clear(); 331 LoadAlu(); 332} 333 334void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { 335 unsigned DestReg = MI->getOperand(0).getReg(); 336 // PressureRegister crashes if an operand is def and used in the same inst 337 // and we try to constraint its regclass 338 for (MachineInstr::mop_iterator It = MI->operands_begin(), 339 E = MI->operands_end(); It != E; ++It) { 340 MachineOperand &MO = *It; 341 if (MO.isReg() && !MO.isDef() && 342 MO.getReg() == MI->getOperand(0).getReg()) 343 return; 344 } 345 // Constrains the regclass of DestReg to assign it to Slot 346 switch (Slot) { 347 case 0: 348 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); 349 break; 350 case 1: 351 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); 352 break; 353 case 2: 354 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); 355 break; 356 case 3: 357 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); 358 break; 359 } 360} 361 362SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { 363 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; 364 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); 365 if (SlotedSU) 366 return SlotedSU; 367 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); 368 if (UnslotedSU) 369 AssignSlot(UnslotedSU->getInstr(), Slot); 370 return UnslotedSU; 371} 372 373bool R600SchedStrategy::isAvailablesAluEmpty() const { 374 return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() && 375 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && 376 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && 377 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() && 378 AvailableAlus[AluPredX].empty(); 379} 380 381SUnit* R600SchedStrategy::pickAlu() { 382 while (!isAvailablesAluEmpty()) { 383 if (!OccupedSlotsMask) { 384 // Bottom up scheduling : predX must comes first 385 if (!AvailableAlus[AluPredX].empty()) { 386 OccupedSlotsMask = 15; 387 return PopInst(AvailableAlus[AluPredX]); 388 } 389 // Flush physical reg copies (RA will discard them) 390 if (!AvailableAlus[AluDiscarded].empty()) { 391 OccupedSlotsMask = 15; 392 return PopInst(AvailableAlus[AluDiscarded]); 393 } 394 // If there is a T_XYZW alu available, use it 395 if (!AvailableAlus[AluT_XYZW].empty()) { 396 OccupedSlotsMask = 15; 397 return PopInst(AvailableAlus[AluT_XYZW]); 398 } 399 } 400 for (int Chan = 3; Chan > -1; --Chan) { 401 bool isOccupied = OccupedSlotsMask & (1 << Chan); 402 if (!isOccupied) { 403 SUnit *SU = AttemptFillSlot(Chan); 404 if (SU) { 405 OccupedSlotsMask |= (1 << Chan); 406 InstructionsGroupCandidate.push_back(SU->getInstr()); 407 return SU; 408 } 409 } 410 } 411 PrepareNextSlot(); 412 } 413 return NULL; 414} 415 416SUnit* R600SchedStrategy::pickOther(int QID) { 417 SUnit *SU = 0; 418 std::vector<SUnit *> &AQ = Available[QID]; 419 420 if (AQ.empty()) { 421 MoveUnits(Pending[QID], AQ); 422 } 423 if (!AQ.empty()) { 424 SU = AQ.back(); 425 AQ.resize(AQ.size() - 1); 426 } 427 return SU; 428} 429 430