PostRASchedulerList.cpp revision 6c3643c41b7c00fe503a36b468ac0488d946454e
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "llvm/CodeGen/Passes.h" 23#include "llvm/CodeGen/ScheduleDAGInstrs.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetRegisterInfo.h" 32#include "llvm/Support/Compiler.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/ADT/Statistic.h" 35#include "llvm/ADT/DenseSet.h" 36#include "llvm/ADT/SmallVector.h" 37#include <map> 38#include <climits> 39using namespace llvm; 40 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break post-RA scheduling anti-dependencies"), 46 cl::init(true), cl::Hidden); 47 48namespace { 49 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 50 public: 51 static char ID; 52 PostRAScheduler() : MachineFunctionPass(&ID) {} 53 54 void getAnalysisUsage(AnalysisUsage &AU) const { 55 AU.addRequired<MachineDominatorTree>(); 56 AU.addPreserved<MachineDominatorTree>(); 57 AU.addRequired<MachineLoopInfo>(); 58 AU.addPreserved<MachineLoopInfo>(); 59 MachineFunctionPass::getAnalysisUsage(AU); 60 } 61 62 const char *getPassName() const { 63 return "Post RA top-down list latency scheduler"; 64 } 65 66 bool runOnMachineFunction(MachineFunction &Fn); 67 }; 68 char PostRAScheduler::ID = 0; 69 70 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 71 /// AvailableQueue - The priority queue to use for the available SUnits. 72 /// 73 LatencyPriorityQueue AvailableQueue; 74 75 /// PendingQueue - This contains all of the instructions whose operands have 76 /// been issued, but their results are not ready yet (due to the latency of 77 /// the operation). Once the operands becomes available, the instruction is 78 /// added to the AvailableQueue. 79 std::vector<SUnit*> PendingQueue; 80 81 /// Topo - A topological ordering for SUnits. 82 ScheduleDAGTopologicalSort Topo; 83 84 public: 85 SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm, 86 const MachineLoopInfo &MLI, 87 const MachineDominatorTree &MDT) 88 : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {} 89 90 void Schedule(); 91 92 private: 93 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 94 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 95 void ListScheduleTopDown(); 96 bool BreakAntiDependencies(); 97 }; 98} 99 100bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 101 DOUT << "PostRAScheduler\n"; 102 103 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 104 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 105 106 // Loop over all of the basic blocks 107 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 108 MBB != MBBe; ++MBB) { 109 110 SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT); 111 112 Scheduler.Run(); 113 114 Scheduler.EmitSchedule(); 115 } 116 117 return true; 118} 119 120/// Schedule - Schedule the DAG using list scheduling. 121void SchedulePostRATDList::Schedule() { 122 DOUT << "********** List Scheduling **********\n"; 123 124 // Build scheduling units. 125 BuildSchedUnits(); 126 127 if (EnableAntiDepBreaking) { 128 if (BreakAntiDependencies()) { 129 // We made changes. Update the dependency graph. 130 // Theoretically we could update the graph in place: 131 // When a live range is changed to use a different register, remove 132 // the def's anti-dependence *and* output-dependence edges due to 133 // that register, and add new anti-dependence and output-dependence 134 // edges based on the next live range of the register. 135 SUnits.clear(); 136 BuildSchedUnits(); 137 } 138 } 139 140 AvailableQueue.initNodes(SUnits); 141 142 ListScheduleTopDown(); 143 144 AvailableQueue.releaseState(); 145} 146 147/// getInstrOperandRegClass - Return register class of the operand of an 148/// instruction of the specified TargetInstrDesc. 149static const TargetRegisterClass* 150getInstrOperandRegClass(const TargetRegisterInfo *TRI, 151 const TargetInstrInfo *TII, const TargetInstrDesc &II, 152 unsigned Op) { 153 if (Op >= II.getNumOperands()) 154 return NULL; 155 if (II.OpInfo[Op].isLookupPtrRegClass()) 156 return TII->getPointerRegClass(); 157 return TRI->getRegClass(II.OpInfo[Op].RegClass); 158} 159 160/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 161/// critical path. 162static SDep *CriticalPathStep(SUnit *SU) { 163 SDep *Next = 0; 164 unsigned NextDepth = 0; 165 // Find the predecessor edge with the greatest depth. 166 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 167 P != PE; ++P) { 168 SUnit *PredSU = P->getSUnit(); 169 unsigned PredLatency = P->getLatency(); 170 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 171 // In the case of a latency tie, prefer an anti-dependency edge over 172 // other types of edges. 173 if (NextDepth < PredTotalLatency || 174 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 175 NextDepth = PredTotalLatency; 176 Next = &*P; 177 } 178 } 179 return Next; 180} 181 182/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 183/// of the ScheduleDAG and break them by renaming registers. 184/// 185bool SchedulePostRATDList::BreakAntiDependencies() { 186 // The code below assumes that there is at least one instruction, 187 // so just duck out immediately if the block is empty. 188 if (BB->empty()) return false; 189 190 // Find the node at the bottom of the critical path. 191 SUnit *Max = 0; 192 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 193 SUnit *SU = &SUnits[i]; 194 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 195 Max = SU; 196 } 197 198 DOUT << "Critical path has total latency " 199 << (Max ? Max->getDepth() + Max->Latency : 0) << "\n"; 200 201 // We'll be ignoring anti-dependencies on non-allocatable registers, because 202 // they may not be safe to break. 203 const BitVector AllocatableSet = TRI->getAllocatableSet(*MF); 204 205 // Track progress along the critical path through the SUnit graph as we walk 206 // the instructions. 207 SUnit *CriticalPathSU = Max; 208 MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); 209 210 // For live regs that are only used in one register class in a live range, 211 // the register class. If the register is not live, the corresponding value 212 // is null. If the register is live but used in multiple register classes, 213 // the corresponding value is -1 casted to a pointer. 214 const TargetRegisterClass * 215 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 216 217 // Map registers to all their references within a live range. 218 std::multimap<unsigned, MachineOperand *> RegRefs; 219 220 // The index of the most recent kill (proceding bottom-up), or ~0u if 221 // the register is not live. 222 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 223 std::fill(KillIndices, array_endof(KillIndices), ~0u); 224 // The index of the most recent complete def (proceding bottom up), or ~0u if 225 // the register is live. 226 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 227 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 228 229 // Determine the live-out physregs for this block. 230 if (BB->back().getDesc().isReturn()) 231 // In a return block, examine the function live-out regs. 232 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 233 E = MRI.liveout_end(); I != E; ++I) { 234 unsigned Reg = *I; 235 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 236 KillIndices[Reg] = BB->size(); 237 DefIndices[Reg] = ~0u; 238 // Repeat, for all aliases. 239 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 240 unsigned AliasReg = *Alias; 241 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 242 KillIndices[AliasReg] = BB->size(); 243 DefIndices[AliasReg] = ~0u; 244 } 245 } 246 else 247 // In a non-return block, examine the live-in regs of all successors. 248 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 249 SE = BB->succ_end(); SI != SE; ++SI) 250 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 251 E = (*SI)->livein_end(); I != E; ++I) { 252 unsigned Reg = *I; 253 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 254 KillIndices[Reg] = BB->size(); 255 DefIndices[Reg] = ~0u; 256 // Repeat, for all aliases. 257 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 258 unsigned AliasReg = *Alias; 259 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 260 KillIndices[AliasReg] = BB->size(); 261 DefIndices[AliasReg] = ~0u; 262 } 263 } 264 265 // Consider callee-saved registers as live-out, since we're running after 266 // prologue/epilogue insertion so there's no way to add additional 267 // saved registers. 268 // 269 // TODO: If the callee saves and restores these, then we can potentially 270 // use them between the save and the restore. To do that, we could scan 271 // the exit blocks to see which of these registers are defined. 272 // Alternatively, callee-saved registers that aren't saved and restored 273 // could be marked live-in in every block. 274 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 275 unsigned Reg = *I; 276 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 277 KillIndices[Reg] = BB->size(); 278 DefIndices[Reg] = ~0u; 279 // Repeat, for all aliases. 280 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 281 unsigned AliasReg = *Alias; 282 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 283 KillIndices[AliasReg] = BB->size(); 284 DefIndices[AliasReg] = ~0u; 285 } 286 } 287 288 // Consider this pattern: 289 // A = ... 290 // ... = A 291 // A = ... 292 // ... = A 293 // A = ... 294 // ... = A 295 // A = ... 296 // ... = A 297 // There are three anti-dependencies here, and without special care, 298 // we'd break all of them using the same register: 299 // A = ... 300 // ... = A 301 // B = ... 302 // ... = B 303 // B = ... 304 // ... = B 305 // B = ... 306 // ... = B 307 // because at each anti-dependence, B is the first register that 308 // isn't A which is free. This re-introduces anti-dependencies 309 // at all but one of the original anti-dependencies that we were 310 // trying to break. To avoid this, keep track of the most recent 311 // register that each register was replaced with, avoid avoid 312 // using it to repair an anti-dependence on the same register. 313 // This lets us produce this: 314 // A = ... 315 // ... = A 316 // B = ... 317 // ... = B 318 // C = ... 319 // ... = C 320 // B = ... 321 // ... = B 322 // This still has an anti-dependence on B, but at least it isn't on the 323 // original critical path. 324 // 325 // TODO: If we tracked more than one register here, we could potentially 326 // fix that remaining critical edge too. This is a little more involved, 327 // because unlike the most recent register, less recent registers should 328 // still be considered, though only if no other registers are available. 329 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 330 331 // Attempt to break anti-dependence edges on the critical path. Walk the 332 // instructions from the bottom up, tracking information about liveness 333 // as we go to help determine which registers are available. 334 bool Changed = false; 335 unsigned Count = BB->size() - 1; 336 for (MachineBasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); 337 I != E; ++I, --Count) { 338 MachineInstr *MI = &*I; 339 340 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 341 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 342 // is left behind appearing to clobber the super-register, while the 343 // subregister needs to remain live. So we just ignore them. 344 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 345 continue; 346 347 // Check if this instruction has a dependence on the critical path that 348 // is an anti-dependence that we may be able to break. If it is, set 349 // AntiDepReg to the non-zero register associated with the anti-dependence. 350 // 351 // We limit our attention to the critical path as a heuristic to avoid 352 // breaking anti-dependence edges that aren't going to significantly 353 // impact the overall schedule. There are a limited number of registers 354 // and we want to save them for the important edges. 355 // 356 // TODO: Instructions with multiple defs could have multiple 357 // anti-dependencies. The current code here only knows how to break one 358 // edge per instruction. Note that we'd have to be able to break all of 359 // the anti-dependencies in an instruction in order to be effective. 360 unsigned AntiDepReg = 0; 361 if (MI == CriticalPathMI) { 362 if (SDep *Edge = CriticalPathStep(CriticalPathSU)) { 363 SUnit *NextSU = Edge->getSUnit(); 364 365 // Only consider anti-dependence edges. 366 if (Edge->getKind() == SDep::Anti) { 367 AntiDepReg = Edge->getReg(); 368 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 369 // Don't break anti-dependencies on non-allocatable registers. 370 if (AllocatableSet.test(AntiDepReg)) { 371 // If the SUnit has other dependencies on the SUnit that it 372 // anti-depends on, don't bother breaking the anti-dependency 373 // since those edges would prevent such units from being 374 // scheduled past each other regardless. 375 // 376 // Also, if there are dependencies on other SUnits with the 377 // same register as the anti-dependency, don't attempt to 378 // break it. 379 for (SUnit::pred_iterator P = CriticalPathSU->Preds.begin(), 380 PE = CriticalPathSU->Preds.end(); P != PE; ++P) 381 if (P->getSUnit() == NextSU ? 382 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 383 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 384 AntiDepReg = 0; 385 break; 386 } 387 } 388 } 389 CriticalPathSU = NextSU; 390 CriticalPathMI = CriticalPathSU->getInstr(); 391 } else { 392 // We've reached the end of the critical path. 393 CriticalPathSU = 0; 394 CriticalPathMI = 0; 395 } 396 } 397 398 // Scan the register operands for this instruction and update 399 // Classes and RegRefs. 400 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 401 MachineOperand &MO = MI->getOperand(i); 402 if (!MO.isReg()) continue; 403 unsigned Reg = MO.getReg(); 404 if (Reg == 0) continue; 405 const TargetRegisterClass *NewRC = 406 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 407 408 // If this instruction has a use of AntiDepReg, breaking it 409 // is invalid. 410 if (MO.isUse() && AntiDepReg == Reg) 411 AntiDepReg = 0; 412 413 // For now, only allow the register to be changed if its register 414 // class is consistent across all uses. 415 if (!Classes[Reg] && NewRC) 416 Classes[Reg] = NewRC; 417 else if (!NewRC || Classes[Reg] != NewRC) 418 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 419 420 // Now check for aliases. 421 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 422 // If an alias of the reg is used during the live range, give up. 423 // Note that this allows us to skip checking if AntiDepReg 424 // overlaps with any of the aliases, among other things. 425 unsigned AliasReg = *Alias; 426 if (Classes[AliasReg]) { 427 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 428 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 429 } 430 } 431 432 // If we're still willing to consider this register, note the reference. 433 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 434 RegRefs.insert(std::make_pair(Reg, &MO)); 435 } 436 437 // Determine AntiDepReg's register class, if it is live and is 438 // consistently used within a single class. 439 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 440 assert((AntiDepReg == 0 || RC != NULL) && 441 "Register should be live if it's causing an anti-dependence!"); 442 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 443 AntiDepReg = 0; 444 445 // Look for a suitable register to use to break the anti-depenence. 446 // 447 // TODO: Instead of picking the first free register, consider which might 448 // be the best. 449 if (AntiDepReg != 0) { 450 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(*MF), 451 RE = RC->allocation_order_end(*MF); R != RE; ++R) { 452 unsigned NewReg = *R; 453 // Don't replace a register with itself. 454 if (NewReg == AntiDepReg) continue; 455 // Don't replace a register with one that was recently used to repair 456 // an anti-dependence with this AntiDepReg, because that would 457 // re-introduce that anti-dependence. 458 if (NewReg == LastNewReg[AntiDepReg]) continue; 459 // If NewReg is dead and NewReg's most recent def is not before 460 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 461 assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && 462 "Kill and Def maps aren't consistent for AntiDepReg!"); 463 assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && 464 "Kill and Def maps aren't consistent for NewReg!"); 465 if (KillIndices[NewReg] == ~0u && 466 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 467 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 468 DOUT << "Breaking anti-dependence edge on " 469 << TRI->getName(AntiDepReg) 470 << " with " << RegRefs.count(AntiDepReg) << " references" 471 << " using " << TRI->getName(NewReg) << "!\n"; 472 473 // Update the references to the old register to refer to the new 474 // register. 475 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 476 std::multimap<unsigned, MachineOperand *>::iterator> 477 Range = RegRefs.equal_range(AntiDepReg); 478 for (std::multimap<unsigned, MachineOperand *>::iterator 479 Q = Range.first, QE = Range.second; Q != QE; ++Q) 480 Q->second->setReg(NewReg); 481 482 // We just went back in time and modified history; the 483 // liveness information for the anti-depenence reg is now 484 // inconsistent. Set the state as if it were dead. 485 Classes[NewReg] = Classes[AntiDepReg]; 486 DefIndices[NewReg] = DefIndices[AntiDepReg]; 487 KillIndices[NewReg] = KillIndices[AntiDepReg]; 488 489 Classes[AntiDepReg] = 0; 490 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 491 KillIndices[AntiDepReg] = ~0u; 492 493 RegRefs.erase(AntiDepReg); 494 Changed = true; 495 LastNewReg[AntiDepReg] = NewReg; 496 break; 497 } 498 } 499 } 500 501 // Update liveness. 502 // Proceding upwards, registers that are defed but not used in this 503 // instruction are now dead. 504 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 505 MachineOperand &MO = MI->getOperand(i); 506 if (!MO.isReg()) continue; 507 unsigned Reg = MO.getReg(); 508 if (Reg == 0) continue; 509 if (!MO.isDef()) continue; 510 // Ignore two-addr defs. 511 if (MI->isRegReDefinedByTwoAddr(i)) continue; 512 513 DefIndices[Reg] = Count; 514 KillIndices[Reg] = ~0u; 515 Classes[Reg] = 0; 516 RegRefs.erase(Reg); 517 // Repeat, for all subregs. 518 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 519 *Subreg; ++Subreg) { 520 unsigned SubregReg = *Subreg; 521 DefIndices[SubregReg] = Count; 522 KillIndices[SubregReg] = ~0u; 523 Classes[SubregReg] = 0; 524 RegRefs.erase(SubregReg); 525 } 526 // Conservatively mark super-registers as unusable. 527 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 528 *Super; ++Super) { 529 unsigned SuperReg = *Super; 530 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 531 } 532 } 533 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 534 MachineOperand &MO = MI->getOperand(i); 535 if (!MO.isReg()) continue; 536 unsigned Reg = MO.getReg(); 537 if (Reg == 0) continue; 538 if (!MO.isUse()) continue; 539 540 const TargetRegisterClass *NewRC = 541 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 542 543 // For now, only allow the register to be changed if its register 544 // class is consistent across all uses. 545 if (!Classes[Reg] && NewRC) 546 Classes[Reg] = NewRC; 547 else if (!NewRC || Classes[Reg] != NewRC) 548 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 549 550 RegRefs.insert(std::make_pair(Reg, &MO)); 551 552 // It wasn't previously live but now it is, this is a kill. 553 if (KillIndices[Reg] == ~0u) { 554 KillIndices[Reg] = Count; 555 DefIndices[Reg] = ~0u; 556 } 557 // Repeat, for all aliases. 558 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 559 unsigned AliasReg = *Alias; 560 if (KillIndices[AliasReg] == ~0u) { 561 KillIndices[AliasReg] = Count; 562 DefIndices[AliasReg] = ~0u; 563 } 564 } 565 } 566 } 567 assert(Count == ~0u && "Count mismatch!"); 568 569 return Changed; 570} 571 572//===----------------------------------------------------------------------===// 573// Top-Down Scheduling 574//===----------------------------------------------------------------------===// 575 576/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 577/// the PendingQueue if the count reaches zero. Also update its cycle bound. 578void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 579 SUnit *SuccSU = SuccEdge->getSUnit(); 580 --SuccSU->NumPredsLeft; 581 582#ifndef NDEBUG 583 if (SuccSU->NumPredsLeft < 0) { 584 cerr << "*** Scheduling failed! ***\n"; 585 SuccSU->dump(this); 586 cerr << " has been released too many times!\n"; 587 assert(0); 588 } 589#endif 590 591 // Compute how many cycles it will be before this actually becomes 592 // available. This is the max of the start time of all predecessors plus 593 // their latencies. 594 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 595 596 if (SuccSU->NumPredsLeft == 0) { 597 PendingQueue.push_back(SuccSU); 598 } 599} 600 601/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 602/// count of its successors. If a successor pending count is zero, add it to 603/// the Available queue. 604void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 605 DOUT << "*** Scheduling [" << CurCycle << "]: "; 606 DEBUG(SU->dump(this)); 607 608 Sequence.push_back(SU); 609 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 610 SU->setDepthToAtLeast(CurCycle); 611 612 // Top down: release successors. 613 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 614 I != E; ++I) 615 ReleaseSucc(SU, &*I); 616 617 SU->isScheduled = true; 618 AvailableQueue.ScheduledNode(SU); 619} 620 621/// ListScheduleTopDown - The main loop of list scheduling for top-down 622/// schedulers. 623void SchedulePostRATDList::ListScheduleTopDown() { 624 unsigned CurCycle = 0; 625 626 // All leaves to Available queue. 627 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 628 // It is available if it has no predecessors. 629 if (SUnits[i].Preds.empty()) { 630 AvailableQueue.push(&SUnits[i]); 631 SUnits[i].isAvailable = true; 632 } 633 } 634 635 // While Available queue is not empty, grab the node with the highest 636 // priority. If it is not ready put it back. Schedule the node. 637 Sequence.reserve(SUnits.size()); 638 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 639 // Check to see if any of the pending instructions are ready to issue. If 640 // so, add them to the available queue. 641 unsigned MinDepth = ~0u; 642 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 643 if (PendingQueue[i]->getDepth() <= CurCycle) { 644 AvailableQueue.push(PendingQueue[i]); 645 PendingQueue[i]->isAvailable = true; 646 PendingQueue[i] = PendingQueue.back(); 647 PendingQueue.pop_back(); 648 --i; --e; 649 } else if (PendingQueue[i]->getDepth() < MinDepth) 650 MinDepth = PendingQueue[i]->getDepth(); 651 } 652 653 // If there are no instructions available, don't try to issue anything. 654 if (AvailableQueue.empty()) { 655 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 656 continue; 657 } 658 659 SUnit *FoundSUnit = AvailableQueue.pop(); 660 661 // If we found a node to schedule, do it now. 662 if (FoundSUnit) { 663 ScheduleNodeTopDown(FoundSUnit, CurCycle); 664 665 // If this is a pseudo-op node, we don't want to increment the current 666 // cycle. 667 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 668 ++CurCycle; 669 } else { 670 // Otherwise, we have a pipeline stall, but no other problem, just advance 671 // the current cycle and try again. 672 DOUT << "*** Advancing cycle, no work to do\n"; 673 ++NumStalls; 674 ++CurCycle; 675 } 676 } 677 678#ifndef NDEBUG 679 VerifySchedule(/*isBottomUp=*/false); 680#endif 681} 682 683//===----------------------------------------------------------------------===// 684// Public Constructor Functions 685//===----------------------------------------------------------------------===// 686 687FunctionPass *llvm::createPostRAScheduler() { 688 return new PostRAScheduler(); 689} 690