PostRASchedulerList.cpp revision 5d36e5a5fe59c26f71132b31cf34c3a314f07528
1//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements a top-down list scheduler, using standard algorithms. 11// The basic approach uses a priority queue of available nodes to schedule. 12// One at a time, nodes are taken from the priority queue (thus in priority 13// order), checked for legality to schedule, and emitted if legal. 14// 15// Nodes may not be legal to schedule either due to structural hazards (e.g. 16// pipeline or resource constraints) or because an input to the instruction has 17// not completed execution. 18// 19//===----------------------------------------------------------------------===// 20 21#define DEBUG_TYPE "post-RA-sched" 22#include "llvm/CodeGen/Passes.h" 23#include "llvm/CodeGen/ScheduleDAGInstrs.h" 24#include "llvm/CodeGen/LatencyPriorityQueue.h" 25#include "llvm/CodeGen/SchedulerRegistry.h" 26#include "llvm/CodeGen/MachineDominators.h" 27#include "llvm/CodeGen/MachineFunctionPass.h" 28#include "llvm/CodeGen/MachineLoopInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/Target/TargetInstrInfo.h" 31#include "llvm/Target/TargetRegisterInfo.h" 32#include "llvm/Support/Compiler.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/ADT/Statistic.h" 35#include "llvm/ADT/DenseSet.h" 36#include "llvm/ADT/SmallVector.h" 37#include <map> 38#include <climits> 39using namespace llvm; 40 41STATISTIC(NumStalls, "Number of pipeline stalls"); 42 43static cl::opt<bool> 44EnableAntiDepBreaking("break-anti-dependencies", 45 cl::desc("Break scheduling anti-dependencies"), 46 cl::init(true)); 47 48namespace { 49 class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { 50 public: 51 static char ID; 52 PostRAScheduler() : MachineFunctionPass(&ID) {} 53 54 void getAnalysisUsage(AnalysisUsage &AU) const { 55 AU.addRequired<MachineDominatorTree>(); 56 AU.addPreserved<MachineDominatorTree>(); 57 AU.addRequired<MachineLoopInfo>(); 58 AU.addPreserved<MachineLoopInfo>(); 59 MachineFunctionPass::getAnalysisUsage(AU); 60 } 61 62 const char *getPassName() const { 63 return "Post RA top-down list latency scheduler"; 64 } 65 66 bool runOnMachineFunction(MachineFunction &Fn); 67 }; 68 char PostRAScheduler::ID = 0; 69 70 class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs { 71 /// AvailableQueue - The priority queue to use for the available SUnits. 72 /// 73 LatencyPriorityQueue AvailableQueue; 74 75 /// PendingQueue - This contains all of the instructions whose operands have 76 /// been issued, but their results are not ready yet (due to the latency of 77 /// the operation). Once the operands becomes available, the instruction is 78 /// added to the AvailableQueue. 79 std::vector<SUnit*> PendingQueue; 80 81 /// Topo - A topological ordering for SUnits. 82 ScheduleDAGTopologicalSort Topo; 83 84 public: 85 SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm, 86 const MachineLoopInfo &MLI, 87 const MachineDominatorTree &MDT) 88 : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {} 89 90 void Schedule(); 91 92 private: 93 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 94 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 95 void ListScheduleTopDown(); 96 bool BreakAntiDependencies(); 97 }; 98} 99 100bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { 101 DOUT << "PostRAScheduler\n"; 102 103 const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 104 const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); 105 106 // Loop over all of the basic blocks 107 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 108 MBB != MBBe; ++MBB) { 109 110 SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT); 111 112 Scheduler.Run(); 113 114 Scheduler.EmitSchedule(); 115 } 116 117 return true; 118} 119 120/// Schedule - Schedule the DAG using list scheduling. 121void SchedulePostRATDList::Schedule() { 122 DOUT << "********** List Scheduling **********\n"; 123 124 // Build scheduling units. 125 BuildSchedUnits(); 126 127 if (EnableAntiDepBreaking) { 128 if (BreakAntiDependencies()) { 129 // We made changes. Update the dependency graph. 130 // Theoretically we could update the graph in place: 131 // When a live range is changed to use a different register, remove 132 // the def's anti-dependence *and* output-dependence edges due to 133 // that register, and add new anti-dependence and output-dependence 134 // edges based on the next live range of the register. 135 SUnits.clear(); 136 BuildSchedUnits(); 137 } 138 } 139 140 AvailableQueue.initNodes(SUnits); 141 142 ListScheduleTopDown(); 143 144 AvailableQueue.releaseState(); 145} 146 147/// getInstrOperandRegClass - Return register class of the operand of an 148/// instruction of the specified TargetInstrDesc. 149static const TargetRegisterClass* 150getInstrOperandRegClass(const TargetRegisterInfo *TRI, 151 const TargetInstrInfo *TII, const TargetInstrDesc &II, 152 unsigned Op) { 153 if (Op >= II.getNumOperands()) 154 return NULL; 155 if (II.OpInfo[Op].isLookupPtrRegClass()) 156 return TII->getPointerRegClass(); 157 return TRI->getRegClass(II.OpInfo[Op].RegClass); 158} 159 160/// CriticalPathStep - Return the next SUnit after SU on the bottom-up 161/// critical path. 162static SDep *CriticalPathStep(SUnit *SU) { 163 SDep *Next = 0; 164 unsigned NextDepth = 0; 165 // Find the predecessor edge with the greatest depth. 166 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 167 P != PE; ++P) { 168 SUnit *PredSU = P->getSUnit(); 169 unsigned PredLatency = P->getLatency(); 170 unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; 171 // In the case of a latency tie, prefer an anti-dependency edge over 172 // other types of edges. 173 if (NextDepth < PredTotalLatency || 174 (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { 175 NextDepth = PredTotalLatency; 176 Next = &*P; 177 } 178 } 179 return Next; 180} 181 182/// BreakAntiDependencies - Identifiy anti-dependencies along the critical path 183/// of the ScheduleDAG and break them by renaming registers. 184/// 185bool SchedulePostRATDList::BreakAntiDependencies() { 186 // The code below assumes that there is at least one instruction, 187 // so just duck out immediately if the block is empty. 188 if (BB->empty()) return false; 189 190 // Find the node at the bottom of the critical path. 191 SUnit *Max = 0; 192 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 193 SUnit *SU = &SUnits[i]; 194 if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) 195 Max = SU; 196 } 197 198 DOUT << "Critical path has total latency " 199 << (Max ? Max->getDepth() + Max->Latency : 0) << "\n"; 200 201 // Walk the critical path from the bottom up. Collect all anti-dependence 202 // edges on the critical path. Skip anti-dependencies between SUnits that 203 // are connected with other edges, since such units won't be able to be 204 // scheduled past each other anyway. 205 // 206 // The heuristic is that edges on the critical path are more important to 207 // break than other edges. And since there are a limited number of 208 // registers, we don't want to waste them breaking edges that aren't 209 // important. 210 // 211 // TODO: Instructions with multiple defs could have multiple 212 // anti-dependencies. The current code here only knows how to break one 213 // edge per instruction. Note that we'd have to be able to break all of 214 // the anti-dependencies in an instruction in order to be effective. 215 BitVector AllocatableSet = TRI->getAllocatableSet(*MF); 216 DenseMap<MachineInstr *, unsigned> CriticalAntiDeps; 217 SUnit *SU = Max; 218 for (SDep *Edge = CriticalPathStep(SU); Edge; 219 Edge = CriticalPathStep(SU = Edge->getSUnit())) { 220 SUnit *NextSU = Edge->getSUnit(); 221 // Only consider anti-dependence edges. 222 if (Edge->getKind() != SDep::Anti) 223 continue; 224 unsigned AntiDepReg = Edge->getReg(); 225 assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); 226 // Don't break anti-dependencies on non-allocatable registers. 227 if (!AllocatableSet.test(AntiDepReg)) 228 continue; 229 // If the SUnit has other dependencies on the SUnit that it 230 // anti-depends on, don't bother breaking the anti-dependency. 231 // Also, if there are dependencies on other SUnits with the 232 // same register as the anti-dependency, don't attempt to 233 // break it. 234 for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); 235 P != PE; ++P) 236 if (P->getSUnit() == NextSU ? 237 (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : 238 (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { 239 AntiDepReg = 0; 240 break; 241 } 242 if (AntiDepReg != 0) 243 CriticalAntiDeps[SU->getInstr()] = AntiDepReg; 244 } 245 246 // For live regs that are only used in one register class in a live range, 247 // the register class. If the register is not live, the corresponding value 248 // is null. If the register is live but used in multiple register classes, 249 // the corresponding value is -1 casted to a pointer. 250 const TargetRegisterClass * 251 Classes[TargetRegisterInfo::FirstVirtualRegister] = {}; 252 253 // Map registers to all their references within a live range. 254 std::multimap<unsigned, MachineOperand *> RegRefs; 255 256 // The index of the most recent kill (proceding bottom-up), or -1 if 257 // the register is not live. 258 unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; 259 std::fill(KillIndices, array_endof(KillIndices), -1); 260 // The index of the most recent def (proceding bottom up), or -1 if 261 // the register is live. 262 unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; 263 std::fill(DefIndices, array_endof(DefIndices), BB->size()); 264 265 // Determine the live-out physregs for this block. 266 if (!BB->empty() && BB->back().getDesc().isReturn()) 267 // In a return block, examine the function live-out regs. 268 for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), 269 E = MRI.liveout_end(); I != E; ++I) { 270 unsigned Reg = *I; 271 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 272 KillIndices[Reg] = BB->size(); 273 DefIndices[Reg] = -1; 274 // Repeat, for all aliases. 275 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 276 unsigned AliasReg = *Alias; 277 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 278 KillIndices[AliasReg] = BB->size(); 279 DefIndices[AliasReg] = -1; 280 } 281 } 282 else 283 // In a non-return block, examine the live-in regs of all successors. 284 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 285 SE = BB->succ_end(); SI != SE; ++SI) 286 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 287 E = (*SI)->livein_end(); I != E; ++I) { 288 unsigned Reg = *I; 289 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 290 KillIndices[Reg] = BB->size(); 291 DefIndices[Reg] = -1; 292 // Repeat, for all aliases. 293 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 294 unsigned AliasReg = *Alias; 295 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 296 KillIndices[AliasReg] = BB->size(); 297 DefIndices[AliasReg] = -1; 298 } 299 } 300 301 // Consider callee-saved registers as live-out, since we're running after 302 // prologue/epilogue insertion so there's no way to add additional 303 // saved registers. 304 // 305 // TODO: If the callee saves and restores these, then we can potentially 306 // use them between the save and the restore. To do that, we could scan 307 // the exit blocks to see which of these registers are defined. 308 // Alternatively, calle-saved registers that aren't saved and restored 309 // could be marked live-in in every block. 310 for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { 311 unsigned Reg = *I; 312 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 313 KillIndices[Reg] = BB->size(); 314 DefIndices[Reg] = -1; 315 // Repeat, for all aliases. 316 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 317 unsigned AliasReg = *Alias; 318 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 319 KillIndices[AliasReg] = BB->size(); 320 DefIndices[AliasReg] = -1; 321 } 322 } 323 324 // Consider this pattern: 325 // A = ... 326 // ... = A 327 // A = ... 328 // ... = A 329 // A = ... 330 // ... = A 331 // A = ... 332 // ... = A 333 // There are three anti-dependencies here, and without special care, 334 // we'd break all of them using the same register: 335 // A = ... 336 // ... = A 337 // B = ... 338 // ... = B 339 // B = ... 340 // ... = B 341 // B = ... 342 // ... = B 343 // because at each anti-dependence, B is the first register that 344 // isn't A which is free. This re-introduces anti-dependencies 345 // at all but one of the original anti-dependencies that we were 346 // trying to break. To avoid this, keep track of the most recent 347 // register that each register was replaced with, avoid avoid 348 // using it to repair an anti-dependence on the same register. 349 // This lets us produce this: 350 // A = ... 351 // ... = A 352 // B = ... 353 // ... = B 354 // C = ... 355 // ... = C 356 // B = ... 357 // ... = B 358 // This still has an anti-dependence on B, but at least it isn't on the 359 // original critical path. 360 // 361 // TODO: If we tracked more than one register here, we could potentially 362 // fix that remaining critical edge too. This is a little more involved, 363 // because unlike the most recent register, less recent registers should 364 // still be considered, though only if no other registers are available. 365 unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {}; 366 367 // Attempt to break anti-dependence edges on the critical path. Walk the 368 // instructions from the bottom up, tracking information about liveness 369 // as we go to help determine which registers are available. 370 bool Changed = false; 371 unsigned Count = BB->size() - 1; 372 for (MachineBasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); 373 I != E; ++I, --Count) { 374 MachineInstr *MI = &*I; 375 376 // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as 377 // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF 378 // is left behind appearing to clobber the super-register, while the 379 // subregister needs to remain live. So we just ignore them. 380 if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) 381 continue; 382 383 // Check if this instruction has an anti-dependence that we're 384 // interested in. 385 DenseMap<MachineInstr *, unsigned>::iterator C = CriticalAntiDeps.find(MI); 386 unsigned AntiDepReg = C != CriticalAntiDeps.end() ? 387 C->second : 0; 388 389 // Scan the register operands for this instruction and update 390 // Classes and RegRefs. 391 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 392 MachineOperand &MO = MI->getOperand(i); 393 if (!MO.isReg()) continue; 394 unsigned Reg = MO.getReg(); 395 if (Reg == 0) continue; 396 const TargetRegisterClass *NewRC = 397 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 398 399 // If this instruction has a use of AntiDepReg, breaking it 400 // is invalid. 401 if (MO.isUse() && AntiDepReg == Reg) 402 AntiDepReg = 0; 403 404 // For now, only allow the register to be changed if its register 405 // class is consistent across all uses. 406 if (!Classes[Reg] && NewRC) 407 Classes[Reg] = NewRC; 408 else if (!NewRC || Classes[Reg] != NewRC) 409 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 410 411 // Now check for aliases. 412 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 413 // If an alias of the reg is used during the live range, give up. 414 // Note that this allows us to skip checking if AntiDepReg 415 // overlaps with any of the aliases, among other things. 416 unsigned AliasReg = *Alias; 417 if (Classes[AliasReg]) { 418 Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); 419 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 420 } 421 } 422 423 // If we're still willing to consider this register, note the reference. 424 if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) 425 RegRefs.insert(std::make_pair(Reg, &MO)); 426 } 427 428 // Determine AntiDepReg's register class, if it is live and is 429 // consistently used within a single class. 430 const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; 431 assert((AntiDepReg == 0 || RC != NULL) && 432 "Register should be live if it's causing an anti-dependence!"); 433 if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) 434 AntiDepReg = 0; 435 436 // Look for a suitable register to use to break the anti-depenence. 437 // 438 // TODO: Instead of picking the first free register, consider which might 439 // be the best. 440 if (AntiDepReg != 0) { 441 for (TargetRegisterClass::iterator R = RC->allocation_order_begin(*MF), 442 RE = RC->allocation_order_end(*MF); R != RE; ++R) { 443 unsigned NewReg = *R; 444 // Don't replace a register with itself. 445 if (NewReg == AntiDepReg) continue; 446 // Don't replace a register with one that was recently used to repair 447 // an anti-dependence with this AntiDepReg, because that would 448 // re-introduce that anti-dependence. 449 if (NewReg == LastNewReg[AntiDepReg]) continue; 450 // If NewReg is dead and NewReg's most recent def is not before 451 // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. 452 assert(((KillIndices[AntiDepReg] == -1u) != (DefIndices[AntiDepReg] == -1u)) && 453 "Kill and Def maps aren't consistent for AntiDepReg!"); 454 assert(((KillIndices[NewReg] == -1u) != (DefIndices[NewReg] == -1u)) && 455 "Kill and Def maps aren't consistent for NewReg!"); 456 if (KillIndices[NewReg] == -1u && 457 Classes[NewReg] != reinterpret_cast<TargetRegisterClass *>(-1) && 458 KillIndices[AntiDepReg] <= DefIndices[NewReg]) { 459 DOUT << "Breaking anti-dependence edge on " 460 << TRI->getName(AntiDepReg) 461 << " with " << RegRefs.count(AntiDepReg) << " references" 462 << " using " << TRI->getName(NewReg) << "!\n"; 463 464 // Update the references to the old register to refer to the new 465 // register. 466 std::pair<std::multimap<unsigned, MachineOperand *>::iterator, 467 std::multimap<unsigned, MachineOperand *>::iterator> 468 Range = RegRefs.equal_range(AntiDepReg); 469 for (std::multimap<unsigned, MachineOperand *>::iterator 470 Q = Range.first, QE = Range.second; Q != QE; ++Q) 471 Q->second->setReg(NewReg); 472 473 // We just went back in time and modified history; the 474 // liveness information for the anti-depenence reg is now 475 // inconsistent. Set the state as if it were dead. 476 Classes[NewReg] = Classes[AntiDepReg]; 477 DefIndices[NewReg] = DefIndices[AntiDepReg]; 478 KillIndices[NewReg] = KillIndices[AntiDepReg]; 479 480 Classes[AntiDepReg] = 0; 481 DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; 482 KillIndices[AntiDepReg] = -1; 483 484 RegRefs.erase(AntiDepReg); 485 Changed = true; 486 LastNewReg[AntiDepReg] = NewReg; 487 break; 488 } 489 } 490 } 491 492 // Update liveness. 493 // Proceding upwards, registers that are defed but not used in this 494 // instruction are now dead. 495 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 496 MachineOperand &MO = MI->getOperand(i); 497 if (!MO.isReg()) continue; 498 unsigned Reg = MO.getReg(); 499 if (Reg == 0) continue; 500 if (!MO.isDef()) continue; 501 // Ignore two-addr defs. 502 if (MI->isRegReDefinedByTwoAddr(i)) continue; 503 504 DefIndices[Reg] = Count; 505 KillIndices[Reg] = -1; 506 Classes[Reg] = 0; 507 RegRefs.erase(Reg); 508 // Repeat, for all subregs. 509 for (const unsigned *Subreg = TRI->getSubRegisters(Reg); 510 *Subreg; ++Subreg) { 511 unsigned SubregReg = *Subreg; 512 DefIndices[SubregReg] = Count; 513 KillIndices[SubregReg] = -1; 514 Classes[SubregReg] = 0; 515 RegRefs.erase(SubregReg); 516 } 517 for (const unsigned *Super = TRI->getSuperRegisters(Reg); 518 *Super; ++Super) { 519 unsigned SuperReg = *Super; 520 Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1); 521 } 522 } 523 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 524 MachineOperand &MO = MI->getOperand(i); 525 if (!MO.isReg()) continue; 526 unsigned Reg = MO.getReg(); 527 if (Reg == 0) continue; 528 if (!MO.isUse()) continue; 529 530 const TargetRegisterClass *NewRC = 531 getInstrOperandRegClass(TRI, TII, MI->getDesc(), i); 532 533 // For now, only allow the register to be changed if its register 534 // class is consistent across all uses. 535 if (!Classes[Reg] && NewRC) 536 Classes[Reg] = NewRC; 537 else if (!NewRC || Classes[Reg] != NewRC) 538 Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); 539 540 RegRefs.insert(std::make_pair(Reg, &MO)); 541 542 // It wasn't previously live but now it is, this is a kill. 543 if (KillIndices[Reg] == -1u) { 544 KillIndices[Reg] = Count; 545 DefIndices[Reg] = -1u; 546 } 547 // Repeat, for all aliases. 548 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { 549 unsigned AliasReg = *Alias; 550 if (KillIndices[AliasReg] == -1u) { 551 KillIndices[AliasReg] = Count; 552 DefIndices[AliasReg] = -1u; 553 } 554 } 555 } 556 } 557 assert(Count == -1u && "Count mismatch!"); 558 559 return Changed; 560} 561 562//===----------------------------------------------------------------------===// 563// Top-Down Scheduling 564//===----------------------------------------------------------------------===// 565 566/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 567/// the PendingQueue if the count reaches zero. Also update its cycle bound. 568void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 569 SUnit *SuccSU = SuccEdge->getSUnit(); 570 --SuccSU->NumPredsLeft; 571 572#ifndef NDEBUG 573 if (SuccSU->NumPredsLeft < 0) { 574 cerr << "*** Scheduling failed! ***\n"; 575 SuccSU->dump(this); 576 cerr << " has been released too many times!\n"; 577 assert(0); 578 } 579#endif 580 581 // Compute how many cycles it will be before this actually becomes 582 // available. This is the max of the start time of all predecessors plus 583 // their latencies. 584 SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); 585 586 if (SuccSU->NumPredsLeft == 0) { 587 PendingQueue.push_back(SuccSU); 588 } 589} 590 591/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 592/// count of its successors. If a successor pending count is zero, add it to 593/// the Available queue. 594void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 595 DOUT << "*** Scheduling [" << CurCycle << "]: "; 596 DEBUG(SU->dump(this)); 597 598 Sequence.push_back(SU); 599 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 600 SU->setDepthToAtLeast(CurCycle); 601 602 // Top down: release successors. 603 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 604 I != E; ++I) 605 ReleaseSucc(SU, &*I); 606 607 SU->isScheduled = true; 608 AvailableQueue.ScheduledNode(SU); 609} 610 611/// ListScheduleTopDown - The main loop of list scheduling for top-down 612/// schedulers. 613void SchedulePostRATDList::ListScheduleTopDown() { 614 unsigned CurCycle = 0; 615 616 // All leaves to Available queue. 617 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 618 // It is available if it has no predecessors. 619 if (SUnits[i].Preds.empty()) { 620 AvailableQueue.push(&SUnits[i]); 621 SUnits[i].isAvailable = true; 622 } 623 } 624 625 // While Available queue is not empty, grab the node with the highest 626 // priority. If it is not ready put it back. Schedule the node. 627 Sequence.reserve(SUnits.size()); 628 while (!AvailableQueue.empty() || !PendingQueue.empty()) { 629 // Check to see if any of the pending instructions are ready to issue. If 630 // so, add them to the available queue. 631 unsigned MinDepth = ~0u; 632 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 633 if (PendingQueue[i]->getDepth() <= CurCycle) { 634 AvailableQueue.push(PendingQueue[i]); 635 PendingQueue[i]->isAvailable = true; 636 PendingQueue[i] = PendingQueue.back(); 637 PendingQueue.pop_back(); 638 --i; --e; 639 } else if (PendingQueue[i]->getDepth() < MinDepth) 640 MinDepth = PendingQueue[i]->getDepth(); 641 } 642 643 // If there are no instructions available, don't try to issue anything. 644 if (AvailableQueue.empty()) { 645 CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; 646 continue; 647 } 648 649 SUnit *FoundSUnit = AvailableQueue.pop(); 650 651 // If we found a node to schedule, do it now. 652 if (FoundSUnit) { 653 ScheduleNodeTopDown(FoundSUnit, CurCycle); 654 655 // If this is a pseudo-op node, we don't want to increment the current 656 // cycle. 657 if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! 658 ++CurCycle; 659 } else { 660 // Otherwise, we have a pipeline stall, but no other problem, just advance 661 // the current cycle and try again. 662 DOUT << "*** Advancing cycle, no work to do\n"; 663 ++NumStalls; 664 ++CurCycle; 665 } 666 } 667 668#ifndef NDEBUG 669 VerifySchedule(/*isBottomUp=*/false); 670#endif 671} 672 673//===----------------------------------------------------------------------===// 674// Public Constructor Functions 675//===----------------------------------------------------------------------===// 676 677FunctionPass *llvm::createPostRAScheduler() { 678 return new PostRAScheduler(); 679} 680