MachineScheduler.cpp revision b754687fd7391213f455ffa52d1bcfbe11052bc0
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// MachineScheduler schedules machine instructions after phi elimination. It 11// preserves LiveIntervals so it can be invoked before register allocation. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "misched" 16 17#include "llvm/CodeGen/LiveIntervalAnalysis.h" 18#include "llvm/CodeGen/MachineScheduler.h" 19#include "llvm/CodeGen/Passes.h" 20#include "llvm/CodeGen/RegisterClassInfo.h" 21#include "llvm/CodeGen/ScheduleDAGILP.h" 22#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23#include "llvm/Analysis/AliasAnalysis.h" 24#include "llvm/Support/CommandLine.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/ErrorHandling.h" 27#include "llvm/Support/raw_ostream.h" 28#include "llvm/ADT/OwningPtr.h" 29#include "llvm/ADT/PriorityQueue.h" 30 31#include <queue> 32 33using namespace llvm; 34 35namespace llvm { 36cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40} 41 42#ifndef NDEBUG 43static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48#else 49static bool ViewMISchedDAGs = false; 50#endif // NDEBUG 51 52// Threshold to very roughly model an out-of-order processor's instruction 53// buffers. If the actual value of this threshold matters much in practice, then 54// it can be specified by the machine model. For now, it's an experimental 55// tuning knob to determine when and if it matters. 56static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61//===----------------------------------------------------------------------===// 62// Machine Instruction Scheduling Pass and Registry 63//===----------------------------------------------------------------------===// 64 65MachineSchedContext::MachineSchedContext(): 66 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 67 RegClassInfo = new RegisterClassInfo(); 68} 69 70MachineSchedContext::~MachineSchedContext() { 71 delete RegClassInfo; 72} 73 74namespace { 75/// MachineScheduler runs after coalescing and before register allocation. 76class MachineScheduler : public MachineSchedContext, 77 public MachineFunctionPass { 78public: 79 MachineScheduler(); 80 81 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 82 83 virtual void releaseMemory() {} 84 85 virtual bool runOnMachineFunction(MachineFunction&); 86 87 virtual void print(raw_ostream &O, const Module* = 0) const; 88 89 static char ID; // Class identification, replacement for typeinfo 90}; 91} // namespace 92 93char MachineScheduler::ID = 0; 94 95char &llvm::MachineSchedulerID = MachineScheduler::ID; 96 97INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 98 "Machine Instruction Scheduler", false, false) 99INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 100INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 101INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 102INITIALIZE_PASS_END(MachineScheduler, "misched", 103 "Machine Instruction Scheduler", false, false) 104 105MachineScheduler::MachineScheduler() 106: MachineFunctionPass(ID) { 107 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 108} 109 110void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 111 AU.setPreservesCFG(); 112 AU.addRequiredID(MachineDominatorsID); 113 AU.addRequired<MachineLoopInfo>(); 114 AU.addRequired<AliasAnalysis>(); 115 AU.addRequired<TargetPassConfig>(); 116 AU.addRequired<SlotIndexes>(); 117 AU.addPreserved<SlotIndexes>(); 118 AU.addRequired<LiveIntervals>(); 119 AU.addPreserved<LiveIntervals>(); 120 MachineFunctionPass::getAnalysisUsage(AU); 121} 122 123MachinePassRegistry MachineSchedRegistry::Registry; 124 125/// A dummy default scheduler factory indicates whether the scheduler 126/// is overridden on the command line. 127static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 128 return 0; 129} 130 131/// MachineSchedOpt allows command line selection of the scheduler. 132static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 133 RegisterPassParser<MachineSchedRegistry> > 134MachineSchedOpt("misched", 135 cl::init(&useDefaultMachineSched), cl::Hidden, 136 cl::desc("Machine instruction scheduler to use")); 137 138static MachineSchedRegistry 139DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 140 useDefaultMachineSched); 141 142/// Forward declare the standard machine scheduler. This will be used as the 143/// default scheduler if the target does not set a default. 144static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 145 146 147/// Decrement this iterator until reaching the top or a non-debug instr. 148static MachineBasicBlock::iterator 149priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 150 assert(I != Beg && "reached the top of the region, cannot decrement"); 151 while (--I != Beg) { 152 if (!I->isDebugValue()) 153 break; 154 } 155 return I; 156} 157 158/// If this iterator is a debug value, increment until reaching the End or a 159/// non-debug instruction. 160static MachineBasicBlock::iterator 161nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 162 for(; I != End; ++I) { 163 if (!I->isDebugValue()) 164 break; 165 } 166 return I; 167} 168 169/// Top-level MachineScheduler pass driver. 170/// 171/// Visit blocks in function order. Divide each block into scheduling regions 172/// and visit them bottom-up. Visiting regions bottom-up is not required, but is 173/// consistent with the DAG builder, which traverses the interior of the 174/// scheduling regions bottom-up. 175/// 176/// This design avoids exposing scheduling boundaries to the DAG builder, 177/// simplifying the DAG builder's support for "special" target instructions. 178/// At the same time the design allows target schedulers to operate across 179/// scheduling boundaries, for example to bundle the boudary instructions 180/// without reordering them. This creates complexity, because the target 181/// scheduler must update the RegionBegin and RegionEnd positions cached by 182/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 183/// design would be to split blocks at scheduling boundaries, but LLVM has a 184/// general bias against block splitting purely for implementation simplicity. 185bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 186 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 187 188 // Initialize the context of the pass. 189 MF = &mf; 190 MLI = &getAnalysis<MachineLoopInfo>(); 191 MDT = &getAnalysis<MachineDominatorTree>(); 192 PassConfig = &getAnalysis<TargetPassConfig>(); 193 AA = &getAnalysis<AliasAnalysis>(); 194 195 LIS = &getAnalysis<LiveIntervals>(); 196 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 197 198 RegClassInfo->runOnMachineFunction(*MF); 199 200 // Select the scheduler, or set the default. 201 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 202 if (Ctor == useDefaultMachineSched) { 203 // Get the default scheduler set by the target. 204 Ctor = MachineSchedRegistry::getDefault(); 205 if (!Ctor) { 206 Ctor = createConvergingSched; 207 MachineSchedRegistry::setDefault(Ctor); 208 } 209 } 210 // Instantiate the selected scheduler. 211 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 212 213 // Visit all machine basic blocks. 214 // 215 // TODO: Visit blocks in global postorder or postorder within the bottom-up 216 // loop tree. Then we can optionally compute global RegPressure. 217 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 218 MBB != MBBEnd; ++MBB) { 219 220 Scheduler->startBlock(MBB); 221 222 // Break the block into scheduling regions [I, RegionEnd), and schedule each 223 // region as soon as it is discovered. RegionEnd points the scheduling 224 // boundary at the bottom of the region. The DAG does not include RegionEnd, 225 // but the region does (i.e. the next RegionEnd is above the previous 226 // RegionBegin). If the current block has no terminator then RegionEnd == 227 // MBB->end() for the bottom region. 228 // 229 // The Scheduler may insert instructions during either schedule() or 230 // exitRegion(), even for empty regions. So the local iterators 'I' and 231 // 'RegionEnd' are invalid across these calls. 232 unsigned RemainingInstrs = MBB->size(); 233 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 234 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 235 236 // Avoid decrementing RegionEnd for blocks with no terminator. 237 if (RegionEnd != MBB->end() 238 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 239 --RegionEnd; 240 // Count the boundary instruction. 241 --RemainingInstrs; 242 } 243 244 // The next region starts above the previous region. Look backward in the 245 // instruction stream until we find the nearest boundary. 246 MachineBasicBlock::iterator I = RegionEnd; 247 for(;I != MBB->begin(); --I, --RemainingInstrs) { 248 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 249 break; 250 } 251 // Notify the scheduler of the region, even if we may skip scheduling 252 // it. Perhaps it still needs to be bundled. 253 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 254 255 // Skip empty scheduling regions (0 or 1 schedulable instructions). 256 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 257 // Close the current region. Bundle the terminator if needed. 258 // This invalidates 'RegionEnd' and 'I'. 259 Scheduler->exitRegion(); 260 continue; 261 } 262 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 263 DEBUG(dbgs() << MF->getName() 264 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 265 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 266 else dbgs() << "End"; 267 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 268 269 // Schedule a region: possibly reorder instructions. 270 // This invalidates 'RegionEnd' and 'I'. 271 Scheduler->schedule(); 272 273 // Close the current region. 274 Scheduler->exitRegion(); 275 276 // Scheduling has invalidated the current iterator 'I'. Ask the 277 // scheduler for the top of it's scheduled region. 278 RegionEnd = Scheduler->begin(); 279 } 280 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 281 Scheduler->finishBlock(); 282 } 283 Scheduler->finalizeSchedule(); 284 DEBUG(LIS->print(dbgs())); 285 return true; 286} 287 288void MachineScheduler::print(raw_ostream &O, const Module* m) const { 289 // unimplemented 290} 291 292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 293void ReadyQueue::dump() { 294 dbgs() << Name << ": "; 295 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 296 dbgs() << Queue[i]->NodeNum << " "; 297 dbgs() << "\n"; 298} 299#endif 300 301//===----------------------------------------------------------------------===// 302// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 303// preservation. 304//===----------------------------------------------------------------------===// 305 306/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 307/// NumPredsLeft reaches zero, release the successor node. 308/// 309/// FIXME: Adjust SuccSU height based on MinLatency. 310void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 311 SUnit *SuccSU = SuccEdge->getSUnit(); 312 313#ifndef NDEBUG 314 if (SuccSU->NumPredsLeft == 0) { 315 dbgs() << "*** Scheduling failed! ***\n"; 316 SuccSU->dump(this); 317 dbgs() << " has been released too many times!\n"; 318 llvm_unreachable(0); 319 } 320#endif 321 --SuccSU->NumPredsLeft; 322 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 323 SchedImpl->releaseTopNode(SuccSU); 324} 325 326/// releaseSuccessors - Call releaseSucc on each of SU's successors. 327void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 328 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 329 I != E; ++I) { 330 releaseSucc(SU, &*I); 331 } 332} 333 334/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 335/// NumSuccsLeft reaches zero, release the predecessor node. 336/// 337/// FIXME: Adjust PredSU height based on MinLatency. 338void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 339 SUnit *PredSU = PredEdge->getSUnit(); 340 341#ifndef NDEBUG 342 if (PredSU->NumSuccsLeft == 0) { 343 dbgs() << "*** Scheduling failed! ***\n"; 344 PredSU->dump(this); 345 dbgs() << " has been released too many times!\n"; 346 llvm_unreachable(0); 347 } 348#endif 349 --PredSU->NumSuccsLeft; 350 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 351 SchedImpl->releaseBottomNode(PredSU); 352} 353 354/// releasePredecessors - Call releasePred on each of SU's predecessors. 355void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 356 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 357 I != E; ++I) { 358 releasePred(SU, &*I); 359 } 360} 361 362void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 363 MachineBasicBlock::iterator InsertPos) { 364 // Advance RegionBegin if the first instruction moves down. 365 if (&*RegionBegin == MI) 366 ++RegionBegin; 367 368 // Update the instruction stream. 369 BB->splice(InsertPos, BB, MI); 370 371 // Update LiveIntervals 372 LIS->handleMove(MI, /*UpdateFlags=*/true); 373 374 // Recede RegionBegin if an instruction moves above the first. 375 if (RegionBegin == InsertPos) 376 RegionBegin = MI; 377} 378 379bool ScheduleDAGMI::checkSchedLimit() { 380#ifndef NDEBUG 381 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 382 CurrentTop = CurrentBottom; 383 return false; 384 } 385 ++NumInstrsScheduled; 386#endif 387 return true; 388} 389 390/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 391/// crossing a scheduling boundary. [begin, end) includes all instructions in 392/// the region, including the boundary itself and single-instruction regions 393/// that don't get scheduled. 394void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 395 MachineBasicBlock::iterator begin, 396 MachineBasicBlock::iterator end, 397 unsigned endcount) 398{ 399 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 400 401 // For convenience remember the end of the liveness region. 402 LiveRegionEnd = 403 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 404} 405 406// Setup the register pressure trackers for the top scheduled top and bottom 407// scheduled regions. 408void ScheduleDAGMI::initRegPressure() { 409 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 410 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 411 412 // Close the RPTracker to finalize live ins. 413 RPTracker.closeRegion(); 414 415 DEBUG(RPTracker.getPressure().dump(TRI)); 416 417 // Initialize the live ins and live outs. 418 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 419 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 420 421 // Close one end of the tracker so we can call 422 // getMaxUpward/DownwardPressureDelta before advancing across any 423 // instructions. This converts currently live regs into live ins/outs. 424 TopRPTracker.closeTop(); 425 BotRPTracker.closeBottom(); 426 427 // Account for liveness generated by the region boundary. 428 if (LiveRegionEnd != RegionEnd) 429 BotRPTracker.recede(); 430 431 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 432 433 // Cache the list of excess pressure sets in this region. This will also track 434 // the max pressure in the scheduled code for these sets. 435 RegionCriticalPSets.clear(); 436 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 437 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 438 unsigned Limit = TRI->getRegPressureSetLimit(i); 439 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 440 << "Limit " << Limit 441 << " Actual " << RegionPressure[i] << "\n"); 442 if (RegionPressure[i] > Limit) 443 RegionCriticalPSets.push_back(PressureElement(i, 0)); 444 } 445 DEBUG(dbgs() << "Excess PSets: "; 446 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 447 dbgs() << TRI->getRegPressureSetName( 448 RegionCriticalPSets[i].PSetID) << " "; 449 dbgs() << "\n"); 450} 451 452// FIXME: When the pressure tracker deals in pressure differences then we won't 453// iterate over all RegionCriticalPSets[i]. 454void ScheduleDAGMI:: 455updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 456 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 457 unsigned ID = RegionCriticalPSets[i].PSetID; 458 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 459 if ((int)NewMaxPressure[ID] > MaxUnits) 460 MaxUnits = NewMaxPressure[ID]; 461 } 462} 463 464/// schedule - Called back from MachineScheduler::runOnMachineFunction 465/// after setting up the current scheduling region. [RegionBegin, RegionEnd) 466/// only includes instructions that have DAG nodes, not scheduling boundaries. 467/// 468/// This is a skeletal driver, with all the functionality pushed into helpers, 469/// so that it can be easilly extended by experimental schedulers. Generally, 470/// implementing MachineSchedStrategy should be sufficient to implement a new 471/// scheduling algorithm. However, if a scheduler further subclasses 472/// ScheduleDAGMI then it will want to override this virtual method in order to 473/// update any specialized state. 474void ScheduleDAGMI::schedule() { 475 buildDAGWithRegPressure(); 476 477 postprocessDAG(); 478 479 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 480 SUnits[su].dumpAll(this)); 481 482 if (ViewMISchedDAGs) viewGraph(); 483 484 initQueues(); 485 486 bool IsTopNode = false; 487 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 488 assert(!SU->isScheduled && "Node already scheduled"); 489 if (!checkSchedLimit()) 490 break; 491 492 scheduleMI(SU, IsTopNode); 493 494 updateQueues(SU, IsTopNode); 495 } 496 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 497 498 placeDebugValues(); 499 500 DEBUG({ 501 unsigned BBNum = top()->getParent()->getNumber(); 502 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 503 dumpSchedule(); 504 dbgs() << '\n'; 505 }); 506} 507 508/// Build the DAG and setup three register pressure trackers. 509void ScheduleDAGMI::buildDAGWithRegPressure() { 510 // Initialize the register pressure tracker used by buildSchedGraph. 511 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 512 513 // Account for liveness generate by the region boundary. 514 if (LiveRegionEnd != RegionEnd) 515 RPTracker.recede(); 516 517 // Build the DAG, and compute current register pressure. 518 buildSchedGraph(AA, &RPTracker); 519 if (ViewMISchedDAGs) viewGraph(); 520 521 // Initialize top/bottom trackers after computing region pressure. 522 initRegPressure(); 523} 524 525/// Apply each ScheduleDAGMutation step in order. 526void ScheduleDAGMI::postprocessDAG() { 527 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 528 Mutations[i]->apply(this); 529 } 530} 531 532// Release all DAG roots for scheduling. 533void ScheduleDAGMI::releaseRoots() { 534 SmallVector<SUnit*, 16> BotRoots; 535 536 for (std::vector<SUnit>::iterator 537 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 538 // A SUnit is ready to top schedule if it has no predecessors. 539 if (I->Preds.empty()) 540 SchedImpl->releaseTopNode(&(*I)); 541 // A SUnit is ready to bottom schedule if it has no successors. 542 if (I->Succs.empty()) 543 BotRoots.push_back(&(*I)); 544 } 545 // Release bottom roots in reverse order so the higher priority nodes appear 546 // first. This is more natural and slightly more efficient. 547 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 548 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 549 SchedImpl->releaseBottomNode(*I); 550} 551 552/// Identify DAG roots and setup scheduler queues. 553void ScheduleDAGMI::initQueues() { 554 555 // Initialize the strategy before modifying the DAG. 556 SchedImpl->initialize(this); 557 558 // Release edges from the special Entry node or to the special Exit node. 559 releaseSuccessors(&EntrySU); 560 releasePredecessors(&ExitSU); 561 562 // Release all DAG roots for scheduling. 563 releaseRoots(); 564 565 SchedImpl->registerRoots(); 566 567 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 568 CurrentBottom = RegionEnd; 569} 570 571/// Move an instruction and update register pressure. 572void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 573 // Move the instruction to its new location in the instruction stream. 574 MachineInstr *MI = SU->getInstr(); 575 576 if (IsTopNode) { 577 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 578 if (&*CurrentTop == MI) 579 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 580 else { 581 moveInstruction(MI, CurrentTop); 582 TopRPTracker.setPos(MI); 583 } 584 585 // Update top scheduled pressure. 586 TopRPTracker.advance(); 587 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 588 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 589 } 590 else { 591 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 592 MachineBasicBlock::iterator priorII = 593 priorNonDebug(CurrentBottom, CurrentTop); 594 if (&*priorII == MI) 595 CurrentBottom = priorII; 596 else { 597 if (&*CurrentTop == MI) { 598 CurrentTop = nextIfDebug(++CurrentTop, priorII); 599 TopRPTracker.setPos(CurrentTop); 600 } 601 moveInstruction(MI, CurrentBottom); 602 CurrentBottom = MI; 603 } 604 // Update bottom scheduled pressure. 605 BotRPTracker.recede(); 606 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 607 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 608 } 609} 610 611/// Update scheduler queues after scheduling an instruction. 612void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 613 // Release dependent instructions for scheduling. 614 if (IsTopNode) 615 releaseSuccessors(SU); 616 else 617 releasePredecessors(SU); 618 619 SU->isScheduled = true; 620 621 // Notify the scheduling strategy after updating the DAG. 622 SchedImpl->schedNode(SU, IsTopNode); 623} 624 625/// Reinsert any remaining debug_values, just like the PostRA scheduler. 626void ScheduleDAGMI::placeDebugValues() { 627 // If first instruction was a DBG_VALUE then put it back. 628 if (FirstDbgValue) { 629 BB->splice(RegionBegin, BB, FirstDbgValue); 630 RegionBegin = FirstDbgValue; 631 } 632 633 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 634 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 635 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 636 MachineInstr *DbgValue = P.first; 637 MachineBasicBlock::iterator OrigPrevMI = P.second; 638 BB->splice(++OrigPrevMI, BB, DbgValue); 639 if (OrigPrevMI == llvm::prior(RegionEnd)) 640 RegionEnd = DbgValue; 641 } 642 DbgValues.clear(); 643 FirstDbgValue = NULL; 644} 645 646#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 647void ScheduleDAGMI::dumpSchedule() const { 648 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 649 if (SUnit *SU = getSUnit(&(*MI))) 650 SU->dump(this); 651 else 652 dbgs() << "Missing SUnit\n"; 653 } 654} 655#endif 656 657//===----------------------------------------------------------------------===// 658// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 659//===----------------------------------------------------------------------===// 660 661namespace { 662/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 663/// the schedule. 664class ConvergingScheduler : public MachineSchedStrategy { 665public: 666 /// Represent the type of SchedCandidate found within a single queue. 667 /// pickNodeBidirectional depends on these listed by decreasing priority. 668 enum CandReason { 669 NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, 670 BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, 671 SingleMax, MultiPressure, NextDefUse, NodeOrder}; 672 673#ifndef NDEBUG 674 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 675#endif 676 677 /// Policy for scheduling the next instruction in the candidate's zone. 678 struct CandPolicy { 679 bool ReduceLatency; 680 unsigned ReduceResIdx; 681 unsigned DemandResIdx; 682 683 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 684 }; 685 686 /// Status of an instruction's critical resource consumption. 687 struct SchedResourceDelta { 688 // Count critical resources in the scheduled region required by SU. 689 unsigned CritResources; 690 691 // Count critical resources from another region consumed by SU. 692 unsigned DemandedResources; 693 694 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 695 696 bool operator==(const SchedResourceDelta &RHS) const { 697 return CritResources == RHS.CritResources 698 && DemandedResources == RHS.DemandedResources; 699 } 700 bool operator!=(const SchedResourceDelta &RHS) const { 701 return !operator==(RHS); 702 } 703 }; 704 705 /// Store the state used by ConvergingScheduler heuristics, required for the 706 /// lifetime of one invocation of pickNode(). 707 struct SchedCandidate { 708 CandPolicy Policy; 709 710 // The best SUnit candidate. 711 SUnit *SU; 712 713 // The reason for this candidate. 714 CandReason Reason; 715 716 // Register pressure values for the best candidate. 717 RegPressureDelta RPDelta; 718 719 // Critical resource consumption of the best candidate. 720 SchedResourceDelta ResDelta; 721 722 SchedCandidate(const CandPolicy &policy) 723 : Policy(policy), SU(NULL), Reason(NoCand) {} 724 725 bool isValid() const { return SU; } 726 727 // Copy the status of another candidate without changing policy. 728 void setBest(SchedCandidate &Best) { 729 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 730 SU = Best.SU; 731 Reason = Best.Reason; 732 RPDelta = Best.RPDelta; 733 ResDelta = Best.ResDelta; 734 } 735 736 void initResourceDelta(const ScheduleDAGMI *DAG, 737 const TargetSchedModel *SchedModel); 738 }; 739 740 /// Summarize the unscheduled region. 741 struct SchedRemainder { 742 // Critical path through the DAG in expected latency. 743 unsigned CriticalPath; 744 745 // Unscheduled resources 746 SmallVector<unsigned, 16> RemainingCounts; 747 // Critical resource for the unscheduled zone. 748 unsigned CritResIdx; 749 // Number of micro-ops left to schedule. 750 unsigned RemainingMicroOps; 751 // Is the unscheduled zone resource limited. 752 bool IsResourceLimited; 753 754 unsigned MaxRemainingCount; 755 756 void reset() { 757 CriticalPath = 0; 758 RemainingCounts.clear(); 759 CritResIdx = 0; 760 RemainingMicroOps = 0; 761 IsResourceLimited = false; 762 MaxRemainingCount = 0; 763 } 764 765 SchedRemainder() { reset(); } 766 767 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 768 }; 769 770 /// Each Scheduling boundary is associated with ready queues. It tracks the 771 /// current cycle in the direction of movement, and maintains the state 772 /// of "hazards" and other interlocks at the current cycle. 773 struct SchedBoundary { 774 ScheduleDAGMI *DAG; 775 const TargetSchedModel *SchedModel; 776 SchedRemainder *Rem; 777 778 ReadyQueue Available; 779 ReadyQueue Pending; 780 bool CheckPending; 781 782 // For heuristics, keep a list of the nodes that immediately depend on the 783 // most recently scheduled node. 784 SmallPtrSet<const SUnit*, 8> NextSUs; 785 786 ScheduleHazardRecognizer *HazardRec; 787 788 unsigned CurrCycle; 789 unsigned IssueCount; 790 791 /// MinReadyCycle - Cycle of the soonest available instruction. 792 unsigned MinReadyCycle; 793 794 // The expected latency of the critical path in this scheduled zone. 795 unsigned ExpectedLatency; 796 797 // Resources used in the scheduled zone beyond this boundary. 798 SmallVector<unsigned, 16> ResourceCounts; 799 800 // Cache the critical resources ID in this scheduled zone. 801 unsigned CritResIdx; 802 803 // Is the scheduled region resource limited vs. latency limited. 804 bool IsResourceLimited; 805 806 unsigned ExpectedCount; 807 808 // Policy flag: attempt to find ILP until expected latency is covered. 809 bool ShouldIncreaseILP; 810 811#ifndef NDEBUG 812 // Remember the greatest min operand latency. 813 unsigned MaxMinLatency; 814#endif 815 816 void reset() { 817 Available.clear(); 818 Pending.clear(); 819 CheckPending = false; 820 NextSUs.clear(); 821 HazardRec = 0; 822 CurrCycle = 0; 823 IssueCount = 0; 824 MinReadyCycle = UINT_MAX; 825 ExpectedLatency = 0; 826 ResourceCounts.resize(1); 827 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 828 CritResIdx = 0; 829 IsResourceLimited = false; 830 ExpectedCount = 0; 831 ShouldIncreaseILP = false; 832#ifndef NDEBUG 833 MaxMinLatency = 0; 834#endif 835 // Reserve a zero-count for invalid CritResIdx. 836 ResourceCounts.resize(1); 837 } 838 839 /// Pending queues extend the ready queues with the same ID and the 840 /// PendingFlag set. 841 SchedBoundary(unsigned ID, const Twine &Name): 842 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 843 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 844 reset(); 845 } 846 847 ~SchedBoundary() { delete HazardRec; } 848 849 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 850 SchedRemainder *rem); 851 852 bool isTop() const { 853 return Available.getID() == ConvergingScheduler::TopQID; 854 } 855 856 unsigned getUnscheduledLatency(SUnit *SU) const { 857 if (isTop()) 858 return SU->getHeight(); 859 return SU->getDepth(); 860 } 861 862 unsigned getCriticalCount() const { 863 return ResourceCounts[CritResIdx]; 864 } 865 866 bool checkHazard(SUnit *SU); 867 868 void checkILPPolicy(); 869 870 void releaseNode(SUnit *SU, unsigned ReadyCycle); 871 872 void bumpCycle(); 873 874 void countResource(unsigned PIdx, unsigned Cycles); 875 876 void bumpNode(SUnit *SU); 877 878 void releasePending(); 879 880 void removeReady(SUnit *SU); 881 882 SUnit *pickOnlyChoice(); 883 }; 884 885private: 886 ScheduleDAGMI *DAG; 887 const TargetSchedModel *SchedModel; 888 const TargetRegisterInfo *TRI; 889 890 // State of the top and bottom scheduled instruction boundaries. 891 SchedRemainder Rem; 892 SchedBoundary Top; 893 SchedBoundary Bot; 894 895public: 896 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 897 enum { 898 TopQID = 1, 899 BotQID = 2, 900 LogMaxQID = 2 901 }; 902 903 ConvergingScheduler(): 904 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 905 906 virtual void initialize(ScheduleDAGMI *dag); 907 908 virtual SUnit *pickNode(bool &IsTopNode); 909 910 virtual void schedNode(SUnit *SU, bool IsTopNode); 911 912 virtual void releaseTopNode(SUnit *SU); 913 914 virtual void releaseBottomNode(SUnit *SU); 915 916 virtual void registerRoots(); 917 918protected: 919 void balanceZones( 920 ConvergingScheduler::SchedBoundary &CriticalZone, 921 ConvergingScheduler::SchedCandidate &CriticalCand, 922 ConvergingScheduler::SchedBoundary &OppositeZone, 923 ConvergingScheduler::SchedCandidate &OppositeCand); 924 925 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 926 ConvergingScheduler::SchedCandidate &BotCand); 927 928 void tryCandidate(SchedCandidate &Cand, 929 SchedCandidate &TryCand, 930 SchedBoundary &Zone, 931 const RegPressureTracker &RPTracker, 932 RegPressureTracker &TempTracker); 933 934 SUnit *pickNodeBidirectional(bool &IsTopNode); 935 936 void pickNodeFromQueue(SchedBoundary &Zone, 937 const RegPressureTracker &RPTracker, 938 SchedCandidate &Candidate); 939 940#ifndef NDEBUG 941 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 942#endif 943}; 944} // namespace 945 946void ConvergingScheduler::SchedRemainder:: 947init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 948 reset(); 949 if (!SchedModel->hasInstrSchedModel()) 950 return; 951 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 952 for (std::vector<SUnit>::iterator 953 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 954 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 955 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 956 for (TargetSchedModel::ProcResIter 957 PI = SchedModel->getWriteProcResBegin(SC), 958 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 959 unsigned PIdx = PI->ProcResourceIdx; 960 unsigned Factor = SchedModel->getResourceFactor(PIdx); 961 RemainingCounts[PIdx] += (Factor * PI->Cycles); 962 } 963 } 964} 965 966void ConvergingScheduler::SchedBoundary:: 967init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 968 reset(); 969 DAG = dag; 970 SchedModel = smodel; 971 Rem = rem; 972 if (SchedModel->hasInstrSchedModel()) 973 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 974} 975 976void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 977 DAG = dag; 978 SchedModel = DAG->getSchedModel(); 979 TRI = DAG->TRI; 980 Rem.init(DAG, SchedModel); 981 Top.init(DAG, SchedModel, &Rem); 982 Bot.init(DAG, SchedModel, &Rem); 983 984 // Initialize resource counts. 985 986 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 987 // are disabled, then these HazardRecs will be disabled. 988 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 989 const TargetMachine &TM = DAG->MF.getTarget(); 990 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 991 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 992 993 assert((!ForceTopDown || !ForceBottomUp) && 994 "-misched-topdown incompatible with -misched-bottomup"); 995} 996 997void ConvergingScheduler::releaseTopNode(SUnit *SU) { 998 if (SU->isScheduled) 999 return; 1000 1001 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1002 I != E; ++I) { 1003 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1004 unsigned MinLatency = I->getMinLatency(); 1005#ifndef NDEBUG 1006 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1007#endif 1008 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1009 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1010 } 1011 Top.releaseNode(SU, SU->TopReadyCycle); 1012} 1013 1014void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1015 if (SU->isScheduled) 1016 return; 1017 1018 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1019 1020 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1021 I != E; ++I) { 1022 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1023 unsigned MinLatency = I->getMinLatency(); 1024#ifndef NDEBUG 1025 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1026#endif 1027 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1028 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1029 } 1030 Bot.releaseNode(SU, SU->BotReadyCycle); 1031} 1032 1033void ConvergingScheduler::registerRoots() { 1034 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1035 // Some roots may not feed into ExitSU. Check all of them in case. 1036 for (std::vector<SUnit*>::const_iterator 1037 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1038 if ((*I)->getDepth() > Rem.CriticalPath) 1039 Rem.CriticalPath = (*I)->getDepth(); 1040 } 1041 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1042} 1043 1044/// Does this SU have a hazard within the current instruction group. 1045/// 1046/// The scheduler supports two modes of hazard recognition. The first is the 1047/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1048/// supports highly complicated in-order reservation tables 1049/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1050/// 1051/// The second is a streamlined mechanism that checks for hazards based on 1052/// simple counters that the scheduler itself maintains. It explicitly checks 1053/// for instruction dispatch limitations, including the number of micro-ops that 1054/// can dispatch per cycle. 1055/// 1056/// TODO: Also check whether the SU must start a new group. 1057bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1058 if (HazardRec->isEnabled()) 1059 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1060 1061 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1062 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1063 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1064 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1065 return true; 1066 } 1067 return false; 1068} 1069 1070/// If expected latency is covered, disable ILP policy. 1071void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1072 if (ShouldIncreaseILP 1073 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1074 ShouldIncreaseILP = false; 1075 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1076 } 1077} 1078 1079void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1080 unsigned ReadyCycle) { 1081 1082 if (ReadyCycle < MinReadyCycle) 1083 MinReadyCycle = ReadyCycle; 1084 1085 // Check for interlocks first. For the purpose of other heuristics, an 1086 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1087 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1088 Pending.push(SU); 1089 else 1090 Available.push(SU); 1091 1092 // Record this node as an immediate dependent of the scheduled node. 1093 NextSUs.insert(SU); 1094 1095 // If CriticalPath has been computed, then check if the unscheduled nodes 1096 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1097 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1098 > Rem->CriticalPath + ILPWindow)) { 1099 ShouldIncreaseILP = true; 1100 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1101 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1102 } 1103} 1104 1105/// Move the boundary of scheduled code by one cycle. 1106void ConvergingScheduler::SchedBoundary::bumpCycle() { 1107 unsigned Width = SchedModel->getIssueWidth(); 1108 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1109 1110 unsigned NextCycle = CurrCycle + 1; 1111 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1112 if (MinReadyCycle > NextCycle) { 1113 IssueCount = 0; 1114 NextCycle = MinReadyCycle; 1115 } 1116 1117 if (!HazardRec->isEnabled()) { 1118 // Bypass HazardRec virtual calls. 1119 CurrCycle = NextCycle; 1120 } 1121 else { 1122 // Bypass getHazardType calls in case of long latency. 1123 for (; CurrCycle != NextCycle; ++CurrCycle) { 1124 if (isTop()) 1125 HazardRec->AdvanceCycle(); 1126 else 1127 HazardRec->RecedeCycle(); 1128 } 1129 } 1130 CheckPending = true; 1131 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1132 1133 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1134 << CurrCycle << '\n'); 1135} 1136 1137/// Add the given processor resource to this scheduled zone. 1138void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1139 unsigned Cycles) { 1140 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1141 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1142 << " +(" << Cycles << "x" << Factor 1143 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1144 1145 unsigned Count = Factor * Cycles; 1146 ResourceCounts[PIdx] += Count; 1147 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1148 Rem->RemainingCounts[PIdx] -= Count; 1149 1150 // Reset MaxRemainingCount for sanity. 1151 Rem->MaxRemainingCount = 0; 1152 1153 // Check if this resource exceeds the current critical resource by a full 1154 // cycle. If so, it becomes the critical resource. 1155 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1156 >= (int)SchedModel->getLatencyFactor()) { 1157 CritResIdx = PIdx; 1158 DEBUG(dbgs() << " *** Critical resource " 1159 << SchedModel->getProcResource(PIdx)->Name << " x" 1160 << ResourceCounts[PIdx] << '\n'); 1161 } 1162} 1163 1164/// Move the boundary of scheduled code by one SUnit. 1165void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1166 // Update the reservation table. 1167 if (HazardRec->isEnabled()) { 1168 if (!isTop() && SU->isCall) { 1169 // Calls are scheduled with their preceding instructions. For bottom-up 1170 // scheduling, clear the pipeline state before emitting. 1171 HazardRec->Reset(); 1172 } 1173 HazardRec->EmitInstruction(SU); 1174 } 1175 // Update resource counts and critical resource. 1176 if (SchedModel->hasInstrSchedModel()) { 1177 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1178 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1179 for (TargetSchedModel::ProcResIter 1180 PI = SchedModel->getWriteProcResBegin(SC), 1181 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1182 countResource(PI->ProcResourceIdx, PI->Cycles); 1183 } 1184 } 1185 if (isTop()) { 1186 if (SU->getDepth() > ExpectedLatency) 1187 ExpectedLatency = SU->getDepth(); 1188 } 1189 else { 1190 if (SU->getHeight() > ExpectedLatency) 1191 ExpectedLatency = SU->getHeight(); 1192 } 1193 1194 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1195 1196 // Check the instruction group dispatch limit. 1197 // TODO: Check if this SU must end a dispatch group. 1198 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1199 1200 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1201 // issue width. However, we commonly reach the maximum. In this case 1202 // opportunistically bump the cycle to avoid uselessly checking everything in 1203 // the readyQ. Furthermore, a single instruction may produce more than one 1204 // cycle's worth of micro-ops. 1205 if (IssueCount >= SchedModel->getIssueWidth()) { 1206 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1207 bumpCycle(); 1208 } 1209} 1210 1211/// Release pending ready nodes in to the available queue. This makes them 1212/// visible to heuristics. 1213void ConvergingScheduler::SchedBoundary::releasePending() { 1214 // If the available queue is empty, it is safe to reset MinReadyCycle. 1215 if (Available.empty()) 1216 MinReadyCycle = UINT_MAX; 1217 1218 // Check to see if any of the pending instructions are ready to issue. If 1219 // so, add them to the available queue. 1220 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1221 SUnit *SU = *(Pending.begin()+i); 1222 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1223 1224 if (ReadyCycle < MinReadyCycle) 1225 MinReadyCycle = ReadyCycle; 1226 1227 if (ReadyCycle > CurrCycle) 1228 continue; 1229 1230 if (checkHazard(SU)) 1231 continue; 1232 1233 Available.push(SU); 1234 Pending.remove(Pending.begin()+i); 1235 --i; --e; 1236 } 1237 DEBUG(if (!Pending.empty()) Pending.dump()); 1238 CheckPending = false; 1239} 1240 1241/// Remove SU from the ready set for this boundary. 1242void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1243 if (Available.isInQueue(SU)) 1244 Available.remove(Available.find(SU)); 1245 else { 1246 assert(Pending.isInQueue(SU) && "bad ready count"); 1247 Pending.remove(Pending.find(SU)); 1248 } 1249} 1250 1251/// If this queue only has one ready candidate, return it. As a side effect, 1252/// defer any nodes that now hit a hazard, and advance the cycle until at least 1253/// one node is ready. If multiple instructions are ready, return NULL. 1254SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1255 if (CheckPending) 1256 releasePending(); 1257 1258 if (IssueCount > 0) { 1259 // Defer any ready instrs that now have a hazard. 1260 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1261 if (checkHazard(*I)) { 1262 Pending.push(*I); 1263 I = Available.remove(I); 1264 continue; 1265 } 1266 ++I; 1267 } 1268 } 1269 for (unsigned i = 0; Available.empty(); ++i) { 1270 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1271 "permanent hazard"); (void)i; 1272 bumpCycle(); 1273 releasePending(); 1274 } 1275 if (Available.size() == 1) 1276 return *Available.begin(); 1277 return NULL; 1278} 1279 1280/// Record the candidate policy for opposite zones with different critical 1281/// resources. 1282/// 1283/// If the CriticalZone is latency limited, don't force a policy for the 1284/// candidates here. Instead, When releasing each candidate, releaseNode 1285/// compares the region's critical path to the candidate's height or depth and 1286/// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1287void ConvergingScheduler::balanceZones( 1288 ConvergingScheduler::SchedBoundary &CriticalZone, 1289 ConvergingScheduler::SchedCandidate &CriticalCand, 1290 ConvergingScheduler::SchedBoundary &OppositeZone, 1291 ConvergingScheduler::SchedCandidate &OppositeCand) { 1292 1293 if (!CriticalZone.IsResourceLimited) 1294 return; 1295 1296 SchedRemainder *Rem = CriticalZone.Rem; 1297 1298 // If the critical zone is overconsuming a resource relative to the 1299 // remainder, try to reduce it. 1300 unsigned RemainingCritCount = 1301 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1302 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1303 > (int)SchedModel->getLatencyFactor()) { 1304 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1305 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1306 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1307 << '\n'); 1308 } 1309 // If the other zone is underconsuming a resource relative to the full zone, 1310 // try to increase it. 1311 unsigned OppositeCount = 1312 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1313 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1314 > (int)SchedModel->getLatencyFactor()) { 1315 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1316 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1317 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1318 << '\n'); 1319 } 1320} 1321 1322/// Determine if the scheduled zones exceed resource limits or critical path and 1323/// set each candidate's ReduceHeight policy accordingly. 1324void ConvergingScheduler::checkResourceLimits( 1325 ConvergingScheduler::SchedCandidate &TopCand, 1326 ConvergingScheduler::SchedCandidate &BotCand) { 1327 1328 Bot.checkILPPolicy(); 1329 Top.checkILPPolicy(); 1330 if (Bot.ShouldIncreaseILP) 1331 BotCand.Policy.ReduceLatency = true; 1332 if (Top.ShouldIncreaseILP) 1333 TopCand.Policy.ReduceLatency = true; 1334 1335 // Handle resource-limited regions. 1336 if (Top.IsResourceLimited && Bot.IsResourceLimited 1337 && Top.CritResIdx == Bot.CritResIdx) { 1338 // If the scheduled critical resource in both zones is no longer the 1339 // critical remaining resource, attempt to reduce resource height both ways. 1340 if (Top.CritResIdx != Rem.CritResIdx) { 1341 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1342 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1343 DEBUG(dbgs() << "Reduce scheduled " 1344 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1345 } 1346 return; 1347 } 1348 // Handle latency-limited regions. 1349 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1350 // If the total scheduled expected latency exceeds the region's critical 1351 // path then reduce latency both ways. 1352 // 1353 // Just because a zone is not resource limited does not mean it is latency 1354 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1355 // to exceed expected latency. 1356 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1357 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1358 TopCand.Policy.ReduceLatency = true; 1359 BotCand.Policy.ReduceLatency = true; 1360 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1361 << " + " << Bot.ExpectedLatency << '\n'); 1362 } 1363 return; 1364 } 1365 // The critical resource is different in each zone, so request balancing. 1366 1367 // Compute the cost of each zone. 1368 Rem.MaxRemainingCount = std::max( 1369 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1370 Rem.RemainingCounts[Rem.CritResIdx]); 1371 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1372 Top.ExpectedCount = std::max( 1373 Top.getCriticalCount(), 1374 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1375 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1376 Bot.ExpectedCount = std::max( 1377 Bot.getCriticalCount(), 1378 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1379 1380 balanceZones(Top, TopCand, Bot, BotCand); 1381 balanceZones(Bot, BotCand, Top, TopCand); 1382} 1383 1384void ConvergingScheduler::SchedCandidate:: 1385initResourceDelta(const ScheduleDAGMI *DAG, 1386 const TargetSchedModel *SchedModel) { 1387 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1388 return; 1389 1390 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1391 for (TargetSchedModel::ProcResIter 1392 PI = SchedModel->getWriteProcResBegin(SC), 1393 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1394 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1395 ResDelta.CritResources += PI->Cycles; 1396 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1397 ResDelta.DemandedResources += PI->Cycles; 1398 } 1399} 1400 1401/// Return true if this heuristic determines order. 1402static bool tryLess(unsigned TryVal, unsigned CandVal, 1403 ConvergingScheduler::SchedCandidate &TryCand, 1404 ConvergingScheduler::SchedCandidate &Cand, 1405 ConvergingScheduler::CandReason Reason) { 1406 if (TryVal < CandVal) { 1407 TryCand.Reason = Reason; 1408 return true; 1409 } 1410 if (TryVal > CandVal) { 1411 if (Cand.Reason > Reason) 1412 Cand.Reason = Reason; 1413 return true; 1414 } 1415 return false; 1416} 1417static bool tryGreater(unsigned TryVal, unsigned CandVal, 1418 ConvergingScheduler::SchedCandidate &TryCand, 1419 ConvergingScheduler::SchedCandidate &Cand, 1420 ConvergingScheduler::CandReason Reason) { 1421 if (TryVal > CandVal) { 1422 TryCand.Reason = Reason; 1423 return true; 1424 } 1425 if (TryVal < CandVal) { 1426 if (Cand.Reason > Reason) 1427 Cand.Reason = Reason; 1428 return true; 1429 } 1430 return false; 1431} 1432 1433/// Apply a set of heursitics to a new candidate. Heuristics are currently 1434/// hierarchical. This may be more efficient than a graduated cost model because 1435/// we don't need to evaluate all aspects of the model for each node in the 1436/// queue. But it's really done to make the heuristics easier to debug and 1437/// statistically analyze. 1438/// 1439/// \param Cand provides the policy and current best candidate. 1440/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1441/// \param Zone describes the scheduled zone that we are extending. 1442/// \param RPTracker describes reg pressure within the scheduled zone. 1443/// \param TempTracker is a scratch pressure tracker to reuse in queries. 1444void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1445 SchedCandidate &TryCand, 1446 SchedBoundary &Zone, 1447 const RegPressureTracker &RPTracker, 1448 RegPressureTracker &TempTracker) { 1449 1450 // Always initialize TryCand's RPDelta. 1451 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1452 DAG->getRegionCriticalPSets(), 1453 DAG->getRegPressure().MaxSetPressure); 1454 1455 // Initialize the candidate if needed. 1456 if (!Cand.isValid()) { 1457 TryCand.Reason = NodeOrder; 1458 return; 1459 } 1460 // Avoid exceeding the target's limit. 1461 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1462 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1463 return; 1464 if (Cand.Reason == SingleExcess) 1465 Cand.Reason = MultiPressure; 1466 1467 // Avoid increasing the max critical pressure in the scheduled region. 1468 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1469 Cand.RPDelta.CriticalMax.UnitIncrease, 1470 TryCand, Cand, SingleCritical)) 1471 return; 1472 if (Cand.Reason == SingleCritical) 1473 Cand.Reason = MultiPressure; 1474 1475 // Avoid critical resource consumption and balance the schedule. 1476 TryCand.initResourceDelta(DAG, SchedModel); 1477 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1478 TryCand, Cand, ResourceReduce)) 1479 return; 1480 if (tryGreater(TryCand.ResDelta.DemandedResources, 1481 Cand.ResDelta.DemandedResources, 1482 TryCand, Cand, ResourceDemand)) 1483 return; 1484 1485 // Avoid serializing long latency dependence chains. 1486 if (Cand.Policy.ReduceLatency) { 1487 if (Zone.isTop()) { 1488 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1489 > Zone.ExpectedCount) { 1490 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1491 TryCand, Cand, TopDepthReduce)) 1492 return; 1493 } 1494 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1495 TryCand, Cand, TopPathReduce)) 1496 return; 1497 } 1498 else { 1499 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1500 > Zone.ExpectedCount) { 1501 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1502 TryCand, Cand, BotHeightReduce)) 1503 return; 1504 } 1505 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1506 TryCand, Cand, BotPathReduce)) 1507 return; 1508 } 1509 } 1510 1511 // Avoid increasing the max pressure of the entire region. 1512 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1513 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1514 return; 1515 if (Cand.Reason == SingleMax) 1516 Cand.Reason = MultiPressure; 1517 1518 // Prefer immediate defs/users of the last scheduled instruction. This is a 1519 // nice pressure avoidance strategy that also conserves the processor's 1520 // register renaming resources and keeps the machine code readable. 1521 if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { 1522 TryCand.Reason = NextDefUse; 1523 return; 1524 } 1525 if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { 1526 if (Cand.Reason > NextDefUse) 1527 Cand.Reason = NextDefUse; 1528 return; 1529 } 1530 // Fall through to original instruction order. 1531 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1532 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1533 TryCand.Reason = NodeOrder; 1534 } 1535} 1536 1537/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1538/// more desirable than RHS from scheduling standpoint. 1539static bool compareRPDelta(const RegPressureDelta &LHS, 1540 const RegPressureDelta &RHS) { 1541 // Compare each component of pressure in decreasing order of importance 1542 // without checking if any are valid. Invalid PressureElements are assumed to 1543 // have UnitIncrease==0, so are neutral. 1544 1545 // Avoid increasing the max critical pressure in the scheduled region. 1546 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1547 DEBUG(dbgs() << "RP excess top - bot: " 1548 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1549 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1550 } 1551 // Avoid increasing the max critical pressure in the scheduled region. 1552 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1553 DEBUG(dbgs() << "RP critical top - bot: " 1554 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1555 << '\n'); 1556 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1557 } 1558 // Avoid increasing the max pressure of the entire region. 1559 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1560 DEBUG(dbgs() << "RP current top - bot: " 1561 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1562 << '\n'); 1563 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1564 } 1565 return false; 1566} 1567 1568#ifndef NDEBUG 1569const char *ConvergingScheduler::getReasonStr( 1570 ConvergingScheduler::CandReason Reason) { 1571 switch (Reason) { 1572 case NoCand: return "NOCAND "; 1573 case SingleExcess: return "REG-EXCESS"; 1574 case SingleCritical: return "REG-CRIT "; 1575 case SingleMax: return "REG-MAX "; 1576 case MultiPressure: return "REG-MULTI "; 1577 case ResourceReduce: return "RES-REDUCE"; 1578 case ResourceDemand: return "RES-DEMAND"; 1579 case TopDepthReduce: return "TOP-DEPTH "; 1580 case TopPathReduce: return "TOP-PATH "; 1581 case BotHeightReduce:return "BOT-HEIGHT"; 1582 case BotPathReduce: return "BOT-PATH "; 1583 case NextDefUse: return "DEF-USE "; 1584 case NodeOrder: return "ORDER "; 1585 }; 1586 llvm_unreachable("Unknown reason!"); 1587} 1588 1589void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1590 const SchedBoundary &Zone) { 1591 const char *Label = getReasonStr(Cand.Reason); 1592 PressureElement P; 1593 unsigned ResIdx = 0; 1594 unsigned Latency = 0; 1595 switch (Cand.Reason) { 1596 default: 1597 break; 1598 case SingleExcess: 1599 P = Cand.RPDelta.Excess; 1600 break; 1601 case SingleCritical: 1602 P = Cand.RPDelta.CriticalMax; 1603 break; 1604 case SingleMax: 1605 P = Cand.RPDelta.CurrentMax; 1606 break; 1607 case ResourceReduce: 1608 ResIdx = Cand.Policy.ReduceResIdx; 1609 break; 1610 case ResourceDemand: 1611 ResIdx = Cand.Policy.DemandResIdx; 1612 break; 1613 case TopDepthReduce: 1614 Latency = Cand.SU->getDepth(); 1615 break; 1616 case TopPathReduce: 1617 Latency = Cand.SU->getHeight(); 1618 break; 1619 case BotHeightReduce: 1620 Latency = Cand.SU->getHeight(); 1621 break; 1622 case BotPathReduce: 1623 Latency = Cand.SU->getDepth(); 1624 break; 1625 } 1626 dbgs() << Label << " " << Zone.Available.getName() << " "; 1627 if (P.isValid()) 1628 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1629 << " "; 1630 else 1631 dbgs() << " "; 1632 if (ResIdx) 1633 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1634 else 1635 dbgs() << " "; 1636 if (Latency) 1637 dbgs() << Latency << " cycles "; 1638 else 1639 dbgs() << " "; 1640 Cand.SU->dump(DAG); 1641} 1642#endif 1643 1644/// Pick the best candidate from the top queue. 1645/// 1646/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1647/// DAG building. To adjust for the current scheduling location we need to 1648/// maintain the number of vreg uses remaining to be top-scheduled. 1649void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1650 const RegPressureTracker &RPTracker, 1651 SchedCandidate &Cand) { 1652 ReadyQueue &Q = Zone.Available; 1653 1654 DEBUG(Q.dump()); 1655 1656 // getMaxPressureDelta temporarily modifies the tracker. 1657 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1658 1659 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1660 1661 SchedCandidate TryCand(Cand.Policy); 1662 TryCand.SU = *I; 1663 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1664 if (TryCand.Reason != NoCand) { 1665 // Initialize resource delta if needed in case future heuristics query it. 1666 if (TryCand.ResDelta == SchedResourceDelta()) 1667 TryCand.initResourceDelta(DAG, SchedModel); 1668 Cand.setBest(TryCand); 1669 DEBUG(traceCandidate(Cand, Zone)); 1670 } 1671 TryCand.SU = *I; 1672 } 1673} 1674 1675static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1676 bool IsTop) { 1677 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1678 << " SU(" << Cand.SU->NodeNum << ") " 1679 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1680} 1681 1682/// Pick the best candidate node from either the top or bottom queue. 1683SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1684 // Schedule as far as possible in the direction of no choice. This is most 1685 // efficient, but also provides the best heuristics for CriticalPSets. 1686 if (SUnit *SU = Bot.pickOnlyChoice()) { 1687 IsTopNode = false; 1688 return SU; 1689 } 1690 if (SUnit *SU = Top.pickOnlyChoice()) { 1691 IsTopNode = true; 1692 return SU; 1693 } 1694 CandPolicy NoPolicy; 1695 SchedCandidate BotCand(NoPolicy); 1696 SchedCandidate TopCand(NoPolicy); 1697 checkResourceLimits(TopCand, BotCand); 1698 1699 // Prefer bottom scheduling when heuristics are silent. 1700 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1701 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1702 1703 // If either Q has a single candidate that provides the least increase in 1704 // Excess pressure, we can immediately schedule from that Q. 1705 // 1706 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1707 // affects picking from either Q. If scheduling in one direction must 1708 // increase pressure for one of the excess PSets, then schedule in that 1709 // direction first to provide more freedom in the other direction. 1710 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1711 IsTopNode = false; 1712 tracePick(BotCand, IsTopNode); 1713 return BotCand.SU; 1714 } 1715 // Check if the top Q has a better candidate. 1716 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1717 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1718 1719 // If either Q has a single candidate that minimizes pressure above the 1720 // original region's pressure pick it. 1721 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1722 if (TopCand.Reason < BotCand.Reason) { 1723 IsTopNode = true; 1724 tracePick(TopCand, IsTopNode); 1725 return TopCand.SU; 1726 } 1727 IsTopNode = false; 1728 tracePick(BotCand, IsTopNode); 1729 return BotCand.SU; 1730 } 1731 // Check for a salient pressure difference and pick the best from either side. 1732 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1733 IsTopNode = true; 1734 tracePick(TopCand, IsTopNode); 1735 return TopCand.SU; 1736 } 1737 // Otherwise prefer the bottom candidate, in node order if all else failed. 1738 if (TopCand.Reason < BotCand.Reason) { 1739 IsTopNode = true; 1740 tracePick(TopCand, IsTopNode); 1741 return TopCand.SU; 1742 } 1743 IsTopNode = false; 1744 tracePick(BotCand, IsTopNode); 1745 return BotCand.SU; 1746} 1747 1748/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1749SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1750 if (DAG->top() == DAG->bottom()) { 1751 assert(Top.Available.empty() && Top.Pending.empty() && 1752 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1753 return NULL; 1754 } 1755 SUnit *SU; 1756 do { 1757 if (ForceTopDown) { 1758 SU = Top.pickOnlyChoice(); 1759 if (!SU) { 1760 CandPolicy NoPolicy; 1761 SchedCandidate TopCand(NoPolicy); 1762 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1763 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1764 SU = TopCand.SU; 1765 } 1766 IsTopNode = true; 1767 } 1768 else if (ForceBottomUp) { 1769 SU = Bot.pickOnlyChoice(); 1770 if (!SU) { 1771 CandPolicy NoPolicy; 1772 SchedCandidate BotCand(NoPolicy); 1773 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1774 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1775 SU = BotCand.SU; 1776 } 1777 IsTopNode = false; 1778 } 1779 else { 1780 SU = pickNodeBidirectional(IsTopNode); 1781 } 1782 } while (SU->isScheduled); 1783 1784 if (SU->isTopReady()) 1785 Top.removeReady(SU); 1786 if (SU->isBottomReady()) 1787 Bot.removeReady(SU); 1788 1789 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 1790 << " Scheduling Instruction in cycle " 1791 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 1792 SU->dump(DAG)); 1793 return SU; 1794} 1795 1796/// Update the scheduler's state after scheduling a node. This is the same node 1797/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 1798/// it's state based on the current cycle before MachineSchedStrategy does. 1799void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1800 if (IsTopNode) { 1801 SU->TopReadyCycle = Top.CurrCycle; 1802 Top.bumpNode(SU); 1803 } 1804 else { 1805 SU->BotReadyCycle = Bot.CurrCycle; 1806 Bot.bumpNode(SU); 1807 } 1808} 1809 1810/// Create the standard converging machine scheduler. This will be used as the 1811/// default scheduler if the target does not set a default. 1812static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 1813 assert((!ForceTopDown || !ForceBottomUp) && 1814 "-misched-topdown incompatible with -misched-bottomup"); 1815 return new ScheduleDAGMI(C, new ConvergingScheduler()); 1816} 1817static MachineSchedRegistry 1818ConvergingSchedRegistry("converge", "Standard converging scheduler.", 1819 createConvergingSched); 1820 1821//===----------------------------------------------------------------------===// 1822// ILP Scheduler. Currently for experimental analysis of heuristics. 1823//===----------------------------------------------------------------------===// 1824 1825namespace { 1826/// \brief Order nodes by the ILP metric. 1827struct ILPOrder { 1828 ScheduleDAGILP *ILP; 1829 bool MaximizeILP; 1830 1831 ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} 1832 1833 /// \brief Apply a less-than relation on node priority. 1834 bool operator()(const SUnit *A, const SUnit *B) const { 1835 // Return true if A comes after B in the Q. 1836 if (MaximizeILP) 1837 return ILP->getILP(A) < ILP->getILP(B); 1838 else 1839 return ILP->getILP(A) > ILP->getILP(B); 1840 } 1841}; 1842 1843/// \brief Schedule based on the ILP metric. 1844class ILPScheduler : public MachineSchedStrategy { 1845 ScheduleDAGILP ILP; 1846 ILPOrder Cmp; 1847 1848 std::vector<SUnit*> ReadyQ; 1849public: 1850 ILPScheduler(bool MaximizeILP) 1851 : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} 1852 1853 virtual void initialize(ScheduleDAGMI *DAG) { 1854 ReadyQ.clear(); 1855 ILP.resize(DAG->SUnits.size()); 1856 } 1857 1858 virtual void registerRoots() { 1859 for (std::vector<SUnit*>::const_iterator 1860 I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { 1861 ILP.computeILP(*I); 1862 } 1863 } 1864 1865 /// Implement MachineSchedStrategy interface. 1866 /// ----------------------------------------- 1867 1868 virtual SUnit *pickNode(bool &IsTopNode) { 1869 if (ReadyQ.empty()) return NULL; 1870 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1871 SUnit *SU = ReadyQ.back(); 1872 ReadyQ.pop_back(); 1873 IsTopNode = false; 1874 DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() 1875 << " ILP: " << ILP.getILP(SU) << '\n'); 1876 return SU; 1877 } 1878 1879 virtual void schedNode(SUnit *, bool) {} 1880 1881 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 1882 1883 virtual void releaseBottomNode(SUnit *SU) { 1884 ReadyQ.push_back(SU); 1885 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 1886 } 1887}; 1888} // namespace 1889 1890static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 1891 return new ScheduleDAGMI(C, new ILPScheduler(true)); 1892} 1893static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 1894 return new ScheduleDAGMI(C, new ILPScheduler(false)); 1895} 1896static MachineSchedRegistry ILPMaxRegistry( 1897 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 1898static MachineSchedRegistry ILPMinRegistry( 1899 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 1900 1901//===----------------------------------------------------------------------===// 1902// Machine Instruction Shuffler for Correctness Testing 1903//===----------------------------------------------------------------------===// 1904 1905#ifndef NDEBUG 1906namespace { 1907/// Apply a less-than relation on the node order, which corresponds to the 1908/// instruction order prior to scheduling. IsReverse implements greater-than. 1909template<bool IsReverse> 1910struct SUnitOrder { 1911 bool operator()(SUnit *A, SUnit *B) const { 1912 if (IsReverse) 1913 return A->NodeNum > B->NodeNum; 1914 else 1915 return A->NodeNum < B->NodeNum; 1916 } 1917}; 1918 1919/// Reorder instructions as much as possible. 1920class InstructionShuffler : public MachineSchedStrategy { 1921 bool IsAlternating; 1922 bool IsTopDown; 1923 1924 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 1925 // gives nodes with a higher number higher priority causing the latest 1926 // instructions to be scheduled first. 1927 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 1928 TopQ; 1929 // When scheduling bottom-up, use greater-than as the queue priority. 1930 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 1931 BottomQ; 1932public: 1933 InstructionShuffler(bool alternate, bool topdown) 1934 : IsAlternating(alternate), IsTopDown(topdown) {} 1935 1936 virtual void initialize(ScheduleDAGMI *) { 1937 TopQ.clear(); 1938 BottomQ.clear(); 1939 } 1940 1941 /// Implement MachineSchedStrategy interface. 1942 /// ----------------------------------------- 1943 1944 virtual SUnit *pickNode(bool &IsTopNode) { 1945 SUnit *SU; 1946 if (IsTopDown) { 1947 do { 1948 if (TopQ.empty()) return NULL; 1949 SU = TopQ.top(); 1950 TopQ.pop(); 1951 } while (SU->isScheduled); 1952 IsTopNode = true; 1953 } 1954 else { 1955 do { 1956 if (BottomQ.empty()) return NULL; 1957 SU = BottomQ.top(); 1958 BottomQ.pop(); 1959 } while (SU->isScheduled); 1960 IsTopNode = false; 1961 } 1962 if (IsAlternating) 1963 IsTopDown = !IsTopDown; 1964 return SU; 1965 } 1966 1967 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 1968 1969 virtual void releaseTopNode(SUnit *SU) { 1970 TopQ.push(SU); 1971 } 1972 virtual void releaseBottomNode(SUnit *SU) { 1973 BottomQ.push(SU); 1974 } 1975}; 1976} // namespace 1977 1978static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 1979 bool Alternate = !ForceTopDown && !ForceBottomUp; 1980 bool TopDown = !ForceBottomUp; 1981 assert((TopDown || !ForceTopDown) && 1982 "-misched-topdown incompatible with -misched-bottomup"); 1983 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 1984} 1985static MachineSchedRegistry ShufflerRegistry( 1986 "shuffle", "Shuffle machine instructions alternating directions", 1987 createInstructionShuffler); 1988#endif // !NDEBUG 1989