ScheduleDAGRRList.cpp revision 2da8bc8a5f7705ac131184cd247f48500da0d74e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 tdrListrDAGScheduler("list-tdrr", 49 "Top-down register reduction list scheduling", 50 createTDRRListDAGScheduler); 51static RegisterScheduler 52 sourceListDAGScheduler("source", 53 "Similar to list-burr but schedules in source " 54 "order when possible", 55 createSourceListDAGScheduler); 56 57static RegisterScheduler 58 hybridListDAGScheduler("list-hybrid", 59 "Bottom-up register pressure aware list scheduling " 60 "which tries to balance latency and register pressure", 61 createHybridListDAGScheduler); 62 63static RegisterScheduler 64 ILPListDAGScheduler("list-ilp", 65 "Bottom-up register pressure aware list scheduling " 66 "which tries to balance ILP and register pressure", 67 createILPListDAGScheduler); 68 69static cl::opt<bool> EnableSchedCycles( 70 "enable-sched-cycles", 71 cl::desc("Enable cycle-level precision during preRA scheduling"), 72 cl::init(false), cl::Hidden); 73 74namespace { 75//===----------------------------------------------------------------------===// 76/// ScheduleDAGRRList - The actual register reduction list scheduler 77/// implementation. This supports both top-down and bottom-up scheduling. 78/// 79class ScheduleDAGRRList : public ScheduleDAGSDNodes { 80private: 81 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 82 /// it is top-down. 83 bool isBottomUp; 84 85 /// NeedLatency - True if the scheduler will make use of latency information. 86 /// 87 bool NeedLatency; 88 89 /// AvailableQueue - The priority queue to use for the available SUnits. 90 SchedulingPriorityQueue *AvailableQueue; 91 92 /// PendingQueue - This contains all of the instructions whose operands have 93 /// been issued, but their results are not ready yet (due to the latency of 94 /// the operation). Once the operands becomes available, the instruction is 95 /// added to the AvailableQueue. 96 std::vector<SUnit*> PendingQueue; 97 98 /// HazardRec - The hazard recognizer to use. 99 ScheduleHazardRecognizer *HazardRec; 100 101 /// CurCycle - The current scheduler state corresponds to this cycle. 102 unsigned CurCycle; 103 104 /// MinAvailableCycle - Cycle of the soonest available instruction. 105 unsigned MinAvailableCycle; 106 107 /// LiveRegDefs - A set of physical registers and their definition 108 /// that are "live". These nodes must be scheduled before any other nodes that 109 /// modifies the registers can be scheduled. 110 unsigned NumLiveRegs; 111 std::vector<SUnit*> LiveRegDefs; 112 std::vector<SUnit*> LiveRegGens; 113 114 /// Topo - A topological ordering for SUnits which permits fast IsReachable 115 /// and similar queries. 116 ScheduleDAGTopologicalSort Topo; 117 118public: 119 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 120 SchedulingPriorityQueue *availqueue, 121 CodeGenOpt::Level OptLevel) 122 : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), 123 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 124 Topo(SUnits) { 125 126 const TargetMachine &tm = mf.getTarget(); 127 if (EnableSchedCycles && OptLevel != CodeGenOpt::None) 128 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 129 else 130 HazardRec = new ScheduleHazardRecognizer(); 131 } 132 133 ~ScheduleDAGRRList() { 134 delete HazardRec; 135 delete AvailableQueue; 136 } 137 138 void Schedule(); 139 140 /// IsReachable - Checks if SU is reachable from TargetSU. 141 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 142 return Topo.IsReachable(SU, TargetSU); 143 } 144 145 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 146 /// create a cycle. 147 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 148 return Topo.WillCreateCycle(SU, TargetSU); 149 } 150 151 /// AddPred - adds a predecessor edge to SUnit SU. 152 /// This returns true if this is a new predecessor. 153 /// Updates the topological ordering if required. 154 void AddPred(SUnit *SU, const SDep &D) { 155 Topo.AddPred(SU, D.getSUnit()); 156 SU->addPred(D); 157 } 158 159 /// RemovePred - removes a predecessor edge from SUnit SU. 160 /// This returns true if an edge was removed. 161 /// Updates the topological ordering if required. 162 void RemovePred(SUnit *SU, const SDep &D) { 163 Topo.RemovePred(SU, D.getSUnit()); 164 SU->removePred(D); 165 } 166 167private: 168 bool isReady(SUnit *SU) { 169 return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() || 170 AvailableQueue->isReady(SU); 171 } 172 173 void ReleasePred(SUnit *SU, const SDep *PredEdge); 174 void ReleasePredecessors(SUnit *SU); 175 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 176 void ReleaseSuccessors(SUnit *SU); 177 void ReleasePending(); 178 void AdvanceToCycle(unsigned NextCycle); 179 void AdvancePastStalls(SUnit *SU); 180 void EmitNode(SUnit *SU); 181 void ScheduleNodeBottomUp(SUnit*); 182 void CapturePred(SDep *PredEdge); 183 void UnscheduleNodeBottomUp(SUnit*); 184 void RestoreHazardCheckerBottomUp(); 185 void BacktrackBottomUp(SUnit*, SUnit*); 186 SUnit *CopyAndMoveSuccessors(SUnit*); 187 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 188 const TargetRegisterClass*, 189 const TargetRegisterClass*, 190 SmallVector<SUnit*, 2>&); 191 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 192 193 SUnit *PickNodeToScheduleBottomUp(); 194 void ListScheduleBottomUp(); 195 196 void ScheduleNodeTopDown(SUnit*); 197 void ListScheduleTopDown(); 198 199 200 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 201 /// Updates the topological ordering if required. 202 SUnit *CreateNewSUnit(SDNode *N) { 203 unsigned NumSUnits = SUnits.size(); 204 SUnit *NewNode = NewSUnit(N); 205 // Update the topological ordering. 206 if (NewNode->NodeNum >= NumSUnits) 207 Topo.InitDAGTopologicalSorting(); 208 return NewNode; 209 } 210 211 /// CreateClone - Creates a new SUnit from an existing one. 212 /// Updates the topological ordering if required. 213 SUnit *CreateClone(SUnit *N) { 214 unsigned NumSUnits = SUnits.size(); 215 SUnit *NewNode = Clone(N); 216 // Update the topological ordering. 217 if (NewNode->NodeNum >= NumSUnits) 218 Topo.InitDAGTopologicalSorting(); 219 return NewNode; 220 } 221 222 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 223 /// need actual latency information but the hybrid scheduler does. 224 bool ForceUnitLatencies() const { 225 return !NeedLatency; 226 } 227}; 228} // end anonymous namespace 229 230 231/// Schedule - Schedule the DAG using list scheduling. 232void ScheduleDAGRRList::Schedule() { 233 DEBUG(dbgs() 234 << "********** List Scheduling BB#" << BB->getNumber() 235 << " '" << BB->getName() << "' **********\n"); 236 237 CurCycle = 0; 238 MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0; 239 NumLiveRegs = 0; 240 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 241 LiveRegGens.resize(TRI->getNumRegs(), NULL); 242 243 // Build the scheduling graph. 244 BuildSchedGraph(NULL); 245 246 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 247 SUnits[su].dumpAll(this)); 248 Topo.InitDAGTopologicalSorting(); 249 250 AvailableQueue->initNodes(SUnits); 251 252 HazardRec->Reset(); 253 254 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 255 if (isBottomUp) 256 ListScheduleBottomUp(); 257 else 258 ListScheduleTopDown(); 259 260 AvailableQueue->releaseState(); 261} 262 263//===----------------------------------------------------------------------===// 264// Bottom-Up Scheduling 265//===----------------------------------------------------------------------===// 266 267/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 268/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 269void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 270 SUnit *PredSU = PredEdge->getSUnit(); 271 272#ifndef NDEBUG 273 if (PredSU->NumSuccsLeft == 0) { 274 dbgs() << "*** Scheduling failed! ***\n"; 275 PredSU->dump(this); 276 dbgs() << " has been released too many times!\n"; 277 llvm_unreachable(0); 278 } 279#endif 280 --PredSU->NumSuccsLeft; 281 282 if (!ForceUnitLatencies()) { 283 // Updating predecessor's height. This is now the cycle when the 284 // predecessor can be scheduled without causing a pipeline stall. 285 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 286 } 287 288 // If all the node's successors are scheduled, this node is ready 289 // to be scheduled. Ignore the special EntrySU node. 290 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 291 PredSU->isAvailable = true; 292 293 unsigned Height = PredSU->getHeight(); 294 if (Height < MinAvailableCycle) 295 MinAvailableCycle = Height; 296 297 if (isReady(SU)) { 298 AvailableQueue->push(PredSU); 299 } 300 // CapturePred and others may have left the node in the pending queue, avoid 301 // adding it twice. 302 else if (!PredSU->isPending) { 303 PredSU->isPending = true; 304 PendingQueue.push_back(PredSU); 305 } 306 } 307} 308 309/// Call ReleasePred for each predecessor, then update register live def/gen. 310/// Always update LiveRegDefs for a register dependence even if the current SU 311/// also defines the register. This effectively create one large live range 312/// across a sequence of two-address node. This is important because the 313/// entire chain must be scheduled together. Example: 314/// 315/// flags = (3) add 316/// flags = (2) addc flags 317/// flags = (1) addc flags 318/// 319/// results in 320/// 321/// LiveRegDefs[flags] = 3 322/// LiveRegGens[flags] = 1 323/// 324/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 325/// interference on flags. 326void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 327 // Bottom up: release predecessors 328 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 329 I != E; ++I) { 330 ReleasePred(SU, &*I); 331 if (I->isAssignedRegDep()) { 332 // This is a physical register dependency and it's impossible or 333 // expensive to copy the register. Make sure nothing that can 334 // clobber the register is scheduled between the predecessor and 335 // this node. 336 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 337 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 338 "interference on register dependence"); 339 LiveRegDefs[I->getReg()] = I->getSUnit(); 340 if (!LiveRegGens[I->getReg()]) { 341 ++NumLiveRegs; 342 LiveRegGens[I->getReg()] = SU; 343 } 344 } 345 } 346} 347 348/// Check to see if any of the pending instructions are ready to issue. If 349/// so, add them to the available queue. 350void ScheduleDAGRRList::ReleasePending() { 351 assert(!EnableSchedCycles && "requires --enable-sched-cycles" ); 352 353 // If the available queue is empty, it is safe to reset MinAvailableCycle. 354 if (AvailableQueue->empty()) 355 MinAvailableCycle = UINT_MAX; 356 357 // Check to see if any of the pending instructions are ready to issue. If 358 // so, add them to the available queue. 359 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 360 unsigned ReadyCycle = 361 isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); 362 if (ReadyCycle < MinAvailableCycle) 363 MinAvailableCycle = ReadyCycle; 364 365 if (PendingQueue[i]->isAvailable) { 366 if (!isReady(PendingQueue[i])) 367 continue; 368 AvailableQueue->push(PendingQueue[i]); 369 } 370 PendingQueue[i]->isPending = false; 371 PendingQueue[i] = PendingQueue.back(); 372 PendingQueue.pop_back(); 373 --i; --e; 374 } 375} 376 377/// Move the scheduler state forward by the specified number of Cycles. 378void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 379 if (NextCycle <= CurCycle) 380 return; 381 382 AvailableQueue->setCurCycle(NextCycle); 383 if (HazardRec->getMaxLookAhead() == 0) { 384 // Bypass lots of virtual calls in case of long latency. 385 CurCycle = NextCycle; 386 } 387 else { 388 for (; CurCycle != NextCycle; ++CurCycle) { 389 if (isBottomUp) 390 HazardRec->RecedeCycle(); 391 else 392 HazardRec->AdvanceCycle(); 393 } 394 } 395 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 396 // available Q to release pending nodes at least once before popping. 397 ReleasePending(); 398} 399 400/// Move the scheduler state forward until the specified node's dependents are 401/// ready and can be scheduled with no resource conflicts. 402void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 403 if (!EnableSchedCycles) 404 return; 405 406 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); 407 408 // Bump CurCycle to account for latency. We assume the latency of other 409 // available instructions may be hidden by the stall (not a full pipe stall). 410 // This updates the hazard recognizer's cycle before reserving resources for 411 // this instruction. 412 AdvanceToCycle(ReadyCycle); 413 414 // Calls are scheduled in their preceding cycle, so don't conflict with 415 // hazards from instructions after the call. EmitNode will reset the 416 // scoreboard state before emitting the call. 417 if (isBottomUp && SU->isCall) 418 return; 419 420 // FIXME: For resource conflicts in very long non-pipelined stages, we 421 // should probably skip ahead here to avoid useless scoreboard checks. 422 int Stalls = 0; 423 while (true) { 424 ScheduleHazardRecognizer::HazardType HT = 425 HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); 426 427 if (HT == ScheduleHazardRecognizer::NoHazard) 428 break; 429 430 ++Stalls; 431 } 432 AdvanceToCycle(CurCycle + Stalls); 433} 434 435/// Record this SUnit in the HazardRecognizer. 436/// Does not update CurCycle. 437void ScheduleDAGRRList::EmitNode(SUnit *SU) { 438 switch (SU->getNode()->getOpcode()) { 439 default: 440 assert(SU->getNode()->isMachineOpcode() && 441 "This target-independent node should not be scheduled."); 442 break; 443 case ISD::MERGE_VALUES: 444 case ISD::TokenFactor: 445 case ISD::CopyToReg: 446 case ISD::CopyFromReg: 447 case ISD::EH_LABEL: 448 // Noops don't affect the scoreboard state. Copies are likely to be 449 // removed. 450 return; 451 case ISD::INLINEASM: 452 // For inline asm, clear the pipeline state. 453 HazardRec->Reset(); 454 return; 455 } 456 if (isBottomUp && SU->isCall) { 457 // Calls are scheduled with their preceding instructions. For bottom-up 458 // scheduling, clear the pipeline state before emitting. 459 HazardRec->Reset(); 460 } 461 462 HazardRec->EmitInstruction(SU); 463 464 if (!isBottomUp && SU->isCall) { 465 HazardRec->Reset(); 466 } 467} 468 469/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 470/// count of its predecessors. If a predecessor pending count is zero, add it to 471/// the Available queue. 472void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 473 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 474 DEBUG(SU->dump(this)); 475 476#ifndef NDEBUG 477 if (CurCycle < SU->getHeight()) 478 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 479#endif 480 481 // FIXME: Do not modify node height. It may interfere with 482 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 483 // node it's ready cycle can aid heuristics, and after scheduling it can 484 // indicate the scheduled cycle. 485 SU->setHeightToAtLeast(CurCycle); 486 487 // Reserve resources for the scheduled intruction. 488 EmitNode(SU); 489 490 Sequence.push_back(SU); 491 492 AvailableQueue->ScheduledNode(SU); 493 494 // Update liveness of predecessors before successors to avoid treating a 495 // two-address node as a live range def. 496 ReleasePredecessors(SU); 497 498 // Release all the implicit physical register defs that are live. 499 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 500 I != E; ++I) { 501 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 502 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 503 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 504 --NumLiveRegs; 505 LiveRegDefs[I->getReg()] = NULL; 506 LiveRegGens[I->getReg()] = NULL; 507 } 508 } 509 510 SU->isScheduled = true; 511 512 // Conditions under which the scheduler should eagerly advance the cycle: 513 // (1) No available instructions 514 // (2) All pipelines full, so available instructions must have hazards. 515 // 516 // If SchedCycles is disabled, count each inst as one cycle. 517 if (!EnableSchedCycles || 518 AvailableQueue->empty() || HazardRec->atIssueLimit()) 519 AdvanceToCycle(CurCycle + 1); 520} 521 522/// CapturePred - This does the opposite of ReleasePred. Since SU is being 523/// unscheduled, incrcease the succ left count of its predecessors. Remove 524/// them from AvailableQueue if necessary. 525void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 526 SUnit *PredSU = PredEdge->getSUnit(); 527 if (PredSU->isAvailable) { 528 PredSU->isAvailable = false; 529 if (!PredSU->isPending) 530 AvailableQueue->remove(PredSU); 531 } 532 533 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 534 ++PredSU->NumSuccsLeft; 535} 536 537/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 538/// its predecessor states to reflect the change. 539void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 540 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 541 DEBUG(SU->dump(this)); 542 543 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 544 I != E; ++I) { 545 CapturePred(&*I); 546 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 547 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 548 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 549 "Physical register dependency violated?"); 550 --NumLiveRegs; 551 LiveRegDefs[I->getReg()] = NULL; 552 LiveRegGens[I->getReg()] = NULL; 553 } 554 } 555 556 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 557 I != E; ++I) { 558 if (I->isAssignedRegDep()) { 559 // This becomes the nearest def. Note that an earlier def may still be 560 // pending if this is a two-address node. 561 LiveRegDefs[I->getReg()] = SU; 562 if (!LiveRegDefs[I->getReg()]) { 563 ++NumLiveRegs; 564 } 565 if (LiveRegGens[I->getReg()] == NULL || 566 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 567 LiveRegGens[I->getReg()] = I->getSUnit(); 568 } 569 } 570 if (SU->getHeight() < MinAvailableCycle) 571 MinAvailableCycle = SU->getHeight(); 572 573 SU->setHeightDirty(); 574 SU->isScheduled = false; 575 SU->isAvailable = true; 576 if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) { 577 // Don't make available until backtracking is complete. 578 SU->isPending = true; 579 PendingQueue.push_back(SU); 580 } 581 else { 582 AvailableQueue->push(SU); 583 } 584 AvailableQueue->UnscheduledNode(SU); 585} 586 587/// After backtracking, the hazard checker needs to be restored to a state 588/// corresponding the the current cycle. 589void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 590 HazardRec->Reset(); 591 592 unsigned LookAhead = std::min((unsigned)Sequence.size(), 593 HazardRec->getMaxLookAhead()); 594 if (LookAhead == 0) 595 return; 596 597 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 598 unsigned HazardCycle = (*I)->getHeight(); 599 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 600 SUnit *SU = *I; 601 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 602 HazardRec->RecedeCycle(); 603 } 604 EmitNode(SU); 605 } 606} 607 608/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 609/// BTCycle in order to schedule a specific node. 610void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 611 SUnit *OldSU = Sequence.back(); 612 while (true) { 613 Sequence.pop_back(); 614 if (SU->isSucc(OldSU)) 615 // Don't try to remove SU from AvailableQueue. 616 SU->isAvailable = false; 617 // FIXME: use ready cycle instead of height 618 CurCycle = OldSU->getHeight(); 619 UnscheduleNodeBottomUp(OldSU); 620 AvailableQueue->setCurCycle(CurCycle); 621 if (OldSU == BtSU) 622 break; 623 OldSU = Sequence.back(); 624 } 625 626 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 627 628 RestoreHazardCheckerBottomUp(); 629 630 if (EnableSchedCycles) 631 ReleasePending(); 632 633 ++NumBacktracks; 634} 635 636static bool isOperandOf(const SUnit *SU, SDNode *N) { 637 for (const SDNode *SUNode = SU->getNode(); SUNode; 638 SUNode = SUNode->getGluedNode()) { 639 if (SUNode->isOperandOf(N)) 640 return true; 641 } 642 return false; 643} 644 645/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 646/// successors to the newly created node. 647SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 648 if (SU->getNode()->getGluedNode()) 649 return NULL; 650 651 SDNode *N = SU->getNode(); 652 if (!N) 653 return NULL; 654 655 SUnit *NewSU; 656 bool TryUnfold = false; 657 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 658 EVT VT = N->getValueType(i); 659 if (VT == MVT::Glue) 660 return NULL; 661 else if (VT == MVT::Other) 662 TryUnfold = true; 663 } 664 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 665 const SDValue &Op = N->getOperand(i); 666 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 667 if (VT == MVT::Glue) 668 return NULL; 669 } 670 671 if (TryUnfold) { 672 SmallVector<SDNode*, 2> NewNodes; 673 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 674 return NULL; 675 676 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 677 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 678 679 N = NewNodes[1]; 680 SDNode *LoadNode = NewNodes[0]; 681 unsigned NumVals = N->getNumValues(); 682 unsigned OldNumVals = SU->getNode()->getNumValues(); 683 for (unsigned i = 0; i != NumVals; ++i) 684 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 685 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 686 SDValue(LoadNode, 1)); 687 688 // LoadNode may already exist. This can happen when there is another 689 // load from the same location and producing the same type of value 690 // but it has different alignment or volatileness. 691 bool isNewLoad = true; 692 SUnit *LoadSU; 693 if (LoadNode->getNodeId() != -1) { 694 LoadSU = &SUnits[LoadNode->getNodeId()]; 695 isNewLoad = false; 696 } else { 697 LoadSU = CreateNewSUnit(LoadNode); 698 LoadNode->setNodeId(LoadSU->NodeNum); 699 ComputeLatency(LoadSU); 700 } 701 702 SUnit *NewSU = CreateNewSUnit(N); 703 assert(N->getNodeId() == -1 && "Node already inserted!"); 704 N->setNodeId(NewSU->NodeNum); 705 706 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 707 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 708 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 709 NewSU->isTwoAddress = true; 710 break; 711 } 712 } 713 if (TID.isCommutable()) 714 NewSU->isCommutable = true; 715 ComputeLatency(NewSU); 716 717 // Record all the edges to and from the old SU, by category. 718 SmallVector<SDep, 4> ChainPreds; 719 SmallVector<SDep, 4> ChainSuccs; 720 SmallVector<SDep, 4> LoadPreds; 721 SmallVector<SDep, 4> NodePreds; 722 SmallVector<SDep, 4> NodeSuccs; 723 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 724 I != E; ++I) { 725 if (I->isCtrl()) 726 ChainPreds.push_back(*I); 727 else if (isOperandOf(I->getSUnit(), LoadNode)) 728 LoadPreds.push_back(*I); 729 else 730 NodePreds.push_back(*I); 731 } 732 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 733 I != E; ++I) { 734 if (I->isCtrl()) 735 ChainSuccs.push_back(*I); 736 else 737 NodeSuccs.push_back(*I); 738 } 739 740 // Now assign edges to the newly-created nodes. 741 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 742 const SDep &Pred = ChainPreds[i]; 743 RemovePred(SU, Pred); 744 if (isNewLoad) 745 AddPred(LoadSU, Pred); 746 } 747 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 748 const SDep &Pred = LoadPreds[i]; 749 RemovePred(SU, Pred); 750 if (isNewLoad) 751 AddPred(LoadSU, Pred); 752 } 753 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 754 const SDep &Pred = NodePreds[i]; 755 RemovePred(SU, Pred); 756 AddPred(NewSU, Pred); 757 } 758 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 759 SDep D = NodeSuccs[i]; 760 SUnit *SuccDep = D.getSUnit(); 761 D.setSUnit(SU); 762 RemovePred(SuccDep, D); 763 D.setSUnit(NewSU); 764 AddPred(SuccDep, D); 765 } 766 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 767 SDep D = ChainSuccs[i]; 768 SUnit *SuccDep = D.getSUnit(); 769 D.setSUnit(SU); 770 RemovePred(SuccDep, D); 771 if (isNewLoad) { 772 D.setSUnit(LoadSU); 773 AddPred(SuccDep, D); 774 } 775 } 776 777 // Add a data dependency to reflect that NewSU reads the value defined 778 // by LoadSU. 779 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 780 781 if (isNewLoad) 782 AvailableQueue->addNode(LoadSU); 783 AvailableQueue->addNode(NewSU); 784 785 ++NumUnfolds; 786 787 if (NewSU->NumSuccsLeft == 0) { 788 NewSU->isAvailable = true; 789 return NewSU; 790 } 791 SU = NewSU; 792 } 793 794 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 795 NewSU = CreateClone(SU); 796 797 // New SUnit has the exact same predecessors. 798 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 799 I != E; ++I) 800 if (!I->isArtificial()) 801 AddPred(NewSU, *I); 802 803 // Only copy scheduled successors. Cut them from old node's successor 804 // list and move them over. 805 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 806 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 807 I != E; ++I) { 808 if (I->isArtificial()) 809 continue; 810 SUnit *SuccSU = I->getSUnit(); 811 if (SuccSU->isScheduled) { 812 SDep D = *I; 813 D.setSUnit(NewSU); 814 AddPred(SuccSU, D); 815 D.setSUnit(SU); 816 DelDeps.push_back(std::make_pair(SuccSU, D)); 817 } 818 } 819 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 820 RemovePred(DelDeps[i].first, DelDeps[i].second); 821 822 AvailableQueue->updateNode(SU); 823 AvailableQueue->addNode(NewSU); 824 825 ++NumDups; 826 return NewSU; 827} 828 829/// InsertCopiesAndMoveSuccs - Insert register copies and move all 830/// scheduled successors of the given SUnit to the last copy. 831void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 832 const TargetRegisterClass *DestRC, 833 const TargetRegisterClass *SrcRC, 834 SmallVector<SUnit*, 2> &Copies) { 835 SUnit *CopyFromSU = CreateNewSUnit(NULL); 836 CopyFromSU->CopySrcRC = SrcRC; 837 CopyFromSU->CopyDstRC = DestRC; 838 839 SUnit *CopyToSU = CreateNewSUnit(NULL); 840 CopyToSU->CopySrcRC = DestRC; 841 CopyToSU->CopyDstRC = SrcRC; 842 843 // Only copy scheduled successors. Cut them from old node's successor 844 // list and move them over. 845 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 846 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 847 I != E; ++I) { 848 if (I->isArtificial()) 849 continue; 850 SUnit *SuccSU = I->getSUnit(); 851 if (SuccSU->isScheduled) { 852 SDep D = *I; 853 D.setSUnit(CopyToSU); 854 AddPred(SuccSU, D); 855 DelDeps.push_back(std::make_pair(SuccSU, *I)); 856 } 857 } 858 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 859 RemovePred(DelDeps[i].first, DelDeps[i].second); 860 861 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 862 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 863 864 AvailableQueue->updateNode(SU); 865 AvailableQueue->addNode(CopyFromSU); 866 AvailableQueue->addNode(CopyToSU); 867 Copies.push_back(CopyFromSU); 868 Copies.push_back(CopyToSU); 869 870 ++NumPRCopies; 871} 872 873/// getPhysicalRegisterVT - Returns the ValueType of the physical register 874/// definition of the specified node. 875/// FIXME: Move to SelectionDAG? 876static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 877 const TargetInstrInfo *TII) { 878 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 879 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 880 unsigned NumRes = TID.getNumDefs(); 881 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 882 if (Reg == *ImpDef) 883 break; 884 ++NumRes; 885 } 886 return N->getValueType(NumRes); 887} 888 889/// CheckForLiveRegDef - Return true and update live register vector if the 890/// specified register def of the specified SUnit clobbers any "live" registers. 891static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 892 std::vector<SUnit*> &LiveRegDefs, 893 SmallSet<unsigned, 4> &RegAdded, 894 SmallVector<unsigned, 4> &LRegs, 895 const TargetRegisterInfo *TRI) { 896 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 897 898 // Check if Ref is live. 899 if (!LiveRegDefs[Reg]) continue; 900 901 // Allow multiple uses of the same def. 902 if (LiveRegDefs[Reg] == SU) continue; 903 904 // Add Reg to the set of interfering live regs. 905 if (RegAdded.insert(Reg)) 906 LRegs.push_back(Reg); 907 } 908} 909 910/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 911/// scheduling of the given node to satisfy live physical register dependencies. 912/// If the specific node is the last one that's available to schedule, do 913/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 914bool ScheduleDAGRRList:: 915DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 916 if (NumLiveRegs == 0) 917 return false; 918 919 SmallSet<unsigned, 4> RegAdded; 920 // If this node would clobber any "live" register, then it's not ready. 921 // 922 // If SU is the currently live definition of the same register that it uses, 923 // then we are free to schedule it. 924 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 925 I != E; ++I) { 926 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 927 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 928 RegAdded, LRegs, TRI); 929 } 930 931 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 932 if (Node->getOpcode() == ISD::INLINEASM) { 933 // Inline asm can clobber physical defs. 934 unsigned NumOps = Node->getNumOperands(); 935 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 936 --NumOps; // Ignore the glue operand. 937 938 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 939 unsigned Flags = 940 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 941 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 942 943 ++i; // Skip the ID value. 944 if (InlineAsm::isRegDefKind(Flags) || 945 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 946 // Check for def of register or earlyclobber register. 947 for (; NumVals; --NumVals, ++i) { 948 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 949 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 950 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 951 } 952 } else 953 i += NumVals; 954 } 955 continue; 956 } 957 958 if (!Node->isMachineOpcode()) 959 continue; 960 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 961 if (!TID.ImplicitDefs) 962 continue; 963 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 964 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 965 } 966 967 return !LRegs.empty(); 968} 969 970/// Return a node that can be scheduled in this cycle. Requirements: 971/// (1) Ready: latency has been satisfied 972/// (2) No Hazards: resources are available 973/// (3) No Interferences: may unschedule to break register interferences. 974SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 975 SmallVector<SUnit*, 4> Interferences; 976 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 977 978 SUnit *CurSU = AvailableQueue->pop(); 979 while (CurSU) { 980 SmallVector<unsigned, 4> LRegs; 981 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 982 break; 983 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 984 985 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 986 Interferences.push_back(CurSU); 987 CurSU = AvailableQueue->pop(); 988 } 989 if (CurSU) { 990 // Add the nodes that aren't ready back onto the available list. 991 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 992 Interferences[i]->isPending = false; 993 assert(Interferences[i]->isAvailable && "must still be available"); 994 AvailableQueue->push(Interferences[i]); 995 } 996 return CurSU; 997 } 998 999 // All candidates are delayed due to live physical reg dependencies. 1000 // Try backtracking, code duplication, or inserting cross class copies 1001 // to resolve it. 1002 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1003 SUnit *TrySU = Interferences[i]; 1004 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1005 1006 // Try unscheduling up to the point where it's safe to schedule 1007 // this node. 1008 SUnit *BtSU = NULL; 1009 unsigned LiveCycle = UINT_MAX; 1010 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1011 unsigned Reg = LRegs[j]; 1012 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1013 BtSU = LiveRegGens[Reg]; 1014 LiveCycle = BtSU->getHeight(); 1015 } 1016 } 1017 if (!WillCreateCycle(TrySU, BtSU)) { 1018 BacktrackBottomUp(TrySU, BtSU); 1019 1020 // Force the current node to be scheduled before the node that 1021 // requires the physical reg dep. 1022 if (BtSU->isAvailable) { 1023 BtSU->isAvailable = false; 1024 if (!BtSU->isPending) 1025 AvailableQueue->remove(BtSU); 1026 } 1027 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1028 /*Reg=*/0, /*isNormalMemory=*/false, 1029 /*isMustAlias=*/false, /*isArtificial=*/true)); 1030 1031 // If one or more successors has been unscheduled, then the current 1032 // node is no longer avaialable. Schedule a successor that's now 1033 // available instead. 1034 if (!TrySU->isAvailable) { 1035 CurSU = AvailableQueue->pop(); 1036 } 1037 else { 1038 CurSU = TrySU; 1039 TrySU->isPending = false; 1040 Interferences.erase(Interferences.begin()+i); 1041 } 1042 break; 1043 } 1044 } 1045 1046 if (!CurSU) { 1047 // Can't backtrack. If it's too expensive to copy the value, then try 1048 // duplicate the nodes that produces these "too expensive to copy" 1049 // values to break the dependency. In case even that doesn't work, 1050 // insert cross class copies. 1051 // If it's not too expensive, i.e. cost != -1, issue copies. 1052 SUnit *TrySU = Interferences[0]; 1053 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1054 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1055 unsigned Reg = LRegs[0]; 1056 SUnit *LRDef = LiveRegDefs[Reg]; 1057 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1058 const TargetRegisterClass *RC = 1059 TRI->getMinimalPhysRegClass(Reg, VT); 1060 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1061 1062 // If cross copy register class is null, then it must be possible copy 1063 // the value directly. Do not try duplicate the def. 1064 SUnit *NewDef = 0; 1065 if (DestRC) 1066 NewDef = CopyAndMoveSuccessors(LRDef); 1067 else 1068 DestRC = RC; 1069 if (!NewDef) { 1070 // Issue copies, these can be expensive cross register class copies. 1071 SmallVector<SUnit*, 2> Copies; 1072 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1073 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1074 << " to SU #" << Copies.front()->NodeNum << "\n"); 1075 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1076 /*Reg=*/0, /*isNormalMemory=*/false, 1077 /*isMustAlias=*/false, 1078 /*isArtificial=*/true)); 1079 NewDef = Copies.back(); 1080 } 1081 1082 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1083 << " to SU #" << TrySU->NodeNum << "\n"); 1084 LiveRegDefs[Reg] = NewDef; 1085 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1086 /*Reg=*/0, /*isNormalMemory=*/false, 1087 /*isMustAlias=*/false, 1088 /*isArtificial=*/true)); 1089 TrySU->isAvailable = false; 1090 CurSU = NewDef; 1091 } 1092 1093 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1094 1095 // Add the nodes that aren't ready back onto the available list. 1096 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1097 Interferences[i]->isPending = false; 1098 // May no longer be available due to backtracking. 1099 if (Interferences[i]->isAvailable) { 1100 AvailableQueue->push(Interferences[i]); 1101 } 1102 } 1103 return CurSU; 1104} 1105 1106/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1107/// schedulers. 1108void ScheduleDAGRRList::ListScheduleBottomUp() { 1109 // Release any predecessors of the special Exit node. 1110 ReleasePredecessors(&ExitSU); 1111 1112 // Add root to Available queue. 1113 if (!SUnits.empty()) { 1114 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1115 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1116 RootSU->isAvailable = true; 1117 AvailableQueue->push(RootSU); 1118 } 1119 1120 // While Available queue is not empty, grab the node with the highest 1121 // priority. If it is not ready put it back. Schedule the node. 1122 Sequence.reserve(SUnits.size()); 1123 while (!AvailableQueue->empty()) { 1124 DEBUG(dbgs() << "\n*** Examining Available\n"; 1125 AvailableQueue->dump(this)); 1126 1127 // Pick the best node to schedule taking all constraints into 1128 // consideration. 1129 SUnit *SU = PickNodeToScheduleBottomUp(); 1130 1131 AdvancePastStalls(SU); 1132 1133 ScheduleNodeBottomUp(SU); 1134 1135 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1136 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1137 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1138 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1139 } 1140 } 1141 1142 // Reverse the order if it is bottom up. 1143 std::reverse(Sequence.begin(), Sequence.end()); 1144 1145#ifndef NDEBUG 1146 VerifySchedule(isBottomUp); 1147#endif 1148} 1149 1150//===----------------------------------------------------------------------===// 1151// Top-Down Scheduling 1152//===----------------------------------------------------------------------===// 1153 1154/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1155/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 1156void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 1157 SUnit *SuccSU = SuccEdge->getSUnit(); 1158 1159#ifndef NDEBUG 1160 if (SuccSU->NumPredsLeft == 0) { 1161 dbgs() << "*** Scheduling failed! ***\n"; 1162 SuccSU->dump(this); 1163 dbgs() << " has been released too many times!\n"; 1164 llvm_unreachable(0); 1165 } 1166#endif 1167 --SuccSU->NumPredsLeft; 1168 1169 // If all the node's predecessors are scheduled, this node is ready 1170 // to be scheduled. Ignore the special ExitSU node. 1171 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 1172 SuccSU->isAvailable = true; 1173 AvailableQueue->push(SuccSU); 1174 } 1175} 1176 1177void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 1178 // Top down: release successors 1179 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1180 I != E; ++I) { 1181 assert(!I->isAssignedRegDep() && 1182 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 1183 1184 ReleaseSucc(SU, &*I); 1185 } 1186} 1187 1188/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1189/// count of its successors. If a successor pending count is zero, add it to 1190/// the Available queue. 1191void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { 1192 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 1193 DEBUG(SU->dump(this)); 1194 1195 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1196 SU->setDepthToAtLeast(CurCycle); 1197 Sequence.push_back(SU); 1198 1199 ReleaseSuccessors(SU); 1200 SU->isScheduled = true; 1201 AvailableQueue->ScheduledNode(SU); 1202} 1203 1204/// ListScheduleTopDown - The main loop of list scheduling for top-down 1205/// schedulers. 1206void ScheduleDAGRRList::ListScheduleTopDown() { 1207 AvailableQueue->setCurCycle(CurCycle); 1208 1209 // Release any successors of the special Entry node. 1210 ReleaseSuccessors(&EntrySU); 1211 1212 // All leaves to Available queue. 1213 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1214 // It is available if it has no predecessors. 1215 if (SUnits[i].Preds.empty()) { 1216 AvailableQueue->push(&SUnits[i]); 1217 SUnits[i].isAvailable = true; 1218 } 1219 } 1220 1221 // While Available queue is not empty, grab the node with the highest 1222 // priority. If it is not ready put it back. Schedule the node. 1223 Sequence.reserve(SUnits.size()); 1224 while (!AvailableQueue->empty()) { 1225 SUnit *CurSU = AvailableQueue->pop(); 1226 1227 if (CurSU) 1228 ScheduleNodeTopDown(CurSU); 1229 ++CurCycle; 1230 AvailableQueue->setCurCycle(CurCycle); 1231 } 1232 1233#ifndef NDEBUG 1234 VerifySchedule(isBottomUp); 1235#endif 1236} 1237 1238 1239//===----------------------------------------------------------------------===// 1240// RegReductionPriorityQueue Implementation 1241//===----------------------------------------------------------------------===// 1242// 1243// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1244// to reduce register pressure. 1245// 1246namespace { 1247 template<class SF> 1248 class RegReductionPriorityQueue; 1249 1250 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1251 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1252 }; 1253 1254 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1255 // reduction scheduler. 1256 struct bu_ls_rr_sort : public queue_sort { 1257 enum { 1258 IsBottomUp = true, 1259 HasReadyFilter = false 1260 }; 1261 1262 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 1263 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 1264 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1265 1266 bool operator()(const SUnit* left, const SUnit* right) const; 1267 }; 1268 1269 // td_ls_rr_sort - Priority function for top down register pressure reduction 1270 // scheduler. 1271 struct td_ls_rr_sort : public queue_sort { 1272 enum { 1273 IsBottomUp = false, 1274 HasReadyFilter = false 1275 }; 1276 1277 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 1278 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 1279 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1280 1281 bool operator()(const SUnit* left, const SUnit* right) const; 1282 }; 1283 1284 // src_ls_rr_sort - Priority function for source order scheduler. 1285 struct src_ls_rr_sort : public queue_sort { 1286 enum { 1287 IsBottomUp = true, 1288 HasReadyFilter = false 1289 }; 1290 1291 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 1292 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 1293 : SPQ(spq) {} 1294 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1295 : SPQ(RHS.SPQ) {} 1296 1297 bool operator()(const SUnit* left, const SUnit* right) const; 1298 }; 1299 1300 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1301 struct hybrid_ls_rr_sort : public queue_sort { 1302 enum { 1303 IsBottomUp = true, 1304 HasReadyFilter = false 1305 }; 1306 1307 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 1308 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 1309 : SPQ(spq) {} 1310 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1311 : SPQ(RHS.SPQ) {} 1312 1313 bool operator()(const SUnit* left, const SUnit* right) const; 1314 }; 1315 1316 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1317 // scheduler. 1318 struct ilp_ls_rr_sort : public queue_sort { 1319 enum { 1320 IsBottomUp = true, 1321 HasReadyFilter = true 1322 }; 1323 1324 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; 1325 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) 1326 : SPQ(spq) {} 1327 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1328 : SPQ(RHS.SPQ) {} 1329 1330 bool isReady(SUnit *SU, unsigned CurCycle) const; 1331 1332 bool operator()(const SUnit* left, const SUnit* right) const; 1333 }; 1334} // end anonymous namespace 1335 1336/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1337/// Smaller number is the higher priority. 1338static unsigned 1339CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1340 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1341 if (SethiUllmanNumber != 0) 1342 return SethiUllmanNumber; 1343 1344 unsigned Extra = 0; 1345 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1346 I != E; ++I) { 1347 if (I->isCtrl()) continue; // ignore chain preds 1348 SUnit *PredSU = I->getSUnit(); 1349 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1350 if (PredSethiUllman > SethiUllmanNumber) { 1351 SethiUllmanNumber = PredSethiUllman; 1352 Extra = 0; 1353 } else if (PredSethiUllman == SethiUllmanNumber) 1354 ++Extra; 1355 } 1356 1357 SethiUllmanNumber += Extra; 1358 1359 if (SethiUllmanNumber == 0) 1360 SethiUllmanNumber = 1; 1361 1362 return SethiUllmanNumber; 1363} 1364 1365namespace { 1366 template<class SF> 1367 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1368 static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { 1369 std::vector<SUnit *>::iterator Best = Q.begin(); 1370 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1371 E = Q.end(); I != E; ++I) 1372 if (Picker(*Best, *I)) 1373 Best = I; 1374 SUnit *V = *Best; 1375 if (Best != prior(Q.end())) 1376 std::swap(*Best, Q.back()); 1377 Q.pop_back(); 1378 return V; 1379 } 1380 1381 std::vector<SUnit*> Queue; 1382 SF Picker; 1383 unsigned CurQueueId; 1384 bool TracksRegPressure; 1385 1386 protected: 1387 // SUnits - The SUnits for the current graph. 1388 std::vector<SUnit> *SUnits; 1389 1390 MachineFunction &MF; 1391 const TargetInstrInfo *TII; 1392 const TargetRegisterInfo *TRI; 1393 const TargetLowering *TLI; 1394 ScheduleDAGRRList *scheduleDAG; 1395 1396 // SethiUllmanNumbers - The SethiUllman number for each node. 1397 std::vector<unsigned> SethiUllmanNumbers; 1398 1399 /// RegPressure - Tracking current reg pressure per register class. 1400 /// 1401 std::vector<unsigned> RegPressure; 1402 1403 /// RegLimit - Tracking the number of allocatable registers per register 1404 /// class. 1405 std::vector<unsigned> RegLimit; 1406 1407 public: 1408 RegReductionPriorityQueue(MachineFunction &mf, 1409 bool tracksrp, 1410 const TargetInstrInfo *tii, 1411 const TargetRegisterInfo *tri, 1412 const TargetLowering *tli) 1413 : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this), 1414 CurQueueId(0), TracksRegPressure(tracksrp), 1415 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1416 if (TracksRegPressure) { 1417 unsigned NumRC = TRI->getNumRegClasses(); 1418 RegLimit.resize(NumRC); 1419 RegPressure.resize(NumRC); 1420 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1421 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1422 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1423 E = TRI->regclass_end(); I != E; ++I) 1424 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); 1425 } 1426 } 1427 1428 bool isBottomUp() const { return SF::IsBottomUp; } 1429 1430 void initNodes(std::vector<SUnit> &sunits) { 1431 SUnits = &sunits; 1432 // Add pseudo dependency edges for two-address nodes. 1433 AddPseudoTwoAddrDeps(); 1434 // Reroute edges to nodes with multiple uses. 1435 PrescheduleNodesWithMultipleUses(); 1436 // Calculate node priorities. 1437 CalculateSethiUllmanNumbers(); 1438 } 1439 1440 void addNode(const SUnit *SU) { 1441 unsigned SUSize = SethiUllmanNumbers.size(); 1442 if (SUnits->size() > SUSize) 1443 SethiUllmanNumbers.resize(SUSize*2, 0); 1444 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1445 } 1446 1447 void updateNode(const SUnit *SU) { 1448 SethiUllmanNumbers[SU->NodeNum] = 0; 1449 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1450 } 1451 1452 void releaseState() { 1453 SUnits = 0; 1454 SethiUllmanNumbers.clear(); 1455 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1456 } 1457 1458 unsigned getNodePriority(const SUnit *SU) const { 1459 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1460 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1461 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1462 // CopyToReg should be close to its uses to facilitate coalescing and 1463 // avoid spilling. 1464 return 0; 1465 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1466 Opc == TargetOpcode::SUBREG_TO_REG || 1467 Opc == TargetOpcode::INSERT_SUBREG) 1468 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1469 // close to their uses to facilitate coalescing. 1470 return 0; 1471 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1472 // If SU does not have a register use, i.e. it doesn't produce a value 1473 // that would be consumed (e.g. store), then it terminates a chain of 1474 // computation. Give it a large SethiUllman number so it will be 1475 // scheduled right before its predecessors that it doesn't lengthen 1476 // their live ranges. 1477 return 0xffff; 1478 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1479 // If SU does not have a register def, schedule it close to its uses 1480 // because it does not lengthen any live ranges. 1481 return 0; 1482 return SethiUllmanNumbers[SU->NodeNum]; 1483 } 1484 1485 unsigned getNodeOrdering(const SUnit *SU) const { 1486 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1487 } 1488 1489 bool empty() const { return Queue.empty(); } 1490 1491 bool isReady(SUnit *U) const { 1492 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1493 } 1494 1495 void push(SUnit *U) { 1496 assert(!U->NodeQueueId && "Node in the queue already"); 1497 U->NodeQueueId = ++CurQueueId; 1498 Queue.push_back(U); 1499 } 1500 1501 SUnit *pop() { 1502 if (Queue.empty()) return NULL; 1503 1504 SUnit *V = popFromQueue(Queue, Picker); 1505 V->NodeQueueId = 0; 1506 return V; 1507 } 1508 1509 void remove(SUnit *SU) { 1510 assert(!Queue.empty() && "Queue is empty!"); 1511 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1512 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1513 SU); 1514 if (I != prior(Queue.end())) 1515 std::swap(*I, Queue.back()); 1516 Queue.pop_back(); 1517 SU->NodeQueueId = 0; 1518 } 1519 1520 bool HighRegPressure(const SUnit *SU) const { 1521 if (!TLI) 1522 return false; 1523 1524 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1525 I != E; ++I) { 1526 if (I->isCtrl()) 1527 continue; 1528 SUnit *PredSU = I->getSUnit(); 1529 const SDNode *PN = PredSU->getNode(); 1530 if (!PN->isMachineOpcode()) { 1531 if (PN->getOpcode() == ISD::CopyFromReg) { 1532 EVT VT = PN->getValueType(0); 1533 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1534 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1535 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1536 return true; 1537 } 1538 continue; 1539 } 1540 unsigned POpc = PN->getMachineOpcode(); 1541 if (POpc == TargetOpcode::IMPLICIT_DEF) 1542 continue; 1543 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1544 EVT VT = PN->getOperand(0).getValueType(); 1545 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1546 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1547 // Check if this increases register pressure of the specific register 1548 // class to the point where it would cause spills. 1549 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1550 return true; 1551 continue; 1552 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1553 POpc == TargetOpcode::SUBREG_TO_REG) { 1554 EVT VT = PN->getValueType(0); 1555 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1556 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1557 // Check if this increases register pressure of the specific register 1558 // class to the point where it would cause spills. 1559 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1560 return true; 1561 continue; 1562 } 1563 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1564 for (unsigned i = 0; i != NumDefs; ++i) { 1565 EVT VT = PN->getValueType(i); 1566 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1567 if (RegPressure[RCId] >= RegLimit[RCId]) 1568 return true; // Reg pressure already high. 1569 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1570 if (!PN->hasAnyUseOfValue(i)) 1571 continue; 1572 // Check if this increases register pressure of the specific register 1573 // class to the point where it would cause spills. 1574 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1575 return true; 1576 } 1577 } 1578 1579 return false; 1580 } 1581 1582 void ScheduledNode(SUnit *SU) { 1583 if (!TracksRegPressure) 1584 return; 1585 1586 const SDNode *N = SU->getNode(); 1587 if (!N->isMachineOpcode()) { 1588 if (N->getOpcode() != ISD::CopyToReg) 1589 return; 1590 } else { 1591 unsigned Opc = N->getMachineOpcode(); 1592 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1593 Opc == TargetOpcode::INSERT_SUBREG || 1594 Opc == TargetOpcode::SUBREG_TO_REG || 1595 Opc == TargetOpcode::REG_SEQUENCE || 1596 Opc == TargetOpcode::IMPLICIT_DEF) 1597 return; 1598 } 1599 1600 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1601 I != E; ++I) { 1602 if (I->isCtrl()) 1603 continue; 1604 SUnit *PredSU = I->getSUnit(); 1605 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1606 continue; 1607 const SDNode *PN = PredSU->getNode(); 1608 if (!PN->isMachineOpcode()) { 1609 if (PN->getOpcode() == ISD::CopyFromReg) { 1610 EVT VT = PN->getValueType(0); 1611 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1612 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1613 } 1614 continue; 1615 } 1616 unsigned POpc = PN->getMachineOpcode(); 1617 if (POpc == TargetOpcode::IMPLICIT_DEF) 1618 continue; 1619 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1620 EVT VT = PN->getOperand(0).getValueType(); 1621 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1622 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1623 continue; 1624 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1625 POpc == TargetOpcode::SUBREG_TO_REG) { 1626 EVT VT = PN->getValueType(0); 1627 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1628 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1629 continue; 1630 } 1631 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1632 for (unsigned i = 0; i != NumDefs; ++i) { 1633 EVT VT = PN->getValueType(i); 1634 if (!PN->hasAnyUseOfValue(i)) 1635 continue; 1636 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1637 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1638 } 1639 } 1640 1641 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1642 // may transfer data dependencies to CopyToReg. 1643 if (SU->NumSuccs && N->isMachineOpcode()) { 1644 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1645 for (unsigned i = 0; i != NumDefs; ++i) { 1646 EVT VT = N->getValueType(i); 1647 if (!N->hasAnyUseOfValue(i)) 1648 continue; 1649 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1650 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1651 // Register pressure tracking is imprecise. This can happen. 1652 RegPressure[RCId] = 0; 1653 else 1654 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1655 } 1656 } 1657 1658 dumpRegPressure(); 1659 } 1660 1661 void UnscheduledNode(SUnit *SU) { 1662 if (!TracksRegPressure) 1663 return; 1664 1665 const SDNode *N = SU->getNode(); 1666 if (!N->isMachineOpcode()) { 1667 if (N->getOpcode() != ISD::CopyToReg) 1668 return; 1669 } else { 1670 unsigned Opc = N->getMachineOpcode(); 1671 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1672 Opc == TargetOpcode::INSERT_SUBREG || 1673 Opc == TargetOpcode::SUBREG_TO_REG || 1674 Opc == TargetOpcode::REG_SEQUENCE || 1675 Opc == TargetOpcode::IMPLICIT_DEF) 1676 return; 1677 } 1678 1679 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1680 I != E; ++I) { 1681 if (I->isCtrl()) 1682 continue; 1683 SUnit *PredSU = I->getSUnit(); 1684 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1685 continue; 1686 const SDNode *PN = PredSU->getNode(); 1687 if (!PN->isMachineOpcode()) { 1688 if (PN->getOpcode() == ISD::CopyFromReg) { 1689 EVT VT = PN->getValueType(0); 1690 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1691 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1692 } 1693 continue; 1694 } 1695 unsigned POpc = PN->getMachineOpcode(); 1696 if (POpc == TargetOpcode::IMPLICIT_DEF) 1697 continue; 1698 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1699 EVT VT = PN->getOperand(0).getValueType(); 1700 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1701 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1702 continue; 1703 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1704 POpc == TargetOpcode::SUBREG_TO_REG) { 1705 EVT VT = PN->getValueType(0); 1706 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1707 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1708 continue; 1709 } 1710 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1711 for (unsigned i = 0; i != NumDefs; ++i) { 1712 EVT VT = PN->getValueType(i); 1713 if (!PN->hasAnyUseOfValue(i)) 1714 continue; 1715 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1716 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1717 // Register pressure tracking is imprecise. This can happen. 1718 RegPressure[RCId] = 0; 1719 else 1720 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1721 } 1722 } 1723 1724 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1725 // may transfer data dependencies to CopyToReg. 1726 if (SU->NumSuccs && N->isMachineOpcode()) { 1727 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1728 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1729 EVT VT = N->getValueType(i); 1730 if (VT == MVT::Glue || VT == MVT::Other) 1731 continue; 1732 if (!N->hasAnyUseOfValue(i)) 1733 continue; 1734 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1735 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1736 } 1737 } 1738 1739 dumpRegPressure(); 1740 } 1741 1742 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1743 scheduleDAG = scheduleDag; 1744 } 1745 1746 void dumpRegPressure() const { 1747 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1748 E = TRI->regclass_end(); I != E; ++I) { 1749 const TargetRegisterClass *RC = *I; 1750 unsigned Id = RC->getID(); 1751 unsigned RP = RegPressure[Id]; 1752 if (!RP) continue; 1753 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1754 << '\n'); 1755 } 1756 } 1757 1758 void dump(ScheduleDAG *DAG) const { 1759 // Emulate pop() without clobbering NodeQueueIds. 1760 std::vector<SUnit*> DumpQueue = Queue; 1761 SF DumpPicker = Picker; 1762 while (!DumpQueue.empty()) { 1763 SUnit *SU = popFromQueue(DumpQueue, DumpPicker); 1764 if (isBottomUp()) 1765 dbgs() << "Height " << SU->getHeight() << ": "; 1766 else 1767 dbgs() << "Depth " << SU->getDepth() << ": "; 1768 SU->dump(DAG); 1769 } 1770 } 1771 1772 protected: 1773 bool canClobber(const SUnit *SU, const SUnit *Op); 1774 void AddPseudoTwoAddrDeps(); 1775 void PrescheduleNodesWithMultipleUses(); 1776 void CalculateSethiUllmanNumbers(); 1777 }; 1778 1779 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1780 BURegReductionPriorityQueue; 1781 1782 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1783 TDRegReductionPriorityQueue; 1784 1785 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1786 SrcRegReductionPriorityQueue; 1787 1788 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1789 HybridBURRPriorityQueue; 1790 1791 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1792 ILPBURRPriorityQueue; 1793} 1794 1795/// closestSucc - Returns the scheduled cycle of the successor which is 1796/// closest to the current cycle. 1797static unsigned closestSucc(const SUnit *SU) { 1798 unsigned MaxHeight = 0; 1799 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1800 I != E; ++I) { 1801 if (I->isCtrl()) continue; // ignore chain succs 1802 unsigned Height = I->getSUnit()->getHeight(); 1803 // If there are bunch of CopyToRegs stacked up, they should be considered 1804 // to be at the same position. 1805 if (I->getSUnit()->getNode() && 1806 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1807 Height = closestSucc(I->getSUnit())+1; 1808 if (Height > MaxHeight) 1809 MaxHeight = Height; 1810 } 1811 return MaxHeight; 1812} 1813 1814/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1815/// for scratch registers, i.e. number of data dependencies. 1816static unsigned calcMaxScratches(const SUnit *SU) { 1817 unsigned Scratches = 0; 1818 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1819 I != E; ++I) { 1820 if (I->isCtrl()) continue; // ignore chain preds 1821 Scratches++; 1822 } 1823 return Scratches; 1824} 1825 1826/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a 1827/// CopyToReg to a virtual register. This SU def is probably a liveout and 1828/// it has no other use. It should be scheduled closer to the terminator. 1829static bool hasOnlyLiveOutUses(const SUnit *SU) { 1830 bool RetVal = false; 1831 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1832 I != E; ++I) { 1833 if (I->isCtrl()) continue; 1834 const SUnit *SuccSU = I->getSUnit(); 1835 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 1836 unsigned Reg = 1837 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 1838 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1839 RetVal = true; 1840 continue; 1841 } 1842 } 1843 return false; 1844 } 1845 return RetVal; 1846} 1847 1848/// UnitsSharePred - Return true if the two scheduling units share a common 1849/// data predecessor. 1850static bool UnitsSharePred(const SUnit *left, const SUnit *right) { 1851 SmallSet<const SUnit*, 4> Preds; 1852 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); 1853 I != E; ++I) { 1854 if (I->isCtrl()) continue; // ignore chain preds 1855 Preds.insert(I->getSUnit()); 1856 } 1857 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); 1858 I != E; ++I) { 1859 if (I->isCtrl()) continue; // ignore chain preds 1860 if (Preds.count(I->getSUnit())) 1861 return true; 1862 } 1863 return false; 1864} 1865 1866template <typename RRSort> 1867static bool BURRSort(const SUnit *left, const SUnit *right, 1868 const RegReductionPriorityQueue<RRSort> *SPQ) { 1869 unsigned LPriority = SPQ->getNodePriority(left); 1870 unsigned RPriority = SPQ->getNodePriority(right); 1871 if (LPriority != RPriority) 1872 return LPriority > RPriority; 1873 1874 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1875 // e.g. 1876 // t1 = op t2, c1 1877 // t3 = op t4, c2 1878 // 1879 // and the following instructions are both ready. 1880 // t2 = op c3 1881 // t4 = op c4 1882 // 1883 // Then schedule t2 = op first. 1884 // i.e. 1885 // t4 = op c4 1886 // t2 = op c3 1887 // t1 = op t2, c1 1888 // t3 = op t4, c2 1889 // 1890 // This creates more short live intervals. 1891 unsigned LDist = closestSucc(left); 1892 unsigned RDist = closestSucc(right); 1893 if (LDist != RDist) 1894 return LDist < RDist; 1895 1896 // How many registers becomes live when the node is scheduled. 1897 unsigned LScratch = calcMaxScratches(left); 1898 unsigned RScratch = calcMaxScratches(right); 1899 if (LScratch != RScratch) 1900 return LScratch > RScratch; 1901 1902 // Note: with a bottom-up ready filter, the height check may be redundant. 1903 if (left->getHeight() != right->getHeight()) 1904 return left->getHeight() > right->getHeight(); 1905 1906 if (left->getDepth() != right->getDepth()) 1907 return left->getDepth() < right->getDepth(); 1908 1909 assert(left->NodeQueueId && right->NodeQueueId && 1910 "NodeQueueId cannot be zero"); 1911 return (left->NodeQueueId > right->NodeQueueId); 1912} 1913 1914// Bottom up 1915bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1916 return BURRSort(left, right, SPQ); 1917} 1918 1919// Source order, otherwise bottom up. 1920bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1921 unsigned LOrder = SPQ->getNodeOrdering(left); 1922 unsigned ROrder = SPQ->getNodeOrdering(right); 1923 1924 // Prefer an ordering where the lower the non-zero order number, the higher 1925 // the preference. 1926 if ((LOrder || ROrder) && LOrder != ROrder) 1927 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1928 1929 return BURRSort(left, right, SPQ); 1930} 1931 1932bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1933 if (left->isCall || right->isCall) 1934 // No way to compute latency of calls. 1935 return BURRSort(left, right, SPQ); 1936 1937 bool LHigh = SPQ->HighRegPressure(left); 1938 bool RHigh = SPQ->HighRegPressure(right); 1939 // Avoid causing spills. If register pressure is high, schedule for 1940 // register pressure reduction. 1941 if (LHigh && !RHigh) 1942 return true; 1943 else if (!LHigh && RHigh) 1944 return false; 1945 else if (!LHigh && !RHigh) { 1946 // If the two nodes share an operand and one of them has a single 1947 // use that is a live out copy, favor the one that is live out. Otherwise 1948 // it will be difficult to eliminate the copy if the instruction is a 1949 // loop induction variable update. e.g. 1950 // BB: 1951 // sub r1, r3, #1 1952 // str r0, [r2, r3] 1953 // mov r3, r1 1954 // cmp 1955 // bne BB 1956 bool SharePred = UnitsSharePred(left, right); 1957 // FIXME: Only adjust if BB is a loop back edge. 1958 // FIXME: What's the cost of a copy? 1959 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; 1960 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; 1961 int LHeight = (int)left->getHeight() - LBonus; 1962 int RHeight = (int)right->getHeight() - RBonus; 1963 1964 // Low register pressure situation, schedule for latency if possible. 1965 bool LStall = left->SchedulingPref == Sched::Latency && 1966 (int)SPQ->getCurCycle() < LHeight; 1967 bool RStall = right->SchedulingPref == Sched::Latency && 1968 (int)SPQ->getCurCycle() < RHeight; 1969 // If scheduling one of the node will cause a pipeline stall, delay it. 1970 // If scheduling either one of the node will cause a pipeline stall, sort 1971 // them according to their height. 1972 if (LStall) { 1973 if (!RStall) 1974 return true; 1975 if (LHeight != RHeight) 1976 return LHeight > RHeight; 1977 } else if (RStall) 1978 return false; 1979 1980 // If either node is scheduling for latency, sort them by height 1981 // and latency. 1982 if (left->SchedulingPref == Sched::Latency || 1983 right->SchedulingPref == Sched::Latency) { 1984 if (LHeight != RHeight) 1985 return LHeight > RHeight; 1986 if (left->Latency != right->Latency) 1987 return left->Latency > right->Latency; 1988 } 1989 } 1990 1991 return BURRSort(left, right, SPQ); 1992} 1993 1994// Schedule as many instructions in each cycle as possible. So don't make an 1995// instruction available unless it is ready in the current cycle. 1996bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 1997 return SU->getHeight() <= CurCycle; 1998} 1999 2000bool ilp_ls_rr_sort::operator()(const SUnit *left, 2001 const SUnit *right) const { 2002 if (left->isCall || right->isCall) 2003 // No way to compute latency of calls. 2004 return BURRSort(left, right, SPQ); 2005 2006 bool LHigh = SPQ->HighRegPressure(left); 2007 bool RHigh = SPQ->HighRegPressure(right); 2008 // Avoid causing spills. If register pressure is high, schedule for 2009 // register pressure reduction. 2010 if (LHigh && !RHigh) 2011 return true; 2012 else if (!LHigh && RHigh) 2013 return false; 2014 else if (!LHigh && !RHigh) { 2015 // Low register pressure situation, schedule to maximize instruction level 2016 // parallelism. 2017 if (left->NumPreds > right->NumPreds) 2018 return false; 2019 else if (left->NumPreds < right->NumPreds) 2020 return false; 2021 } 2022 2023 return BURRSort(left, right, SPQ); 2024} 2025 2026template<class SF> 2027bool 2028RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 2029 if (SU->isTwoAddress) { 2030 unsigned Opc = SU->getNode()->getMachineOpcode(); 2031 const TargetInstrDesc &TID = TII->get(Opc); 2032 unsigned NumRes = TID.getNumDefs(); 2033 unsigned NumOps = TID.getNumOperands() - NumRes; 2034 for (unsigned i = 0; i != NumOps; ++i) { 2035 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 2036 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2037 if (DU->getNodeId() != -1 && 2038 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2039 return true; 2040 } 2041 } 2042 } 2043 return false; 2044} 2045 2046/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2047/// physical register defs. 2048static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2049 const TargetInstrInfo *TII, 2050 const TargetRegisterInfo *TRI) { 2051 SDNode *N = SuccSU->getNode(); 2052 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2053 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2054 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2055 for (const SDNode *SUNode = SU->getNode(); SUNode; 2056 SUNode = SUNode->getGluedNode()) { 2057 if (!SUNode->isMachineOpcode()) 2058 continue; 2059 const unsigned *SUImpDefs = 2060 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2061 if (!SUImpDefs) 2062 return false; 2063 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2064 EVT VT = N->getValueType(i); 2065 if (VT == MVT::Glue || VT == MVT::Other) 2066 continue; 2067 if (!N->hasAnyUseOfValue(i)) 2068 continue; 2069 unsigned Reg = ImpDefs[i - NumDefs]; 2070 for (;*SUImpDefs; ++SUImpDefs) { 2071 unsigned SUReg = *SUImpDefs; 2072 if (TRI->regsOverlap(Reg, SUReg)) 2073 return true; 2074 } 2075 } 2076 } 2077 return false; 2078} 2079 2080/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2081/// are not handled well by the general register pressure reduction 2082/// heuristics. When presented with code like this: 2083/// 2084/// N 2085/// / | 2086/// / | 2087/// U store 2088/// | 2089/// ... 2090/// 2091/// the heuristics tend to push the store up, but since the 2092/// operand of the store has another use (U), this would increase 2093/// the length of that other use (the U->N edge). 2094/// 2095/// This function transforms code like the above to route U's 2096/// dependence through the store when possible, like this: 2097/// 2098/// N 2099/// || 2100/// || 2101/// store 2102/// | 2103/// U 2104/// | 2105/// ... 2106/// 2107/// This results in the store being scheduled immediately 2108/// after N, which shortens the U->N live range, reducing 2109/// register pressure. 2110/// 2111template<class SF> 2112void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 2113 // Visit all the nodes in topological order, working top-down. 2114 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2115 SUnit *SU = &(*SUnits)[i]; 2116 // For now, only look at nodes with no data successors, such as stores. 2117 // These are especially important, due to the heuristics in 2118 // getNodePriority for nodes with no data successors. 2119 if (SU->NumSuccs != 0) 2120 continue; 2121 // For now, only look at nodes with exactly one data predecessor. 2122 if (SU->NumPreds != 1) 2123 continue; 2124 // Avoid prescheduling copies to virtual registers, which don't behave 2125 // like other nodes from the perspective of scheduling heuristics. 2126 if (SDNode *N = SU->getNode()) 2127 if (N->getOpcode() == ISD::CopyToReg && 2128 TargetRegisterInfo::isVirtualRegister 2129 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2130 continue; 2131 2132 // Locate the single data predecessor. 2133 SUnit *PredSU = 0; 2134 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2135 EE = SU->Preds.end(); II != EE; ++II) 2136 if (!II->isCtrl()) { 2137 PredSU = II->getSUnit(); 2138 break; 2139 } 2140 assert(PredSU); 2141 2142 // Don't rewrite edges that carry physregs, because that requires additional 2143 // support infrastructure. 2144 if (PredSU->hasPhysRegDefs) 2145 continue; 2146 // Short-circuit the case where SU is PredSU's only data successor. 2147 if (PredSU->NumSuccs == 1) 2148 continue; 2149 // Avoid prescheduling to copies from virtual registers, which don't behave 2150 // like other nodes from the perspective of scheduling // heuristics. 2151 if (SDNode *N = SU->getNode()) 2152 if (N->getOpcode() == ISD::CopyFromReg && 2153 TargetRegisterInfo::isVirtualRegister 2154 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2155 continue; 2156 2157 // Perform checks on the successors of PredSU. 2158 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2159 EE = PredSU->Succs.end(); II != EE; ++II) { 2160 SUnit *PredSuccSU = II->getSUnit(); 2161 if (PredSuccSU == SU) continue; 2162 // If PredSU has another successor with no data successors, for 2163 // now don't attempt to choose either over the other. 2164 if (PredSuccSU->NumSuccs == 0) 2165 goto outer_loop_continue; 2166 // Don't break physical register dependencies. 2167 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2168 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2169 goto outer_loop_continue; 2170 // Don't introduce graph cycles. 2171 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2172 goto outer_loop_continue; 2173 } 2174 2175 // Ok, the transformation is safe and the heuristics suggest it is 2176 // profitable. Update the graph. 2177 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2178 << " next to PredSU #" << PredSU->NodeNum 2179 << " to guide scheduling in the presence of multiple uses\n"); 2180 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2181 SDep Edge = PredSU->Succs[i]; 2182 assert(!Edge.isAssignedRegDep()); 2183 SUnit *SuccSU = Edge.getSUnit(); 2184 if (SuccSU != SU) { 2185 Edge.setSUnit(PredSU); 2186 scheduleDAG->RemovePred(SuccSU, Edge); 2187 scheduleDAG->AddPred(SU, Edge); 2188 Edge.setSUnit(SU); 2189 scheduleDAG->AddPred(SuccSU, Edge); 2190 --i; 2191 } 2192 } 2193 outer_loop_continue:; 2194 } 2195} 2196 2197/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2198/// it as a def&use operand. Add a pseudo control edge from it to the other 2199/// node (if it won't create a cycle) so the two-address one will be scheduled 2200/// first (lower in the schedule). If both nodes are two-address, favor the 2201/// one that has a CopyToReg use (more likely to be a loop induction update). 2202/// If both are two-address, but one is commutable while the other is not 2203/// commutable, favor the one that's not commutable. 2204template<class SF> 2205void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 2206 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2207 SUnit *SU = &(*SUnits)[i]; 2208 if (!SU->isTwoAddress) 2209 continue; 2210 2211 SDNode *Node = SU->getNode(); 2212 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2213 continue; 2214 2215 bool isLiveOut = hasOnlyLiveOutUses(SU); 2216 unsigned Opc = Node->getMachineOpcode(); 2217 const TargetInstrDesc &TID = TII->get(Opc); 2218 unsigned NumRes = TID.getNumDefs(); 2219 unsigned NumOps = TID.getNumOperands() - NumRes; 2220 for (unsigned j = 0; j != NumOps; ++j) { 2221 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 2222 continue; 2223 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2224 if (DU->getNodeId() == -1) 2225 continue; 2226 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2227 if (!DUSU) continue; 2228 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2229 E = DUSU->Succs.end(); I != E; ++I) { 2230 if (I->isCtrl()) continue; 2231 SUnit *SuccSU = I->getSUnit(); 2232 if (SuccSU == SU) 2233 continue; 2234 // Be conservative. Ignore if nodes aren't at roughly the same 2235 // depth and height. 2236 if (SuccSU->getHeight() < SU->getHeight() && 2237 (SU->getHeight() - SuccSU->getHeight()) > 1) 2238 continue; 2239 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2240 // constrains whatever is using the copy, instead of the copy 2241 // itself. In the case that the copy is coalesced, this 2242 // preserves the intent of the pseudo two-address heurietics. 2243 while (SuccSU->Succs.size() == 1 && 2244 SuccSU->getNode()->isMachineOpcode() && 2245 SuccSU->getNode()->getMachineOpcode() == 2246 TargetOpcode::COPY_TO_REGCLASS) 2247 SuccSU = SuccSU->Succs.front().getSUnit(); 2248 // Don't constrain non-instruction nodes. 2249 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2250 continue; 2251 // Don't constrain nodes with physical register defs if the 2252 // predecessor can clobber them. 2253 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2254 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2255 continue; 2256 } 2257 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2258 // these may be coalesced away. We want them close to their uses. 2259 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2260 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2261 SuccOpc == TargetOpcode::INSERT_SUBREG || 2262 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2263 continue; 2264 if ((!canClobber(SuccSU, DUSU) || 2265 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2266 (!SU->isCommutable && SuccSU->isCommutable)) && 2267 !scheduleDAG->IsReachable(SuccSU, SU)) { 2268 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2269 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2270 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2271 /*Reg=*/0, /*isNormalMemory=*/false, 2272 /*isMustAlias=*/false, 2273 /*isArtificial=*/true)); 2274 } 2275 } 2276 } 2277 } 2278} 2279 2280/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 2281/// scheduling units. 2282template<class SF> 2283void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 2284 SethiUllmanNumbers.assign(SUnits->size(), 0); 2285 2286 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 2287 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 2288} 2289 2290/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 2291/// predecessors of the successors of the SUnit SU. Stop when the provided 2292/// limit is exceeded. 2293static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 2294 unsigned Limit) { 2295 unsigned Sum = 0; 2296 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2297 I != E; ++I) { 2298 const SUnit *SuccSU = I->getSUnit(); 2299 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 2300 EE = SuccSU->Preds.end(); II != EE; ++II) { 2301 SUnit *PredSU = II->getSUnit(); 2302 if (!PredSU->isScheduled) 2303 if (++Sum > Limit) 2304 return Sum; 2305 } 2306 } 2307 return Sum; 2308} 2309 2310 2311// Top down 2312bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 2313 unsigned LPriority = SPQ->getNodePriority(left); 2314 unsigned RPriority = SPQ->getNodePriority(right); 2315 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 2316 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 2317 bool LIsFloater = LIsTarget && left->NumPreds == 0; 2318 bool RIsFloater = RIsTarget && right->NumPreds == 0; 2319 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 2320 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 2321 2322 if (left->NumSuccs == 0 && right->NumSuccs != 0) 2323 return false; 2324 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 2325 return true; 2326 2327 if (LIsFloater) 2328 LBonus -= 2; 2329 if (RIsFloater) 2330 RBonus -= 2; 2331 if (left->NumSuccs == 1) 2332 LBonus += 2; 2333 if (right->NumSuccs == 1) 2334 RBonus += 2; 2335 2336 if (LPriority+LBonus != RPriority+RBonus) 2337 return LPriority+LBonus < RPriority+RBonus; 2338 2339 if (left->getDepth() != right->getDepth()) 2340 return left->getDepth() < right->getDepth(); 2341 2342 if (left->NumSuccsLeft != right->NumSuccsLeft) 2343 return left->NumSuccsLeft > right->NumSuccsLeft; 2344 2345 assert(left->NodeQueueId && right->NodeQueueId && 2346 "NodeQueueId cannot be zero"); 2347 return (left->NodeQueueId > right->NodeQueueId); 2348} 2349 2350//===----------------------------------------------------------------------===// 2351// Public Constructor Functions 2352//===----------------------------------------------------------------------===// 2353 2354llvm::ScheduleDAGSDNodes * 2355llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2356 CodeGenOpt::Level OptLevel) { 2357 const TargetMachine &TM = IS->TM; 2358 const TargetInstrInfo *TII = TM.getInstrInfo(); 2359 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2360 2361 BURegReductionPriorityQueue *PQ = 2362 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2363 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2364 PQ->setScheduleDAG(SD); 2365 return SD; 2366} 2367 2368llvm::ScheduleDAGSDNodes * 2369llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 2370 CodeGenOpt::Level OptLevel) { 2371 const TargetMachine &TM = IS->TM; 2372 const TargetInstrInfo *TII = TM.getInstrInfo(); 2373 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2374 2375 TDRegReductionPriorityQueue *PQ = 2376 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2377 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2378 PQ->setScheduleDAG(SD); 2379 return SD; 2380} 2381 2382llvm::ScheduleDAGSDNodes * 2383llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2384 CodeGenOpt::Level OptLevel) { 2385 const TargetMachine &TM = IS->TM; 2386 const TargetInstrInfo *TII = TM.getInstrInfo(); 2387 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2388 2389 SrcRegReductionPriorityQueue *PQ = 2390 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2391 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2392 PQ->setScheduleDAG(SD); 2393 return SD; 2394} 2395 2396llvm::ScheduleDAGSDNodes * 2397llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2398 CodeGenOpt::Level OptLevel) { 2399 const TargetMachine &TM = IS->TM; 2400 const TargetInstrInfo *TII = TM.getInstrInfo(); 2401 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2402 const TargetLowering *TLI = &IS->getTargetLowering(); 2403 2404 HybridBURRPriorityQueue *PQ = 2405 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2406 2407 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2408 PQ->setScheduleDAG(SD); 2409 return SD; 2410} 2411 2412llvm::ScheduleDAGSDNodes * 2413llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2414 CodeGenOpt::Level OptLevel) { 2415 const TargetMachine &TM = IS->TM; 2416 const TargetInstrInfo *TII = TM.getInstrInfo(); 2417 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2418 const TargetLowering *TLI = &IS->getTargetLowering(); 2419 2420 ILPBURRPriorityQueue *PQ = 2421 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2422 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2423 PQ->setScheduleDAG(SD); 2424 return SD; 2425} 2426