ScheduleDAGRRList.cpp revision 8ae3edacfaebfeed3e65bbbcf18a6fb21b09feaf
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 tdrListrDAGScheduler("list-tdrr", 49 "Top-down register reduction list scheduling", 50 createTDRRListDAGScheduler); 51static RegisterScheduler 52 sourceListDAGScheduler("source", 53 "Similar to list-burr but schedules in source " 54 "order when possible", 55 createSourceListDAGScheduler); 56 57static RegisterScheduler 58 hybridListDAGScheduler("list-hybrid", 59 "Bottom-up register pressure aware list scheduling " 60 "which tries to balance latency and register pressure", 61 createHybridListDAGScheduler); 62 63static RegisterScheduler 64 ILPListDAGScheduler("list-ilp", 65 "Bottom-up register pressure aware list scheduling " 66 "which tries to balance ILP and register pressure", 67 createILPListDAGScheduler); 68 69static cl::opt<bool> DisableSchedCycles( 70 "disable-sched-cycles", cl::Hidden, cl::init(false), 71 cl::desc("Disable cycle-level precision during preRA scheduling")); 72 73// Temporary sched=list-ilp flags until the heuristics are robust. 74// Some options are also available under sched=list-hybrid. 75static cl::opt<bool> DisableSchedRegPressure( 76 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 77 cl::desc("Disable regpressure priority in sched=list-ilp")); 78static cl::opt<bool> DisableSchedLiveUses( 79 "disable-sched-live-uses", cl::Hidden, cl::init(true), 80 cl::desc("Disable live use priority in sched=list-ilp")); 81static cl::opt<bool> DisableSchedVRegCycle( 82 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 83 cl::desc("Disable virtual register cycle interference checks")); 84static cl::opt<bool> DisableSchedPhysRegJoin( 85 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 86 cl::desc("Disable physreg def-use affinity")); 87static cl::opt<bool> DisableSchedStalls( 88 "disable-sched-stalls", cl::Hidden, cl::init(true), 89 cl::desc("Disable no-stall priority in sched=list-ilp")); 90static cl::opt<bool> DisableSchedCriticalPath( 91 "disable-sched-critical-path", cl::Hidden, cl::init(false), 92 cl::desc("Disable critical path priority in sched=list-ilp")); 93static cl::opt<bool> DisableSchedHeight( 94 "disable-sched-height", cl::Hidden, cl::init(false), 95 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 96 97static cl::opt<int> MaxReorderWindow( 98 "max-sched-reorder", cl::Hidden, cl::init(6), 99 cl::desc("Number of instructions to allow ahead of the critical path " 100 "in sched=list-ilp")); 101 102static cl::opt<unsigned> AvgIPC( 103 "sched-avg-ipc", cl::Hidden, cl::init(1), 104 cl::desc("Average inst/cycle whan no target itinerary exists.")); 105 106#ifndef NDEBUG 107namespace { 108 // For sched=list-ilp, Count the number of times each factor comes into play. 109 enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, 110 FactStatic, FactOther, NumFactors }; 111} 112static const char *FactorName[NumFactors] = 113{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; 114static int FactorCount[NumFactors]; 115#endif //!NDEBUG 116 117namespace { 118//===----------------------------------------------------------------------===// 119/// ScheduleDAGRRList - The actual register reduction list scheduler 120/// implementation. This supports both top-down and bottom-up scheduling. 121/// 122class ScheduleDAGRRList : public ScheduleDAGSDNodes { 123private: 124 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 125 /// it is top-down. 126 bool isBottomUp; 127 128 /// NeedLatency - True if the scheduler will make use of latency information. 129 /// 130 bool NeedLatency; 131 132 /// AvailableQueue - The priority queue to use for the available SUnits. 133 SchedulingPriorityQueue *AvailableQueue; 134 135 /// PendingQueue - This contains all of the instructions whose operands have 136 /// been issued, but their results are not ready yet (due to the latency of 137 /// the operation). Once the operands becomes available, the instruction is 138 /// added to the AvailableQueue. 139 std::vector<SUnit*> PendingQueue; 140 141 /// HazardRec - The hazard recognizer to use. 142 ScheduleHazardRecognizer *HazardRec; 143 144 /// CurCycle - The current scheduler state corresponds to this cycle. 145 unsigned CurCycle; 146 147 /// MinAvailableCycle - Cycle of the soonest available instruction. 148 unsigned MinAvailableCycle; 149 150 /// IssueCount - Count instructions issued in this cycle 151 /// Currently valid only for bottom-up scheduling. 152 unsigned IssueCount; 153 154 /// LiveRegDefs - A set of physical registers and their definition 155 /// that are "live". These nodes must be scheduled before any other nodes that 156 /// modifies the registers can be scheduled. 157 unsigned NumLiveRegs; 158 std::vector<SUnit*> LiveRegDefs; 159 std::vector<SUnit*> LiveRegGens; 160 161 /// Topo - A topological ordering for SUnits which permits fast IsReachable 162 /// and similar queries. 163 ScheduleDAGTopologicalSort Topo; 164 165public: 166 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 167 SchedulingPriorityQueue *availqueue, 168 CodeGenOpt::Level OptLevel) 169 : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), 170 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 171 Topo(SUnits) { 172 173 const TargetMachine &tm = mf.getTarget(); 174 if (DisableSchedCycles || !NeedLatency) 175 HazardRec = new ScheduleHazardRecognizer(); 176 else 177 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 178 } 179 180 ~ScheduleDAGRRList() { 181 delete HazardRec; 182 delete AvailableQueue; 183 } 184 185 void Schedule(); 186 187 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 188 189 /// IsReachable - Checks if SU is reachable from TargetSU. 190 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 191 return Topo.IsReachable(SU, TargetSU); 192 } 193 194 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 195 /// create a cycle. 196 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 197 return Topo.WillCreateCycle(SU, TargetSU); 198 } 199 200 /// AddPred - adds a predecessor edge to SUnit SU. 201 /// This returns true if this is a new predecessor. 202 /// Updates the topological ordering if required. 203 void AddPred(SUnit *SU, const SDep &D) { 204 Topo.AddPred(SU, D.getSUnit()); 205 SU->addPred(D); 206 } 207 208 /// RemovePred - removes a predecessor edge from SUnit SU. 209 /// This returns true if an edge was removed. 210 /// Updates the topological ordering if required. 211 void RemovePred(SUnit *SU, const SDep &D) { 212 Topo.RemovePred(SU, D.getSUnit()); 213 SU->removePred(D); 214 } 215 216private: 217 bool isReady(SUnit *SU) { 218 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 219 AvailableQueue->isReady(SU); 220 } 221 222 void ReleasePred(SUnit *SU, const SDep *PredEdge); 223 void ReleasePredecessors(SUnit *SU); 224 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 225 void ReleaseSuccessors(SUnit *SU); 226 void ReleasePending(); 227 void AdvanceToCycle(unsigned NextCycle); 228 void AdvancePastStalls(SUnit *SU); 229 void EmitNode(SUnit *SU); 230 void ScheduleNodeBottomUp(SUnit*); 231 void CapturePred(SDep *PredEdge); 232 void UnscheduleNodeBottomUp(SUnit*); 233 void RestoreHazardCheckerBottomUp(); 234 void BacktrackBottomUp(SUnit*, SUnit*); 235 SUnit *CopyAndMoveSuccessors(SUnit*); 236 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 237 const TargetRegisterClass*, 238 const TargetRegisterClass*, 239 SmallVector<SUnit*, 2>&); 240 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 241 242 SUnit *PickNodeToScheduleBottomUp(); 243 void ListScheduleBottomUp(); 244 245 void ScheduleNodeTopDown(SUnit*); 246 void ListScheduleTopDown(); 247 248 249 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 250 /// Updates the topological ordering if required. 251 SUnit *CreateNewSUnit(SDNode *N) { 252 unsigned NumSUnits = SUnits.size(); 253 SUnit *NewNode = NewSUnit(N); 254 // Update the topological ordering. 255 if (NewNode->NodeNum >= NumSUnits) 256 Topo.InitDAGTopologicalSorting(); 257 return NewNode; 258 } 259 260 /// CreateClone - Creates a new SUnit from an existing one. 261 /// Updates the topological ordering if required. 262 SUnit *CreateClone(SUnit *N) { 263 unsigned NumSUnits = SUnits.size(); 264 SUnit *NewNode = Clone(N); 265 // Update the topological ordering. 266 if (NewNode->NodeNum >= NumSUnits) 267 Topo.InitDAGTopologicalSorting(); 268 return NewNode; 269 } 270 271 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 272 /// need actual latency information but the hybrid scheduler does. 273 bool ForceUnitLatencies() const { 274 return !NeedLatency; 275 } 276}; 277} // end anonymous namespace 278 279 280/// Schedule - Schedule the DAG using list scheduling. 281void ScheduleDAGRRList::Schedule() { 282 DEBUG(dbgs() 283 << "********** List Scheduling BB#" << BB->getNumber() 284 << " '" << BB->getName() << "' **********\n"); 285#ifndef NDEBUG 286 for (int i = 0; i < NumFactors; ++i) { 287 FactorCount[i] = 0; 288 } 289#endif //!NDEBUG 290 291 CurCycle = 0; 292 IssueCount = 0; 293 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 294 NumLiveRegs = 0; 295 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 296 LiveRegGens.resize(TRI->getNumRegs(), NULL); 297 298 // Build the scheduling graph. 299 BuildSchedGraph(NULL); 300 301 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 302 SUnits[su].dumpAll(this)); 303 Topo.InitDAGTopologicalSorting(); 304 305 AvailableQueue->initNodes(SUnits); 306 307 HazardRec->Reset(); 308 309 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 310 if (isBottomUp) 311 ListScheduleBottomUp(); 312 else 313 ListScheduleTopDown(); 314 315#ifndef NDEBUG 316 for (int i = 0; i < NumFactors; ++i) { 317 DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); 318 } 319#endif // !NDEBUG 320 AvailableQueue->releaseState(); 321} 322 323//===----------------------------------------------------------------------===// 324// Bottom-Up Scheduling 325//===----------------------------------------------------------------------===// 326 327/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 328/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 329void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 330 SUnit *PredSU = PredEdge->getSUnit(); 331 332#ifndef NDEBUG 333 if (PredSU->NumSuccsLeft == 0) { 334 dbgs() << "*** Scheduling failed! ***\n"; 335 PredSU->dump(this); 336 dbgs() << " has been released too many times!\n"; 337 llvm_unreachable(0); 338 } 339#endif 340 --PredSU->NumSuccsLeft; 341 342 if (!ForceUnitLatencies()) { 343 // Updating predecessor's height. This is now the cycle when the 344 // predecessor can be scheduled without causing a pipeline stall. 345 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 346 } 347 348 // If all the node's successors are scheduled, this node is ready 349 // to be scheduled. Ignore the special EntrySU node. 350 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 351 PredSU->isAvailable = true; 352 353 unsigned Height = PredSU->getHeight(); 354 if (Height < MinAvailableCycle) 355 MinAvailableCycle = Height; 356 357 if (isReady(PredSU)) { 358 AvailableQueue->push(PredSU); 359 } 360 // CapturePred and others may have left the node in the pending queue, avoid 361 // adding it twice. 362 else if (!PredSU->isPending) { 363 PredSU->isPending = true; 364 PendingQueue.push_back(PredSU); 365 } 366 } 367} 368 369/// Call ReleasePred for each predecessor, then update register live def/gen. 370/// Always update LiveRegDefs for a register dependence even if the current SU 371/// also defines the register. This effectively create one large live range 372/// across a sequence of two-address node. This is important because the 373/// entire chain must be scheduled together. Example: 374/// 375/// flags = (3) add 376/// flags = (2) addc flags 377/// flags = (1) addc flags 378/// 379/// results in 380/// 381/// LiveRegDefs[flags] = 3 382/// LiveRegGens[flags] = 1 383/// 384/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 385/// interference on flags. 386void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 387 // Bottom up: release predecessors 388 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 389 I != E; ++I) { 390 ReleasePred(SU, &*I); 391 if (I->isAssignedRegDep()) { 392 // This is a physical register dependency and it's impossible or 393 // expensive to copy the register. Make sure nothing that can 394 // clobber the register is scheduled between the predecessor and 395 // this node. 396 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 397 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 398 "interference on register dependence"); 399 LiveRegDefs[I->getReg()] = I->getSUnit(); 400 if (!LiveRegGens[I->getReg()]) { 401 ++NumLiveRegs; 402 LiveRegGens[I->getReg()] = SU; 403 } 404 } 405 } 406} 407 408/// Check to see if any of the pending instructions are ready to issue. If 409/// so, add them to the available queue. 410void ScheduleDAGRRList::ReleasePending() { 411 if (DisableSchedCycles) { 412 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 413 return; 414 } 415 416 // If the available queue is empty, it is safe to reset MinAvailableCycle. 417 if (AvailableQueue->empty()) 418 MinAvailableCycle = UINT_MAX; 419 420 // Check to see if any of the pending instructions are ready to issue. If 421 // so, add them to the available queue. 422 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 423 unsigned ReadyCycle = 424 isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); 425 if (ReadyCycle < MinAvailableCycle) 426 MinAvailableCycle = ReadyCycle; 427 428 if (PendingQueue[i]->isAvailable) { 429 if (!isReady(PendingQueue[i])) 430 continue; 431 AvailableQueue->push(PendingQueue[i]); 432 } 433 PendingQueue[i]->isPending = false; 434 PendingQueue[i] = PendingQueue.back(); 435 PendingQueue.pop_back(); 436 --i; --e; 437 } 438} 439 440/// Move the scheduler state forward by the specified number of Cycles. 441void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 442 if (NextCycle <= CurCycle) 443 return; 444 445 IssueCount = 0; 446 AvailableQueue->setCurCycle(NextCycle); 447 if (!HazardRec->isEnabled()) { 448 // Bypass lots of virtual calls in case of long latency. 449 CurCycle = NextCycle; 450 } 451 else { 452 for (; CurCycle != NextCycle; ++CurCycle) { 453 if (isBottomUp) 454 HazardRec->RecedeCycle(); 455 else 456 HazardRec->AdvanceCycle(); 457 } 458 } 459 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 460 // available Q to release pending nodes at least once before popping. 461 ReleasePending(); 462} 463 464/// Move the scheduler state forward until the specified node's dependents are 465/// ready and can be scheduled with no resource conflicts. 466void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 467 if (DisableSchedCycles) 468 return; 469 470 // FIXME: Nodes such as CopyFromReg probably should not advance the current 471 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 472 // has predecessors the cycle will be advanced when they are scheduled. 473 // But given the crude nature of modeling latency though such nodes, we 474 // currently need to treat these nodes like real instructions. 475 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 476 477 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); 478 479 // Bump CurCycle to account for latency. We assume the latency of other 480 // available instructions may be hidden by the stall (not a full pipe stall). 481 // This updates the hazard recognizer's cycle before reserving resources for 482 // this instruction. 483 AdvanceToCycle(ReadyCycle); 484 485 // Calls are scheduled in their preceding cycle, so don't conflict with 486 // hazards from instructions after the call. EmitNode will reset the 487 // scoreboard state before emitting the call. 488 if (isBottomUp && SU->isCall) 489 return; 490 491 // FIXME: For resource conflicts in very long non-pipelined stages, we 492 // should probably skip ahead here to avoid useless scoreboard checks. 493 int Stalls = 0; 494 while (true) { 495 ScheduleHazardRecognizer::HazardType HT = 496 HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); 497 498 if (HT == ScheduleHazardRecognizer::NoHazard) 499 break; 500 501 ++Stalls; 502 } 503 AdvanceToCycle(CurCycle + Stalls); 504} 505 506/// Record this SUnit in the HazardRecognizer. 507/// Does not update CurCycle. 508void ScheduleDAGRRList::EmitNode(SUnit *SU) { 509 if (!HazardRec->isEnabled()) 510 return; 511 512 // Check for phys reg copy. 513 if (!SU->getNode()) 514 return; 515 516 switch (SU->getNode()->getOpcode()) { 517 default: 518 assert(SU->getNode()->isMachineOpcode() && 519 "This target-independent node should not be scheduled."); 520 break; 521 case ISD::MERGE_VALUES: 522 case ISD::TokenFactor: 523 case ISD::CopyToReg: 524 case ISD::CopyFromReg: 525 case ISD::EH_LABEL: 526 // Noops don't affect the scoreboard state. Copies are likely to be 527 // removed. 528 return; 529 case ISD::INLINEASM: 530 // For inline asm, clear the pipeline state. 531 HazardRec->Reset(); 532 return; 533 } 534 if (isBottomUp && SU->isCall) { 535 // Calls are scheduled with their preceding instructions. For bottom-up 536 // scheduling, clear the pipeline state before emitting. 537 HazardRec->Reset(); 538 } 539 540 HazardRec->EmitInstruction(SU); 541 542 if (!isBottomUp && SU->isCall) { 543 HazardRec->Reset(); 544 } 545} 546 547static void resetVRegCycle(SUnit *SU); 548 549/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 550/// count of its predecessors. If a predecessor pending count is zero, add it to 551/// the Available queue. 552void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 553 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 554 DEBUG(SU->dump(this)); 555 556#ifndef NDEBUG 557 if (CurCycle < SU->getHeight()) 558 DEBUG(dbgs() << " Height [" << SU->getHeight() 559 << "] pipeline stall!\n"); 560#endif 561 562 // FIXME: Do not modify node height. It may interfere with 563 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 564 // node its ready cycle can aid heuristics, and after scheduling it can 565 // indicate the scheduled cycle. 566 SU->setHeightToAtLeast(CurCycle); 567 568 // Reserve resources for the scheduled intruction. 569 EmitNode(SU); 570 571 Sequence.push_back(SU); 572 573 AvailableQueue->ScheduledNode(SU); 574 575 // If HazardRec is disabled, and each inst counts as one cycle, then 576 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 577 // PendingQueue for schedulers that implement HasReadyFilter. 578 if (!HazardRec->isEnabled() && AvgIPC < 2) 579 AdvanceToCycle(CurCycle + 1); 580 581 // Update liveness of predecessors before successors to avoid treating a 582 // two-address node as a live range def. 583 ReleasePredecessors(SU); 584 585 // Release all the implicit physical register defs that are live. 586 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 587 I != E; ++I) { 588 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 589 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 590 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 591 --NumLiveRegs; 592 LiveRegDefs[I->getReg()] = NULL; 593 LiveRegGens[I->getReg()] = NULL; 594 } 595 } 596 597 resetVRegCycle(SU); 598 599 SU->isScheduled = true; 600 601 // Conditions under which the scheduler should eagerly advance the cycle: 602 // (1) No available instructions 603 // (2) All pipelines full, so available instructions must have hazards. 604 // 605 // If HazardRec is disabled, the cycle was pre-advanced before calling 606 // ReleasePredecessors. In that case, IssueCount should remain 0. 607 // 608 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 609 if (HazardRec->isEnabled() || AvgIPC > 1) { 610 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 611 ++IssueCount; 612 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 613 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 614 AdvanceToCycle(CurCycle + 1); 615 } 616} 617 618/// CapturePred - This does the opposite of ReleasePred. Since SU is being 619/// unscheduled, incrcease the succ left count of its predecessors. Remove 620/// them from AvailableQueue if necessary. 621void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 622 SUnit *PredSU = PredEdge->getSUnit(); 623 if (PredSU->isAvailable) { 624 PredSU->isAvailable = false; 625 if (!PredSU->isPending) 626 AvailableQueue->remove(PredSU); 627 } 628 629 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 630 ++PredSU->NumSuccsLeft; 631} 632 633/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 634/// its predecessor states to reflect the change. 635void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 636 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 637 DEBUG(SU->dump(this)); 638 639 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 640 I != E; ++I) { 641 CapturePred(&*I); 642 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 643 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 644 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 645 "Physical register dependency violated?"); 646 --NumLiveRegs; 647 LiveRegDefs[I->getReg()] = NULL; 648 LiveRegGens[I->getReg()] = NULL; 649 } 650 } 651 652 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 653 I != E; ++I) { 654 if (I->isAssignedRegDep()) { 655 // This becomes the nearest def. Note that an earlier def may still be 656 // pending if this is a two-address node. 657 LiveRegDefs[I->getReg()] = SU; 658 if (!LiveRegDefs[I->getReg()]) { 659 ++NumLiveRegs; 660 } 661 if (LiveRegGens[I->getReg()] == NULL || 662 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 663 LiveRegGens[I->getReg()] = I->getSUnit(); 664 } 665 } 666 if (SU->getHeight() < MinAvailableCycle) 667 MinAvailableCycle = SU->getHeight(); 668 669 SU->setHeightDirty(); 670 SU->isScheduled = false; 671 SU->isAvailable = true; 672 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 673 // Don't make available until backtracking is complete. 674 SU->isPending = true; 675 PendingQueue.push_back(SU); 676 } 677 else { 678 AvailableQueue->push(SU); 679 } 680 AvailableQueue->UnscheduledNode(SU); 681} 682 683/// After backtracking, the hazard checker needs to be restored to a state 684/// corresponding the the current cycle. 685void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 686 HazardRec->Reset(); 687 688 unsigned LookAhead = std::min((unsigned)Sequence.size(), 689 HazardRec->getMaxLookAhead()); 690 if (LookAhead == 0) 691 return; 692 693 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 694 unsigned HazardCycle = (*I)->getHeight(); 695 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 696 SUnit *SU = *I; 697 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 698 HazardRec->RecedeCycle(); 699 } 700 EmitNode(SU); 701 } 702} 703 704/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 705/// BTCycle in order to schedule a specific node. 706void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 707 SUnit *OldSU = Sequence.back(); 708 while (true) { 709 Sequence.pop_back(); 710 if (SU->isSucc(OldSU)) 711 // Don't try to remove SU from AvailableQueue. 712 SU->isAvailable = false; 713 // FIXME: use ready cycle instead of height 714 CurCycle = OldSU->getHeight(); 715 UnscheduleNodeBottomUp(OldSU); 716 AvailableQueue->setCurCycle(CurCycle); 717 if (OldSU == BtSU) 718 break; 719 OldSU = Sequence.back(); 720 } 721 722 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 723 724 RestoreHazardCheckerBottomUp(); 725 726 ReleasePending(); 727 728 ++NumBacktracks; 729} 730 731static bool isOperandOf(const SUnit *SU, SDNode *N) { 732 for (const SDNode *SUNode = SU->getNode(); SUNode; 733 SUNode = SUNode->getGluedNode()) { 734 if (SUNode->isOperandOf(N)) 735 return true; 736 } 737 return false; 738} 739 740/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 741/// successors to the newly created node. 742SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 743 SDNode *N = SU->getNode(); 744 if (!N) 745 return NULL; 746 747 if (SU->getNode()->getGluedNode()) 748 return NULL; 749 750 SUnit *NewSU; 751 bool TryUnfold = false; 752 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 753 EVT VT = N->getValueType(i); 754 if (VT == MVT::Glue) 755 return NULL; 756 else if (VT == MVT::Other) 757 TryUnfold = true; 758 } 759 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 760 const SDValue &Op = N->getOperand(i); 761 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 762 if (VT == MVT::Glue) 763 return NULL; 764 } 765 766 if (TryUnfold) { 767 SmallVector<SDNode*, 2> NewNodes; 768 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 769 return NULL; 770 771 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 772 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 773 774 N = NewNodes[1]; 775 SDNode *LoadNode = NewNodes[0]; 776 unsigned NumVals = N->getNumValues(); 777 unsigned OldNumVals = SU->getNode()->getNumValues(); 778 for (unsigned i = 0; i != NumVals; ++i) 779 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 780 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 781 SDValue(LoadNode, 1)); 782 783 // LoadNode may already exist. This can happen when there is another 784 // load from the same location and producing the same type of value 785 // but it has different alignment or volatileness. 786 bool isNewLoad = true; 787 SUnit *LoadSU; 788 if (LoadNode->getNodeId() != -1) { 789 LoadSU = &SUnits[LoadNode->getNodeId()]; 790 isNewLoad = false; 791 } else { 792 LoadSU = CreateNewSUnit(LoadNode); 793 LoadNode->setNodeId(LoadSU->NodeNum); 794 795 InitNumRegDefsLeft(LoadSU); 796 ComputeLatency(LoadSU); 797 } 798 799 SUnit *NewSU = CreateNewSUnit(N); 800 assert(N->getNodeId() == -1 && "Node already inserted!"); 801 N->setNodeId(NewSU->NodeNum); 802 803 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 804 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 805 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 806 NewSU->isTwoAddress = true; 807 break; 808 } 809 } 810 if (TID.isCommutable()) 811 NewSU->isCommutable = true; 812 813 InitNumRegDefsLeft(NewSU); 814 ComputeLatency(NewSU); 815 816 // Record all the edges to and from the old SU, by category. 817 SmallVector<SDep, 4> ChainPreds; 818 SmallVector<SDep, 4> ChainSuccs; 819 SmallVector<SDep, 4> LoadPreds; 820 SmallVector<SDep, 4> NodePreds; 821 SmallVector<SDep, 4> NodeSuccs; 822 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 823 I != E; ++I) { 824 if (I->isCtrl()) 825 ChainPreds.push_back(*I); 826 else if (isOperandOf(I->getSUnit(), LoadNode)) 827 LoadPreds.push_back(*I); 828 else 829 NodePreds.push_back(*I); 830 } 831 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 832 I != E; ++I) { 833 if (I->isCtrl()) 834 ChainSuccs.push_back(*I); 835 else 836 NodeSuccs.push_back(*I); 837 } 838 839 // Now assign edges to the newly-created nodes. 840 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 841 const SDep &Pred = ChainPreds[i]; 842 RemovePred(SU, Pred); 843 if (isNewLoad) 844 AddPred(LoadSU, Pred); 845 } 846 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 847 const SDep &Pred = LoadPreds[i]; 848 RemovePred(SU, Pred); 849 if (isNewLoad) 850 AddPred(LoadSU, Pred); 851 } 852 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 853 const SDep &Pred = NodePreds[i]; 854 RemovePred(SU, Pred); 855 AddPred(NewSU, Pred); 856 } 857 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 858 SDep D = NodeSuccs[i]; 859 SUnit *SuccDep = D.getSUnit(); 860 D.setSUnit(SU); 861 RemovePred(SuccDep, D); 862 D.setSUnit(NewSU); 863 AddPred(SuccDep, D); 864 // Balance register pressure. 865 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 866 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 867 --NewSU->NumRegDefsLeft; 868 } 869 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 870 SDep D = ChainSuccs[i]; 871 SUnit *SuccDep = D.getSUnit(); 872 D.setSUnit(SU); 873 RemovePred(SuccDep, D); 874 if (isNewLoad) { 875 D.setSUnit(LoadSU); 876 AddPred(SuccDep, D); 877 } 878 } 879 880 // Add a data dependency to reflect that NewSU reads the value defined 881 // by LoadSU. 882 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 883 884 if (isNewLoad) 885 AvailableQueue->addNode(LoadSU); 886 AvailableQueue->addNode(NewSU); 887 888 ++NumUnfolds; 889 890 if (NewSU->NumSuccsLeft == 0) { 891 NewSU->isAvailable = true; 892 return NewSU; 893 } 894 SU = NewSU; 895 } 896 897 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 898 NewSU = CreateClone(SU); 899 900 // New SUnit has the exact same predecessors. 901 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 902 I != E; ++I) 903 if (!I->isArtificial()) 904 AddPred(NewSU, *I); 905 906 // Only copy scheduled successors. Cut them from old node's successor 907 // list and move them over. 908 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 909 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 910 I != E; ++I) { 911 if (I->isArtificial()) 912 continue; 913 SUnit *SuccSU = I->getSUnit(); 914 if (SuccSU->isScheduled) { 915 SDep D = *I; 916 D.setSUnit(NewSU); 917 AddPred(SuccSU, D); 918 D.setSUnit(SU); 919 DelDeps.push_back(std::make_pair(SuccSU, D)); 920 } 921 } 922 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 923 RemovePred(DelDeps[i].first, DelDeps[i].second); 924 925 AvailableQueue->updateNode(SU); 926 AvailableQueue->addNode(NewSU); 927 928 ++NumDups; 929 return NewSU; 930} 931 932/// InsertCopiesAndMoveSuccs - Insert register copies and move all 933/// scheduled successors of the given SUnit to the last copy. 934void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 935 const TargetRegisterClass *DestRC, 936 const TargetRegisterClass *SrcRC, 937 SmallVector<SUnit*, 2> &Copies) { 938 SUnit *CopyFromSU = CreateNewSUnit(NULL); 939 CopyFromSU->CopySrcRC = SrcRC; 940 CopyFromSU->CopyDstRC = DestRC; 941 942 SUnit *CopyToSU = CreateNewSUnit(NULL); 943 CopyToSU->CopySrcRC = DestRC; 944 CopyToSU->CopyDstRC = SrcRC; 945 946 // Only copy scheduled successors. Cut them from old node's successor 947 // list and move them over. 948 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 949 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 950 I != E; ++I) { 951 if (I->isArtificial()) 952 continue; 953 SUnit *SuccSU = I->getSUnit(); 954 if (SuccSU->isScheduled) { 955 SDep D = *I; 956 D.setSUnit(CopyToSU); 957 AddPred(SuccSU, D); 958 DelDeps.push_back(std::make_pair(SuccSU, *I)); 959 } 960 else { 961 // Avoid scheduling the def-side copy before other successors. Otherwise 962 // we could introduce another physreg interference on the copy and 963 // continue inserting copies indefinitely. 964 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 965 /*Reg=*/0, /*isNormalMemory=*/false, 966 /*isMustAlias=*/false, /*isArtificial=*/true); 967 AddPred(SuccSU, D); 968 } 969 } 970 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 971 RemovePred(DelDeps[i].first, DelDeps[i].second); 972 973 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 974 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 975 976 AvailableQueue->updateNode(SU); 977 AvailableQueue->addNode(CopyFromSU); 978 AvailableQueue->addNode(CopyToSU); 979 Copies.push_back(CopyFromSU); 980 Copies.push_back(CopyToSU); 981 982 ++NumPRCopies; 983} 984 985/// getPhysicalRegisterVT - Returns the ValueType of the physical register 986/// definition of the specified node. 987/// FIXME: Move to SelectionDAG? 988static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 989 const TargetInstrInfo *TII) { 990 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 991 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 992 unsigned NumRes = TID.getNumDefs(); 993 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 994 if (Reg == *ImpDef) 995 break; 996 ++NumRes; 997 } 998 return N->getValueType(NumRes); 999} 1000 1001/// CheckForLiveRegDef - Return true and update live register vector if the 1002/// specified register def of the specified SUnit clobbers any "live" registers. 1003static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1004 std::vector<SUnit*> &LiveRegDefs, 1005 SmallSet<unsigned, 4> &RegAdded, 1006 SmallVector<unsigned, 4> &LRegs, 1007 const TargetRegisterInfo *TRI) { 1008 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1009 1010 // Check if Ref is live. 1011 if (!LiveRegDefs[*AliasI]) continue; 1012 1013 // Allow multiple uses of the same def. 1014 if (LiveRegDefs[*AliasI] == SU) continue; 1015 1016 // Add Reg to the set of interfering live regs. 1017 if (RegAdded.insert(*AliasI)) { 1018 LRegs.push_back(*AliasI); 1019 } 1020 } 1021} 1022 1023/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1024/// scheduling of the given node to satisfy live physical register dependencies. 1025/// If the specific node is the last one that's available to schedule, do 1026/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1027bool ScheduleDAGRRList:: 1028DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1029 if (NumLiveRegs == 0) 1030 return false; 1031 1032 SmallSet<unsigned, 4> RegAdded; 1033 // If this node would clobber any "live" register, then it's not ready. 1034 // 1035 // If SU is the currently live definition of the same register that it uses, 1036 // then we are free to schedule it. 1037 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1038 I != E; ++I) { 1039 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1040 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1041 RegAdded, LRegs, TRI); 1042 } 1043 1044 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1045 if (Node->getOpcode() == ISD::INLINEASM) { 1046 // Inline asm can clobber physical defs. 1047 unsigned NumOps = Node->getNumOperands(); 1048 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1049 --NumOps; // Ignore the glue operand. 1050 1051 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1052 unsigned Flags = 1053 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1054 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1055 1056 ++i; // Skip the ID value. 1057 if (InlineAsm::isRegDefKind(Flags) || 1058 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 1059 // Check for def of register or earlyclobber register. 1060 for (; NumVals; --NumVals, ++i) { 1061 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1062 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1063 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1064 } 1065 } else 1066 i += NumVals; 1067 } 1068 continue; 1069 } 1070 1071 if (!Node->isMachineOpcode()) 1072 continue; 1073 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 1074 if (!TID.ImplicitDefs) 1075 continue; 1076 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 1077 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1078 } 1079 1080 return !LRegs.empty(); 1081} 1082 1083/// Return a node that can be scheduled in this cycle. Requirements: 1084/// (1) Ready: latency has been satisfied 1085/// (2) No Hazards: resources are available 1086/// (3) No Interferences: may unschedule to break register interferences. 1087SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1088 SmallVector<SUnit*, 4> Interferences; 1089 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1090 1091 SUnit *CurSU = AvailableQueue->pop(); 1092 while (CurSU) { 1093 SmallVector<unsigned, 4> LRegs; 1094 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1095 break; 1096 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1097 1098 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1099 Interferences.push_back(CurSU); 1100 CurSU = AvailableQueue->pop(); 1101 } 1102 if (CurSU) { 1103 // Add the nodes that aren't ready back onto the available list. 1104 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1105 Interferences[i]->isPending = false; 1106 assert(Interferences[i]->isAvailable && "must still be available"); 1107 AvailableQueue->push(Interferences[i]); 1108 } 1109 return CurSU; 1110 } 1111 1112 // All candidates are delayed due to live physical reg dependencies. 1113 // Try backtracking, code duplication, or inserting cross class copies 1114 // to resolve it. 1115 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1116 SUnit *TrySU = Interferences[i]; 1117 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1118 1119 // Try unscheduling up to the point where it's safe to schedule 1120 // this node. 1121 SUnit *BtSU = NULL; 1122 unsigned LiveCycle = UINT_MAX; 1123 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1124 unsigned Reg = LRegs[j]; 1125 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1126 BtSU = LiveRegGens[Reg]; 1127 LiveCycle = BtSU->getHeight(); 1128 } 1129 } 1130 if (!WillCreateCycle(TrySU, BtSU)) { 1131 BacktrackBottomUp(TrySU, BtSU); 1132 1133 // Force the current node to be scheduled before the node that 1134 // requires the physical reg dep. 1135 if (BtSU->isAvailable) { 1136 BtSU->isAvailable = false; 1137 if (!BtSU->isPending) 1138 AvailableQueue->remove(BtSU); 1139 } 1140 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1141 /*Reg=*/0, /*isNormalMemory=*/false, 1142 /*isMustAlias=*/false, /*isArtificial=*/true)); 1143 1144 // If one or more successors has been unscheduled, then the current 1145 // node is no longer avaialable. Schedule a successor that's now 1146 // available instead. 1147 if (!TrySU->isAvailable) { 1148 CurSU = AvailableQueue->pop(); 1149 } 1150 else { 1151 CurSU = TrySU; 1152 TrySU->isPending = false; 1153 Interferences.erase(Interferences.begin()+i); 1154 } 1155 break; 1156 } 1157 } 1158 1159 if (!CurSU) { 1160 // Can't backtrack. If it's too expensive to copy the value, then try 1161 // duplicate the nodes that produces these "too expensive to copy" 1162 // values to break the dependency. In case even that doesn't work, 1163 // insert cross class copies. 1164 // If it's not too expensive, i.e. cost != -1, issue copies. 1165 SUnit *TrySU = Interferences[0]; 1166 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1167 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1168 unsigned Reg = LRegs[0]; 1169 SUnit *LRDef = LiveRegDefs[Reg]; 1170 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1171 const TargetRegisterClass *RC = 1172 TRI->getMinimalPhysRegClass(Reg, VT); 1173 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1174 1175 // If cross copy register class is the same as RC, then it must be possible 1176 // copy the value directly. Do not try duplicate the def. 1177 // If cross copy register class is not the same as RC, then it's possible to 1178 // copy the value but it require cross register class copies and it is 1179 // expensive. 1180 // If cross copy register class is null, then it's not possible to copy 1181 // the value at all. 1182 SUnit *NewDef = 0; 1183 if (DestRC != RC) { 1184 NewDef = CopyAndMoveSuccessors(LRDef); 1185 if (!DestRC && !NewDef) 1186 report_fatal_error("Can't handle live physical register dependency!"); 1187 } 1188 if (!NewDef) { 1189 // Issue copies, these can be expensive cross register class copies. 1190 SmallVector<SUnit*, 2> Copies; 1191 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1192 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1193 << " to SU #" << Copies.front()->NodeNum << "\n"); 1194 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1195 /*Reg=*/0, /*isNormalMemory=*/false, 1196 /*isMustAlias=*/false, 1197 /*isArtificial=*/true)); 1198 NewDef = Copies.back(); 1199 } 1200 1201 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1202 << " to SU #" << TrySU->NodeNum << "\n"); 1203 LiveRegDefs[Reg] = NewDef; 1204 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1205 /*Reg=*/0, /*isNormalMemory=*/false, 1206 /*isMustAlias=*/false, 1207 /*isArtificial=*/true)); 1208 TrySU->isAvailable = false; 1209 CurSU = NewDef; 1210 } 1211 1212 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1213 1214 // Add the nodes that aren't ready back onto the available list. 1215 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1216 Interferences[i]->isPending = false; 1217 // May no longer be available due to backtracking. 1218 if (Interferences[i]->isAvailable) { 1219 AvailableQueue->push(Interferences[i]); 1220 } 1221 } 1222 return CurSU; 1223} 1224 1225/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1226/// schedulers. 1227void ScheduleDAGRRList::ListScheduleBottomUp() { 1228 // Release any predecessors of the special Exit node. 1229 ReleasePredecessors(&ExitSU); 1230 1231 // Add root to Available queue. 1232 if (!SUnits.empty()) { 1233 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1234 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1235 RootSU->isAvailable = true; 1236 AvailableQueue->push(RootSU); 1237 } 1238 1239 // While Available queue is not empty, grab the node with the highest 1240 // priority. If it is not ready put it back. Schedule the node. 1241 Sequence.reserve(SUnits.size()); 1242 while (!AvailableQueue->empty()) { 1243 DEBUG(dbgs() << "\nExamining Available:\n"; 1244 AvailableQueue->dump(this)); 1245 1246 // Pick the best node to schedule taking all constraints into 1247 // consideration. 1248 SUnit *SU = PickNodeToScheduleBottomUp(); 1249 1250 AdvancePastStalls(SU); 1251 1252 ScheduleNodeBottomUp(SU); 1253 1254 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1255 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1256 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1257 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1258 } 1259 } 1260 1261 // Reverse the order if it is bottom up. 1262 std::reverse(Sequence.begin(), Sequence.end()); 1263 1264#ifndef NDEBUG 1265 VerifySchedule(isBottomUp); 1266#endif 1267} 1268 1269//===----------------------------------------------------------------------===// 1270// Top-Down Scheduling 1271//===----------------------------------------------------------------------===// 1272 1273/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1274/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 1275void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 1276 SUnit *SuccSU = SuccEdge->getSUnit(); 1277 1278#ifndef NDEBUG 1279 if (SuccSU->NumPredsLeft == 0) { 1280 dbgs() << "*** Scheduling failed! ***\n"; 1281 SuccSU->dump(this); 1282 dbgs() << " has been released too many times!\n"; 1283 llvm_unreachable(0); 1284 } 1285#endif 1286 --SuccSU->NumPredsLeft; 1287 1288 // If all the node's predecessors are scheduled, this node is ready 1289 // to be scheduled. Ignore the special ExitSU node. 1290 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 1291 SuccSU->isAvailable = true; 1292 AvailableQueue->push(SuccSU); 1293 } 1294} 1295 1296void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 1297 // Top down: release successors 1298 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1299 I != E; ++I) { 1300 assert(!I->isAssignedRegDep() && 1301 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 1302 1303 ReleaseSucc(SU, &*I); 1304 } 1305} 1306 1307/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1308/// count of its successors. If a successor pending count is zero, add it to 1309/// the Available queue. 1310void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { 1311 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 1312 DEBUG(SU->dump(this)); 1313 1314 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1315 SU->setDepthToAtLeast(CurCycle); 1316 Sequence.push_back(SU); 1317 1318 ReleaseSuccessors(SU); 1319 SU->isScheduled = true; 1320 AvailableQueue->ScheduledNode(SU); 1321} 1322 1323/// ListScheduleTopDown - The main loop of list scheduling for top-down 1324/// schedulers. 1325void ScheduleDAGRRList::ListScheduleTopDown() { 1326 AvailableQueue->setCurCycle(CurCycle); 1327 1328 // Release any successors of the special Entry node. 1329 ReleaseSuccessors(&EntrySU); 1330 1331 // All leaves to Available queue. 1332 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1333 // It is available if it has no predecessors. 1334 if (SUnits[i].Preds.empty()) { 1335 AvailableQueue->push(&SUnits[i]); 1336 SUnits[i].isAvailable = true; 1337 } 1338 } 1339 1340 // While Available queue is not empty, grab the node with the highest 1341 // priority. If it is not ready put it back. Schedule the node. 1342 Sequence.reserve(SUnits.size()); 1343 while (!AvailableQueue->empty()) { 1344 SUnit *CurSU = AvailableQueue->pop(); 1345 1346 if (CurSU) 1347 ScheduleNodeTopDown(CurSU); 1348 ++CurCycle; 1349 AvailableQueue->setCurCycle(CurCycle); 1350 } 1351 1352#ifndef NDEBUG 1353 VerifySchedule(isBottomUp); 1354#endif 1355} 1356 1357 1358//===----------------------------------------------------------------------===// 1359// RegReductionPriorityQueue Definition 1360//===----------------------------------------------------------------------===// 1361// 1362// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1363// to reduce register pressure. 1364// 1365namespace { 1366class RegReductionPQBase; 1367 1368struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1369 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1370}; 1371 1372/// bu_ls_rr_sort - Priority function for bottom up register pressure 1373// reduction scheduler. 1374struct bu_ls_rr_sort : public queue_sort { 1375 enum { 1376 IsBottomUp = true, 1377 HasReadyFilter = false 1378 }; 1379 1380 RegReductionPQBase *SPQ; 1381 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1382 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1383 1384 bool operator()(SUnit* left, SUnit* right) const; 1385}; 1386 1387// td_ls_rr_sort - Priority function for top down register pressure reduction 1388// scheduler. 1389struct td_ls_rr_sort : public queue_sort { 1390 enum { 1391 IsBottomUp = false, 1392 HasReadyFilter = false 1393 }; 1394 1395 RegReductionPQBase *SPQ; 1396 td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1397 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1398 1399 bool operator()(const SUnit* left, const SUnit* right) const; 1400}; 1401 1402// src_ls_rr_sort - Priority function for source order scheduler. 1403struct src_ls_rr_sort : public queue_sort { 1404 enum { 1405 IsBottomUp = true, 1406 HasReadyFilter = false 1407 }; 1408 1409 RegReductionPQBase *SPQ; 1410 src_ls_rr_sort(RegReductionPQBase *spq) 1411 : SPQ(spq) {} 1412 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1413 : SPQ(RHS.SPQ) {} 1414 1415 bool operator()(SUnit* left, SUnit* right) const; 1416}; 1417 1418// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1419struct hybrid_ls_rr_sort : public queue_sort { 1420 enum { 1421 IsBottomUp = true, 1422 HasReadyFilter = false 1423 }; 1424 1425 RegReductionPQBase *SPQ; 1426 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1427 : SPQ(spq) {} 1428 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1429 : SPQ(RHS.SPQ) {} 1430 1431 bool isReady(SUnit *SU, unsigned CurCycle) const; 1432 1433 bool operator()(SUnit* left, SUnit* right) const; 1434}; 1435 1436// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1437// scheduler. 1438struct ilp_ls_rr_sort : public queue_sort { 1439 enum { 1440 IsBottomUp = true, 1441 HasReadyFilter = false 1442 }; 1443 1444 RegReductionPQBase *SPQ; 1445 ilp_ls_rr_sort(RegReductionPQBase *spq) 1446 : SPQ(spq) {} 1447 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1448 : SPQ(RHS.SPQ) {} 1449 1450 bool isReady(SUnit *SU, unsigned CurCycle) const; 1451 1452 bool operator()(SUnit* left, SUnit* right) const; 1453}; 1454 1455class RegReductionPQBase : public SchedulingPriorityQueue { 1456protected: 1457 std::vector<SUnit*> Queue; 1458 unsigned CurQueueId; 1459 bool TracksRegPressure; 1460 1461 // SUnits - The SUnits for the current graph. 1462 std::vector<SUnit> *SUnits; 1463 1464 MachineFunction &MF; 1465 const TargetInstrInfo *TII; 1466 const TargetRegisterInfo *TRI; 1467 const TargetLowering *TLI; 1468 ScheduleDAGRRList *scheduleDAG; 1469 1470 // SethiUllmanNumbers - The SethiUllman number for each node. 1471 std::vector<unsigned> SethiUllmanNumbers; 1472 1473 /// RegPressure - Tracking current reg pressure per register class. 1474 /// 1475 std::vector<unsigned> RegPressure; 1476 1477 /// RegLimit - Tracking the number of allocatable registers per register 1478 /// class. 1479 std::vector<unsigned> RegLimit; 1480 1481public: 1482 RegReductionPQBase(MachineFunction &mf, 1483 bool hasReadyFilter, 1484 bool tracksrp, 1485 const TargetInstrInfo *tii, 1486 const TargetRegisterInfo *tri, 1487 const TargetLowering *tli) 1488 : SchedulingPriorityQueue(hasReadyFilter), 1489 CurQueueId(0), TracksRegPressure(tracksrp), 1490 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1491 if (TracksRegPressure) { 1492 unsigned NumRC = TRI->getNumRegClasses(); 1493 RegLimit.resize(NumRC); 1494 RegPressure.resize(NumRC); 1495 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1496 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1497 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1498 E = TRI->regclass_end(); I != E; ++I) 1499 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1500 } 1501 } 1502 1503 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1504 scheduleDAG = scheduleDag; 1505 } 1506 1507 ScheduleHazardRecognizer* getHazardRec() { 1508 return scheduleDAG->getHazardRec(); 1509 } 1510 1511 void initNodes(std::vector<SUnit> &sunits); 1512 1513 void addNode(const SUnit *SU); 1514 1515 void updateNode(const SUnit *SU); 1516 1517 void releaseState() { 1518 SUnits = 0; 1519 SethiUllmanNumbers.clear(); 1520 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1521 } 1522 1523 unsigned getNodePriority(const SUnit *SU) const; 1524 1525 unsigned getNodeOrdering(const SUnit *SU) const { 1526 if (!SU->getNode()) return 0; 1527 1528 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1529 } 1530 1531 bool empty() const { return Queue.empty(); } 1532 1533 void push(SUnit *U) { 1534 assert(!U->NodeQueueId && "Node in the queue already"); 1535 U->NodeQueueId = ++CurQueueId; 1536 Queue.push_back(U); 1537 } 1538 1539 void remove(SUnit *SU) { 1540 assert(!Queue.empty() && "Queue is empty!"); 1541 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1542 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1543 SU); 1544 if (I != prior(Queue.end())) 1545 std::swap(*I, Queue.back()); 1546 Queue.pop_back(); 1547 SU->NodeQueueId = 0; 1548 } 1549 1550 bool tracksRegPressure() const { return TracksRegPressure; } 1551 1552 void dumpRegPressure() const; 1553 1554 bool HighRegPressure(const SUnit *SU) const; 1555 1556 bool MayReduceRegPressure(SUnit *SU) const; 1557 1558 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1559 1560 void ScheduledNode(SUnit *SU); 1561 1562 void UnscheduledNode(SUnit *SU); 1563 1564protected: 1565 bool canClobber(const SUnit *SU, const SUnit *Op); 1566 void AddPseudoTwoAddrDeps(); 1567 void PrescheduleNodesWithMultipleUses(); 1568 void CalculateSethiUllmanNumbers(); 1569}; 1570 1571template<class SF> 1572class RegReductionPriorityQueue : public RegReductionPQBase { 1573 static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { 1574 std::vector<SUnit *>::iterator Best = Q.begin(); 1575 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1576 E = Q.end(); I != E; ++I) 1577 if (Picker(*Best, *I)) 1578 Best = I; 1579 SUnit *V = *Best; 1580 if (Best != prior(Q.end())) 1581 std::swap(*Best, Q.back()); 1582 Q.pop_back(); 1583 return V; 1584 } 1585 1586 SF Picker; 1587 1588public: 1589 RegReductionPriorityQueue(MachineFunction &mf, 1590 bool tracksrp, 1591 const TargetInstrInfo *tii, 1592 const TargetRegisterInfo *tri, 1593 const TargetLowering *tli) 1594 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1595 Picker(this) {} 1596 1597 bool isBottomUp() const { return SF::IsBottomUp; } 1598 1599 bool isReady(SUnit *U) const { 1600 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1601 } 1602 1603 SUnit *pop() { 1604 if (Queue.empty()) return NULL; 1605 1606 SUnit *V = popFromQueue(Queue, Picker); 1607 V->NodeQueueId = 0; 1608 return V; 1609 } 1610 1611 void dump(ScheduleDAG *DAG) const { 1612 // Emulate pop() without clobbering NodeQueueIds. 1613 std::vector<SUnit*> DumpQueue = Queue; 1614 SF DumpPicker = Picker; 1615 while (!DumpQueue.empty()) { 1616 SUnit *SU = popFromQueue(DumpQueue, DumpPicker); 1617 if (isBottomUp()) 1618 dbgs() << "Height " << SU->getHeight() << ": "; 1619 else 1620 dbgs() << "Depth " << SU->getDepth() << ": "; 1621 SU->dump(DAG); 1622 } 1623 } 1624}; 1625 1626typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1627BURegReductionPriorityQueue; 1628 1629typedef RegReductionPriorityQueue<td_ls_rr_sort> 1630TDRegReductionPriorityQueue; 1631 1632typedef RegReductionPriorityQueue<src_ls_rr_sort> 1633SrcRegReductionPriorityQueue; 1634 1635typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1636HybridBURRPriorityQueue; 1637 1638typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1639ILPBURRPriorityQueue; 1640} // end anonymous namespace 1641 1642//===----------------------------------------------------------------------===// 1643// Static Node Priority for Register Pressure Reduction 1644//===----------------------------------------------------------------------===// 1645 1646// Check for special nodes that bypass scheduling heuristics. 1647// Currently this pushes TokenFactor nodes down, but may be used for other 1648// pseudo-ops as well. 1649// 1650// Return -1 to schedule right above left, 1 for left above right. 1651// Return 0 if no bias exists. 1652static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1653 bool LSchedLow = left->isScheduleLow; 1654 bool RSchedLow = right->isScheduleLow; 1655 if (LSchedLow != RSchedLow) 1656 return LSchedLow < RSchedLow ? 1 : -1; 1657 return 0; 1658} 1659 1660/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1661/// Smaller number is the higher priority. 1662static unsigned 1663CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1664 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1665 if (SethiUllmanNumber != 0) 1666 return SethiUllmanNumber; 1667 1668 unsigned Extra = 0; 1669 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1670 I != E; ++I) { 1671 if (I->isCtrl()) continue; // ignore chain preds 1672 SUnit *PredSU = I->getSUnit(); 1673 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1674 if (PredSethiUllman > SethiUllmanNumber) { 1675 SethiUllmanNumber = PredSethiUllman; 1676 Extra = 0; 1677 } else if (PredSethiUllman == SethiUllmanNumber) 1678 ++Extra; 1679 } 1680 1681 SethiUllmanNumber += Extra; 1682 1683 if (SethiUllmanNumber == 0) 1684 SethiUllmanNumber = 1; 1685 1686 return SethiUllmanNumber; 1687} 1688 1689/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1690/// scheduling units. 1691void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1692 SethiUllmanNumbers.assign(SUnits->size(), 0); 1693 1694 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1695 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1696} 1697 1698void RegReductionPQBase::addNode(const SUnit *SU) { 1699 unsigned SUSize = SethiUllmanNumbers.size(); 1700 if (SUnits->size() > SUSize) 1701 SethiUllmanNumbers.resize(SUSize*2, 0); 1702 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1703} 1704 1705void RegReductionPQBase::updateNode(const SUnit *SU) { 1706 SethiUllmanNumbers[SU->NodeNum] = 0; 1707 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1708} 1709 1710// Lower priority means schedule further down. For bottom-up scheduling, lower 1711// priority SUs are scheduled before higher priority SUs. 1712unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1713 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1714 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1715 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1716 // CopyToReg should be close to its uses to facilitate coalescing and 1717 // avoid spilling. 1718 return 0; 1719 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1720 Opc == TargetOpcode::SUBREG_TO_REG || 1721 Opc == TargetOpcode::INSERT_SUBREG) 1722 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1723 // close to their uses to facilitate coalescing. 1724 return 0; 1725 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1726 // If SU does not have a register use, i.e. it doesn't produce a value 1727 // that would be consumed (e.g. store), then it terminates a chain of 1728 // computation. Give it a large SethiUllman number so it will be 1729 // scheduled right before its predecessors that it doesn't lengthen 1730 // their live ranges. 1731 return 0xffff; 1732 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1733 // If SU does not have a register def, schedule it close to its uses 1734 // because it does not lengthen any live ranges. 1735 return 0; 1736#if 1 1737 return SethiUllmanNumbers[SU->NodeNum]; 1738#else 1739 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1740 if (SU->isCallOp) { 1741 // FIXME: This assumes all of the defs are used as call operands. 1742 int NP = (int)Priority - SU->getNode()->getNumValues(); 1743 return (NP > 0) ? NP : 0; 1744 } 1745 return Priority; 1746#endif 1747} 1748 1749//===----------------------------------------------------------------------===// 1750// Register Pressure Tracking 1751//===----------------------------------------------------------------------===// 1752 1753void RegReductionPQBase::dumpRegPressure() const { 1754 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1755 E = TRI->regclass_end(); I != E; ++I) { 1756 const TargetRegisterClass *RC = *I; 1757 unsigned Id = RC->getID(); 1758 unsigned RP = RegPressure[Id]; 1759 if (!RP) continue; 1760 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1761 << '\n'); 1762 } 1763} 1764 1765bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1766 if (!TLI) 1767 return false; 1768 1769 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1770 I != E; ++I) { 1771 if (I->isCtrl()) 1772 continue; 1773 SUnit *PredSU = I->getSUnit(); 1774 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1775 // to cover the number of registers defined (they are all live). 1776 if (PredSU->NumRegDefsLeft == 0) { 1777 continue; 1778 } 1779 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1780 RegDefPos.IsValid(); RegDefPos.Advance()) { 1781 EVT VT = RegDefPos.GetValue(); 1782 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1783 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1784 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1785 return true; 1786 } 1787 } 1788 return false; 1789} 1790 1791bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1792 const SDNode *N = SU->getNode(); 1793 1794 if (!N->isMachineOpcode() || !SU->NumSuccs) 1795 return false; 1796 1797 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1798 for (unsigned i = 0; i != NumDefs; ++i) { 1799 EVT VT = N->getValueType(i); 1800 if (!N->hasAnyUseOfValue(i)) 1801 continue; 1802 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1803 if (RegPressure[RCId] >= RegLimit[RCId]) 1804 return true; 1805 } 1806 return false; 1807} 1808 1809// Compute the register pressure contribution by this instruction by count up 1810// for uses that are not live and down for defs. Only count register classes 1811// that are already under high pressure. As a side effect, compute the number of 1812// uses of registers that are already live. 1813// 1814// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1815// so could probably be factored. 1816int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1817 LiveUses = 0; 1818 int PDiff = 0; 1819 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1820 I != E; ++I) { 1821 if (I->isCtrl()) 1822 continue; 1823 SUnit *PredSU = I->getSUnit(); 1824 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1825 // to cover the number of registers defined (they are all live). 1826 if (PredSU->NumRegDefsLeft == 0) { 1827 if (PredSU->getNode()->isMachineOpcode()) 1828 ++LiveUses; 1829 continue; 1830 } 1831 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1832 RegDefPos.IsValid(); RegDefPos.Advance()) { 1833 EVT VT = RegDefPos.GetValue(); 1834 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1835 if (RegPressure[RCId] >= RegLimit[RCId]) 1836 ++PDiff; 1837 } 1838 } 1839 const SDNode *N = SU->getNode(); 1840 1841 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1842 return PDiff; 1843 1844 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1845 for (unsigned i = 0; i != NumDefs; ++i) { 1846 EVT VT = N->getValueType(i); 1847 if (!N->hasAnyUseOfValue(i)) 1848 continue; 1849 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1850 if (RegPressure[RCId] >= RegLimit[RCId]) 1851 --PDiff; 1852 } 1853 return PDiff; 1854} 1855 1856void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1857 if (!TracksRegPressure) 1858 return; 1859 1860 if (!SU->getNode()) 1861 return; 1862 1863 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1864 I != E; ++I) { 1865 if (I->isCtrl()) 1866 continue; 1867 SUnit *PredSU = I->getSUnit(); 1868 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1869 // to cover the number of registers defined (they are all live). 1870 if (PredSU->NumRegDefsLeft == 0) { 1871 continue; 1872 } 1873 // FIXME: The ScheduleDAG currently loses information about which of a 1874 // node's values is consumed by each dependence. Consequently, if the node 1875 // defines multiple register classes, we don't know which to pressurize 1876 // here. Instead the following loop consumes the register defs in an 1877 // arbitrary order. At least it handles the common case of clustered loads 1878 // to the same class. For precise liveness, each SDep needs to indicate the 1879 // result number. But that tightly couples the ScheduleDAG with the 1880 // SelectionDAG making updates tricky. A simpler hack would be to attach a 1881 // value type or register class to SDep. 1882 // 1883 // The most important aspect of register tracking is balancing the increase 1884 // here with the reduction further below. Note that this SU may use multiple 1885 // defs in PredSU. The can't be determined here, but we've already 1886 // compensated by reducing NumRegDefsLeft in PredSU during 1887 // ScheduleDAGSDNodes::AddSchedEdges. 1888 --PredSU->NumRegDefsLeft; 1889 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 1890 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1891 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1892 if (SkipRegDefs) 1893 continue; 1894 EVT VT = RegDefPos.GetValue(); 1895 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1896 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1897 break; 1898 } 1899 } 1900 1901 // We should have this assert, but there may be dead SDNodes that never 1902 // materialize as SUnits, so they don't appear to generate liveness. 1903 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 1904 int SkipRegDefs = (int)SU->NumRegDefsLeft; 1905 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 1906 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1907 if (SkipRegDefs > 0) 1908 continue; 1909 EVT VT = RegDefPos.GetValue(); 1910 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1911 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { 1912 // Register pressure tracking is imprecise. This can happen. But we try 1913 // hard not to let it happen because it likely results in poor scheduling. 1914 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 1915 RegPressure[RCId] = 0; 1916 } 1917 else { 1918 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1919 } 1920 } 1921 dumpRegPressure(); 1922} 1923 1924void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 1925 if (!TracksRegPressure) 1926 return; 1927 1928 const SDNode *N = SU->getNode(); 1929 if (!N) return; 1930 1931 if (!N->isMachineOpcode()) { 1932 if (N->getOpcode() != ISD::CopyToReg) 1933 return; 1934 } else { 1935 unsigned Opc = N->getMachineOpcode(); 1936 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1937 Opc == TargetOpcode::INSERT_SUBREG || 1938 Opc == TargetOpcode::SUBREG_TO_REG || 1939 Opc == TargetOpcode::REG_SEQUENCE || 1940 Opc == TargetOpcode::IMPLICIT_DEF) 1941 return; 1942 } 1943 1944 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1945 I != E; ++I) { 1946 if (I->isCtrl()) 1947 continue; 1948 SUnit *PredSU = I->getSUnit(); 1949 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 1950 // counts data deps. 1951 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 1952 continue; 1953 const SDNode *PN = PredSU->getNode(); 1954 if (!PN->isMachineOpcode()) { 1955 if (PN->getOpcode() == ISD::CopyFromReg) { 1956 EVT VT = PN->getValueType(0); 1957 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1958 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1959 } 1960 continue; 1961 } 1962 unsigned POpc = PN->getMachineOpcode(); 1963 if (POpc == TargetOpcode::IMPLICIT_DEF) 1964 continue; 1965 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1966 EVT VT = PN->getOperand(0).getValueType(); 1967 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1968 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1969 continue; 1970 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1971 POpc == TargetOpcode::SUBREG_TO_REG) { 1972 EVT VT = PN->getValueType(0); 1973 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1974 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1975 continue; 1976 } 1977 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1978 for (unsigned i = 0; i != NumDefs; ++i) { 1979 EVT VT = PN->getValueType(i); 1980 if (!PN->hasAnyUseOfValue(i)) 1981 continue; 1982 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1983 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1984 // Register pressure tracking is imprecise. This can happen. 1985 RegPressure[RCId] = 0; 1986 else 1987 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1988 } 1989 } 1990 1991 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1992 // may transfer data dependencies to CopyToReg. 1993 if (SU->NumSuccs && N->isMachineOpcode()) { 1994 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1995 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1996 EVT VT = N->getValueType(i); 1997 if (VT == MVT::Glue || VT == MVT::Other) 1998 continue; 1999 if (!N->hasAnyUseOfValue(i)) 2000 continue; 2001 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2002 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2003 } 2004 } 2005 2006 dumpRegPressure(); 2007} 2008 2009//===----------------------------------------------------------------------===// 2010// Dynamic Node Priority for Register Pressure Reduction 2011//===----------------------------------------------------------------------===// 2012 2013/// closestSucc - Returns the scheduled cycle of the successor which is 2014/// closest to the current cycle. 2015static unsigned closestSucc(const SUnit *SU) { 2016 unsigned MaxHeight = 0; 2017 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2018 I != E; ++I) { 2019 if (I->isCtrl()) continue; // ignore chain succs 2020 unsigned Height = I->getSUnit()->getHeight(); 2021 // If there are bunch of CopyToRegs stacked up, they should be considered 2022 // to be at the same position. 2023 if (I->getSUnit()->getNode() && 2024 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2025 Height = closestSucc(I->getSUnit())+1; 2026 if (Height > MaxHeight) 2027 MaxHeight = Height; 2028 } 2029 return MaxHeight; 2030} 2031 2032/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2033/// for scratch registers, i.e. number of data dependencies. 2034static unsigned calcMaxScratches(const SUnit *SU) { 2035 unsigned Scratches = 0; 2036 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2037 I != E; ++I) { 2038 if (I->isCtrl()) continue; // ignore chain preds 2039 Scratches++; 2040 } 2041 return Scratches; 2042} 2043 2044/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2045/// CopyFromReg from a virtual register. 2046static bool hasOnlyLiveInOpers(const SUnit *SU) { 2047 bool RetVal = false; 2048 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2049 I != E; ++I) { 2050 if (I->isCtrl()) continue; 2051 const SUnit *PredSU = I->getSUnit(); 2052 if (PredSU->getNode() && 2053 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2054 unsigned Reg = 2055 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2056 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2057 RetVal = true; 2058 continue; 2059 } 2060 } 2061 return false; 2062 } 2063 return RetVal; 2064} 2065 2066/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2067/// CopyToReg to a virtual register. This SU def is probably a liveout and 2068/// it has no other use. It should be scheduled closer to the terminator. 2069static bool hasOnlyLiveOutUses(const SUnit *SU) { 2070 bool RetVal = false; 2071 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2072 I != E; ++I) { 2073 if (I->isCtrl()) continue; 2074 const SUnit *SuccSU = I->getSUnit(); 2075 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2076 unsigned Reg = 2077 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2078 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2079 RetVal = true; 2080 continue; 2081 } 2082 } 2083 return false; 2084 } 2085 return RetVal; 2086} 2087 2088// Set isVRegCycle for a node with only live in opers and live out uses. Also 2089// set isVRegCycle for its CopyFromReg operands. 2090// 2091// This is only relevant for single-block loops, in which case the VRegCycle 2092// node is likely an induction variable in which the operand and target virtual 2093// registers should be coalesced (e.g. pre/post increment values). Setting the 2094// isVRegCycle flag helps the scheduler prioritize other uses of the same 2095// CopyFromReg so that this node becomes the virtual register "kill". This 2096// avoids interference between the values live in and out of the block and 2097// eliminates a copy inside the loop. 2098static void initVRegCycle(SUnit *SU) { 2099 if (DisableSchedVRegCycle) 2100 return; 2101 2102 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2103 return; 2104 2105 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2106 2107 SU->isVRegCycle = true; 2108 2109 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2110 I != E; ++I) { 2111 if (I->isCtrl()) continue; 2112 I->getSUnit()->isVRegCycle = true; 2113 } 2114} 2115 2116// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2117// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2118static void resetVRegCycle(SUnit *SU) { 2119 if (!SU->isVRegCycle) 2120 return; 2121 2122 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2123 I != E; ++I) { 2124 if (I->isCtrl()) continue; // ignore chain preds 2125 SUnit *PredSU = I->getSUnit(); 2126 if (PredSU->isVRegCycle) { 2127 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2128 "VRegCycle def must be CopyFromReg"); 2129 I->getSUnit()->isVRegCycle = 0; 2130 } 2131 } 2132} 2133 2134// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2135// means a node that defines the VRegCycle has not been scheduled yet. 2136static bool hasVRegCycleUse(const SUnit *SU) { 2137 // If this SU also defines the VReg, don't hoist it as a "use". 2138 if (SU->isVRegCycle) 2139 return false; 2140 2141 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2142 I != E; ++I) { 2143 if (I->isCtrl()) continue; // ignore chain preds 2144 if (I->getSUnit()->isVRegCycle && 2145 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2146 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2147 return true; 2148 } 2149 } 2150 return false; 2151} 2152 2153// Check for either a dependence (latency) or resource (hazard) stall. 2154// 2155// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2156static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2157 if ((int)SPQ->getCurCycle() < Height) return true; 2158 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2159 != ScheduleHazardRecognizer::NoHazard) 2160 return true; 2161 return false; 2162} 2163 2164// Return -1 if left has higher priority, 1 if right has higher priority. 2165// Return 0 if latency-based priority is equivalent. 2166static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2167 RegReductionPQBase *SPQ) { 2168 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2169 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2170 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2171 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2172 int LHeight = (int)left->getHeight() + LPenalty; 2173 int RHeight = (int)right->getHeight() + RPenalty; 2174 2175 bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && 2176 BUHasStall(left, LHeight, SPQ); 2177 bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && 2178 BUHasStall(right, RHeight, SPQ); 2179 2180 // If scheduling one of the node will cause a pipeline stall, delay it. 2181 // If scheduling either one of the node will cause a pipeline stall, sort 2182 // them according to their height. 2183 if (LStall) { 2184 if (!RStall) { 2185 DEBUG(++FactorCount[FactStall]); 2186 return 1; 2187 } 2188 if (LHeight != RHeight) { 2189 DEBUG(++FactorCount[FactStall]); 2190 return LHeight > RHeight ? 1 : -1; 2191 } 2192 } else if (RStall) { 2193 DEBUG(++FactorCount[FactStall]); 2194 return -1; 2195 } 2196 2197 // If either node is scheduling for latency, sort them by height/depth 2198 // and latency. 2199 if (!checkPref || (left->SchedulingPref == Sched::Latency || 2200 right->SchedulingPref == Sched::Latency)) { 2201 if (DisableSchedCycles) { 2202 if (LHeight != RHeight) { 2203 DEBUG(++FactorCount[FactHeight]); 2204 return LHeight > RHeight ? 1 : -1; 2205 } 2206 } 2207 else { 2208 // If neither instruction stalls (!LStall && !RStall) then 2209 // its height is already covered so only its depth matters. We also reach 2210 // this if both stall but have the same height. 2211 int LDepth = left->getDepth() - LPenalty; 2212 int RDepth = right->getDepth() - RPenalty; 2213 if (LDepth != RDepth) { 2214 DEBUG(++FactorCount[FactDepth]); 2215 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2216 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2217 << ") depth " << RDepth << "\n"); 2218 return LDepth < RDepth ? 1 : -1; 2219 } 2220 } 2221 if (left->Latency != right->Latency) { 2222 DEBUG(++FactorCount[FactOther]); 2223 return left->Latency > right->Latency ? 1 : -1; 2224 } 2225 } 2226 return 0; 2227} 2228 2229static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2230 // Schedule physical register definitions close to their use. This is 2231 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2232 // long as shortening physreg live ranges is generally good, we can defer 2233 // creating a subtarget hook. 2234 if (!DisableSchedPhysRegJoin) { 2235 bool LHasPhysReg = left->hasPhysRegDefs; 2236 bool RHasPhysReg = right->hasPhysRegDefs; 2237 if (LHasPhysReg != RHasPhysReg) { 2238 DEBUG(++FactorCount[FactRegUses]); 2239 #ifndef NDEBUG 2240 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2241 #endif 2242 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2243 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2244 << PhysRegMsg[RHasPhysReg] << "\n"); 2245 return LHasPhysReg < RHasPhysReg; 2246 } 2247 } 2248 2249 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2250 unsigned LPriority = SPQ->getNodePriority(left); 2251 unsigned RPriority = SPQ->getNodePriority(right); 2252 2253 // Be really careful about hoisting call operands above previous calls. 2254 // Only allows it if it would reduce register pressure. 2255 if (left->isCall && right->isCallOp) { 2256 unsigned RNumVals = right->getNode()->getNumValues(); 2257 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2258 } 2259 if (right->isCall && left->isCallOp) { 2260 unsigned LNumVals = left->getNode()->getNumValues(); 2261 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2262 } 2263 2264 if (LPriority != RPriority) { 2265 DEBUG(++FactorCount[FactStatic]); 2266 return LPriority > RPriority; 2267 } 2268 2269 // One or both of the nodes are calls and their sethi-ullman numbers are the 2270 // same, then keep source order. 2271 if (left->isCall || right->isCall) { 2272 unsigned LOrder = SPQ->getNodeOrdering(left); 2273 unsigned ROrder = SPQ->getNodeOrdering(right); 2274 2275 // Prefer an ordering where the lower the non-zero order number, the higher 2276 // the preference. 2277 if ((LOrder || ROrder) && LOrder != ROrder) 2278 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2279 } 2280 2281 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2282 // e.g. 2283 // t1 = op t2, c1 2284 // t3 = op t4, c2 2285 // 2286 // and the following instructions are both ready. 2287 // t2 = op c3 2288 // t4 = op c4 2289 // 2290 // Then schedule t2 = op first. 2291 // i.e. 2292 // t4 = op c4 2293 // t2 = op c3 2294 // t1 = op t2, c1 2295 // t3 = op t4, c2 2296 // 2297 // This creates more short live intervals. 2298 unsigned LDist = closestSucc(left); 2299 unsigned RDist = closestSucc(right); 2300 if (LDist != RDist) { 2301 DEBUG(++FactorCount[FactOther]); 2302 return LDist < RDist; 2303 } 2304 2305 // How many registers becomes live when the node is scheduled. 2306 unsigned LScratch = calcMaxScratches(left); 2307 unsigned RScratch = calcMaxScratches(right); 2308 if (LScratch != RScratch) { 2309 DEBUG(++FactorCount[FactOther]); 2310 return LScratch > RScratch; 2311 } 2312 2313 // Comparing latency against a call makes little sense unless the node 2314 // is register pressure-neutral. 2315 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2316 return (left->NodeQueueId > right->NodeQueueId); 2317 2318 // Do not compare latencies when one or both of the nodes are calls. 2319 if (!DisableSchedCycles && 2320 !(left->isCall || right->isCall)) { 2321 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2322 if (result != 0) 2323 return result > 0; 2324 } 2325 else { 2326 if (left->getHeight() != right->getHeight()) { 2327 DEBUG(++FactorCount[FactHeight]); 2328 return left->getHeight() > right->getHeight(); 2329 } 2330 2331 if (left->getDepth() != right->getDepth()) { 2332 DEBUG(++FactorCount[FactDepth]); 2333 return left->getDepth() < right->getDepth(); 2334 } 2335 } 2336 2337 assert(left->NodeQueueId && right->NodeQueueId && 2338 "NodeQueueId cannot be zero"); 2339 DEBUG(++FactorCount[FactOther]); 2340 return (left->NodeQueueId > right->NodeQueueId); 2341} 2342 2343// Bottom up 2344bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2345 if (int res = checkSpecialNodes(left, right)) 2346 return res > 0; 2347 2348 return BURRSort(left, right, SPQ); 2349} 2350 2351// Source order, otherwise bottom up. 2352bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2353 if (int res = checkSpecialNodes(left, right)) 2354 return res > 0; 2355 2356 unsigned LOrder = SPQ->getNodeOrdering(left); 2357 unsigned ROrder = SPQ->getNodeOrdering(right); 2358 2359 // Prefer an ordering where the lower the non-zero order number, the higher 2360 // the preference. 2361 if ((LOrder || ROrder) && LOrder != ROrder) 2362 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2363 2364 return BURRSort(left, right, SPQ); 2365} 2366 2367// If the time between now and when the instruction will be ready can cover 2368// the spill code, then avoid adding it to the ready queue. This gives long 2369// stalls highest priority and allows hoisting across calls. It should also 2370// speed up processing the available queue. 2371bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2372 static const unsigned ReadyDelay = 3; 2373 2374 if (SPQ->MayReduceRegPressure(SU)) return true; 2375 2376 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2377 2378 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2379 != ScheduleHazardRecognizer::NoHazard) 2380 return false; 2381 2382 return true; 2383} 2384 2385// Return true if right should be scheduled with higher priority than left. 2386bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2387 if (int res = checkSpecialNodes(left, right)) 2388 return res > 0; 2389 2390 if (left->isCall || right->isCall) 2391 // No way to compute latency of calls. 2392 return BURRSort(left, right, SPQ); 2393 2394 bool LHigh = SPQ->HighRegPressure(left); 2395 bool RHigh = SPQ->HighRegPressure(right); 2396 // Avoid causing spills. If register pressure is high, schedule for 2397 // register pressure reduction. 2398 if (LHigh && !RHigh) { 2399 DEBUG(++FactorCount[FactPressureDiff]); 2400 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2401 << right->NodeNum << ")\n"); 2402 return true; 2403 } 2404 else if (!LHigh && RHigh) { 2405 DEBUG(++FactorCount[FactPressureDiff]); 2406 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2407 << left->NodeNum << ")\n"); 2408 return false; 2409 } 2410 if (!LHigh && !RHigh) { 2411 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2412 if (result != 0) 2413 return result > 0; 2414 } 2415 return BURRSort(left, right, SPQ); 2416} 2417 2418// Schedule as many instructions in each cycle as possible. So don't make an 2419// instruction available unless it is ready in the current cycle. 2420bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2421 if (SU->getHeight() > CurCycle) return false; 2422 2423 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2424 != ScheduleHazardRecognizer::NoHazard) 2425 return false; 2426 2427 return true; 2428} 2429 2430static bool canEnableCoalescing(SUnit *SU) { 2431 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2432 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2433 // CopyToReg should be close to its uses to facilitate coalescing and 2434 // avoid spilling. 2435 return true; 2436 2437 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2438 Opc == TargetOpcode::SUBREG_TO_REG || 2439 Opc == TargetOpcode::INSERT_SUBREG) 2440 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2441 // close to their uses to facilitate coalescing. 2442 return true; 2443 2444 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2445 // If SU does not have a register def, schedule it close to its uses 2446 // because it does not lengthen any live ranges. 2447 return true; 2448 2449 return false; 2450} 2451 2452// list-ilp is currently an experimental scheduler that allows various 2453// heuristics to be enabled prior to the normal register reduction logic. 2454bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2455 if (int res = checkSpecialNodes(left, right)) 2456 return res > 0; 2457 2458 if (left->isCall || right->isCall) 2459 // No way to compute latency of calls. 2460 return BURRSort(left, right, SPQ); 2461 2462 unsigned LLiveUses = 0, RLiveUses = 0; 2463 int LPDiff = 0, RPDiff = 0; 2464 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2465 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2466 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2467 } 2468 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2469 DEBUG(++FactorCount[FactPressureDiff]); 2470 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2471 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2472 return LPDiff > RPDiff; 2473 } 2474 2475 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2476 bool LReduce = canEnableCoalescing(left); 2477 bool RReduce = canEnableCoalescing(right); 2478 DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); 2479 if (LReduce && !RReduce) return false; 2480 if (RReduce && !LReduce) return true; 2481 } 2482 2483 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2484 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2485 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2486 DEBUG(++FactorCount[FactRegUses]); 2487 return LLiveUses < RLiveUses; 2488 } 2489 2490 if (!DisableSchedStalls) { 2491 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2492 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2493 if (LStall != RStall) { 2494 DEBUG(++FactorCount[FactHeight]); 2495 return left->getHeight() > right->getHeight(); 2496 } 2497 } 2498 2499 if (!DisableSchedCriticalPath) { 2500 int spread = (int)left->getDepth() - (int)right->getDepth(); 2501 if (std::abs(spread) > MaxReorderWindow) { 2502 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2503 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2504 << right->getDepth() << "\n"); 2505 DEBUG(++FactorCount[FactDepth]); 2506 return left->getDepth() < right->getDepth(); 2507 } 2508 } 2509 2510 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2511 int spread = (int)left->getHeight() - (int)right->getHeight(); 2512 if (std::abs(spread) > MaxReorderWindow) { 2513 DEBUG(++FactorCount[FactHeight]); 2514 return left->getHeight() > right->getHeight(); 2515 } 2516 } 2517 2518 return BURRSort(left, right, SPQ); 2519} 2520 2521void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2522 SUnits = &sunits; 2523 // Add pseudo dependency edges for two-address nodes. 2524 AddPseudoTwoAddrDeps(); 2525 // Reroute edges to nodes with multiple uses. 2526 if (!TracksRegPressure) 2527 PrescheduleNodesWithMultipleUses(); 2528 // Calculate node priorities. 2529 CalculateSethiUllmanNumbers(); 2530 2531 // For single block loops, mark nodes that look like canonical IV increments. 2532 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2533 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2534 initVRegCycle(&sunits[i]); 2535 } 2536 } 2537} 2538 2539//===----------------------------------------------------------------------===// 2540// Preschedule for Register Pressure 2541//===----------------------------------------------------------------------===// 2542 2543bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2544 if (SU->isTwoAddress) { 2545 unsigned Opc = SU->getNode()->getMachineOpcode(); 2546 const TargetInstrDesc &TID = TII->get(Opc); 2547 unsigned NumRes = TID.getNumDefs(); 2548 unsigned NumOps = TID.getNumOperands() - NumRes; 2549 for (unsigned i = 0; i != NumOps; ++i) { 2550 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 2551 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2552 if (DU->getNodeId() != -1 && 2553 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2554 return true; 2555 } 2556 } 2557 } 2558 return false; 2559} 2560 2561/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2562/// physical register defs. 2563static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2564 const TargetInstrInfo *TII, 2565 const TargetRegisterInfo *TRI) { 2566 SDNode *N = SuccSU->getNode(); 2567 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2568 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2569 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2570 for (const SDNode *SUNode = SU->getNode(); SUNode; 2571 SUNode = SUNode->getGluedNode()) { 2572 if (!SUNode->isMachineOpcode()) 2573 continue; 2574 const unsigned *SUImpDefs = 2575 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2576 if (!SUImpDefs) 2577 return false; 2578 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2579 EVT VT = N->getValueType(i); 2580 if (VT == MVT::Glue || VT == MVT::Other) 2581 continue; 2582 if (!N->hasAnyUseOfValue(i)) 2583 continue; 2584 unsigned Reg = ImpDefs[i - NumDefs]; 2585 for (;*SUImpDefs; ++SUImpDefs) { 2586 unsigned SUReg = *SUImpDefs; 2587 if (TRI->regsOverlap(Reg, SUReg)) 2588 return true; 2589 } 2590 } 2591 } 2592 return false; 2593} 2594 2595/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2596/// are not handled well by the general register pressure reduction 2597/// heuristics. When presented with code like this: 2598/// 2599/// N 2600/// / | 2601/// / | 2602/// U store 2603/// | 2604/// ... 2605/// 2606/// the heuristics tend to push the store up, but since the 2607/// operand of the store has another use (U), this would increase 2608/// the length of that other use (the U->N edge). 2609/// 2610/// This function transforms code like the above to route U's 2611/// dependence through the store when possible, like this: 2612/// 2613/// N 2614/// || 2615/// || 2616/// store 2617/// | 2618/// U 2619/// | 2620/// ... 2621/// 2622/// This results in the store being scheduled immediately 2623/// after N, which shortens the U->N live range, reducing 2624/// register pressure. 2625/// 2626void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2627 // Visit all the nodes in topological order, working top-down. 2628 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2629 SUnit *SU = &(*SUnits)[i]; 2630 // For now, only look at nodes with no data successors, such as stores. 2631 // These are especially important, due to the heuristics in 2632 // getNodePriority for nodes with no data successors. 2633 if (SU->NumSuccs != 0) 2634 continue; 2635 // For now, only look at nodes with exactly one data predecessor. 2636 if (SU->NumPreds != 1) 2637 continue; 2638 // Avoid prescheduling copies to virtual registers, which don't behave 2639 // like other nodes from the perspective of scheduling heuristics. 2640 if (SDNode *N = SU->getNode()) 2641 if (N->getOpcode() == ISD::CopyToReg && 2642 TargetRegisterInfo::isVirtualRegister 2643 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2644 continue; 2645 2646 // Locate the single data predecessor. 2647 SUnit *PredSU = 0; 2648 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2649 EE = SU->Preds.end(); II != EE; ++II) 2650 if (!II->isCtrl()) { 2651 PredSU = II->getSUnit(); 2652 break; 2653 } 2654 assert(PredSU); 2655 2656 // Don't rewrite edges that carry physregs, because that requires additional 2657 // support infrastructure. 2658 if (PredSU->hasPhysRegDefs) 2659 continue; 2660 // Short-circuit the case where SU is PredSU's only data successor. 2661 if (PredSU->NumSuccs == 1) 2662 continue; 2663 // Avoid prescheduling to copies from virtual registers, which don't behave 2664 // like other nodes from the perspective of scheduling heuristics. 2665 if (SDNode *N = SU->getNode()) 2666 if (N->getOpcode() == ISD::CopyFromReg && 2667 TargetRegisterInfo::isVirtualRegister 2668 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2669 continue; 2670 2671 // Perform checks on the successors of PredSU. 2672 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2673 EE = PredSU->Succs.end(); II != EE; ++II) { 2674 SUnit *PredSuccSU = II->getSUnit(); 2675 if (PredSuccSU == SU) continue; 2676 // If PredSU has another successor with no data successors, for 2677 // now don't attempt to choose either over the other. 2678 if (PredSuccSU->NumSuccs == 0) 2679 goto outer_loop_continue; 2680 // Don't break physical register dependencies. 2681 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2682 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2683 goto outer_loop_continue; 2684 // Don't introduce graph cycles. 2685 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2686 goto outer_loop_continue; 2687 } 2688 2689 // Ok, the transformation is safe and the heuristics suggest it is 2690 // profitable. Update the graph. 2691 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2692 << " next to PredSU #" << PredSU->NodeNum 2693 << " to guide scheduling in the presence of multiple uses\n"); 2694 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2695 SDep Edge = PredSU->Succs[i]; 2696 assert(!Edge.isAssignedRegDep()); 2697 SUnit *SuccSU = Edge.getSUnit(); 2698 if (SuccSU != SU) { 2699 Edge.setSUnit(PredSU); 2700 scheduleDAG->RemovePred(SuccSU, Edge); 2701 scheduleDAG->AddPred(SU, Edge); 2702 Edge.setSUnit(SU); 2703 scheduleDAG->AddPred(SuccSU, Edge); 2704 --i; 2705 } 2706 } 2707 outer_loop_continue:; 2708 } 2709} 2710 2711/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2712/// it as a def&use operand. Add a pseudo control edge from it to the other 2713/// node (if it won't create a cycle) so the two-address one will be scheduled 2714/// first (lower in the schedule). If both nodes are two-address, favor the 2715/// one that has a CopyToReg use (more likely to be a loop induction update). 2716/// If both are two-address, but one is commutable while the other is not 2717/// commutable, favor the one that's not commutable. 2718void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2719 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2720 SUnit *SU = &(*SUnits)[i]; 2721 if (!SU->isTwoAddress) 2722 continue; 2723 2724 SDNode *Node = SU->getNode(); 2725 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2726 continue; 2727 2728 bool isLiveOut = hasOnlyLiveOutUses(SU); 2729 unsigned Opc = Node->getMachineOpcode(); 2730 const TargetInstrDesc &TID = TII->get(Opc); 2731 unsigned NumRes = TID.getNumDefs(); 2732 unsigned NumOps = TID.getNumOperands() - NumRes; 2733 for (unsigned j = 0; j != NumOps; ++j) { 2734 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 2735 continue; 2736 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2737 if (DU->getNodeId() == -1) 2738 continue; 2739 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2740 if (!DUSU) continue; 2741 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2742 E = DUSU->Succs.end(); I != E; ++I) { 2743 if (I->isCtrl()) continue; 2744 SUnit *SuccSU = I->getSUnit(); 2745 if (SuccSU == SU) 2746 continue; 2747 // Be conservative. Ignore if nodes aren't at roughly the same 2748 // depth and height. 2749 if (SuccSU->getHeight() < SU->getHeight() && 2750 (SU->getHeight() - SuccSU->getHeight()) > 1) 2751 continue; 2752 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2753 // constrains whatever is using the copy, instead of the copy 2754 // itself. In the case that the copy is coalesced, this 2755 // preserves the intent of the pseudo two-address heurietics. 2756 while (SuccSU->Succs.size() == 1 && 2757 SuccSU->getNode()->isMachineOpcode() && 2758 SuccSU->getNode()->getMachineOpcode() == 2759 TargetOpcode::COPY_TO_REGCLASS) 2760 SuccSU = SuccSU->Succs.front().getSUnit(); 2761 // Don't constrain non-instruction nodes. 2762 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2763 continue; 2764 // Don't constrain nodes with physical register defs if the 2765 // predecessor can clobber them. 2766 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2767 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2768 continue; 2769 } 2770 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2771 // these may be coalesced away. We want them close to their uses. 2772 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2773 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2774 SuccOpc == TargetOpcode::INSERT_SUBREG || 2775 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2776 continue; 2777 if ((!canClobber(SuccSU, DUSU) || 2778 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2779 (!SU->isCommutable && SuccSU->isCommutable)) && 2780 !scheduleDAG->IsReachable(SuccSU, SU)) { 2781 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2782 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2783 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2784 /*Reg=*/0, /*isNormalMemory=*/false, 2785 /*isMustAlias=*/false, 2786 /*isArtificial=*/true)); 2787 } 2788 } 2789 } 2790 } 2791} 2792 2793/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 2794/// predecessors of the successors of the SUnit SU. Stop when the provided 2795/// limit is exceeded. 2796static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 2797 unsigned Limit) { 2798 unsigned Sum = 0; 2799 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2800 I != E; ++I) { 2801 const SUnit *SuccSU = I->getSUnit(); 2802 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 2803 EE = SuccSU->Preds.end(); II != EE; ++II) { 2804 SUnit *PredSU = II->getSUnit(); 2805 if (!PredSU->isScheduled) 2806 if (++Sum > Limit) 2807 return Sum; 2808 } 2809 } 2810 return Sum; 2811} 2812 2813 2814// Top down 2815bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 2816 if (int res = checkSpecialNodes(left, right)) 2817 return res < 0; 2818 2819 unsigned LPriority = SPQ->getNodePriority(left); 2820 unsigned RPriority = SPQ->getNodePriority(right); 2821 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 2822 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 2823 bool LIsFloater = LIsTarget && left->NumPreds == 0; 2824 bool RIsFloater = RIsTarget && right->NumPreds == 0; 2825 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 2826 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 2827 2828 if (left->NumSuccs == 0 && right->NumSuccs != 0) 2829 return false; 2830 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 2831 return true; 2832 2833 if (LIsFloater) 2834 LBonus -= 2; 2835 if (RIsFloater) 2836 RBonus -= 2; 2837 if (left->NumSuccs == 1) 2838 LBonus += 2; 2839 if (right->NumSuccs == 1) 2840 RBonus += 2; 2841 2842 if (LPriority+LBonus != RPriority+RBonus) 2843 return LPriority+LBonus < RPriority+RBonus; 2844 2845 if (left->getDepth() != right->getDepth()) 2846 return left->getDepth() < right->getDepth(); 2847 2848 if (left->NumSuccsLeft != right->NumSuccsLeft) 2849 return left->NumSuccsLeft > right->NumSuccsLeft; 2850 2851 assert(left->NodeQueueId && right->NodeQueueId && 2852 "NodeQueueId cannot be zero"); 2853 return (left->NodeQueueId > right->NodeQueueId); 2854} 2855 2856//===----------------------------------------------------------------------===// 2857// Public Constructor Functions 2858//===----------------------------------------------------------------------===// 2859 2860llvm::ScheduleDAGSDNodes * 2861llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2862 CodeGenOpt::Level OptLevel) { 2863 const TargetMachine &TM = IS->TM; 2864 const TargetInstrInfo *TII = TM.getInstrInfo(); 2865 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2866 2867 BURegReductionPriorityQueue *PQ = 2868 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2869 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2870 PQ->setScheduleDAG(SD); 2871 return SD; 2872} 2873 2874llvm::ScheduleDAGSDNodes * 2875llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 2876 CodeGenOpt::Level OptLevel) { 2877 const TargetMachine &TM = IS->TM; 2878 const TargetInstrInfo *TII = TM.getInstrInfo(); 2879 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2880 2881 TDRegReductionPriorityQueue *PQ = 2882 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2883 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2884 PQ->setScheduleDAG(SD); 2885 return SD; 2886} 2887 2888llvm::ScheduleDAGSDNodes * 2889llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2890 CodeGenOpt::Level OptLevel) { 2891 const TargetMachine &TM = IS->TM; 2892 const TargetInstrInfo *TII = TM.getInstrInfo(); 2893 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2894 2895 SrcRegReductionPriorityQueue *PQ = 2896 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2897 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2898 PQ->setScheduleDAG(SD); 2899 return SD; 2900} 2901 2902llvm::ScheduleDAGSDNodes * 2903llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2904 CodeGenOpt::Level OptLevel) { 2905 const TargetMachine &TM = IS->TM; 2906 const TargetInstrInfo *TII = TM.getInstrInfo(); 2907 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2908 const TargetLowering *TLI = &IS->getTargetLowering(); 2909 2910 HybridBURRPriorityQueue *PQ = 2911 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2912 2913 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2914 PQ->setScheduleDAG(SD); 2915 return SD; 2916} 2917 2918llvm::ScheduleDAGSDNodes * 2919llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2920 CodeGenOpt::Level OptLevel) { 2921 const TargetMachine &TM = IS->TM; 2922 const TargetInstrInfo *TII = TM.getInstrInfo(); 2923 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2924 const TargetLowering *TLI = &IS->getTargetLowering(); 2925 2926 ILPBURRPriorityQueue *PQ = 2927 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2928 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2929 PQ->setScheduleDAG(SD); 2930 return SD; 2931} 2932