ScheduleDAGRRList.cpp revision 4cb971ce1c8b254f29365c988b55f6dcfe86d21e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 tdrListrDAGScheduler("list-tdrr", 49 "Top-down register reduction list scheduling", 50 createTDRRListDAGScheduler); 51static RegisterScheduler 52 sourceListDAGScheduler("source", 53 "Similar to list-burr but schedules in source " 54 "order when possible", 55 createSourceListDAGScheduler); 56 57static RegisterScheduler 58 hybridListDAGScheduler("list-hybrid", 59 "Bottom-up register pressure aware list scheduling " 60 "which tries to balance latency and register pressure", 61 createHybridListDAGScheduler); 62 63static RegisterScheduler 64 ILPListDAGScheduler("list-ilp", 65 "Bottom-up register pressure aware list scheduling " 66 "which tries to balance ILP and register pressure", 67 createILPListDAGScheduler); 68 69static cl::opt<bool> DisableSchedCycles( 70 "disable-sched-cycles", cl::Hidden, cl::init(false), 71 cl::desc("Disable cycle-level precision during preRA scheduling")); 72 73// Temporary sched=list-ilp flags until the heuristics are robust. 74// Some options are also available under sched=list-hybrid. 75static cl::opt<bool> DisableSchedRegPressure( 76 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 77 cl::desc("Disable regpressure priority in sched=list-ilp")); 78static cl::opt<bool> DisableSchedLiveUses( 79 "disable-sched-live-uses", cl::Hidden, cl::init(true), 80 cl::desc("Disable live use priority in sched=list-ilp")); 81static cl::opt<bool> DisableSchedVRegCycle( 82 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 83 cl::desc("Disable virtual register cycle interference checks")); 84static cl::opt<bool> DisableSchedPhysRegJoin( 85 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 86 cl::desc("Disable physreg def-use affinity")); 87static cl::opt<bool> DisableSchedStalls( 88 "disable-sched-stalls", cl::Hidden, cl::init(true), 89 cl::desc("Disable no-stall priority in sched=list-ilp")); 90static cl::opt<bool> DisableSchedCriticalPath( 91 "disable-sched-critical-path", cl::Hidden, cl::init(false), 92 cl::desc("Disable critical path priority in sched=list-ilp")); 93static cl::opt<bool> DisableSchedHeight( 94 "disable-sched-height", cl::Hidden, cl::init(false), 95 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 96 97static cl::opt<int> MaxReorderWindow( 98 "max-sched-reorder", cl::Hidden, cl::init(6), 99 cl::desc("Number of instructions to allow ahead of the critical path " 100 "in sched=list-ilp")); 101 102static cl::opt<unsigned> AvgIPC( 103 "sched-avg-ipc", cl::Hidden, cl::init(1), 104 cl::desc("Average inst/cycle whan no target itinerary exists.")); 105 106#ifndef NDEBUG 107namespace { 108 // For sched=list-ilp, Count the number of times each factor comes into play. 109 enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, 110 FactStatic, FactOther, NumFactors }; 111} 112static const char *FactorName[NumFactors] = 113{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; 114static int FactorCount[NumFactors]; 115#endif //!NDEBUG 116 117namespace { 118//===----------------------------------------------------------------------===// 119/// ScheduleDAGRRList - The actual register reduction list scheduler 120/// implementation. This supports both top-down and bottom-up scheduling. 121/// 122class ScheduleDAGRRList : public ScheduleDAGSDNodes { 123private: 124 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 125 /// it is top-down. 126 bool isBottomUp; 127 128 /// NeedLatency - True if the scheduler will make use of latency information. 129 /// 130 bool NeedLatency; 131 132 /// AvailableQueue - The priority queue to use for the available SUnits. 133 SchedulingPriorityQueue *AvailableQueue; 134 135 /// PendingQueue - This contains all of the instructions whose operands have 136 /// been issued, but their results are not ready yet (due to the latency of 137 /// the operation). Once the operands becomes available, the instruction is 138 /// added to the AvailableQueue. 139 std::vector<SUnit*> PendingQueue; 140 141 /// HazardRec - The hazard recognizer to use. 142 ScheduleHazardRecognizer *HazardRec; 143 144 /// CurCycle - The current scheduler state corresponds to this cycle. 145 unsigned CurCycle; 146 147 /// MinAvailableCycle - Cycle of the soonest available instruction. 148 unsigned MinAvailableCycle; 149 150 /// IssueCount - Count instructions issued in this cycle 151 /// Currently valid only for bottom-up scheduling. 152 unsigned IssueCount; 153 154 /// LiveRegDefs - A set of physical registers and their definition 155 /// that are "live". These nodes must be scheduled before any other nodes that 156 /// modifies the registers can be scheduled. 157 unsigned NumLiveRegs; 158 std::vector<SUnit*> LiveRegDefs; 159 std::vector<SUnit*> LiveRegGens; 160 161 /// Topo - A topological ordering for SUnits which permits fast IsReachable 162 /// and similar queries. 163 ScheduleDAGTopologicalSort Topo; 164 165public: 166 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 167 SchedulingPriorityQueue *availqueue, 168 CodeGenOpt::Level OptLevel) 169 : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), 170 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 171 Topo(SUnits) { 172 173 const TargetMachine &tm = mf.getTarget(); 174 if (DisableSchedCycles || !NeedLatency) 175 HazardRec = new ScheduleHazardRecognizer(); 176 else 177 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 178 } 179 180 ~ScheduleDAGRRList() { 181 delete HazardRec; 182 delete AvailableQueue; 183 } 184 185 void Schedule(); 186 187 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 188 189 /// IsReachable - Checks if SU is reachable from TargetSU. 190 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 191 return Topo.IsReachable(SU, TargetSU); 192 } 193 194 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 195 /// create a cycle. 196 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 197 return Topo.WillCreateCycle(SU, TargetSU); 198 } 199 200 /// AddPred - adds a predecessor edge to SUnit SU. 201 /// This returns true if this is a new predecessor. 202 /// Updates the topological ordering if required. 203 void AddPred(SUnit *SU, const SDep &D) { 204 Topo.AddPred(SU, D.getSUnit()); 205 SU->addPred(D); 206 } 207 208 /// RemovePred - removes a predecessor edge from SUnit SU. 209 /// This returns true if an edge was removed. 210 /// Updates the topological ordering if required. 211 void RemovePred(SUnit *SU, const SDep &D) { 212 Topo.RemovePred(SU, D.getSUnit()); 213 SU->removePred(D); 214 } 215 216private: 217 bool isReady(SUnit *SU) { 218 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 219 AvailableQueue->isReady(SU); 220 } 221 222 void ReleasePred(SUnit *SU, const SDep *PredEdge); 223 void ReleasePredecessors(SUnit *SU); 224 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 225 void ReleaseSuccessors(SUnit *SU); 226 void ReleasePending(); 227 void AdvanceToCycle(unsigned NextCycle); 228 void AdvancePastStalls(SUnit *SU); 229 void EmitNode(SUnit *SU); 230 void ScheduleNodeBottomUp(SUnit*); 231 void CapturePred(SDep *PredEdge); 232 void UnscheduleNodeBottomUp(SUnit*); 233 void RestoreHazardCheckerBottomUp(); 234 void BacktrackBottomUp(SUnit*, SUnit*); 235 SUnit *CopyAndMoveSuccessors(SUnit*); 236 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 237 const TargetRegisterClass*, 238 const TargetRegisterClass*, 239 SmallVector<SUnit*, 2>&); 240 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 241 242 SUnit *PickNodeToScheduleBottomUp(); 243 void ListScheduleBottomUp(); 244 245 void ScheduleNodeTopDown(SUnit*); 246 void ListScheduleTopDown(); 247 248 249 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 250 /// Updates the topological ordering if required. 251 SUnit *CreateNewSUnit(SDNode *N) { 252 unsigned NumSUnits = SUnits.size(); 253 SUnit *NewNode = NewSUnit(N); 254 // Update the topological ordering. 255 if (NewNode->NodeNum >= NumSUnits) 256 Topo.InitDAGTopologicalSorting(); 257 return NewNode; 258 } 259 260 /// CreateClone - Creates a new SUnit from an existing one. 261 /// Updates the topological ordering if required. 262 SUnit *CreateClone(SUnit *N) { 263 unsigned NumSUnits = SUnits.size(); 264 SUnit *NewNode = Clone(N); 265 // Update the topological ordering. 266 if (NewNode->NodeNum >= NumSUnits) 267 Topo.InitDAGTopologicalSorting(); 268 return NewNode; 269 } 270 271 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 272 /// need actual latency information but the hybrid scheduler does. 273 bool ForceUnitLatencies() const { 274 return !NeedLatency; 275 } 276}; 277} // end anonymous namespace 278 279 280/// Schedule - Schedule the DAG using list scheduling. 281void ScheduleDAGRRList::Schedule() { 282 DEBUG(dbgs() 283 << "********** List Scheduling BB#" << BB->getNumber() 284 << " '" << BB->getName() << "' **********\n"); 285#ifndef NDEBUG 286 for (int i = 0; i < NumFactors; ++i) { 287 FactorCount[i] = 0; 288 } 289#endif //!NDEBUG 290 291 CurCycle = 0; 292 IssueCount = 0; 293 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 294 NumLiveRegs = 0; 295 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 296 LiveRegGens.resize(TRI->getNumRegs(), NULL); 297 298 // Build the scheduling graph. 299 BuildSchedGraph(NULL); 300 301 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 302 SUnits[su].dumpAll(this)); 303 Topo.InitDAGTopologicalSorting(); 304 305 AvailableQueue->initNodes(SUnits); 306 307 HazardRec->Reset(); 308 309 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 310 if (isBottomUp) 311 ListScheduleBottomUp(); 312 else 313 ListScheduleTopDown(); 314 315#ifndef NDEBUG 316 for (int i = 0; i < NumFactors; ++i) { 317 DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); 318 } 319#endif // !NDEBUG 320 AvailableQueue->releaseState(); 321} 322 323//===----------------------------------------------------------------------===// 324// Bottom-Up Scheduling 325//===----------------------------------------------------------------------===// 326 327/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 328/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 329void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 330 SUnit *PredSU = PredEdge->getSUnit(); 331 332#ifndef NDEBUG 333 if (PredSU->NumSuccsLeft == 0) { 334 dbgs() << "*** Scheduling failed! ***\n"; 335 PredSU->dump(this); 336 dbgs() << " has been released too many times!\n"; 337 llvm_unreachable(0); 338 } 339#endif 340 --PredSU->NumSuccsLeft; 341 342 if (!ForceUnitLatencies()) { 343 // Updating predecessor's height. This is now the cycle when the 344 // predecessor can be scheduled without causing a pipeline stall. 345 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 346 } 347 348 // If all the node's successors are scheduled, this node is ready 349 // to be scheduled. Ignore the special EntrySU node. 350 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 351 PredSU->isAvailable = true; 352 353 unsigned Height = PredSU->getHeight(); 354 if (Height < MinAvailableCycle) 355 MinAvailableCycle = Height; 356 357 if (isReady(PredSU)) { 358 AvailableQueue->push(PredSU); 359 } 360 // CapturePred and others may have left the node in the pending queue, avoid 361 // adding it twice. 362 else if (!PredSU->isPending) { 363 PredSU->isPending = true; 364 PendingQueue.push_back(PredSU); 365 } 366 } 367} 368 369/// Call ReleasePred for each predecessor, then update register live def/gen. 370/// Always update LiveRegDefs for a register dependence even if the current SU 371/// also defines the register. This effectively create one large live range 372/// across a sequence of two-address node. This is important because the 373/// entire chain must be scheduled together. Example: 374/// 375/// flags = (3) add 376/// flags = (2) addc flags 377/// flags = (1) addc flags 378/// 379/// results in 380/// 381/// LiveRegDefs[flags] = 3 382/// LiveRegGens[flags] = 1 383/// 384/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 385/// interference on flags. 386void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 387 // Bottom up: release predecessors 388 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 389 I != E; ++I) { 390 ReleasePred(SU, &*I); 391 if (I->isAssignedRegDep()) { 392 // This is a physical register dependency and it's impossible or 393 // expensive to copy the register. Make sure nothing that can 394 // clobber the register is scheduled between the predecessor and 395 // this node. 396 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 397 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 398 "interference on register dependence"); 399 LiveRegDefs[I->getReg()] = I->getSUnit(); 400 if (!LiveRegGens[I->getReg()]) { 401 ++NumLiveRegs; 402 LiveRegGens[I->getReg()] = SU; 403 } 404 } 405 } 406} 407 408/// Check to see if any of the pending instructions are ready to issue. If 409/// so, add them to the available queue. 410void ScheduleDAGRRList::ReleasePending() { 411 if (DisableSchedCycles) { 412 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 413 return; 414 } 415 416 // If the available queue is empty, it is safe to reset MinAvailableCycle. 417 if (AvailableQueue->empty()) 418 MinAvailableCycle = UINT_MAX; 419 420 // Check to see if any of the pending instructions are ready to issue. If 421 // so, add them to the available queue. 422 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 423 unsigned ReadyCycle = 424 isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); 425 if (ReadyCycle < MinAvailableCycle) 426 MinAvailableCycle = ReadyCycle; 427 428 if (PendingQueue[i]->isAvailable) { 429 if (!isReady(PendingQueue[i])) 430 continue; 431 AvailableQueue->push(PendingQueue[i]); 432 } 433 PendingQueue[i]->isPending = false; 434 PendingQueue[i] = PendingQueue.back(); 435 PendingQueue.pop_back(); 436 --i; --e; 437 } 438} 439 440/// Move the scheduler state forward by the specified number of Cycles. 441void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 442 if (NextCycle <= CurCycle) 443 return; 444 445 IssueCount = 0; 446 AvailableQueue->setCurCycle(NextCycle); 447 if (!HazardRec->isEnabled()) { 448 // Bypass lots of virtual calls in case of long latency. 449 CurCycle = NextCycle; 450 } 451 else { 452 for (; CurCycle != NextCycle; ++CurCycle) { 453 if (isBottomUp) 454 HazardRec->RecedeCycle(); 455 else 456 HazardRec->AdvanceCycle(); 457 } 458 } 459 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 460 // available Q to release pending nodes at least once before popping. 461 ReleasePending(); 462} 463 464/// Move the scheduler state forward until the specified node's dependents are 465/// ready and can be scheduled with no resource conflicts. 466void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 467 if (DisableSchedCycles) 468 return; 469 470 // FIXME: Nodes such as CopyFromReg probably should not advance the current 471 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 472 // has predecessors the cycle will be advanced when they are scheduled. 473 // But given the crude nature of modeling latency though such nodes, we 474 // currently need to treat these nodes like real instructions. 475 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 476 477 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); 478 479 // Bump CurCycle to account for latency. We assume the latency of other 480 // available instructions may be hidden by the stall (not a full pipe stall). 481 // This updates the hazard recognizer's cycle before reserving resources for 482 // this instruction. 483 AdvanceToCycle(ReadyCycle); 484 485 // Calls are scheduled in their preceding cycle, so don't conflict with 486 // hazards from instructions after the call. EmitNode will reset the 487 // scoreboard state before emitting the call. 488 if (isBottomUp && SU->isCall) 489 return; 490 491 // FIXME: For resource conflicts in very long non-pipelined stages, we 492 // should probably skip ahead here to avoid useless scoreboard checks. 493 int Stalls = 0; 494 while (true) { 495 ScheduleHazardRecognizer::HazardType HT = 496 HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); 497 498 if (HT == ScheduleHazardRecognizer::NoHazard) 499 break; 500 501 ++Stalls; 502 } 503 AdvanceToCycle(CurCycle + Stalls); 504} 505 506/// Record this SUnit in the HazardRecognizer. 507/// Does not update CurCycle. 508void ScheduleDAGRRList::EmitNode(SUnit *SU) { 509 if (!HazardRec->isEnabled()) 510 return; 511 512 // Check for phys reg copy. 513 if (!SU->getNode()) 514 return; 515 516 switch (SU->getNode()->getOpcode()) { 517 default: 518 assert(SU->getNode()->isMachineOpcode() && 519 "This target-independent node should not be scheduled."); 520 break; 521 case ISD::MERGE_VALUES: 522 case ISD::TokenFactor: 523 case ISD::CopyToReg: 524 case ISD::CopyFromReg: 525 case ISD::EH_LABEL: 526 // Noops don't affect the scoreboard state. Copies are likely to be 527 // removed. 528 return; 529 case ISD::INLINEASM: 530 // For inline asm, clear the pipeline state. 531 HazardRec->Reset(); 532 return; 533 } 534 if (isBottomUp && SU->isCall) { 535 // Calls are scheduled with their preceding instructions. For bottom-up 536 // scheduling, clear the pipeline state before emitting. 537 HazardRec->Reset(); 538 } 539 540 HazardRec->EmitInstruction(SU); 541 542 if (!isBottomUp && SU->isCall) { 543 HazardRec->Reset(); 544 } 545} 546 547static void resetVRegCycle(SUnit *SU); 548 549/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 550/// count of its predecessors. If a predecessor pending count is zero, add it to 551/// the Available queue. 552void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 553 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 554 DEBUG(SU->dump(this)); 555 556#ifndef NDEBUG 557 if (CurCycle < SU->getHeight()) 558 DEBUG(dbgs() << " Height [" << SU->getHeight() 559 << "] pipeline stall!\n"); 560#endif 561 562 // FIXME: Do not modify node height. It may interfere with 563 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 564 // node its ready cycle can aid heuristics, and after scheduling it can 565 // indicate the scheduled cycle. 566 SU->setHeightToAtLeast(CurCycle); 567 568 // Reserve resources for the scheduled intruction. 569 EmitNode(SU); 570 571 Sequence.push_back(SU); 572 573 AvailableQueue->ScheduledNode(SU); 574 575 // If HazardRec is disabled, and each inst counts as one cycle, then 576 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 577 // PendingQueue for schedulers that implement HasReadyFilter. 578 if (!HazardRec->isEnabled() && AvgIPC < 2) 579 AdvanceToCycle(CurCycle + 1); 580 581 // Update liveness of predecessors before successors to avoid treating a 582 // two-address node as a live range def. 583 ReleasePredecessors(SU); 584 585 // Release all the implicit physical register defs that are live. 586 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 587 I != E; ++I) { 588 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 589 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 590 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 591 --NumLiveRegs; 592 LiveRegDefs[I->getReg()] = NULL; 593 LiveRegGens[I->getReg()] = NULL; 594 } 595 } 596 597 resetVRegCycle(SU); 598 599 SU->isScheduled = true; 600 601 // Conditions under which the scheduler should eagerly advance the cycle: 602 // (1) No available instructions 603 // (2) All pipelines full, so available instructions must have hazards. 604 // 605 // If HazardRec is disabled, the cycle was pre-advanced before calling 606 // ReleasePredecessors. In that case, IssueCount should remain 0. 607 // 608 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 609 if (HazardRec->isEnabled() || AvgIPC > 1) { 610 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 611 ++IssueCount; 612 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 613 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 614 AdvanceToCycle(CurCycle + 1); 615 } 616} 617 618/// CapturePred - This does the opposite of ReleasePred. Since SU is being 619/// unscheduled, incrcease the succ left count of its predecessors. Remove 620/// them from AvailableQueue if necessary. 621void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 622 SUnit *PredSU = PredEdge->getSUnit(); 623 if (PredSU->isAvailable) { 624 PredSU->isAvailable = false; 625 if (!PredSU->isPending) 626 AvailableQueue->remove(PredSU); 627 } 628 629 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 630 ++PredSU->NumSuccsLeft; 631} 632 633/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 634/// its predecessor states to reflect the change. 635void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 636 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 637 DEBUG(SU->dump(this)); 638 639 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 640 I != E; ++I) { 641 CapturePred(&*I); 642 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 643 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 644 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 645 "Physical register dependency violated?"); 646 --NumLiveRegs; 647 LiveRegDefs[I->getReg()] = NULL; 648 LiveRegGens[I->getReg()] = NULL; 649 } 650 } 651 652 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 653 I != E; ++I) { 654 if (I->isAssignedRegDep()) { 655 // This becomes the nearest def. Note that an earlier def may still be 656 // pending if this is a two-address node. 657 LiveRegDefs[I->getReg()] = SU; 658 if (!LiveRegDefs[I->getReg()]) { 659 ++NumLiveRegs; 660 } 661 if (LiveRegGens[I->getReg()] == NULL || 662 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 663 LiveRegGens[I->getReg()] = I->getSUnit(); 664 } 665 } 666 if (SU->getHeight() < MinAvailableCycle) 667 MinAvailableCycle = SU->getHeight(); 668 669 SU->setHeightDirty(); 670 SU->isScheduled = false; 671 SU->isAvailable = true; 672 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 673 // Don't make available until backtracking is complete. 674 SU->isPending = true; 675 PendingQueue.push_back(SU); 676 } 677 else { 678 AvailableQueue->push(SU); 679 } 680 AvailableQueue->UnscheduledNode(SU); 681} 682 683/// After backtracking, the hazard checker needs to be restored to a state 684/// corresponding the the current cycle. 685void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 686 HazardRec->Reset(); 687 688 unsigned LookAhead = std::min((unsigned)Sequence.size(), 689 HazardRec->getMaxLookAhead()); 690 if (LookAhead == 0) 691 return; 692 693 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 694 unsigned HazardCycle = (*I)->getHeight(); 695 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 696 SUnit *SU = *I; 697 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 698 HazardRec->RecedeCycle(); 699 } 700 EmitNode(SU); 701 } 702} 703 704/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 705/// BTCycle in order to schedule a specific node. 706void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 707 SUnit *OldSU = Sequence.back(); 708 while (true) { 709 Sequence.pop_back(); 710 if (SU->isSucc(OldSU)) 711 // Don't try to remove SU from AvailableQueue. 712 SU->isAvailable = false; 713 // FIXME: use ready cycle instead of height 714 CurCycle = OldSU->getHeight(); 715 UnscheduleNodeBottomUp(OldSU); 716 AvailableQueue->setCurCycle(CurCycle); 717 if (OldSU == BtSU) 718 break; 719 OldSU = Sequence.back(); 720 } 721 722 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 723 724 RestoreHazardCheckerBottomUp(); 725 726 ReleasePending(); 727 728 ++NumBacktracks; 729} 730 731static bool isOperandOf(const SUnit *SU, SDNode *N) { 732 for (const SDNode *SUNode = SU->getNode(); SUNode; 733 SUNode = SUNode->getGluedNode()) { 734 if (SUNode->isOperandOf(N)) 735 return true; 736 } 737 return false; 738} 739 740/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 741/// successors to the newly created node. 742SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 743 SDNode *N = SU->getNode(); 744 if (!N) 745 return NULL; 746 747 if (SU->getNode()->getGluedNode()) 748 return NULL; 749 750 SUnit *NewSU; 751 bool TryUnfold = false; 752 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 753 EVT VT = N->getValueType(i); 754 if (VT == MVT::Glue) 755 return NULL; 756 else if (VT == MVT::Other) 757 TryUnfold = true; 758 } 759 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 760 const SDValue &Op = N->getOperand(i); 761 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 762 if (VT == MVT::Glue) 763 return NULL; 764 } 765 766 if (TryUnfold) { 767 SmallVector<SDNode*, 2> NewNodes; 768 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 769 return NULL; 770 771 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 772 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 773 774 N = NewNodes[1]; 775 SDNode *LoadNode = NewNodes[0]; 776 unsigned NumVals = N->getNumValues(); 777 unsigned OldNumVals = SU->getNode()->getNumValues(); 778 for (unsigned i = 0; i != NumVals; ++i) 779 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 780 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 781 SDValue(LoadNode, 1)); 782 783 // LoadNode may already exist. This can happen when there is another 784 // load from the same location and producing the same type of value 785 // but it has different alignment or volatileness. 786 bool isNewLoad = true; 787 SUnit *LoadSU; 788 if (LoadNode->getNodeId() != -1) { 789 LoadSU = &SUnits[LoadNode->getNodeId()]; 790 isNewLoad = false; 791 } else { 792 LoadSU = CreateNewSUnit(LoadNode); 793 LoadNode->setNodeId(LoadSU->NodeNum); 794 795 InitNumRegDefsLeft(LoadSU); 796 ComputeLatency(LoadSU); 797 } 798 799 SUnit *NewSU = CreateNewSUnit(N); 800 assert(N->getNodeId() == -1 && "Node already inserted!"); 801 N->setNodeId(NewSU->NodeNum); 802 803 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 804 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 805 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 806 NewSU->isTwoAddress = true; 807 break; 808 } 809 } 810 if (TID.isCommutable()) 811 NewSU->isCommutable = true; 812 813 InitNumRegDefsLeft(NewSU); 814 ComputeLatency(NewSU); 815 816 // Record all the edges to and from the old SU, by category. 817 SmallVector<SDep, 4> ChainPreds; 818 SmallVector<SDep, 4> ChainSuccs; 819 SmallVector<SDep, 4> LoadPreds; 820 SmallVector<SDep, 4> NodePreds; 821 SmallVector<SDep, 4> NodeSuccs; 822 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 823 I != E; ++I) { 824 if (I->isCtrl()) 825 ChainPreds.push_back(*I); 826 else if (isOperandOf(I->getSUnit(), LoadNode)) 827 LoadPreds.push_back(*I); 828 else 829 NodePreds.push_back(*I); 830 } 831 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 832 I != E; ++I) { 833 if (I->isCtrl()) 834 ChainSuccs.push_back(*I); 835 else 836 NodeSuccs.push_back(*I); 837 } 838 839 // Now assign edges to the newly-created nodes. 840 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 841 const SDep &Pred = ChainPreds[i]; 842 RemovePred(SU, Pred); 843 if (isNewLoad) 844 AddPred(LoadSU, Pred); 845 } 846 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 847 const SDep &Pred = LoadPreds[i]; 848 RemovePred(SU, Pred); 849 if (isNewLoad) 850 AddPred(LoadSU, Pred); 851 } 852 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 853 const SDep &Pred = NodePreds[i]; 854 RemovePred(SU, Pred); 855 AddPred(NewSU, Pred); 856 } 857 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 858 SDep D = NodeSuccs[i]; 859 SUnit *SuccDep = D.getSUnit(); 860 D.setSUnit(SU); 861 RemovePred(SuccDep, D); 862 D.setSUnit(NewSU); 863 AddPred(SuccDep, D); 864 // Balance register pressure. 865 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 866 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 867 --NewSU->NumRegDefsLeft; 868 } 869 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 870 SDep D = ChainSuccs[i]; 871 SUnit *SuccDep = D.getSUnit(); 872 D.setSUnit(SU); 873 RemovePred(SuccDep, D); 874 if (isNewLoad) { 875 D.setSUnit(LoadSU); 876 AddPred(SuccDep, D); 877 } 878 } 879 880 // Add a data dependency to reflect that NewSU reads the value defined 881 // by LoadSU. 882 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 883 884 if (isNewLoad) 885 AvailableQueue->addNode(LoadSU); 886 AvailableQueue->addNode(NewSU); 887 888 ++NumUnfolds; 889 890 if (NewSU->NumSuccsLeft == 0) { 891 NewSU->isAvailable = true; 892 return NewSU; 893 } 894 SU = NewSU; 895 } 896 897 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 898 NewSU = CreateClone(SU); 899 900 // New SUnit has the exact same predecessors. 901 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 902 I != E; ++I) 903 if (!I->isArtificial()) 904 AddPred(NewSU, *I); 905 906 // Only copy scheduled successors. Cut them from old node's successor 907 // list and move them over. 908 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 909 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 910 I != E; ++I) { 911 if (I->isArtificial()) 912 continue; 913 SUnit *SuccSU = I->getSUnit(); 914 if (SuccSU->isScheduled) { 915 SDep D = *I; 916 D.setSUnit(NewSU); 917 AddPred(SuccSU, D); 918 D.setSUnit(SU); 919 DelDeps.push_back(std::make_pair(SuccSU, D)); 920 } 921 } 922 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 923 RemovePred(DelDeps[i].first, DelDeps[i].second); 924 925 AvailableQueue->updateNode(SU); 926 AvailableQueue->addNode(NewSU); 927 928 ++NumDups; 929 return NewSU; 930} 931 932/// InsertCopiesAndMoveSuccs - Insert register copies and move all 933/// scheduled successors of the given SUnit to the last copy. 934void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 935 const TargetRegisterClass *DestRC, 936 const TargetRegisterClass *SrcRC, 937 SmallVector<SUnit*, 2> &Copies) { 938 SUnit *CopyFromSU = CreateNewSUnit(NULL); 939 CopyFromSU->CopySrcRC = SrcRC; 940 CopyFromSU->CopyDstRC = DestRC; 941 942 SUnit *CopyToSU = CreateNewSUnit(NULL); 943 CopyToSU->CopySrcRC = DestRC; 944 CopyToSU->CopyDstRC = SrcRC; 945 946 // Only copy scheduled successors. Cut them from old node's successor 947 // list and move them over. 948 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 949 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 950 I != E; ++I) { 951 if (I->isArtificial()) 952 continue; 953 SUnit *SuccSU = I->getSUnit(); 954 if (SuccSU->isScheduled) { 955 SDep D = *I; 956 D.setSUnit(CopyToSU); 957 AddPred(SuccSU, D); 958 DelDeps.push_back(std::make_pair(SuccSU, *I)); 959 } 960 else { 961 // Avoid scheduling the def-side copy before other successors. Otherwise 962 // we could introduce another physreg interference on the copy and 963 // continue inserting copies indefinitely. 964 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 965 /*Reg=*/0, /*isNormalMemory=*/false, 966 /*isMustAlias=*/false, /*isArtificial=*/true); 967 AddPred(SuccSU, D); 968 } 969 } 970 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 971 RemovePred(DelDeps[i].first, DelDeps[i].second); 972 973 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 974 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 975 976 AvailableQueue->updateNode(SU); 977 AvailableQueue->addNode(CopyFromSU); 978 AvailableQueue->addNode(CopyToSU); 979 Copies.push_back(CopyFromSU); 980 Copies.push_back(CopyToSU); 981 982 ++NumPRCopies; 983} 984 985/// getPhysicalRegisterVT - Returns the ValueType of the physical register 986/// definition of the specified node. 987/// FIXME: Move to SelectionDAG? 988static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 989 const TargetInstrInfo *TII) { 990 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 991 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 992 unsigned NumRes = TID.getNumDefs(); 993 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 994 if (Reg == *ImpDef) 995 break; 996 ++NumRes; 997 } 998 return N->getValueType(NumRes); 999} 1000 1001/// CheckForLiveRegDef - Return true and update live register vector if the 1002/// specified register def of the specified SUnit clobbers any "live" registers. 1003static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1004 std::vector<SUnit*> &LiveRegDefs, 1005 SmallSet<unsigned, 4> &RegAdded, 1006 SmallVector<unsigned, 4> &LRegs, 1007 const TargetRegisterInfo *TRI) { 1008 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1009 1010 // Check if Ref is live. 1011 if (!LiveRegDefs[*AliasI]) continue; 1012 1013 // Allow multiple uses of the same def. 1014 if (LiveRegDefs[*AliasI] == SU) continue; 1015 1016 // Add Reg to the set of interfering live regs. 1017 if (RegAdded.insert(*AliasI)) { 1018 LRegs.push_back(*AliasI); 1019 } 1020 } 1021} 1022 1023/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1024/// scheduling of the given node to satisfy live physical register dependencies. 1025/// If the specific node is the last one that's available to schedule, do 1026/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1027bool ScheduleDAGRRList:: 1028DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1029 if (NumLiveRegs == 0) 1030 return false; 1031 1032 SmallSet<unsigned, 4> RegAdded; 1033 // If this node would clobber any "live" register, then it's not ready. 1034 // 1035 // If SU is the currently live definition of the same register that it uses, 1036 // then we are free to schedule it. 1037 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1038 I != E; ++I) { 1039 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1040 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1041 RegAdded, LRegs, TRI); 1042 } 1043 1044 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1045 if (Node->getOpcode() == ISD::INLINEASM) { 1046 // Inline asm can clobber physical defs. 1047 unsigned NumOps = Node->getNumOperands(); 1048 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1049 --NumOps; // Ignore the glue operand. 1050 1051 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1052 unsigned Flags = 1053 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1054 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1055 1056 ++i; // Skip the ID value. 1057 if (InlineAsm::isRegDefKind(Flags) || 1058 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 1059 // Check for def of register or earlyclobber register. 1060 for (; NumVals; --NumVals, ++i) { 1061 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1062 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1063 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1064 } 1065 } else 1066 i += NumVals; 1067 } 1068 continue; 1069 } 1070 1071 if (!Node->isMachineOpcode()) 1072 continue; 1073 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 1074 if (!TID.ImplicitDefs) 1075 continue; 1076 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 1077 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1078 } 1079 1080 return !LRegs.empty(); 1081} 1082 1083/// Return a node that can be scheduled in this cycle. Requirements: 1084/// (1) Ready: latency has been satisfied 1085/// (2) No Hazards: resources are available 1086/// (3) No Interferences: may unschedule to break register interferences. 1087SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1088 SmallVector<SUnit*, 4> Interferences; 1089 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1090 1091 SUnit *CurSU = AvailableQueue->pop(); 1092 while (CurSU) { 1093 SmallVector<unsigned, 4> LRegs; 1094 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1095 break; 1096 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1097 1098 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1099 Interferences.push_back(CurSU); 1100 CurSU = AvailableQueue->pop(); 1101 } 1102 if (CurSU) { 1103 // Add the nodes that aren't ready back onto the available list. 1104 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1105 Interferences[i]->isPending = false; 1106 assert(Interferences[i]->isAvailable && "must still be available"); 1107 AvailableQueue->push(Interferences[i]); 1108 } 1109 return CurSU; 1110 } 1111 1112 // All candidates are delayed due to live physical reg dependencies. 1113 // Try backtracking, code duplication, or inserting cross class copies 1114 // to resolve it. 1115 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1116 SUnit *TrySU = Interferences[i]; 1117 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1118 1119 // Try unscheduling up to the point where it's safe to schedule 1120 // this node. 1121 SUnit *BtSU = NULL; 1122 unsigned LiveCycle = UINT_MAX; 1123 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1124 unsigned Reg = LRegs[j]; 1125 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1126 BtSU = LiveRegGens[Reg]; 1127 LiveCycle = BtSU->getHeight(); 1128 } 1129 } 1130 if (!WillCreateCycle(TrySU, BtSU)) { 1131 BacktrackBottomUp(TrySU, BtSU); 1132 1133 // Force the current node to be scheduled before the node that 1134 // requires the physical reg dep. 1135 if (BtSU->isAvailable) { 1136 BtSU->isAvailable = false; 1137 if (!BtSU->isPending) 1138 AvailableQueue->remove(BtSU); 1139 } 1140 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1141 /*Reg=*/0, /*isNormalMemory=*/false, 1142 /*isMustAlias=*/false, /*isArtificial=*/true)); 1143 1144 // If one or more successors has been unscheduled, then the current 1145 // node is no longer avaialable. Schedule a successor that's now 1146 // available instead. 1147 if (!TrySU->isAvailable) { 1148 CurSU = AvailableQueue->pop(); 1149 } 1150 else { 1151 CurSU = TrySU; 1152 TrySU->isPending = false; 1153 Interferences.erase(Interferences.begin()+i); 1154 } 1155 break; 1156 } 1157 } 1158 1159 if (!CurSU) { 1160 // Can't backtrack. If it's too expensive to copy the value, then try 1161 // duplicate the nodes that produces these "too expensive to copy" 1162 // values to break the dependency. In case even that doesn't work, 1163 // insert cross class copies. 1164 // If it's not too expensive, i.e. cost != -1, issue copies. 1165 SUnit *TrySU = Interferences[0]; 1166 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1167 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1168 unsigned Reg = LRegs[0]; 1169 SUnit *LRDef = LiveRegDefs[Reg]; 1170 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1171 const TargetRegisterClass *RC = 1172 TRI->getMinimalPhysRegClass(Reg, VT); 1173 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1174 1175 // If cross copy register class is the same as RC, then it must be possible 1176 // copy the value directly. Do not try duplicate the def. 1177 // If cross copy register class is not the same as RC, then it's possible to 1178 // copy the value but it require cross register class copies and it is 1179 // expensive. 1180 // If cross copy register class is null, then it's not possible to copy 1181 // the value at all. 1182 SUnit *NewDef = 0; 1183 if (DestRC != RC) { 1184 NewDef = CopyAndMoveSuccessors(LRDef); 1185 if (!DestRC && !NewDef) 1186 report_fatal_error("Can't handle live physical register dependency!"); 1187 } 1188 if (!NewDef) { 1189 // Issue copies, these can be expensive cross register class copies. 1190 SmallVector<SUnit*, 2> Copies; 1191 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1192 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1193 << " to SU #" << Copies.front()->NodeNum << "\n"); 1194 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1195 /*Reg=*/0, /*isNormalMemory=*/false, 1196 /*isMustAlias=*/false, 1197 /*isArtificial=*/true)); 1198 NewDef = Copies.back(); 1199 } 1200 1201 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1202 << " to SU #" << TrySU->NodeNum << "\n"); 1203 LiveRegDefs[Reg] = NewDef; 1204 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1205 /*Reg=*/0, /*isNormalMemory=*/false, 1206 /*isMustAlias=*/false, 1207 /*isArtificial=*/true)); 1208 TrySU->isAvailable = false; 1209 CurSU = NewDef; 1210 } 1211 1212 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1213 1214 // Add the nodes that aren't ready back onto the available list. 1215 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1216 Interferences[i]->isPending = false; 1217 // May no longer be available due to backtracking. 1218 if (Interferences[i]->isAvailable) { 1219 AvailableQueue->push(Interferences[i]); 1220 } 1221 } 1222 return CurSU; 1223} 1224 1225/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1226/// schedulers. 1227void ScheduleDAGRRList::ListScheduleBottomUp() { 1228 // Release any predecessors of the special Exit node. 1229 ReleasePredecessors(&ExitSU); 1230 1231 // Add root to Available queue. 1232 if (!SUnits.empty()) { 1233 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1234 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1235 RootSU->isAvailable = true; 1236 AvailableQueue->push(RootSU); 1237 } 1238 1239 // While Available queue is not empty, grab the node with the highest 1240 // priority. If it is not ready put it back. Schedule the node. 1241 Sequence.reserve(SUnits.size()); 1242 while (!AvailableQueue->empty()) { 1243 DEBUG(dbgs() << "\nExamining Available:\n"; 1244 AvailableQueue->dump(this)); 1245 1246 // Pick the best node to schedule taking all constraints into 1247 // consideration. 1248 SUnit *SU = PickNodeToScheduleBottomUp(); 1249 1250 AdvancePastStalls(SU); 1251 1252 ScheduleNodeBottomUp(SU); 1253 1254 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1255 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1256 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1257 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1258 } 1259 } 1260 1261 // Reverse the order if it is bottom up. 1262 std::reverse(Sequence.begin(), Sequence.end()); 1263 1264#ifndef NDEBUG 1265 VerifySchedule(isBottomUp); 1266#endif 1267} 1268 1269//===----------------------------------------------------------------------===// 1270// Top-Down Scheduling 1271//===----------------------------------------------------------------------===// 1272 1273/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1274/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 1275void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 1276 SUnit *SuccSU = SuccEdge->getSUnit(); 1277 1278#ifndef NDEBUG 1279 if (SuccSU->NumPredsLeft == 0) { 1280 dbgs() << "*** Scheduling failed! ***\n"; 1281 SuccSU->dump(this); 1282 dbgs() << " has been released too many times!\n"; 1283 llvm_unreachable(0); 1284 } 1285#endif 1286 --SuccSU->NumPredsLeft; 1287 1288 // If all the node's predecessors are scheduled, this node is ready 1289 // to be scheduled. Ignore the special ExitSU node. 1290 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 1291 SuccSU->isAvailable = true; 1292 AvailableQueue->push(SuccSU); 1293 } 1294} 1295 1296void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 1297 // Top down: release successors 1298 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1299 I != E; ++I) { 1300 assert(!I->isAssignedRegDep() && 1301 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 1302 1303 ReleaseSucc(SU, &*I); 1304 } 1305} 1306 1307/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1308/// count of its successors. If a successor pending count is zero, add it to 1309/// the Available queue. 1310void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { 1311 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 1312 DEBUG(SU->dump(this)); 1313 1314 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1315 SU->setDepthToAtLeast(CurCycle); 1316 Sequence.push_back(SU); 1317 1318 ReleaseSuccessors(SU); 1319 SU->isScheduled = true; 1320 AvailableQueue->ScheduledNode(SU); 1321} 1322 1323/// ListScheduleTopDown - The main loop of list scheduling for top-down 1324/// schedulers. 1325void ScheduleDAGRRList::ListScheduleTopDown() { 1326 AvailableQueue->setCurCycle(CurCycle); 1327 1328 // Release any successors of the special Entry node. 1329 ReleaseSuccessors(&EntrySU); 1330 1331 // All leaves to Available queue. 1332 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1333 // It is available if it has no predecessors. 1334 if (SUnits[i].Preds.empty()) { 1335 AvailableQueue->push(&SUnits[i]); 1336 SUnits[i].isAvailable = true; 1337 } 1338 } 1339 1340 // While Available queue is not empty, grab the node with the highest 1341 // priority. If it is not ready put it back. Schedule the node. 1342 Sequence.reserve(SUnits.size()); 1343 while (!AvailableQueue->empty()) { 1344 SUnit *CurSU = AvailableQueue->pop(); 1345 1346 if (CurSU) 1347 ScheduleNodeTopDown(CurSU); 1348 ++CurCycle; 1349 AvailableQueue->setCurCycle(CurCycle); 1350 } 1351 1352#ifndef NDEBUG 1353 VerifySchedule(isBottomUp); 1354#endif 1355} 1356 1357 1358//===----------------------------------------------------------------------===// 1359// RegReductionPriorityQueue Definition 1360//===----------------------------------------------------------------------===// 1361// 1362// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1363// to reduce register pressure. 1364// 1365namespace { 1366class RegReductionPQBase; 1367 1368struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1369 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1370}; 1371 1372#ifndef NDEBUG 1373template<class SF> 1374struct reverse_sort : public queue_sort { 1375 SF &SortFunc; 1376 reverse_sort(SF &sf) : SortFunc(sf) {} 1377 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1378 1379 bool operator()(SUnit* left, SUnit* right) const { 1380 // reverse left/right rather than simply !SortFunc(left, right) 1381 // to expose different paths in the comparison logic. 1382 return SortFunc(right, left); 1383 } 1384}; 1385#endif // NDEBUG 1386 1387/// bu_ls_rr_sort - Priority function for bottom up register pressure 1388// reduction scheduler. 1389struct bu_ls_rr_sort : public queue_sort { 1390 enum { 1391 IsBottomUp = true, 1392 HasReadyFilter = false 1393 }; 1394 1395 RegReductionPQBase *SPQ; 1396 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1397 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1398 1399 bool operator()(SUnit* left, SUnit* right) const; 1400}; 1401 1402// td_ls_rr_sort - Priority function for top down register pressure reduction 1403// scheduler. 1404struct td_ls_rr_sort : public queue_sort { 1405 enum { 1406 IsBottomUp = false, 1407 HasReadyFilter = false 1408 }; 1409 1410 RegReductionPQBase *SPQ; 1411 td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1412 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1413 1414 bool operator()(const SUnit* left, const SUnit* right) const; 1415}; 1416 1417// src_ls_rr_sort - Priority function for source order scheduler. 1418struct src_ls_rr_sort : public queue_sort { 1419 enum { 1420 IsBottomUp = true, 1421 HasReadyFilter = false 1422 }; 1423 1424 RegReductionPQBase *SPQ; 1425 src_ls_rr_sort(RegReductionPQBase *spq) 1426 : SPQ(spq) {} 1427 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1428 : SPQ(RHS.SPQ) {} 1429 1430 bool operator()(SUnit* left, SUnit* right) const; 1431}; 1432 1433// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1434struct hybrid_ls_rr_sort : public queue_sort { 1435 enum { 1436 IsBottomUp = true, 1437 HasReadyFilter = false 1438 }; 1439 1440 RegReductionPQBase *SPQ; 1441 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1442 : SPQ(spq) {} 1443 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1444 : SPQ(RHS.SPQ) {} 1445 1446 bool isReady(SUnit *SU, unsigned CurCycle) const; 1447 1448 bool operator()(SUnit* left, SUnit* right) const; 1449}; 1450 1451// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1452// scheduler. 1453struct ilp_ls_rr_sort : public queue_sort { 1454 enum { 1455 IsBottomUp = true, 1456 HasReadyFilter = false 1457 }; 1458 1459 RegReductionPQBase *SPQ; 1460 ilp_ls_rr_sort(RegReductionPQBase *spq) 1461 : SPQ(spq) {} 1462 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1463 : SPQ(RHS.SPQ) {} 1464 1465 bool isReady(SUnit *SU, unsigned CurCycle) const; 1466 1467 bool operator()(SUnit* left, SUnit* right) const; 1468}; 1469 1470class RegReductionPQBase : public SchedulingPriorityQueue { 1471protected: 1472 std::vector<SUnit*> Queue; 1473 unsigned CurQueueId; 1474 bool TracksRegPressure; 1475 1476 // SUnits - The SUnits for the current graph. 1477 std::vector<SUnit> *SUnits; 1478 1479 MachineFunction &MF; 1480 const TargetInstrInfo *TII; 1481 const TargetRegisterInfo *TRI; 1482 const TargetLowering *TLI; 1483 ScheduleDAGRRList *scheduleDAG; 1484 1485 // SethiUllmanNumbers - The SethiUllman number for each node. 1486 std::vector<unsigned> SethiUllmanNumbers; 1487 1488 /// RegPressure - Tracking current reg pressure per register class. 1489 /// 1490 std::vector<unsigned> RegPressure; 1491 1492 /// RegLimit - Tracking the number of allocatable registers per register 1493 /// class. 1494 std::vector<unsigned> RegLimit; 1495 1496public: 1497 RegReductionPQBase(MachineFunction &mf, 1498 bool hasReadyFilter, 1499 bool tracksrp, 1500 const TargetInstrInfo *tii, 1501 const TargetRegisterInfo *tri, 1502 const TargetLowering *tli) 1503 : SchedulingPriorityQueue(hasReadyFilter), 1504 CurQueueId(0), TracksRegPressure(tracksrp), 1505 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1506 if (TracksRegPressure) { 1507 unsigned NumRC = TRI->getNumRegClasses(); 1508 RegLimit.resize(NumRC); 1509 RegPressure.resize(NumRC); 1510 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1511 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1512 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1513 E = TRI->regclass_end(); I != E; ++I) 1514 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1515 } 1516 } 1517 1518 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1519 scheduleDAG = scheduleDag; 1520 } 1521 1522 ScheduleHazardRecognizer* getHazardRec() { 1523 return scheduleDAG->getHazardRec(); 1524 } 1525 1526 void initNodes(std::vector<SUnit> &sunits); 1527 1528 void addNode(const SUnit *SU); 1529 1530 void updateNode(const SUnit *SU); 1531 1532 void releaseState() { 1533 SUnits = 0; 1534 SethiUllmanNumbers.clear(); 1535 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1536 } 1537 1538 unsigned getNodePriority(const SUnit *SU) const; 1539 1540 unsigned getNodeOrdering(const SUnit *SU) const { 1541 if (!SU->getNode()) return 0; 1542 1543 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1544 } 1545 1546 bool empty() const { return Queue.empty(); } 1547 1548 void push(SUnit *U) { 1549 assert(!U->NodeQueueId && "Node in the queue already"); 1550 U->NodeQueueId = ++CurQueueId; 1551 Queue.push_back(U); 1552 } 1553 1554 void remove(SUnit *SU) { 1555 assert(!Queue.empty() && "Queue is empty!"); 1556 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1557 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1558 SU); 1559 if (I != prior(Queue.end())) 1560 std::swap(*I, Queue.back()); 1561 Queue.pop_back(); 1562 SU->NodeQueueId = 0; 1563 } 1564 1565 bool tracksRegPressure() const { return TracksRegPressure; } 1566 1567 void dumpRegPressure() const; 1568 1569 bool HighRegPressure(const SUnit *SU) const; 1570 1571 bool MayReduceRegPressure(SUnit *SU) const; 1572 1573 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1574 1575 void ScheduledNode(SUnit *SU); 1576 1577 void UnscheduledNode(SUnit *SU); 1578 1579protected: 1580 bool canClobber(const SUnit *SU, const SUnit *Op); 1581 void AddPseudoTwoAddrDeps(); 1582 void PrescheduleNodesWithMultipleUses(); 1583 void CalculateSethiUllmanNumbers(); 1584}; 1585 1586template<class SF> 1587static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1588 std::vector<SUnit *>::iterator Best = Q.begin(); 1589 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1590 E = Q.end(); I != E; ++I) 1591 if (Picker(*Best, *I)) 1592 Best = I; 1593 SUnit *V = *Best; 1594 if (Best != prior(Q.end())) 1595 std::swap(*Best, Q.back()); 1596 Q.pop_back(); 1597 return V; 1598} 1599 1600template<class SF> 1601SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1602#ifndef NDEBUG 1603 if (DAG->StressSched) { 1604 reverse_sort<SF> RPicker(Picker); 1605 return popFromQueueImpl(Q, RPicker); 1606 } 1607#endif 1608 (void)DAG; 1609 return popFromQueueImpl(Q, Picker); 1610} 1611 1612template<class SF> 1613class RegReductionPriorityQueue : public RegReductionPQBase { 1614 SF Picker; 1615 1616public: 1617 RegReductionPriorityQueue(MachineFunction &mf, 1618 bool tracksrp, 1619 const TargetInstrInfo *tii, 1620 const TargetRegisterInfo *tri, 1621 const TargetLowering *tli) 1622 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1623 Picker(this) {} 1624 1625 bool isBottomUp() const { return SF::IsBottomUp; } 1626 1627 bool isReady(SUnit *U) const { 1628 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1629 } 1630 1631 SUnit *pop() { 1632 if (Queue.empty()) return NULL; 1633 1634 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1635 V->NodeQueueId = 0; 1636 return V; 1637 } 1638 1639 void dump(ScheduleDAG *DAG) const { 1640 // Emulate pop() without clobbering NodeQueueIds. 1641 std::vector<SUnit*> DumpQueue = Queue; 1642 SF DumpPicker = Picker; 1643 while (!DumpQueue.empty()) { 1644 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1645 if (isBottomUp()) 1646 dbgs() << "Height " << SU->getHeight() << ": "; 1647 else 1648 dbgs() << "Depth " << SU->getDepth() << ": "; 1649 SU->dump(DAG); 1650 } 1651 } 1652}; 1653 1654typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1655BURegReductionPriorityQueue; 1656 1657typedef RegReductionPriorityQueue<td_ls_rr_sort> 1658TDRegReductionPriorityQueue; 1659 1660typedef RegReductionPriorityQueue<src_ls_rr_sort> 1661SrcRegReductionPriorityQueue; 1662 1663typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1664HybridBURRPriorityQueue; 1665 1666typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1667ILPBURRPriorityQueue; 1668} // end anonymous namespace 1669 1670//===----------------------------------------------------------------------===// 1671// Static Node Priority for Register Pressure Reduction 1672//===----------------------------------------------------------------------===// 1673 1674// Check for special nodes that bypass scheduling heuristics. 1675// Currently this pushes TokenFactor nodes down, but may be used for other 1676// pseudo-ops as well. 1677// 1678// Return -1 to schedule right above left, 1 for left above right. 1679// Return 0 if no bias exists. 1680static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1681 bool LSchedLow = left->isScheduleLow; 1682 bool RSchedLow = right->isScheduleLow; 1683 if (LSchedLow != RSchedLow) 1684 return LSchedLow < RSchedLow ? 1 : -1; 1685 return 0; 1686} 1687 1688/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1689/// Smaller number is the higher priority. 1690static unsigned 1691CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1692 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1693 if (SethiUllmanNumber != 0) 1694 return SethiUllmanNumber; 1695 1696 unsigned Extra = 0; 1697 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1698 I != E; ++I) { 1699 if (I->isCtrl()) continue; // ignore chain preds 1700 SUnit *PredSU = I->getSUnit(); 1701 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1702 if (PredSethiUllman > SethiUllmanNumber) { 1703 SethiUllmanNumber = PredSethiUllman; 1704 Extra = 0; 1705 } else if (PredSethiUllman == SethiUllmanNumber) 1706 ++Extra; 1707 } 1708 1709 SethiUllmanNumber += Extra; 1710 1711 if (SethiUllmanNumber == 0) 1712 SethiUllmanNumber = 1; 1713 1714 return SethiUllmanNumber; 1715} 1716 1717/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1718/// scheduling units. 1719void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1720 SethiUllmanNumbers.assign(SUnits->size(), 0); 1721 1722 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1723 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1724} 1725 1726void RegReductionPQBase::addNode(const SUnit *SU) { 1727 unsigned SUSize = SethiUllmanNumbers.size(); 1728 if (SUnits->size() > SUSize) 1729 SethiUllmanNumbers.resize(SUSize*2, 0); 1730 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1731} 1732 1733void RegReductionPQBase::updateNode(const SUnit *SU) { 1734 SethiUllmanNumbers[SU->NodeNum] = 0; 1735 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1736} 1737 1738// Lower priority means schedule further down. For bottom-up scheduling, lower 1739// priority SUs are scheduled before higher priority SUs. 1740unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1741 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1742 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1743 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1744 // CopyToReg should be close to its uses to facilitate coalescing and 1745 // avoid spilling. 1746 return 0; 1747 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1748 Opc == TargetOpcode::SUBREG_TO_REG || 1749 Opc == TargetOpcode::INSERT_SUBREG) 1750 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1751 // close to their uses to facilitate coalescing. 1752 return 0; 1753 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1754 // If SU does not have a register use, i.e. it doesn't produce a value 1755 // that would be consumed (e.g. store), then it terminates a chain of 1756 // computation. Give it a large SethiUllman number so it will be 1757 // scheduled right before its predecessors that it doesn't lengthen 1758 // their live ranges. 1759 return 0xffff; 1760 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1761 // If SU does not have a register def, schedule it close to its uses 1762 // because it does not lengthen any live ranges. 1763 return 0; 1764#if 1 1765 return SethiUllmanNumbers[SU->NodeNum]; 1766#else 1767 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1768 if (SU->isCallOp) { 1769 // FIXME: This assumes all of the defs are used as call operands. 1770 int NP = (int)Priority - SU->getNode()->getNumValues(); 1771 return (NP > 0) ? NP : 0; 1772 } 1773 return Priority; 1774#endif 1775} 1776 1777//===----------------------------------------------------------------------===// 1778// Register Pressure Tracking 1779//===----------------------------------------------------------------------===// 1780 1781void RegReductionPQBase::dumpRegPressure() const { 1782 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1783 E = TRI->regclass_end(); I != E; ++I) { 1784 const TargetRegisterClass *RC = *I; 1785 unsigned Id = RC->getID(); 1786 unsigned RP = RegPressure[Id]; 1787 if (!RP) continue; 1788 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1789 << '\n'); 1790 } 1791} 1792 1793bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1794 if (!TLI) 1795 return false; 1796 1797 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1798 I != E; ++I) { 1799 if (I->isCtrl()) 1800 continue; 1801 SUnit *PredSU = I->getSUnit(); 1802 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1803 // to cover the number of registers defined (they are all live). 1804 if (PredSU->NumRegDefsLeft == 0) { 1805 continue; 1806 } 1807 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1808 RegDefPos.IsValid(); RegDefPos.Advance()) { 1809 EVT VT = RegDefPos.GetValue(); 1810 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1811 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1812 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1813 return true; 1814 } 1815 } 1816 return false; 1817} 1818 1819bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1820 const SDNode *N = SU->getNode(); 1821 1822 if (!N->isMachineOpcode() || !SU->NumSuccs) 1823 return false; 1824 1825 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1826 for (unsigned i = 0; i != NumDefs; ++i) { 1827 EVT VT = N->getValueType(i); 1828 if (!N->hasAnyUseOfValue(i)) 1829 continue; 1830 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1831 if (RegPressure[RCId] >= RegLimit[RCId]) 1832 return true; 1833 } 1834 return false; 1835} 1836 1837// Compute the register pressure contribution by this instruction by count up 1838// for uses that are not live and down for defs. Only count register classes 1839// that are already under high pressure. As a side effect, compute the number of 1840// uses of registers that are already live. 1841// 1842// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1843// so could probably be factored. 1844int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1845 LiveUses = 0; 1846 int PDiff = 0; 1847 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1848 I != E; ++I) { 1849 if (I->isCtrl()) 1850 continue; 1851 SUnit *PredSU = I->getSUnit(); 1852 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1853 // to cover the number of registers defined (they are all live). 1854 if (PredSU->NumRegDefsLeft == 0) { 1855 if (PredSU->getNode()->isMachineOpcode()) 1856 ++LiveUses; 1857 continue; 1858 } 1859 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1860 RegDefPos.IsValid(); RegDefPos.Advance()) { 1861 EVT VT = RegDefPos.GetValue(); 1862 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1863 if (RegPressure[RCId] >= RegLimit[RCId]) 1864 ++PDiff; 1865 } 1866 } 1867 const SDNode *N = SU->getNode(); 1868 1869 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1870 return PDiff; 1871 1872 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1873 for (unsigned i = 0; i != NumDefs; ++i) { 1874 EVT VT = N->getValueType(i); 1875 if (!N->hasAnyUseOfValue(i)) 1876 continue; 1877 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1878 if (RegPressure[RCId] >= RegLimit[RCId]) 1879 --PDiff; 1880 } 1881 return PDiff; 1882} 1883 1884void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1885 if (!TracksRegPressure) 1886 return; 1887 1888 if (!SU->getNode()) 1889 return; 1890 1891 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1892 I != E; ++I) { 1893 if (I->isCtrl()) 1894 continue; 1895 SUnit *PredSU = I->getSUnit(); 1896 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1897 // to cover the number of registers defined (they are all live). 1898 if (PredSU->NumRegDefsLeft == 0) { 1899 continue; 1900 } 1901 // FIXME: The ScheduleDAG currently loses information about which of a 1902 // node's values is consumed by each dependence. Consequently, if the node 1903 // defines multiple register classes, we don't know which to pressurize 1904 // here. Instead the following loop consumes the register defs in an 1905 // arbitrary order. At least it handles the common case of clustered loads 1906 // to the same class. For precise liveness, each SDep needs to indicate the 1907 // result number. But that tightly couples the ScheduleDAG with the 1908 // SelectionDAG making updates tricky. A simpler hack would be to attach a 1909 // value type or register class to SDep. 1910 // 1911 // The most important aspect of register tracking is balancing the increase 1912 // here with the reduction further below. Note that this SU may use multiple 1913 // defs in PredSU. The can't be determined here, but we've already 1914 // compensated by reducing NumRegDefsLeft in PredSU during 1915 // ScheduleDAGSDNodes::AddSchedEdges. 1916 --PredSU->NumRegDefsLeft; 1917 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 1918 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1919 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1920 if (SkipRegDefs) 1921 continue; 1922 EVT VT = RegDefPos.GetValue(); 1923 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1924 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1925 break; 1926 } 1927 } 1928 1929 // We should have this assert, but there may be dead SDNodes that never 1930 // materialize as SUnits, so they don't appear to generate liveness. 1931 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 1932 int SkipRegDefs = (int)SU->NumRegDefsLeft; 1933 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 1934 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1935 if (SkipRegDefs > 0) 1936 continue; 1937 EVT VT = RegDefPos.GetValue(); 1938 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1939 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) { 1940 // Register pressure tracking is imprecise. This can happen. But we try 1941 // hard not to let it happen because it likely results in poor scheduling. 1942 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 1943 RegPressure[RCId] = 0; 1944 } 1945 else { 1946 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1947 } 1948 } 1949 dumpRegPressure(); 1950} 1951 1952void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 1953 if (!TracksRegPressure) 1954 return; 1955 1956 const SDNode *N = SU->getNode(); 1957 if (!N) return; 1958 1959 if (!N->isMachineOpcode()) { 1960 if (N->getOpcode() != ISD::CopyToReg) 1961 return; 1962 } else { 1963 unsigned Opc = N->getMachineOpcode(); 1964 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1965 Opc == TargetOpcode::INSERT_SUBREG || 1966 Opc == TargetOpcode::SUBREG_TO_REG || 1967 Opc == TargetOpcode::REG_SEQUENCE || 1968 Opc == TargetOpcode::IMPLICIT_DEF) 1969 return; 1970 } 1971 1972 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1973 I != E; ++I) { 1974 if (I->isCtrl()) 1975 continue; 1976 SUnit *PredSU = I->getSUnit(); 1977 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 1978 // counts data deps. 1979 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 1980 continue; 1981 const SDNode *PN = PredSU->getNode(); 1982 if (!PN->isMachineOpcode()) { 1983 if (PN->getOpcode() == ISD::CopyFromReg) { 1984 EVT VT = PN->getValueType(0); 1985 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1986 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1987 } 1988 continue; 1989 } 1990 unsigned POpc = PN->getMachineOpcode(); 1991 if (POpc == TargetOpcode::IMPLICIT_DEF) 1992 continue; 1993 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1994 EVT VT = PN->getOperand(0).getValueType(); 1995 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1996 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1997 continue; 1998 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1999 POpc == TargetOpcode::SUBREG_TO_REG) { 2000 EVT VT = PN->getValueType(0); 2001 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2002 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2003 continue; 2004 } 2005 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2006 for (unsigned i = 0; i != NumDefs; ++i) { 2007 EVT VT = PN->getValueType(i); 2008 if (!PN->hasAnyUseOfValue(i)) 2009 continue; 2010 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2011 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2012 // Register pressure tracking is imprecise. This can happen. 2013 RegPressure[RCId] = 0; 2014 else 2015 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2016 } 2017 } 2018 2019 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2020 // may transfer data dependencies to CopyToReg. 2021 if (SU->NumSuccs && N->isMachineOpcode()) { 2022 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2023 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2024 EVT VT = N->getValueType(i); 2025 if (VT == MVT::Glue || VT == MVT::Other) 2026 continue; 2027 if (!N->hasAnyUseOfValue(i)) 2028 continue; 2029 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2030 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2031 } 2032 } 2033 2034 dumpRegPressure(); 2035} 2036 2037//===----------------------------------------------------------------------===// 2038// Dynamic Node Priority for Register Pressure Reduction 2039//===----------------------------------------------------------------------===// 2040 2041/// closestSucc - Returns the scheduled cycle of the successor which is 2042/// closest to the current cycle. 2043static unsigned closestSucc(const SUnit *SU) { 2044 unsigned MaxHeight = 0; 2045 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2046 I != E; ++I) { 2047 if (I->isCtrl()) continue; // ignore chain succs 2048 unsigned Height = I->getSUnit()->getHeight(); 2049 // If there are bunch of CopyToRegs stacked up, they should be considered 2050 // to be at the same position. 2051 if (I->getSUnit()->getNode() && 2052 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2053 Height = closestSucc(I->getSUnit())+1; 2054 if (Height > MaxHeight) 2055 MaxHeight = Height; 2056 } 2057 return MaxHeight; 2058} 2059 2060/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2061/// for scratch registers, i.e. number of data dependencies. 2062static unsigned calcMaxScratches(const SUnit *SU) { 2063 unsigned Scratches = 0; 2064 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2065 I != E; ++I) { 2066 if (I->isCtrl()) continue; // ignore chain preds 2067 Scratches++; 2068 } 2069 return Scratches; 2070} 2071 2072/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2073/// CopyFromReg from a virtual register. 2074static bool hasOnlyLiveInOpers(const SUnit *SU) { 2075 bool RetVal = false; 2076 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2077 I != E; ++I) { 2078 if (I->isCtrl()) continue; 2079 const SUnit *PredSU = I->getSUnit(); 2080 if (PredSU->getNode() && 2081 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2082 unsigned Reg = 2083 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2084 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2085 RetVal = true; 2086 continue; 2087 } 2088 } 2089 return false; 2090 } 2091 return RetVal; 2092} 2093 2094/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2095/// CopyToReg to a virtual register. This SU def is probably a liveout and 2096/// it has no other use. It should be scheduled closer to the terminator. 2097static bool hasOnlyLiveOutUses(const SUnit *SU) { 2098 bool RetVal = false; 2099 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2100 I != E; ++I) { 2101 if (I->isCtrl()) continue; 2102 const SUnit *SuccSU = I->getSUnit(); 2103 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2104 unsigned Reg = 2105 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2106 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2107 RetVal = true; 2108 continue; 2109 } 2110 } 2111 return false; 2112 } 2113 return RetVal; 2114} 2115 2116// Set isVRegCycle for a node with only live in opers and live out uses. Also 2117// set isVRegCycle for its CopyFromReg operands. 2118// 2119// This is only relevant for single-block loops, in which case the VRegCycle 2120// node is likely an induction variable in which the operand and target virtual 2121// registers should be coalesced (e.g. pre/post increment values). Setting the 2122// isVRegCycle flag helps the scheduler prioritize other uses of the same 2123// CopyFromReg so that this node becomes the virtual register "kill". This 2124// avoids interference between the values live in and out of the block and 2125// eliminates a copy inside the loop. 2126static void initVRegCycle(SUnit *SU) { 2127 if (DisableSchedVRegCycle) 2128 return; 2129 2130 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2131 return; 2132 2133 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2134 2135 SU->isVRegCycle = true; 2136 2137 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2138 I != E; ++I) { 2139 if (I->isCtrl()) continue; 2140 I->getSUnit()->isVRegCycle = true; 2141 } 2142} 2143 2144// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2145// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2146static void resetVRegCycle(SUnit *SU) { 2147 if (!SU->isVRegCycle) 2148 return; 2149 2150 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2151 I != E; ++I) { 2152 if (I->isCtrl()) continue; // ignore chain preds 2153 SUnit *PredSU = I->getSUnit(); 2154 if (PredSU->isVRegCycle) { 2155 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2156 "VRegCycle def must be CopyFromReg"); 2157 I->getSUnit()->isVRegCycle = 0; 2158 } 2159 } 2160} 2161 2162// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2163// means a node that defines the VRegCycle has not been scheduled yet. 2164static bool hasVRegCycleUse(const SUnit *SU) { 2165 // If this SU also defines the VReg, don't hoist it as a "use". 2166 if (SU->isVRegCycle) 2167 return false; 2168 2169 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2170 I != E; ++I) { 2171 if (I->isCtrl()) continue; // ignore chain preds 2172 if (I->getSUnit()->isVRegCycle && 2173 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2174 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2175 return true; 2176 } 2177 } 2178 return false; 2179} 2180 2181// Check for either a dependence (latency) or resource (hazard) stall. 2182// 2183// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2184static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2185 if ((int)SPQ->getCurCycle() < Height) return true; 2186 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2187 != ScheduleHazardRecognizer::NoHazard) 2188 return true; 2189 return false; 2190} 2191 2192// Return -1 if left has higher priority, 1 if right has higher priority. 2193// Return 0 if latency-based priority is equivalent. 2194static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2195 RegReductionPQBase *SPQ) { 2196 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2197 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2198 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2199 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2200 int LHeight = (int)left->getHeight() + LPenalty; 2201 int RHeight = (int)right->getHeight() + RPenalty; 2202 2203 bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && 2204 BUHasStall(left, LHeight, SPQ); 2205 bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && 2206 BUHasStall(right, RHeight, SPQ); 2207 2208 // If scheduling one of the node will cause a pipeline stall, delay it. 2209 // If scheduling either one of the node will cause a pipeline stall, sort 2210 // them according to their height. 2211 if (LStall) { 2212 if (!RStall) { 2213 DEBUG(++FactorCount[FactStall]); 2214 return 1; 2215 } 2216 if (LHeight != RHeight) { 2217 DEBUG(++FactorCount[FactStall]); 2218 return LHeight > RHeight ? 1 : -1; 2219 } 2220 } else if (RStall) { 2221 DEBUG(++FactorCount[FactStall]); 2222 return -1; 2223 } 2224 2225 // If either node is scheduling for latency, sort them by height/depth 2226 // and latency. 2227 if (!checkPref || (left->SchedulingPref == Sched::Latency || 2228 right->SchedulingPref == Sched::Latency)) { 2229 if (DisableSchedCycles) { 2230 if (LHeight != RHeight) { 2231 DEBUG(++FactorCount[FactHeight]); 2232 return LHeight > RHeight ? 1 : -1; 2233 } 2234 } 2235 else { 2236 // If neither instruction stalls (!LStall && !RStall) then 2237 // its height is already covered so only its depth matters. We also reach 2238 // this if both stall but have the same height. 2239 int LDepth = left->getDepth() - LPenalty; 2240 int RDepth = right->getDepth() - RPenalty; 2241 if (LDepth != RDepth) { 2242 DEBUG(++FactorCount[FactDepth]); 2243 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2244 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2245 << ") depth " << RDepth << "\n"); 2246 return LDepth < RDepth ? 1 : -1; 2247 } 2248 } 2249 if (left->Latency != right->Latency) { 2250 DEBUG(++FactorCount[FactOther]); 2251 return left->Latency > right->Latency ? 1 : -1; 2252 } 2253 } 2254 return 0; 2255} 2256 2257static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2258 // Schedule physical register definitions close to their use. This is 2259 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2260 // long as shortening physreg live ranges is generally good, we can defer 2261 // creating a subtarget hook. 2262 if (!DisableSchedPhysRegJoin) { 2263 bool LHasPhysReg = left->hasPhysRegDefs; 2264 bool RHasPhysReg = right->hasPhysRegDefs; 2265 if (LHasPhysReg != RHasPhysReg) { 2266 DEBUG(++FactorCount[FactRegUses]); 2267 #ifndef NDEBUG 2268 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2269 #endif 2270 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2271 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2272 << PhysRegMsg[RHasPhysReg] << "\n"); 2273 return LHasPhysReg < RHasPhysReg; 2274 } 2275 } 2276 2277 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2278 unsigned LPriority = SPQ->getNodePriority(left); 2279 unsigned RPriority = SPQ->getNodePriority(right); 2280 2281 // Be really careful about hoisting call operands above previous calls. 2282 // Only allows it if it would reduce register pressure. 2283 if (left->isCall && right->isCallOp) { 2284 unsigned RNumVals = right->getNode()->getNumValues(); 2285 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2286 } 2287 if (right->isCall && left->isCallOp) { 2288 unsigned LNumVals = left->getNode()->getNumValues(); 2289 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2290 } 2291 2292 if (LPriority != RPriority) { 2293 DEBUG(++FactorCount[FactStatic]); 2294 return LPriority > RPriority; 2295 } 2296 2297 // One or both of the nodes are calls and their sethi-ullman numbers are the 2298 // same, then keep source order. 2299 if (left->isCall || right->isCall) { 2300 unsigned LOrder = SPQ->getNodeOrdering(left); 2301 unsigned ROrder = SPQ->getNodeOrdering(right); 2302 2303 // Prefer an ordering where the lower the non-zero order number, the higher 2304 // the preference. 2305 if ((LOrder || ROrder) && LOrder != ROrder) 2306 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2307 } 2308 2309 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2310 // e.g. 2311 // t1 = op t2, c1 2312 // t3 = op t4, c2 2313 // 2314 // and the following instructions are both ready. 2315 // t2 = op c3 2316 // t4 = op c4 2317 // 2318 // Then schedule t2 = op first. 2319 // i.e. 2320 // t4 = op c4 2321 // t2 = op c3 2322 // t1 = op t2, c1 2323 // t3 = op t4, c2 2324 // 2325 // This creates more short live intervals. 2326 unsigned LDist = closestSucc(left); 2327 unsigned RDist = closestSucc(right); 2328 if (LDist != RDist) { 2329 DEBUG(++FactorCount[FactOther]); 2330 return LDist < RDist; 2331 } 2332 2333 // How many registers becomes live when the node is scheduled. 2334 unsigned LScratch = calcMaxScratches(left); 2335 unsigned RScratch = calcMaxScratches(right); 2336 if (LScratch != RScratch) { 2337 DEBUG(++FactorCount[FactOther]); 2338 return LScratch > RScratch; 2339 } 2340 2341 // Comparing latency against a call makes little sense unless the node 2342 // is register pressure-neutral. 2343 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2344 return (left->NodeQueueId > right->NodeQueueId); 2345 2346 // Do not compare latencies when one or both of the nodes are calls. 2347 if (!DisableSchedCycles && 2348 !(left->isCall || right->isCall)) { 2349 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2350 if (result != 0) 2351 return result > 0; 2352 } 2353 else { 2354 if (left->getHeight() != right->getHeight()) { 2355 DEBUG(++FactorCount[FactHeight]); 2356 return left->getHeight() > right->getHeight(); 2357 } 2358 2359 if (left->getDepth() != right->getDepth()) { 2360 DEBUG(++FactorCount[FactDepth]); 2361 return left->getDepth() < right->getDepth(); 2362 } 2363 } 2364 2365 assert(left->NodeQueueId && right->NodeQueueId && 2366 "NodeQueueId cannot be zero"); 2367 DEBUG(++FactorCount[FactOther]); 2368 return (left->NodeQueueId > right->NodeQueueId); 2369} 2370 2371// Bottom up 2372bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2373 if (int res = checkSpecialNodes(left, right)) 2374 return res > 0; 2375 2376 return BURRSort(left, right, SPQ); 2377} 2378 2379// Source order, otherwise bottom up. 2380bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2381 if (int res = checkSpecialNodes(left, right)) 2382 return res > 0; 2383 2384 unsigned LOrder = SPQ->getNodeOrdering(left); 2385 unsigned ROrder = SPQ->getNodeOrdering(right); 2386 2387 // Prefer an ordering where the lower the non-zero order number, the higher 2388 // the preference. 2389 if ((LOrder || ROrder) && LOrder != ROrder) 2390 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2391 2392 return BURRSort(left, right, SPQ); 2393} 2394 2395// If the time between now and when the instruction will be ready can cover 2396// the spill code, then avoid adding it to the ready queue. This gives long 2397// stalls highest priority and allows hoisting across calls. It should also 2398// speed up processing the available queue. 2399bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2400 static const unsigned ReadyDelay = 3; 2401 2402 if (SPQ->MayReduceRegPressure(SU)) return true; 2403 2404 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2405 2406 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2407 != ScheduleHazardRecognizer::NoHazard) 2408 return false; 2409 2410 return true; 2411} 2412 2413// Return true if right should be scheduled with higher priority than left. 2414bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2415 if (int res = checkSpecialNodes(left, right)) 2416 return res > 0; 2417 2418 if (left->isCall || right->isCall) 2419 // No way to compute latency of calls. 2420 return BURRSort(left, right, SPQ); 2421 2422 bool LHigh = SPQ->HighRegPressure(left); 2423 bool RHigh = SPQ->HighRegPressure(right); 2424 // Avoid causing spills. If register pressure is high, schedule for 2425 // register pressure reduction. 2426 if (LHigh && !RHigh) { 2427 DEBUG(++FactorCount[FactPressureDiff]); 2428 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2429 << right->NodeNum << ")\n"); 2430 return true; 2431 } 2432 else if (!LHigh && RHigh) { 2433 DEBUG(++FactorCount[FactPressureDiff]); 2434 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2435 << left->NodeNum << ")\n"); 2436 return false; 2437 } 2438 if (!LHigh && !RHigh) { 2439 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2440 if (result != 0) 2441 return result > 0; 2442 } 2443 return BURRSort(left, right, SPQ); 2444} 2445 2446// Schedule as many instructions in each cycle as possible. So don't make an 2447// instruction available unless it is ready in the current cycle. 2448bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2449 if (SU->getHeight() > CurCycle) return false; 2450 2451 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2452 != ScheduleHazardRecognizer::NoHazard) 2453 return false; 2454 2455 return true; 2456} 2457 2458static bool canEnableCoalescing(SUnit *SU) { 2459 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2460 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2461 // CopyToReg should be close to its uses to facilitate coalescing and 2462 // avoid spilling. 2463 return true; 2464 2465 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2466 Opc == TargetOpcode::SUBREG_TO_REG || 2467 Opc == TargetOpcode::INSERT_SUBREG) 2468 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2469 // close to their uses to facilitate coalescing. 2470 return true; 2471 2472 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2473 // If SU does not have a register def, schedule it close to its uses 2474 // because it does not lengthen any live ranges. 2475 return true; 2476 2477 return false; 2478} 2479 2480// list-ilp is currently an experimental scheduler that allows various 2481// heuristics to be enabled prior to the normal register reduction logic. 2482bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2483 if (int res = checkSpecialNodes(left, right)) 2484 return res > 0; 2485 2486 if (left->isCall || right->isCall) 2487 // No way to compute latency of calls. 2488 return BURRSort(left, right, SPQ); 2489 2490 unsigned LLiveUses = 0, RLiveUses = 0; 2491 int LPDiff = 0, RPDiff = 0; 2492 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2493 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2494 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2495 } 2496 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2497 DEBUG(++FactorCount[FactPressureDiff]); 2498 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2499 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2500 return LPDiff > RPDiff; 2501 } 2502 2503 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2504 bool LReduce = canEnableCoalescing(left); 2505 bool RReduce = canEnableCoalescing(right); 2506 DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); 2507 if (LReduce && !RReduce) return false; 2508 if (RReduce && !LReduce) return true; 2509 } 2510 2511 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2512 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2513 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2514 DEBUG(++FactorCount[FactRegUses]); 2515 return LLiveUses < RLiveUses; 2516 } 2517 2518 if (!DisableSchedStalls) { 2519 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2520 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2521 if (LStall != RStall) { 2522 DEBUG(++FactorCount[FactHeight]); 2523 return left->getHeight() > right->getHeight(); 2524 } 2525 } 2526 2527 if (!DisableSchedCriticalPath) { 2528 int spread = (int)left->getDepth() - (int)right->getDepth(); 2529 if (std::abs(spread) > MaxReorderWindow) { 2530 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2531 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2532 << right->getDepth() << "\n"); 2533 DEBUG(++FactorCount[FactDepth]); 2534 return left->getDepth() < right->getDepth(); 2535 } 2536 } 2537 2538 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2539 int spread = (int)left->getHeight() - (int)right->getHeight(); 2540 if (std::abs(spread) > MaxReorderWindow) { 2541 DEBUG(++FactorCount[FactHeight]); 2542 return left->getHeight() > right->getHeight(); 2543 } 2544 } 2545 2546 return BURRSort(left, right, SPQ); 2547} 2548 2549void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2550 SUnits = &sunits; 2551 // Add pseudo dependency edges for two-address nodes. 2552 AddPseudoTwoAddrDeps(); 2553 // Reroute edges to nodes with multiple uses. 2554 if (!TracksRegPressure) 2555 PrescheduleNodesWithMultipleUses(); 2556 // Calculate node priorities. 2557 CalculateSethiUllmanNumbers(); 2558 2559 // For single block loops, mark nodes that look like canonical IV increments. 2560 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2561 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2562 initVRegCycle(&sunits[i]); 2563 } 2564 } 2565} 2566 2567//===----------------------------------------------------------------------===// 2568// Preschedule for Register Pressure 2569//===----------------------------------------------------------------------===// 2570 2571bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2572 if (SU->isTwoAddress) { 2573 unsigned Opc = SU->getNode()->getMachineOpcode(); 2574 const TargetInstrDesc &TID = TII->get(Opc); 2575 unsigned NumRes = TID.getNumDefs(); 2576 unsigned NumOps = TID.getNumOperands() - NumRes; 2577 for (unsigned i = 0; i != NumOps; ++i) { 2578 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 2579 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2580 if (DU->getNodeId() != -1 && 2581 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2582 return true; 2583 } 2584 } 2585 } 2586 return false; 2587} 2588 2589/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2590/// physical register defs. 2591static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2592 const TargetInstrInfo *TII, 2593 const TargetRegisterInfo *TRI) { 2594 SDNode *N = SuccSU->getNode(); 2595 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2596 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2597 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2598 for (const SDNode *SUNode = SU->getNode(); SUNode; 2599 SUNode = SUNode->getGluedNode()) { 2600 if (!SUNode->isMachineOpcode()) 2601 continue; 2602 const unsigned *SUImpDefs = 2603 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2604 if (!SUImpDefs) 2605 return false; 2606 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2607 EVT VT = N->getValueType(i); 2608 if (VT == MVT::Glue || VT == MVT::Other) 2609 continue; 2610 if (!N->hasAnyUseOfValue(i)) 2611 continue; 2612 unsigned Reg = ImpDefs[i - NumDefs]; 2613 for (;*SUImpDefs; ++SUImpDefs) { 2614 unsigned SUReg = *SUImpDefs; 2615 if (TRI->regsOverlap(Reg, SUReg)) 2616 return true; 2617 } 2618 } 2619 } 2620 return false; 2621} 2622 2623/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2624/// are not handled well by the general register pressure reduction 2625/// heuristics. When presented with code like this: 2626/// 2627/// N 2628/// / | 2629/// / | 2630/// U store 2631/// | 2632/// ... 2633/// 2634/// the heuristics tend to push the store up, but since the 2635/// operand of the store has another use (U), this would increase 2636/// the length of that other use (the U->N edge). 2637/// 2638/// This function transforms code like the above to route U's 2639/// dependence through the store when possible, like this: 2640/// 2641/// N 2642/// || 2643/// || 2644/// store 2645/// | 2646/// U 2647/// | 2648/// ... 2649/// 2650/// This results in the store being scheduled immediately 2651/// after N, which shortens the U->N live range, reducing 2652/// register pressure. 2653/// 2654void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2655 // Visit all the nodes in topological order, working top-down. 2656 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2657 SUnit *SU = &(*SUnits)[i]; 2658 // For now, only look at nodes with no data successors, such as stores. 2659 // These are especially important, due to the heuristics in 2660 // getNodePriority for nodes with no data successors. 2661 if (SU->NumSuccs != 0) 2662 continue; 2663 // For now, only look at nodes with exactly one data predecessor. 2664 if (SU->NumPreds != 1) 2665 continue; 2666 // Avoid prescheduling copies to virtual registers, which don't behave 2667 // like other nodes from the perspective of scheduling heuristics. 2668 if (SDNode *N = SU->getNode()) 2669 if (N->getOpcode() == ISD::CopyToReg && 2670 TargetRegisterInfo::isVirtualRegister 2671 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2672 continue; 2673 2674 // Locate the single data predecessor. 2675 SUnit *PredSU = 0; 2676 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2677 EE = SU->Preds.end(); II != EE; ++II) 2678 if (!II->isCtrl()) { 2679 PredSU = II->getSUnit(); 2680 break; 2681 } 2682 assert(PredSU); 2683 2684 // Don't rewrite edges that carry physregs, because that requires additional 2685 // support infrastructure. 2686 if (PredSU->hasPhysRegDefs) 2687 continue; 2688 // Short-circuit the case where SU is PredSU's only data successor. 2689 if (PredSU->NumSuccs == 1) 2690 continue; 2691 // Avoid prescheduling to copies from virtual registers, which don't behave 2692 // like other nodes from the perspective of scheduling heuristics. 2693 if (SDNode *N = SU->getNode()) 2694 if (N->getOpcode() == ISD::CopyFromReg && 2695 TargetRegisterInfo::isVirtualRegister 2696 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2697 continue; 2698 2699 // Perform checks on the successors of PredSU. 2700 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2701 EE = PredSU->Succs.end(); II != EE; ++II) { 2702 SUnit *PredSuccSU = II->getSUnit(); 2703 if (PredSuccSU == SU) continue; 2704 // If PredSU has another successor with no data successors, for 2705 // now don't attempt to choose either over the other. 2706 if (PredSuccSU->NumSuccs == 0) 2707 goto outer_loop_continue; 2708 // Don't break physical register dependencies. 2709 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2710 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2711 goto outer_loop_continue; 2712 // Don't introduce graph cycles. 2713 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2714 goto outer_loop_continue; 2715 } 2716 2717 // Ok, the transformation is safe and the heuristics suggest it is 2718 // profitable. Update the graph. 2719 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2720 << " next to PredSU #" << PredSU->NodeNum 2721 << " to guide scheduling in the presence of multiple uses\n"); 2722 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2723 SDep Edge = PredSU->Succs[i]; 2724 assert(!Edge.isAssignedRegDep()); 2725 SUnit *SuccSU = Edge.getSUnit(); 2726 if (SuccSU != SU) { 2727 Edge.setSUnit(PredSU); 2728 scheduleDAG->RemovePred(SuccSU, Edge); 2729 scheduleDAG->AddPred(SU, Edge); 2730 Edge.setSUnit(SU); 2731 scheduleDAG->AddPred(SuccSU, Edge); 2732 --i; 2733 } 2734 } 2735 outer_loop_continue:; 2736 } 2737} 2738 2739/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2740/// it as a def&use operand. Add a pseudo control edge from it to the other 2741/// node (if it won't create a cycle) so the two-address one will be scheduled 2742/// first (lower in the schedule). If both nodes are two-address, favor the 2743/// one that has a CopyToReg use (more likely to be a loop induction update). 2744/// If both are two-address, but one is commutable while the other is not 2745/// commutable, favor the one that's not commutable. 2746void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2747 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2748 SUnit *SU = &(*SUnits)[i]; 2749 if (!SU->isTwoAddress) 2750 continue; 2751 2752 SDNode *Node = SU->getNode(); 2753 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2754 continue; 2755 2756 bool isLiveOut = hasOnlyLiveOutUses(SU); 2757 unsigned Opc = Node->getMachineOpcode(); 2758 const TargetInstrDesc &TID = TII->get(Opc); 2759 unsigned NumRes = TID.getNumDefs(); 2760 unsigned NumOps = TID.getNumOperands() - NumRes; 2761 for (unsigned j = 0; j != NumOps; ++j) { 2762 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 2763 continue; 2764 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2765 if (DU->getNodeId() == -1) 2766 continue; 2767 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2768 if (!DUSU) continue; 2769 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2770 E = DUSU->Succs.end(); I != E; ++I) { 2771 if (I->isCtrl()) continue; 2772 SUnit *SuccSU = I->getSUnit(); 2773 if (SuccSU == SU) 2774 continue; 2775 // Be conservative. Ignore if nodes aren't at roughly the same 2776 // depth and height. 2777 if (SuccSU->getHeight() < SU->getHeight() && 2778 (SU->getHeight() - SuccSU->getHeight()) > 1) 2779 continue; 2780 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2781 // constrains whatever is using the copy, instead of the copy 2782 // itself. In the case that the copy is coalesced, this 2783 // preserves the intent of the pseudo two-address heurietics. 2784 while (SuccSU->Succs.size() == 1 && 2785 SuccSU->getNode()->isMachineOpcode() && 2786 SuccSU->getNode()->getMachineOpcode() == 2787 TargetOpcode::COPY_TO_REGCLASS) 2788 SuccSU = SuccSU->Succs.front().getSUnit(); 2789 // Don't constrain non-instruction nodes. 2790 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2791 continue; 2792 // Don't constrain nodes with physical register defs if the 2793 // predecessor can clobber them. 2794 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2795 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2796 continue; 2797 } 2798 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2799 // these may be coalesced away. We want them close to their uses. 2800 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2801 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2802 SuccOpc == TargetOpcode::INSERT_SUBREG || 2803 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2804 continue; 2805 if ((!canClobber(SuccSU, DUSU) || 2806 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2807 (!SU->isCommutable && SuccSU->isCommutable)) && 2808 !scheduleDAG->IsReachable(SuccSU, SU)) { 2809 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2810 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2811 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2812 /*Reg=*/0, /*isNormalMemory=*/false, 2813 /*isMustAlias=*/false, 2814 /*isArtificial=*/true)); 2815 } 2816 } 2817 } 2818 } 2819} 2820 2821/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 2822/// predecessors of the successors of the SUnit SU. Stop when the provided 2823/// limit is exceeded. 2824static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 2825 unsigned Limit) { 2826 unsigned Sum = 0; 2827 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2828 I != E; ++I) { 2829 const SUnit *SuccSU = I->getSUnit(); 2830 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 2831 EE = SuccSU->Preds.end(); II != EE; ++II) { 2832 SUnit *PredSU = II->getSUnit(); 2833 if (!PredSU->isScheduled) 2834 if (++Sum > Limit) 2835 return Sum; 2836 } 2837 } 2838 return Sum; 2839} 2840 2841 2842// Top down 2843bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 2844 if (int res = checkSpecialNodes(left, right)) 2845 return res < 0; 2846 2847 unsigned LPriority = SPQ->getNodePriority(left); 2848 unsigned RPriority = SPQ->getNodePriority(right); 2849 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 2850 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 2851 bool LIsFloater = LIsTarget && left->NumPreds == 0; 2852 bool RIsFloater = RIsTarget && right->NumPreds == 0; 2853 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 2854 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 2855 2856 if (left->NumSuccs == 0 && right->NumSuccs != 0) 2857 return false; 2858 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 2859 return true; 2860 2861 if (LIsFloater) 2862 LBonus -= 2; 2863 if (RIsFloater) 2864 RBonus -= 2; 2865 if (left->NumSuccs == 1) 2866 LBonus += 2; 2867 if (right->NumSuccs == 1) 2868 RBonus += 2; 2869 2870 if (LPriority+LBonus != RPriority+RBonus) 2871 return LPriority+LBonus < RPriority+RBonus; 2872 2873 if (left->getDepth() != right->getDepth()) 2874 return left->getDepth() < right->getDepth(); 2875 2876 if (left->NumSuccsLeft != right->NumSuccsLeft) 2877 return left->NumSuccsLeft > right->NumSuccsLeft; 2878 2879 assert(left->NodeQueueId && right->NodeQueueId && 2880 "NodeQueueId cannot be zero"); 2881 return (left->NodeQueueId > right->NodeQueueId); 2882} 2883 2884//===----------------------------------------------------------------------===// 2885// Public Constructor Functions 2886//===----------------------------------------------------------------------===// 2887 2888llvm::ScheduleDAGSDNodes * 2889llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2890 CodeGenOpt::Level OptLevel) { 2891 const TargetMachine &TM = IS->TM; 2892 const TargetInstrInfo *TII = TM.getInstrInfo(); 2893 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2894 2895 BURegReductionPriorityQueue *PQ = 2896 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2897 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2898 PQ->setScheduleDAG(SD); 2899 return SD; 2900} 2901 2902llvm::ScheduleDAGSDNodes * 2903llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 2904 CodeGenOpt::Level OptLevel) { 2905 const TargetMachine &TM = IS->TM; 2906 const TargetInstrInfo *TII = TM.getInstrInfo(); 2907 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2908 2909 TDRegReductionPriorityQueue *PQ = 2910 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2911 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2912 PQ->setScheduleDAG(SD); 2913 return SD; 2914} 2915 2916llvm::ScheduleDAGSDNodes * 2917llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2918 CodeGenOpt::Level OptLevel) { 2919 const TargetMachine &TM = IS->TM; 2920 const TargetInstrInfo *TII = TM.getInstrInfo(); 2921 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2922 2923 SrcRegReductionPriorityQueue *PQ = 2924 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2925 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2926 PQ->setScheduleDAG(SD); 2927 return SD; 2928} 2929 2930llvm::ScheduleDAGSDNodes * 2931llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2932 CodeGenOpt::Level OptLevel) { 2933 const TargetMachine &TM = IS->TM; 2934 const TargetInstrInfo *TII = TM.getInstrInfo(); 2935 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2936 const TargetLowering *TLI = &IS->getTargetLowering(); 2937 2938 HybridBURRPriorityQueue *PQ = 2939 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2940 2941 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2942 PQ->setScheduleDAG(SD); 2943 return SD; 2944} 2945 2946llvm::ScheduleDAGSDNodes * 2947llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2948 CodeGenOpt::Level OptLevel) { 2949 const TargetMachine &TM = IS->TM; 2950 const TargetInstrInfo *TII = TM.getInstrInfo(); 2951 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2952 const TargetLowering *TLI = &IS->getTargetLowering(); 2953 2954 ILPBURRPriorityQueue *PQ = 2955 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2956 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2957 PQ->setScheduleDAG(SD); 2958 return SD; 2959} 2960