ScheduleDAGRRList.cpp revision 623a7e146bd86747dc46a6f8bb9993fc217d6b78
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 sourceListDAGScheduler("source", 49 "Similar to list-burr but schedules in source " 50 "order when possible", 51 createSourceListDAGScheduler); 52 53static RegisterScheduler 54 hybridListDAGScheduler("list-hybrid", 55 "Bottom-up register pressure aware list scheduling " 56 "which tries to balance latency and register pressure", 57 createHybridListDAGScheduler); 58 59static RegisterScheduler 60 ILPListDAGScheduler("list-ilp", 61 "Bottom-up register pressure aware list scheduling " 62 "which tries to balance ILP and register pressure", 63 createILPListDAGScheduler); 64 65static cl::opt<bool> DisableSchedCycles( 66 "disable-sched-cycles", cl::Hidden, cl::init(false), 67 cl::desc("Disable cycle-level precision during preRA scheduling")); 68 69// Temporary sched=list-ilp flags until the heuristics are robust. 70// Some options are also available under sched=list-hybrid. 71static cl::opt<bool> DisableSchedRegPressure( 72 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 73 cl::desc("Disable regpressure priority in sched=list-ilp")); 74static cl::opt<bool> DisableSchedLiveUses( 75 "disable-sched-live-uses", cl::Hidden, cl::init(true), 76 cl::desc("Disable live use priority in sched=list-ilp")); 77static cl::opt<bool> DisableSchedVRegCycle( 78 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 79 cl::desc("Disable virtual register cycle interference checks")); 80static cl::opt<bool> DisableSchedPhysRegJoin( 81 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 82 cl::desc("Disable physreg def-use affinity")); 83static cl::opt<bool> DisableSchedStalls( 84 "disable-sched-stalls", cl::Hidden, cl::init(true), 85 cl::desc("Disable no-stall priority in sched=list-ilp")); 86static cl::opt<bool> DisableSchedCriticalPath( 87 "disable-sched-critical-path", cl::Hidden, cl::init(false), 88 cl::desc("Disable critical path priority in sched=list-ilp")); 89static cl::opt<bool> DisableSchedHeight( 90 "disable-sched-height", cl::Hidden, cl::init(false), 91 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 92static cl::opt<bool> Disable2AddrHack( 93 "disable-2addr-hack", cl::Hidden, cl::init(true), 94 cl::desc("Disable scheduler's two-address hack")); 95 96static cl::opt<int> MaxReorderWindow( 97 "max-sched-reorder", cl::Hidden, cl::init(6), 98 cl::desc("Number of instructions to allow ahead of the critical path " 99 "in sched=list-ilp")); 100 101static cl::opt<unsigned> AvgIPC( 102 "sched-avg-ipc", cl::Hidden, cl::init(1), 103 cl::desc("Average inst/cycle whan no target itinerary exists.")); 104 105#ifndef NDEBUG 106namespace { 107 // For sched=list-ilp, Count the number of times each factor comes into play. 108 enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, 109 FactStatic, FactOther, NumFactors }; 110} 111static const char *FactorName[NumFactors] = 112{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; 113static int FactorCount[NumFactors]; 114#endif //!NDEBUG 115 116namespace { 117//===----------------------------------------------------------------------===// 118/// ScheduleDAGRRList - The actual register reduction list scheduler 119/// implementation. This supports both top-down and bottom-up scheduling. 120/// 121class ScheduleDAGRRList : public ScheduleDAGSDNodes { 122private: 123 /// NeedLatency - True if the scheduler will make use of latency information. 124 /// 125 bool NeedLatency; 126 127 /// AvailableQueue - The priority queue to use for the available SUnits. 128 SchedulingPriorityQueue *AvailableQueue; 129 130 /// PendingQueue - This contains all of the instructions whose operands have 131 /// been issued, but their results are not ready yet (due to the latency of 132 /// the operation). Once the operands becomes available, the instruction is 133 /// added to the AvailableQueue. 134 std::vector<SUnit*> PendingQueue; 135 136 /// HazardRec - The hazard recognizer to use. 137 ScheduleHazardRecognizer *HazardRec; 138 139 /// CurCycle - The current scheduler state corresponds to this cycle. 140 unsigned CurCycle; 141 142 /// MinAvailableCycle - Cycle of the soonest available instruction. 143 unsigned MinAvailableCycle; 144 145 /// IssueCount - Count instructions issued in this cycle 146 /// Currently valid only for bottom-up scheduling. 147 unsigned IssueCount; 148 149 /// LiveRegDefs - A set of physical registers and their definition 150 /// that are "live". These nodes must be scheduled before any other nodes that 151 /// modifies the registers can be scheduled. 152 unsigned NumLiveRegs; 153 std::vector<SUnit*> LiveRegDefs; 154 std::vector<SUnit*> LiveRegGens; 155 156 /// Topo - A topological ordering for SUnits which permits fast IsReachable 157 /// and similar queries. 158 ScheduleDAGTopologicalSort Topo; 159 160public: 161 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 162 SchedulingPriorityQueue *availqueue, 163 CodeGenOpt::Level OptLevel) 164 : ScheduleDAGSDNodes(mf), 165 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 166 Topo(SUnits) { 167 168 const TargetMachine &tm = mf.getTarget(); 169 if (DisableSchedCycles || !NeedLatency) 170 HazardRec = new ScheduleHazardRecognizer(); 171 else 172 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 173 } 174 175 ~ScheduleDAGRRList() { 176 delete HazardRec; 177 delete AvailableQueue; 178 } 179 180 void Schedule(); 181 182 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 183 184 /// IsReachable - Checks if SU is reachable from TargetSU. 185 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 186 return Topo.IsReachable(SU, TargetSU); 187 } 188 189 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 190 /// create a cycle. 191 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 192 return Topo.WillCreateCycle(SU, TargetSU); 193 } 194 195 /// AddPred - adds a predecessor edge to SUnit SU. 196 /// This returns true if this is a new predecessor. 197 /// Updates the topological ordering if required. 198 void AddPred(SUnit *SU, const SDep &D) { 199 Topo.AddPred(SU, D.getSUnit()); 200 SU->addPred(D); 201 } 202 203 /// RemovePred - removes a predecessor edge from SUnit SU. 204 /// This returns true if an edge was removed. 205 /// Updates the topological ordering if required. 206 void RemovePred(SUnit *SU, const SDep &D) { 207 Topo.RemovePred(SU, D.getSUnit()); 208 SU->removePred(D); 209 } 210 211private: 212 bool isReady(SUnit *SU) { 213 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 214 AvailableQueue->isReady(SU); 215 } 216 217 void ReleasePred(SUnit *SU, const SDep *PredEdge); 218 void ReleasePredecessors(SUnit *SU); 219 void ReleasePending(); 220 void AdvanceToCycle(unsigned NextCycle); 221 void AdvancePastStalls(SUnit *SU); 222 void EmitNode(SUnit *SU); 223 void ScheduleNodeBottomUp(SUnit*); 224 void CapturePred(SDep *PredEdge); 225 void UnscheduleNodeBottomUp(SUnit*); 226 void RestoreHazardCheckerBottomUp(); 227 void BacktrackBottomUp(SUnit*, SUnit*); 228 SUnit *CopyAndMoveSuccessors(SUnit*); 229 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 230 const TargetRegisterClass*, 231 const TargetRegisterClass*, 232 SmallVector<SUnit*, 2>&); 233 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 234 235 SUnit *PickNodeToScheduleBottomUp(); 236 void ListScheduleBottomUp(); 237 238 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 239 /// Updates the topological ordering if required. 240 SUnit *CreateNewSUnit(SDNode *N) { 241 unsigned NumSUnits = SUnits.size(); 242 SUnit *NewNode = NewSUnit(N); 243 // Update the topological ordering. 244 if (NewNode->NodeNum >= NumSUnits) 245 Topo.InitDAGTopologicalSorting(); 246 return NewNode; 247 } 248 249 /// CreateClone - Creates a new SUnit from an existing one. 250 /// Updates the topological ordering if required. 251 SUnit *CreateClone(SUnit *N) { 252 unsigned NumSUnits = SUnits.size(); 253 SUnit *NewNode = Clone(N); 254 // Update the topological ordering. 255 if (NewNode->NodeNum >= NumSUnits) 256 Topo.InitDAGTopologicalSorting(); 257 return NewNode; 258 } 259 260 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 261 /// need actual latency information but the hybrid scheduler does. 262 bool ForceUnitLatencies() const { 263 return !NeedLatency; 264 } 265}; 266} // end anonymous namespace 267 268/// GetCostForDef - Looks up the register class and cost for a given definition. 269/// Typically this just means looking up the representative register class, 270/// but for untyped values (MVT::untyped) it means inspecting the node's 271/// opcode to determine what register class is being generated. 272static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 273 const TargetLowering *TLI, 274 const TargetInstrInfo *TII, 275 const TargetRegisterInfo *TRI, 276 unsigned &RegClass, unsigned &Cost) { 277 EVT VT = RegDefPos.GetValue(); 278 279 // Special handling for untyped values. These values can only come from 280 // the expansion of custom DAG-to-DAG patterns. 281 if (VT == MVT::untyped) { 282 const SDNode *Node = RegDefPos.GetNode(); 283 unsigned Opcode = Node->getMachineOpcode(); 284 285 if (Opcode == TargetOpcode::REG_SEQUENCE) { 286 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 287 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 288 RegClass = RC->getID(); 289 Cost = 1; 290 return; 291 } 292 293 unsigned Idx = RegDefPos.GetIdx(); 294 const MCInstrDesc Desc = TII->get(Opcode); 295 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); 296 RegClass = RC->getID(); 297 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 298 // better way to determine it. 299 Cost = 1; 300 } else { 301 RegClass = TLI->getRepRegClassFor(VT)->getID(); 302 Cost = TLI->getRepRegClassCostFor(VT); 303 } 304} 305 306/// Schedule - Schedule the DAG using list scheduling. 307void ScheduleDAGRRList::Schedule() { 308 DEBUG(dbgs() 309 << "********** List Scheduling BB#" << BB->getNumber() 310 << " '" << BB->getName() << "' **********\n"); 311#ifndef NDEBUG 312 for (int i = 0; i < NumFactors; ++i) { 313 FactorCount[i] = 0; 314 } 315#endif //!NDEBUG 316 317 CurCycle = 0; 318 IssueCount = 0; 319 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 320 NumLiveRegs = 0; 321 // Allocate slots for each physical register, plus one for a special register 322 // to track the virtual resource of a calling sequence. 323 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 324 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 325 326 // Build the scheduling graph. 327 BuildSchedGraph(NULL); 328 329 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 330 SUnits[su].dumpAll(this)); 331 Topo.InitDAGTopologicalSorting(); 332 333 AvailableQueue->initNodes(SUnits); 334 335 HazardRec->Reset(); 336 337 // Execute the actual scheduling loop. 338 ListScheduleBottomUp(); 339 340#ifndef NDEBUG 341 for (int i = 0; i < NumFactors; ++i) { 342 DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); 343 } 344#endif // !NDEBUG 345 AvailableQueue->releaseState(); 346} 347 348//===----------------------------------------------------------------------===// 349// Bottom-Up Scheduling 350//===----------------------------------------------------------------------===// 351 352/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 353/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 354void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 355 SUnit *PredSU = PredEdge->getSUnit(); 356 357#ifndef NDEBUG 358 if (PredSU->NumSuccsLeft == 0) { 359 dbgs() << "*** Scheduling failed! ***\n"; 360 PredSU->dump(this); 361 dbgs() << " has been released too many times!\n"; 362 llvm_unreachable(0); 363 } 364#endif 365 --PredSU->NumSuccsLeft; 366 367 if (!ForceUnitLatencies()) { 368 // Updating predecessor's height. This is now the cycle when the 369 // predecessor can be scheduled without causing a pipeline stall. 370 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 371 } 372 373 // If all the node's successors are scheduled, this node is ready 374 // to be scheduled. Ignore the special EntrySU node. 375 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 376 PredSU->isAvailable = true; 377 378 unsigned Height = PredSU->getHeight(); 379 if (Height < MinAvailableCycle) 380 MinAvailableCycle = Height; 381 382 if (isReady(PredSU)) { 383 AvailableQueue->push(PredSU); 384 } 385 // CapturePred and others may have left the node in the pending queue, avoid 386 // adding it twice. 387 else if (!PredSU->isPending) { 388 PredSU->isPending = true; 389 PendingQueue.push_back(PredSU); 390 } 391 } 392} 393 394/// IsChainDependent - Test if Outer is reachable from Inner through 395/// chain dependencies. 396static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 397 unsigned NestLevel, 398 const TargetInstrInfo *TII) { 399 SDNode *N = Outer; 400 for (;;) { 401 if (N == Inner) 402 return true; 403 // For a TokenFactor, examine each operand. There may be multiple ways 404 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 405 // most nesting in order to ensure that we find the corresponding match. 406 if (N->getOpcode() == ISD::TokenFactor) { 407 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 408 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 409 return true; 410 return false; 411 } 412 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 413 if (N->isMachineOpcode()) { 414 if (N->getMachineOpcode() == 415 (unsigned)TII->getCallFrameDestroyOpcode()) { 416 ++NestLevel; 417 } else if (N->getMachineOpcode() == 418 (unsigned)TII->getCallFrameSetupOpcode()) { 419 if (NestLevel == 0) 420 return false; 421 --NestLevel; 422 } 423 } 424 // Otherwise, find the chain and continue climbing. 425 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 426 if (N->getOperand(i).getValueType() == MVT::Other) { 427 N = N->getOperand(i).getNode(); 428 goto found_chain_operand; 429 } 430 return false; 431 found_chain_operand:; 432 if (N->getOpcode() == ISD::EntryToken) 433 return false; 434 } 435} 436 437/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 438/// the corresponding (lowered) CALLSEQ_BEGIN node. 439/// 440/// NestLevel and MaxNested are used in recursion to indcate the current level 441/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 442/// level seen so far. 443/// 444/// TODO: It would be better to give CALLSEQ_END an explicit operand to point 445/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 446static SDNode * 447FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 448 const TargetInstrInfo *TII) { 449 for (;;) { 450 // For a TokenFactor, examine each operand. There may be multiple ways 451 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 452 // most nesting in order to ensure that we find the corresponding match. 453 if (N->getOpcode() == ISD::TokenFactor) { 454 SDNode *Best = 0; 455 unsigned BestMaxNest = MaxNest; 456 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 457 unsigned MyNestLevel = NestLevel; 458 unsigned MyMaxNest = MaxNest; 459 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 460 MyNestLevel, MyMaxNest, TII)) 461 if (!Best || (MyMaxNest > BestMaxNest)) { 462 Best = New; 463 BestMaxNest = MyMaxNest; 464 } 465 } 466 assert(Best); 467 MaxNest = BestMaxNest; 468 return Best; 469 } 470 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 471 if (N->isMachineOpcode()) { 472 if (N->getMachineOpcode() == 473 (unsigned)TII->getCallFrameDestroyOpcode()) { 474 ++NestLevel; 475 MaxNest = std::max(MaxNest, NestLevel); 476 } else if (N->getMachineOpcode() == 477 (unsigned)TII->getCallFrameSetupOpcode()) { 478 assert(NestLevel != 0); 479 --NestLevel; 480 if (NestLevel == 0) 481 return N; 482 } 483 } 484 // Otherwise, find the chain and continue climbing. 485 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 486 if (N->getOperand(i).getValueType() == MVT::Other) { 487 N = N->getOperand(i).getNode(); 488 goto found_chain_operand; 489 } 490 return 0; 491 found_chain_operand:; 492 if (N->getOpcode() == ISD::EntryToken) 493 return 0; 494 } 495} 496 497/// Call ReleasePred for each predecessor, then update register live def/gen. 498/// Always update LiveRegDefs for a register dependence even if the current SU 499/// also defines the register. This effectively create one large live range 500/// across a sequence of two-address node. This is important because the 501/// entire chain must be scheduled together. Example: 502/// 503/// flags = (3) add 504/// flags = (2) addc flags 505/// flags = (1) addc flags 506/// 507/// results in 508/// 509/// LiveRegDefs[flags] = 3 510/// LiveRegGens[flags] = 1 511/// 512/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 513/// interference on flags. 514void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 515 // Bottom up: release predecessors 516 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 517 I != E; ++I) { 518 ReleasePred(SU, &*I); 519 if (I->isAssignedRegDep()) { 520 // This is a physical register dependency and it's impossible or 521 // expensive to copy the register. Make sure nothing that can 522 // clobber the register is scheduled between the predecessor and 523 // this node. 524 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 525 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 526 "interference on register dependence"); 527 LiveRegDefs[I->getReg()] = I->getSUnit(); 528 if (!LiveRegGens[I->getReg()]) { 529 ++NumLiveRegs; 530 LiveRegGens[I->getReg()] = SU; 531 } 532 } 533 } 534 535 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 536 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 537 // these nodes, to prevent other calls from being interscheduled with them. 538 unsigned CallResource = TRI->getNumRegs(); 539 if (!LiveRegDefs[CallResource]) 540 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 541 if (Node->isMachineOpcode() && 542 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 543 unsigned NestLevel = 0; 544 unsigned MaxNest = 0; 545 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 546 547 SUnit *Def = &SUnits[N->getNodeId()]; 548 ++NumLiveRegs; 549 LiveRegDefs[CallResource] = Def; 550 LiveRegGens[CallResource] = SU; 551 break; 552 } 553} 554 555/// Check to see if any of the pending instructions are ready to issue. If 556/// so, add them to the available queue. 557void ScheduleDAGRRList::ReleasePending() { 558 if (DisableSchedCycles) { 559 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 560 return; 561 } 562 563 // If the available queue is empty, it is safe to reset MinAvailableCycle. 564 if (AvailableQueue->empty()) 565 MinAvailableCycle = UINT_MAX; 566 567 // Check to see if any of the pending instructions are ready to issue. If 568 // so, add them to the available queue. 569 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 570 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 571 if (ReadyCycle < MinAvailableCycle) 572 MinAvailableCycle = ReadyCycle; 573 574 if (PendingQueue[i]->isAvailable) { 575 if (!isReady(PendingQueue[i])) 576 continue; 577 AvailableQueue->push(PendingQueue[i]); 578 } 579 PendingQueue[i]->isPending = false; 580 PendingQueue[i] = PendingQueue.back(); 581 PendingQueue.pop_back(); 582 --i; --e; 583 } 584} 585 586/// Move the scheduler state forward by the specified number of Cycles. 587void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 588 if (NextCycle <= CurCycle) 589 return; 590 591 IssueCount = 0; 592 AvailableQueue->setCurCycle(NextCycle); 593 if (!HazardRec->isEnabled()) { 594 // Bypass lots of virtual calls in case of long latency. 595 CurCycle = NextCycle; 596 } 597 else { 598 for (; CurCycle != NextCycle; ++CurCycle) { 599 HazardRec->RecedeCycle(); 600 } 601 } 602 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 603 // available Q to release pending nodes at least once before popping. 604 ReleasePending(); 605} 606 607/// Move the scheduler state forward until the specified node's dependents are 608/// ready and can be scheduled with no resource conflicts. 609void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 610 if (DisableSchedCycles) 611 return; 612 613 // FIXME: Nodes such as CopyFromReg probably should not advance the current 614 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 615 // has predecessors the cycle will be advanced when they are scheduled. 616 // But given the crude nature of modeling latency though such nodes, we 617 // currently need to treat these nodes like real instructions. 618 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 619 620 unsigned ReadyCycle = SU->getHeight(); 621 622 // Bump CurCycle to account for latency. We assume the latency of other 623 // available instructions may be hidden by the stall (not a full pipe stall). 624 // This updates the hazard recognizer's cycle before reserving resources for 625 // this instruction. 626 AdvanceToCycle(ReadyCycle); 627 628 // Calls are scheduled in their preceding cycle, so don't conflict with 629 // hazards from instructions after the call. EmitNode will reset the 630 // scoreboard state before emitting the call. 631 if (SU->isCall) 632 return; 633 634 // FIXME: For resource conflicts in very long non-pipelined stages, we 635 // should probably skip ahead here to avoid useless scoreboard checks. 636 int Stalls = 0; 637 while (true) { 638 ScheduleHazardRecognizer::HazardType HT = 639 HazardRec->getHazardType(SU, -Stalls); 640 641 if (HT == ScheduleHazardRecognizer::NoHazard) 642 break; 643 644 ++Stalls; 645 } 646 AdvanceToCycle(CurCycle + Stalls); 647} 648 649/// Record this SUnit in the HazardRecognizer. 650/// Does not update CurCycle. 651void ScheduleDAGRRList::EmitNode(SUnit *SU) { 652 if (!HazardRec->isEnabled()) 653 return; 654 655 // Check for phys reg copy. 656 if (!SU->getNode()) 657 return; 658 659 switch (SU->getNode()->getOpcode()) { 660 default: 661 assert(SU->getNode()->isMachineOpcode() && 662 "This target-independent node should not be scheduled."); 663 break; 664 case ISD::MERGE_VALUES: 665 case ISD::TokenFactor: 666 case ISD::CopyToReg: 667 case ISD::CopyFromReg: 668 case ISD::EH_LABEL: 669 // Noops don't affect the scoreboard state. Copies are likely to be 670 // removed. 671 return; 672 case ISD::INLINEASM: 673 // For inline asm, clear the pipeline state. 674 HazardRec->Reset(); 675 return; 676 } 677 if (SU->isCall) { 678 // Calls are scheduled with their preceding instructions. For bottom-up 679 // scheduling, clear the pipeline state before emitting. 680 HazardRec->Reset(); 681 } 682 683 HazardRec->EmitInstruction(SU); 684} 685 686static void resetVRegCycle(SUnit *SU); 687 688/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 689/// count of its predecessors. If a predecessor pending count is zero, add it to 690/// the Available queue. 691void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 692 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 693 DEBUG(SU->dump(this)); 694 695#ifndef NDEBUG 696 if (CurCycle < SU->getHeight()) 697 DEBUG(dbgs() << " Height [" << SU->getHeight() 698 << "] pipeline stall!\n"); 699#endif 700 701 // FIXME: Do not modify node height. It may interfere with 702 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 703 // node its ready cycle can aid heuristics, and after scheduling it can 704 // indicate the scheduled cycle. 705 SU->setHeightToAtLeast(CurCycle); 706 707 // Reserve resources for the scheduled intruction. 708 EmitNode(SU); 709 710 Sequence.push_back(SU); 711 712 AvailableQueue->ScheduledNode(SU); 713 714 // If HazardRec is disabled, and each inst counts as one cycle, then 715 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 716 // PendingQueue for schedulers that implement HasReadyFilter. 717 if (!HazardRec->isEnabled() && AvgIPC < 2) 718 AdvanceToCycle(CurCycle + 1); 719 720 // Update liveness of predecessors before successors to avoid treating a 721 // two-address node as a live range def. 722 ReleasePredecessors(SU); 723 724 // Release all the implicit physical register defs that are live. 725 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 726 I != E; ++I) { 727 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 728 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 729 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 730 --NumLiveRegs; 731 LiveRegDefs[I->getReg()] = NULL; 732 LiveRegGens[I->getReg()] = NULL; 733 } 734 } 735 // Release the special call resource dependence, if this is the beginning 736 // of a call. 737 unsigned CallResource = TRI->getNumRegs(); 738 if (LiveRegDefs[CallResource] == SU) 739 for (const SDNode *SUNode = SU->getNode(); SUNode; 740 SUNode = SUNode->getGluedNode()) { 741 if (SUNode->isMachineOpcode() && 742 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 743 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 744 --NumLiveRegs; 745 LiveRegDefs[CallResource] = NULL; 746 LiveRegGens[CallResource] = NULL; 747 } 748 } 749 750 resetVRegCycle(SU); 751 752 SU->isScheduled = true; 753 754 // Conditions under which the scheduler should eagerly advance the cycle: 755 // (1) No available instructions 756 // (2) All pipelines full, so available instructions must have hazards. 757 // 758 // If HazardRec is disabled, the cycle was pre-advanced before calling 759 // ReleasePredecessors. In that case, IssueCount should remain 0. 760 // 761 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 762 if (HazardRec->isEnabled() || AvgIPC > 1) { 763 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 764 ++IssueCount; 765 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 766 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 767 AdvanceToCycle(CurCycle + 1); 768 } 769} 770 771/// CapturePred - This does the opposite of ReleasePred. Since SU is being 772/// unscheduled, incrcease the succ left count of its predecessors. Remove 773/// them from AvailableQueue if necessary. 774void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 775 SUnit *PredSU = PredEdge->getSUnit(); 776 if (PredSU->isAvailable) { 777 PredSU->isAvailable = false; 778 if (!PredSU->isPending) 779 AvailableQueue->remove(PredSU); 780 } 781 782 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 783 ++PredSU->NumSuccsLeft; 784} 785 786/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 787/// its predecessor states to reflect the change. 788void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 789 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 790 DEBUG(SU->dump(this)); 791 792 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 793 I != E; ++I) { 794 CapturePred(&*I); 795 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 796 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 797 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 798 "Physical register dependency violated?"); 799 --NumLiveRegs; 800 LiveRegDefs[I->getReg()] = NULL; 801 LiveRegGens[I->getReg()] = NULL; 802 } 803 } 804 805 // Reclaim the special call resource dependence, if this is the beginning 806 // of a call. 807 unsigned CallResource = TRI->getNumRegs(); 808 for (const SDNode *SUNode = SU->getNode(); SUNode; 809 SUNode = SUNode->getGluedNode()) { 810 if (SUNode->isMachineOpcode() && 811 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 812 ++NumLiveRegs; 813 LiveRegDefs[CallResource] = SU; 814 LiveRegGens[CallResource] = NULL; 815 } 816 } 817 818 // Release the special call resource dependence, if this is the end 819 // of a call. 820 if (LiveRegGens[CallResource] == SU) 821 for (const SDNode *SUNode = SU->getNode(); SUNode; 822 SUNode = SUNode->getGluedNode()) { 823 if (SUNode->isMachineOpcode() && 824 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 825 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 826 --NumLiveRegs; 827 LiveRegDefs[CallResource] = NULL; 828 LiveRegGens[CallResource] = NULL; 829 } 830 } 831 832 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 833 I != E; ++I) { 834 if (I->isAssignedRegDep()) { 835 // This becomes the nearest def. Note that an earlier def may still be 836 // pending if this is a two-address node. 837 LiveRegDefs[I->getReg()] = SU; 838 if (!LiveRegDefs[I->getReg()]) { 839 ++NumLiveRegs; 840 } 841 if (LiveRegGens[I->getReg()] == NULL || 842 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 843 LiveRegGens[I->getReg()] = I->getSUnit(); 844 } 845 } 846 if (SU->getHeight() < MinAvailableCycle) 847 MinAvailableCycle = SU->getHeight(); 848 849 SU->setHeightDirty(); 850 SU->isScheduled = false; 851 SU->isAvailable = true; 852 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 853 // Don't make available until backtracking is complete. 854 SU->isPending = true; 855 PendingQueue.push_back(SU); 856 } 857 else { 858 AvailableQueue->push(SU); 859 } 860 AvailableQueue->UnscheduledNode(SU); 861} 862 863/// After backtracking, the hazard checker needs to be restored to a state 864/// corresponding the the current cycle. 865void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 866 HazardRec->Reset(); 867 868 unsigned LookAhead = std::min((unsigned)Sequence.size(), 869 HazardRec->getMaxLookAhead()); 870 if (LookAhead == 0) 871 return; 872 873 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 874 unsigned HazardCycle = (*I)->getHeight(); 875 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 876 SUnit *SU = *I; 877 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 878 HazardRec->RecedeCycle(); 879 } 880 EmitNode(SU); 881 } 882} 883 884/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 885/// BTCycle in order to schedule a specific node. 886void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 887 SUnit *OldSU = Sequence.back(); 888 while (true) { 889 Sequence.pop_back(); 890 if (SU->isSucc(OldSU)) 891 // Don't try to remove SU from AvailableQueue. 892 SU->isAvailable = false; 893 // FIXME: use ready cycle instead of height 894 CurCycle = OldSU->getHeight(); 895 UnscheduleNodeBottomUp(OldSU); 896 AvailableQueue->setCurCycle(CurCycle); 897 if (OldSU == BtSU) 898 break; 899 OldSU = Sequence.back(); 900 } 901 902 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 903 904 RestoreHazardCheckerBottomUp(); 905 906 ReleasePending(); 907 908 ++NumBacktracks; 909} 910 911static bool isOperandOf(const SUnit *SU, SDNode *N) { 912 for (const SDNode *SUNode = SU->getNode(); SUNode; 913 SUNode = SUNode->getGluedNode()) { 914 if (SUNode->isOperandOf(N)) 915 return true; 916 } 917 return false; 918} 919 920/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 921/// successors to the newly created node. 922SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 923 SDNode *N = SU->getNode(); 924 if (!N) 925 return NULL; 926 927 if (SU->getNode()->getGluedNode()) 928 return NULL; 929 930 SUnit *NewSU; 931 bool TryUnfold = false; 932 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 933 EVT VT = N->getValueType(i); 934 if (VT == MVT::Glue) 935 return NULL; 936 else if (VT == MVT::Other) 937 TryUnfold = true; 938 } 939 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 940 const SDValue &Op = N->getOperand(i); 941 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 942 if (VT == MVT::Glue) 943 return NULL; 944 } 945 946 if (TryUnfold) { 947 SmallVector<SDNode*, 2> NewNodes; 948 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 949 return NULL; 950 951 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 952 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 953 954 N = NewNodes[1]; 955 SDNode *LoadNode = NewNodes[0]; 956 unsigned NumVals = N->getNumValues(); 957 unsigned OldNumVals = SU->getNode()->getNumValues(); 958 for (unsigned i = 0; i != NumVals; ++i) 959 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 960 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 961 SDValue(LoadNode, 1)); 962 963 // LoadNode may already exist. This can happen when there is another 964 // load from the same location and producing the same type of value 965 // but it has different alignment or volatileness. 966 bool isNewLoad = true; 967 SUnit *LoadSU; 968 if (LoadNode->getNodeId() != -1) { 969 LoadSU = &SUnits[LoadNode->getNodeId()]; 970 isNewLoad = false; 971 } else { 972 LoadSU = CreateNewSUnit(LoadNode); 973 LoadNode->setNodeId(LoadSU->NodeNum); 974 975 InitNumRegDefsLeft(LoadSU); 976 ComputeLatency(LoadSU); 977 } 978 979 SUnit *NewSU = CreateNewSUnit(N); 980 assert(N->getNodeId() == -1 && "Node already inserted!"); 981 N->setNodeId(NewSU->NodeNum); 982 983 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 984 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 985 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 986 NewSU->isTwoAddress = true; 987 break; 988 } 989 } 990 if (MCID.isCommutable()) 991 NewSU->isCommutable = true; 992 993 InitNumRegDefsLeft(NewSU); 994 ComputeLatency(NewSU); 995 996 // Record all the edges to and from the old SU, by category. 997 SmallVector<SDep, 4> ChainPreds; 998 SmallVector<SDep, 4> ChainSuccs; 999 SmallVector<SDep, 4> LoadPreds; 1000 SmallVector<SDep, 4> NodePreds; 1001 SmallVector<SDep, 4> NodeSuccs; 1002 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1003 I != E; ++I) { 1004 if (I->isCtrl()) 1005 ChainPreds.push_back(*I); 1006 else if (isOperandOf(I->getSUnit(), LoadNode)) 1007 LoadPreds.push_back(*I); 1008 else 1009 NodePreds.push_back(*I); 1010 } 1011 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1012 I != E; ++I) { 1013 if (I->isCtrl()) 1014 ChainSuccs.push_back(*I); 1015 else 1016 NodeSuccs.push_back(*I); 1017 } 1018 1019 // Now assign edges to the newly-created nodes. 1020 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 1021 const SDep &Pred = ChainPreds[i]; 1022 RemovePred(SU, Pred); 1023 if (isNewLoad) 1024 AddPred(LoadSU, Pred); 1025 } 1026 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 1027 const SDep &Pred = LoadPreds[i]; 1028 RemovePred(SU, Pred); 1029 if (isNewLoad) 1030 AddPred(LoadSU, Pred); 1031 } 1032 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 1033 const SDep &Pred = NodePreds[i]; 1034 RemovePred(SU, Pred); 1035 AddPred(NewSU, Pred); 1036 } 1037 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 1038 SDep D = NodeSuccs[i]; 1039 SUnit *SuccDep = D.getSUnit(); 1040 D.setSUnit(SU); 1041 RemovePred(SuccDep, D); 1042 D.setSUnit(NewSU); 1043 AddPred(SuccDep, D); 1044 // Balance register pressure. 1045 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 1046 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1047 --NewSU->NumRegDefsLeft; 1048 } 1049 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 1050 SDep D = ChainSuccs[i]; 1051 SUnit *SuccDep = D.getSUnit(); 1052 D.setSUnit(SU); 1053 RemovePred(SuccDep, D); 1054 if (isNewLoad) { 1055 D.setSUnit(LoadSU); 1056 AddPred(SuccDep, D); 1057 } 1058 } 1059 1060 // Add a data dependency to reflect that NewSU reads the value defined 1061 // by LoadSU. 1062 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 1063 1064 if (isNewLoad) 1065 AvailableQueue->addNode(LoadSU); 1066 AvailableQueue->addNode(NewSU); 1067 1068 ++NumUnfolds; 1069 1070 if (NewSU->NumSuccsLeft == 0) { 1071 NewSU->isAvailable = true; 1072 return NewSU; 1073 } 1074 SU = NewSU; 1075 } 1076 1077 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1078 NewSU = CreateClone(SU); 1079 1080 // New SUnit has the exact same predecessors. 1081 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1082 I != E; ++I) 1083 if (!I->isArtificial()) 1084 AddPred(NewSU, *I); 1085 1086 // Only copy scheduled successors. Cut them from old node's successor 1087 // list and move them over. 1088 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1089 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1090 I != E; ++I) { 1091 if (I->isArtificial()) 1092 continue; 1093 SUnit *SuccSU = I->getSUnit(); 1094 if (SuccSU->isScheduled) { 1095 SDep D = *I; 1096 D.setSUnit(NewSU); 1097 AddPred(SuccSU, D); 1098 D.setSUnit(SU); 1099 DelDeps.push_back(std::make_pair(SuccSU, D)); 1100 } 1101 } 1102 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1103 RemovePred(DelDeps[i].first, DelDeps[i].second); 1104 1105 AvailableQueue->updateNode(SU); 1106 AvailableQueue->addNode(NewSU); 1107 1108 ++NumDups; 1109 return NewSU; 1110} 1111 1112/// InsertCopiesAndMoveSuccs - Insert register copies and move all 1113/// scheduled successors of the given SUnit to the last copy. 1114void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1115 const TargetRegisterClass *DestRC, 1116 const TargetRegisterClass *SrcRC, 1117 SmallVector<SUnit*, 2> &Copies) { 1118 SUnit *CopyFromSU = CreateNewSUnit(NULL); 1119 CopyFromSU->CopySrcRC = SrcRC; 1120 CopyFromSU->CopyDstRC = DestRC; 1121 1122 SUnit *CopyToSU = CreateNewSUnit(NULL); 1123 CopyToSU->CopySrcRC = DestRC; 1124 CopyToSU->CopyDstRC = SrcRC; 1125 1126 // Only copy scheduled successors. Cut them from old node's successor 1127 // list and move them over. 1128 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1129 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1130 I != E; ++I) { 1131 if (I->isArtificial()) 1132 continue; 1133 SUnit *SuccSU = I->getSUnit(); 1134 if (SuccSU->isScheduled) { 1135 SDep D = *I; 1136 D.setSUnit(CopyToSU); 1137 AddPred(SuccSU, D); 1138 DelDeps.push_back(std::make_pair(SuccSU, *I)); 1139 } 1140 else { 1141 // Avoid scheduling the def-side copy before other successors. Otherwise 1142 // we could introduce another physreg interference on the copy and 1143 // continue inserting copies indefinitely. 1144 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1145 /*Reg=*/0, /*isNormalMemory=*/false, 1146 /*isMustAlias=*/false, /*isArtificial=*/true); 1147 AddPred(SuccSU, D); 1148 } 1149 } 1150 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1151 RemovePred(DelDeps[i].first, DelDeps[i].second); 1152 1153 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1154 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1155 1156 AvailableQueue->updateNode(SU); 1157 AvailableQueue->addNode(CopyFromSU); 1158 AvailableQueue->addNode(CopyToSU); 1159 Copies.push_back(CopyFromSU); 1160 Copies.push_back(CopyToSU); 1161 1162 ++NumPRCopies; 1163} 1164 1165/// getPhysicalRegisterVT - Returns the ValueType of the physical register 1166/// definition of the specified node. 1167/// FIXME: Move to SelectionDAG? 1168static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1169 const TargetInstrInfo *TII) { 1170 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1171 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1172 unsigned NumRes = MCID.getNumDefs(); 1173 for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1174 if (Reg == *ImpDef) 1175 break; 1176 ++NumRes; 1177 } 1178 return N->getValueType(NumRes); 1179} 1180 1181/// CheckForLiveRegDef - Return true and update live register vector if the 1182/// specified register def of the specified SUnit clobbers any "live" registers. 1183static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1184 std::vector<SUnit*> &LiveRegDefs, 1185 SmallSet<unsigned, 4> &RegAdded, 1186 SmallVector<unsigned, 4> &LRegs, 1187 const TargetRegisterInfo *TRI) { 1188 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1189 1190 // Check if Ref is live. 1191 if (!LiveRegDefs[*AliasI]) continue; 1192 1193 // Allow multiple uses of the same def. 1194 if (LiveRegDefs[*AliasI] == SU) continue; 1195 1196 // Add Reg to the set of interfering live regs. 1197 if (RegAdded.insert(*AliasI)) { 1198 LRegs.push_back(*AliasI); 1199 } 1200 } 1201} 1202 1203/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1204/// scheduling of the given node to satisfy live physical register dependencies. 1205/// If the specific node is the last one that's available to schedule, do 1206/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1207bool ScheduleDAGRRList:: 1208DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1209 if (NumLiveRegs == 0) 1210 return false; 1211 1212 SmallSet<unsigned, 4> RegAdded; 1213 // If this node would clobber any "live" register, then it's not ready. 1214 // 1215 // If SU is the currently live definition of the same register that it uses, 1216 // then we are free to schedule it. 1217 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1218 I != E; ++I) { 1219 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1220 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1221 RegAdded, LRegs, TRI); 1222 } 1223 1224 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1225 if (Node->getOpcode() == ISD::INLINEASM) { 1226 // Inline asm can clobber physical defs. 1227 unsigned NumOps = Node->getNumOperands(); 1228 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1229 --NumOps; // Ignore the glue operand. 1230 1231 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1232 unsigned Flags = 1233 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1234 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1235 1236 ++i; // Skip the ID value. 1237 if (InlineAsm::isRegDefKind(Flags) || 1238 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1239 InlineAsm::isClobberKind(Flags)) { 1240 // Check for def of register or earlyclobber register. 1241 for (; NumVals; --NumVals, ++i) { 1242 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1243 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1244 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1245 } 1246 } else 1247 i += NumVals; 1248 } 1249 continue; 1250 } 1251 1252 if (!Node->isMachineOpcode()) 1253 continue; 1254 // If we're in the middle of scheduling a call, don't begin scheduling 1255 // another call. Also, don't allow any physical registers to be live across 1256 // the call. 1257 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 1258 // Check the special calling-sequence resource. 1259 unsigned CallResource = TRI->getNumRegs(); 1260 if (LiveRegDefs[CallResource]) { 1261 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1262 while (SDNode *Glued = Gen->getGluedNode()) 1263 Gen = Glued; 1264 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 1265 LRegs.push_back(CallResource); 1266 } 1267 } 1268 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1269 if (!MCID.ImplicitDefs) 1270 continue; 1271 for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) 1272 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1273 } 1274 1275 return !LRegs.empty(); 1276} 1277 1278/// Return a node that can be scheduled in this cycle. Requirements: 1279/// (1) Ready: latency has been satisfied 1280/// (2) No Hazards: resources are available 1281/// (3) No Interferences: may unschedule to break register interferences. 1282SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1283 SmallVector<SUnit*, 4> Interferences; 1284 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1285 1286 SUnit *CurSU = AvailableQueue->pop(); 1287 while (CurSU) { 1288 SmallVector<unsigned, 4> LRegs; 1289 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1290 break; 1291 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1292 1293 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1294 Interferences.push_back(CurSU); 1295 CurSU = AvailableQueue->pop(); 1296 } 1297 if (CurSU) { 1298 // Add the nodes that aren't ready back onto the available list. 1299 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1300 Interferences[i]->isPending = false; 1301 assert(Interferences[i]->isAvailable && "must still be available"); 1302 AvailableQueue->push(Interferences[i]); 1303 } 1304 return CurSU; 1305 } 1306 1307 // All candidates are delayed due to live physical reg dependencies. 1308 // Try backtracking, code duplication, or inserting cross class copies 1309 // to resolve it. 1310 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1311 SUnit *TrySU = Interferences[i]; 1312 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1313 1314 // Try unscheduling up to the point where it's safe to schedule 1315 // this node. 1316 SUnit *BtSU = NULL; 1317 unsigned LiveCycle = UINT_MAX; 1318 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1319 unsigned Reg = LRegs[j]; 1320 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1321 BtSU = LiveRegGens[Reg]; 1322 LiveCycle = BtSU->getHeight(); 1323 } 1324 } 1325 if (!WillCreateCycle(TrySU, BtSU)) { 1326 BacktrackBottomUp(TrySU, BtSU); 1327 1328 // Force the current node to be scheduled before the node that 1329 // requires the physical reg dep. 1330 if (BtSU->isAvailable) { 1331 BtSU->isAvailable = false; 1332 if (!BtSU->isPending) 1333 AvailableQueue->remove(BtSU); 1334 } 1335 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1336 /*Reg=*/0, /*isNormalMemory=*/false, 1337 /*isMustAlias=*/false, /*isArtificial=*/true)); 1338 1339 // If one or more successors has been unscheduled, then the current 1340 // node is no longer avaialable. Schedule a successor that's now 1341 // available instead. 1342 if (!TrySU->isAvailable) { 1343 CurSU = AvailableQueue->pop(); 1344 } 1345 else { 1346 CurSU = TrySU; 1347 TrySU->isPending = false; 1348 Interferences.erase(Interferences.begin()+i); 1349 } 1350 break; 1351 } 1352 } 1353 1354 if (!CurSU) { 1355 // Can't backtrack. If it's too expensive to copy the value, then try 1356 // duplicate the nodes that produces these "too expensive to copy" 1357 // values to break the dependency. In case even that doesn't work, 1358 // insert cross class copies. 1359 // If it's not too expensive, i.e. cost != -1, issue copies. 1360 SUnit *TrySU = Interferences[0]; 1361 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1362 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1363 unsigned Reg = LRegs[0]; 1364 SUnit *LRDef = LiveRegDefs[Reg]; 1365 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1366 const TargetRegisterClass *RC = 1367 TRI->getMinimalPhysRegClass(Reg, VT); 1368 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1369 1370 // If cross copy register class is the same as RC, then it must be possible 1371 // copy the value directly. Do not try duplicate the def. 1372 // If cross copy register class is not the same as RC, then it's possible to 1373 // copy the value but it require cross register class copies and it is 1374 // expensive. 1375 // If cross copy register class is null, then it's not possible to copy 1376 // the value at all. 1377 SUnit *NewDef = 0; 1378 if (DestRC != RC) { 1379 NewDef = CopyAndMoveSuccessors(LRDef); 1380 if (!DestRC && !NewDef) 1381 report_fatal_error("Can't handle live physical register dependency!"); 1382 } 1383 if (!NewDef) { 1384 // Issue copies, these can be expensive cross register class copies. 1385 SmallVector<SUnit*, 2> Copies; 1386 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1387 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1388 << " to SU #" << Copies.front()->NodeNum << "\n"); 1389 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1390 /*Reg=*/0, /*isNormalMemory=*/false, 1391 /*isMustAlias=*/false, 1392 /*isArtificial=*/true)); 1393 NewDef = Copies.back(); 1394 } 1395 1396 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1397 << " to SU #" << TrySU->NodeNum << "\n"); 1398 LiveRegDefs[Reg] = NewDef; 1399 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1400 /*Reg=*/0, /*isNormalMemory=*/false, 1401 /*isMustAlias=*/false, 1402 /*isArtificial=*/true)); 1403 TrySU->isAvailable = false; 1404 CurSU = NewDef; 1405 } 1406 1407 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1408 1409 // Add the nodes that aren't ready back onto the available list. 1410 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1411 Interferences[i]->isPending = false; 1412 // May no longer be available due to backtracking. 1413 if (Interferences[i]->isAvailable) { 1414 AvailableQueue->push(Interferences[i]); 1415 } 1416 } 1417 return CurSU; 1418} 1419 1420/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1421/// schedulers. 1422void ScheduleDAGRRList::ListScheduleBottomUp() { 1423 // Release any predecessors of the special Exit node. 1424 ReleasePredecessors(&ExitSU); 1425 1426 // Add root to Available queue. 1427 if (!SUnits.empty()) { 1428 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1429 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1430 RootSU->isAvailable = true; 1431 AvailableQueue->push(RootSU); 1432 } 1433 1434 // While Available queue is not empty, grab the node with the highest 1435 // priority. If it is not ready put it back. Schedule the node. 1436 Sequence.reserve(SUnits.size()); 1437 while (!AvailableQueue->empty()) { 1438 DEBUG(dbgs() << "\nExamining Available:\n"; 1439 AvailableQueue->dump(this)); 1440 1441 // Pick the best node to schedule taking all constraints into 1442 // consideration. 1443 SUnit *SU = PickNodeToScheduleBottomUp(); 1444 1445 AdvancePastStalls(SU); 1446 1447 ScheduleNodeBottomUp(SU); 1448 1449 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1450 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1451 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1452 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1453 } 1454 } 1455 1456 // Reverse the order if it is bottom up. 1457 std::reverse(Sequence.begin(), Sequence.end()); 1458 1459#ifndef NDEBUG 1460 VerifySchedule(/*isBottomUp=*/true); 1461#endif 1462} 1463 1464//===----------------------------------------------------------------------===// 1465// RegReductionPriorityQueue Definition 1466//===----------------------------------------------------------------------===// 1467// 1468// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1469// to reduce register pressure. 1470// 1471namespace { 1472class RegReductionPQBase; 1473 1474struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1475 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1476}; 1477 1478#ifndef NDEBUG 1479template<class SF> 1480struct reverse_sort : public queue_sort { 1481 SF &SortFunc; 1482 reverse_sort(SF &sf) : SortFunc(sf) {} 1483 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1484 1485 bool operator()(SUnit* left, SUnit* right) const { 1486 // reverse left/right rather than simply !SortFunc(left, right) 1487 // to expose different paths in the comparison logic. 1488 return SortFunc(right, left); 1489 } 1490}; 1491#endif // NDEBUG 1492 1493/// bu_ls_rr_sort - Priority function for bottom up register pressure 1494// reduction scheduler. 1495struct bu_ls_rr_sort : public queue_sort { 1496 enum { 1497 IsBottomUp = true, 1498 HasReadyFilter = false 1499 }; 1500 1501 RegReductionPQBase *SPQ; 1502 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1503 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1504 1505 bool operator()(SUnit* left, SUnit* right) const; 1506}; 1507 1508// src_ls_rr_sort - Priority function for source order scheduler. 1509struct src_ls_rr_sort : public queue_sort { 1510 enum { 1511 IsBottomUp = true, 1512 HasReadyFilter = false 1513 }; 1514 1515 RegReductionPQBase *SPQ; 1516 src_ls_rr_sort(RegReductionPQBase *spq) 1517 : SPQ(spq) {} 1518 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1519 : SPQ(RHS.SPQ) {} 1520 1521 bool operator()(SUnit* left, SUnit* right) const; 1522}; 1523 1524// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1525struct hybrid_ls_rr_sort : public queue_sort { 1526 enum { 1527 IsBottomUp = true, 1528 HasReadyFilter = false 1529 }; 1530 1531 RegReductionPQBase *SPQ; 1532 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1533 : SPQ(spq) {} 1534 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1535 : SPQ(RHS.SPQ) {} 1536 1537 bool isReady(SUnit *SU, unsigned CurCycle) const; 1538 1539 bool operator()(SUnit* left, SUnit* right) const; 1540}; 1541 1542// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1543// scheduler. 1544struct ilp_ls_rr_sort : public queue_sort { 1545 enum { 1546 IsBottomUp = true, 1547 HasReadyFilter = false 1548 }; 1549 1550 RegReductionPQBase *SPQ; 1551 ilp_ls_rr_sort(RegReductionPQBase *spq) 1552 : SPQ(spq) {} 1553 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1554 : SPQ(RHS.SPQ) {} 1555 1556 bool isReady(SUnit *SU, unsigned CurCycle) const; 1557 1558 bool operator()(SUnit* left, SUnit* right) const; 1559}; 1560 1561class RegReductionPQBase : public SchedulingPriorityQueue { 1562protected: 1563 std::vector<SUnit*> Queue; 1564 unsigned CurQueueId; 1565 bool TracksRegPressure; 1566 1567 // SUnits - The SUnits for the current graph. 1568 std::vector<SUnit> *SUnits; 1569 1570 MachineFunction &MF; 1571 const TargetInstrInfo *TII; 1572 const TargetRegisterInfo *TRI; 1573 const TargetLowering *TLI; 1574 ScheduleDAGRRList *scheduleDAG; 1575 1576 // SethiUllmanNumbers - The SethiUllman number for each node. 1577 std::vector<unsigned> SethiUllmanNumbers; 1578 1579 /// RegPressure - Tracking current reg pressure per register class. 1580 /// 1581 std::vector<unsigned> RegPressure; 1582 1583 /// RegLimit - Tracking the number of allocatable registers per register 1584 /// class. 1585 std::vector<unsigned> RegLimit; 1586 1587public: 1588 RegReductionPQBase(MachineFunction &mf, 1589 bool hasReadyFilter, 1590 bool tracksrp, 1591 const TargetInstrInfo *tii, 1592 const TargetRegisterInfo *tri, 1593 const TargetLowering *tli) 1594 : SchedulingPriorityQueue(hasReadyFilter), 1595 CurQueueId(0), TracksRegPressure(tracksrp), 1596 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1597 if (TracksRegPressure) { 1598 unsigned NumRC = TRI->getNumRegClasses(); 1599 RegLimit.resize(NumRC); 1600 RegPressure.resize(NumRC); 1601 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1602 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1603 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1604 E = TRI->regclass_end(); I != E; ++I) 1605 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1606 } 1607 } 1608 1609 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1610 scheduleDAG = scheduleDag; 1611 } 1612 1613 ScheduleHazardRecognizer* getHazardRec() { 1614 return scheduleDAG->getHazardRec(); 1615 } 1616 1617 void initNodes(std::vector<SUnit> &sunits); 1618 1619 void addNode(const SUnit *SU); 1620 1621 void updateNode(const SUnit *SU); 1622 1623 void releaseState() { 1624 SUnits = 0; 1625 SethiUllmanNumbers.clear(); 1626 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1627 } 1628 1629 unsigned getNodePriority(const SUnit *SU) const; 1630 1631 unsigned getNodeOrdering(const SUnit *SU) const { 1632 if (!SU->getNode()) return 0; 1633 1634 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1635 } 1636 1637 bool empty() const { return Queue.empty(); } 1638 1639 void push(SUnit *U) { 1640 assert(!U->NodeQueueId && "Node in the queue already"); 1641 U->NodeQueueId = ++CurQueueId; 1642 Queue.push_back(U); 1643 } 1644 1645 void remove(SUnit *SU) { 1646 assert(!Queue.empty() && "Queue is empty!"); 1647 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1648 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1649 SU); 1650 if (I != prior(Queue.end())) 1651 std::swap(*I, Queue.back()); 1652 Queue.pop_back(); 1653 SU->NodeQueueId = 0; 1654 } 1655 1656 bool tracksRegPressure() const { return TracksRegPressure; } 1657 1658 void dumpRegPressure() const; 1659 1660 bool HighRegPressure(const SUnit *SU) const; 1661 1662 bool MayReduceRegPressure(SUnit *SU) const; 1663 1664 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1665 1666 void ScheduledNode(SUnit *SU); 1667 1668 void UnscheduledNode(SUnit *SU); 1669 1670protected: 1671 bool canClobber(const SUnit *SU, const SUnit *Op); 1672 void AddPseudoTwoAddrDeps(); 1673 void PrescheduleNodesWithMultipleUses(); 1674 void CalculateSethiUllmanNumbers(); 1675}; 1676 1677template<class SF> 1678static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1679 std::vector<SUnit *>::iterator Best = Q.begin(); 1680 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1681 E = Q.end(); I != E; ++I) 1682 if (Picker(*Best, *I)) 1683 Best = I; 1684 SUnit *V = *Best; 1685 if (Best != prior(Q.end())) 1686 std::swap(*Best, Q.back()); 1687 Q.pop_back(); 1688 return V; 1689} 1690 1691template<class SF> 1692SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1693#ifndef NDEBUG 1694 if (DAG->StressSched) { 1695 reverse_sort<SF> RPicker(Picker); 1696 return popFromQueueImpl(Q, RPicker); 1697 } 1698#endif 1699 (void)DAG; 1700 return popFromQueueImpl(Q, Picker); 1701} 1702 1703template<class SF> 1704class RegReductionPriorityQueue : public RegReductionPQBase { 1705 SF Picker; 1706 1707public: 1708 RegReductionPriorityQueue(MachineFunction &mf, 1709 bool tracksrp, 1710 const TargetInstrInfo *tii, 1711 const TargetRegisterInfo *tri, 1712 const TargetLowering *tli) 1713 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1714 Picker(this) {} 1715 1716 bool isBottomUp() const { return SF::IsBottomUp; } 1717 1718 bool isReady(SUnit *U) const { 1719 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1720 } 1721 1722 SUnit *pop() { 1723 if (Queue.empty()) return NULL; 1724 1725 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1726 V->NodeQueueId = 0; 1727 return V; 1728 } 1729 1730 void dump(ScheduleDAG *DAG) const { 1731 // Emulate pop() without clobbering NodeQueueIds. 1732 std::vector<SUnit*> DumpQueue = Queue; 1733 SF DumpPicker = Picker; 1734 while (!DumpQueue.empty()) { 1735 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1736 dbgs() << "Height " << SU->getHeight() << ": "; 1737 SU->dump(DAG); 1738 } 1739 } 1740}; 1741 1742typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1743BURegReductionPriorityQueue; 1744 1745typedef RegReductionPriorityQueue<src_ls_rr_sort> 1746SrcRegReductionPriorityQueue; 1747 1748typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1749HybridBURRPriorityQueue; 1750 1751typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1752ILPBURRPriorityQueue; 1753} // end anonymous namespace 1754 1755//===----------------------------------------------------------------------===// 1756// Static Node Priority for Register Pressure Reduction 1757//===----------------------------------------------------------------------===// 1758 1759// Check for special nodes that bypass scheduling heuristics. 1760// Currently this pushes TokenFactor nodes down, but may be used for other 1761// pseudo-ops as well. 1762// 1763// Return -1 to schedule right above left, 1 for left above right. 1764// Return 0 if no bias exists. 1765static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1766 bool LSchedLow = left->isScheduleLow; 1767 bool RSchedLow = right->isScheduleLow; 1768 if (LSchedLow != RSchedLow) 1769 return LSchedLow < RSchedLow ? 1 : -1; 1770 return 0; 1771} 1772 1773/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1774/// Smaller number is the higher priority. 1775static unsigned 1776CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1777 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1778 if (SethiUllmanNumber != 0) 1779 return SethiUllmanNumber; 1780 1781 unsigned Extra = 0; 1782 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1783 I != E; ++I) { 1784 if (I->isCtrl()) continue; // ignore chain preds 1785 SUnit *PredSU = I->getSUnit(); 1786 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1787 if (PredSethiUllman > SethiUllmanNumber) { 1788 SethiUllmanNumber = PredSethiUllman; 1789 Extra = 0; 1790 } else if (PredSethiUllman == SethiUllmanNumber) 1791 ++Extra; 1792 } 1793 1794 SethiUllmanNumber += Extra; 1795 1796 if (SethiUllmanNumber == 0) 1797 SethiUllmanNumber = 1; 1798 1799 return SethiUllmanNumber; 1800} 1801 1802/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1803/// scheduling units. 1804void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1805 SethiUllmanNumbers.assign(SUnits->size(), 0); 1806 1807 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1808 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1809} 1810 1811void RegReductionPQBase::addNode(const SUnit *SU) { 1812 unsigned SUSize = SethiUllmanNumbers.size(); 1813 if (SUnits->size() > SUSize) 1814 SethiUllmanNumbers.resize(SUSize*2, 0); 1815 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1816} 1817 1818void RegReductionPQBase::updateNode(const SUnit *SU) { 1819 SethiUllmanNumbers[SU->NodeNum] = 0; 1820 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1821} 1822 1823// Lower priority means schedule further down. For bottom-up scheduling, lower 1824// priority SUs are scheduled before higher priority SUs. 1825unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1826 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1827 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1828 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1829 // CopyToReg should be close to its uses to facilitate coalescing and 1830 // avoid spilling. 1831 return 0; 1832 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1833 Opc == TargetOpcode::SUBREG_TO_REG || 1834 Opc == TargetOpcode::INSERT_SUBREG) 1835 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1836 // close to their uses to facilitate coalescing. 1837 return 0; 1838 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1839 // If SU does not have a register use, i.e. it doesn't produce a value 1840 // that would be consumed (e.g. store), then it terminates a chain of 1841 // computation. Give it a large SethiUllman number so it will be 1842 // scheduled right before its predecessors that it doesn't lengthen 1843 // their live ranges. 1844 return 0xffff; 1845 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1846 // If SU does not have a register def, schedule it close to its uses 1847 // because it does not lengthen any live ranges. 1848 return 0; 1849#if 1 1850 return SethiUllmanNumbers[SU->NodeNum]; 1851#else 1852 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1853 if (SU->isCallOp) { 1854 // FIXME: This assumes all of the defs are used as call operands. 1855 int NP = (int)Priority - SU->getNode()->getNumValues(); 1856 return (NP > 0) ? NP : 0; 1857 } 1858 return Priority; 1859#endif 1860} 1861 1862//===----------------------------------------------------------------------===// 1863// Register Pressure Tracking 1864//===----------------------------------------------------------------------===// 1865 1866void RegReductionPQBase::dumpRegPressure() const { 1867 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1868 E = TRI->regclass_end(); I != E; ++I) { 1869 const TargetRegisterClass *RC = *I; 1870 unsigned Id = RC->getID(); 1871 unsigned RP = RegPressure[Id]; 1872 if (!RP) continue; 1873 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1874 << '\n'); 1875 } 1876} 1877 1878bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1879 if (!TLI) 1880 return false; 1881 1882 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1883 I != E; ++I) { 1884 if (I->isCtrl()) 1885 continue; 1886 SUnit *PredSU = I->getSUnit(); 1887 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1888 // to cover the number of registers defined (they are all live). 1889 if (PredSU->NumRegDefsLeft == 0) { 1890 continue; 1891 } 1892 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1893 RegDefPos.IsValid(); RegDefPos.Advance()) { 1894 unsigned RCId, Cost; 1895 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1896 1897 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1898 return true; 1899 } 1900 } 1901 return false; 1902} 1903 1904bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1905 const SDNode *N = SU->getNode(); 1906 1907 if (!N->isMachineOpcode() || !SU->NumSuccs) 1908 return false; 1909 1910 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1911 for (unsigned i = 0; i != NumDefs; ++i) { 1912 EVT VT = N->getValueType(i); 1913 if (!N->hasAnyUseOfValue(i)) 1914 continue; 1915 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1916 if (RegPressure[RCId] >= RegLimit[RCId]) 1917 return true; 1918 } 1919 return false; 1920} 1921 1922// Compute the register pressure contribution by this instruction by count up 1923// for uses that are not live and down for defs. Only count register classes 1924// that are already under high pressure. As a side effect, compute the number of 1925// uses of registers that are already live. 1926// 1927// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1928// so could probably be factored. 1929int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1930 LiveUses = 0; 1931 int PDiff = 0; 1932 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1933 I != E; ++I) { 1934 if (I->isCtrl()) 1935 continue; 1936 SUnit *PredSU = I->getSUnit(); 1937 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1938 // to cover the number of registers defined (they are all live). 1939 if (PredSU->NumRegDefsLeft == 0) { 1940 if (PredSU->getNode()->isMachineOpcode()) 1941 ++LiveUses; 1942 continue; 1943 } 1944 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1945 RegDefPos.IsValid(); RegDefPos.Advance()) { 1946 EVT VT = RegDefPos.GetValue(); 1947 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1948 if (RegPressure[RCId] >= RegLimit[RCId]) 1949 ++PDiff; 1950 } 1951 } 1952 const SDNode *N = SU->getNode(); 1953 1954 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1955 return PDiff; 1956 1957 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1958 for (unsigned i = 0; i != NumDefs; ++i) { 1959 EVT VT = N->getValueType(i); 1960 if (!N->hasAnyUseOfValue(i)) 1961 continue; 1962 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1963 if (RegPressure[RCId] >= RegLimit[RCId]) 1964 --PDiff; 1965 } 1966 return PDiff; 1967} 1968 1969void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1970 if (!TracksRegPressure) 1971 return; 1972 1973 if (!SU->getNode()) 1974 return; 1975 1976 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1977 I != E; ++I) { 1978 if (I->isCtrl()) 1979 continue; 1980 SUnit *PredSU = I->getSUnit(); 1981 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1982 // to cover the number of registers defined (they are all live). 1983 if (PredSU->NumRegDefsLeft == 0) { 1984 continue; 1985 } 1986 // FIXME: The ScheduleDAG currently loses information about which of a 1987 // node's values is consumed by each dependence. Consequently, if the node 1988 // defines multiple register classes, we don't know which to pressurize 1989 // here. Instead the following loop consumes the register defs in an 1990 // arbitrary order. At least it handles the common case of clustered loads 1991 // to the same class. For precise liveness, each SDep needs to indicate the 1992 // result number. But that tightly couples the ScheduleDAG with the 1993 // SelectionDAG making updates tricky. A simpler hack would be to attach a 1994 // value type or register class to SDep. 1995 // 1996 // The most important aspect of register tracking is balancing the increase 1997 // here with the reduction further below. Note that this SU may use multiple 1998 // defs in PredSU. The can't be determined here, but we've already 1999 // compensated by reducing NumRegDefsLeft in PredSU during 2000 // ScheduleDAGSDNodes::AddSchedEdges. 2001 --PredSU->NumRegDefsLeft; 2002 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2003 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2004 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2005 if (SkipRegDefs) 2006 continue; 2007 2008 unsigned RCId, Cost; 2009 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2010 RegPressure[RCId] += Cost; 2011 break; 2012 } 2013 } 2014 2015 // We should have this assert, but there may be dead SDNodes that never 2016 // materialize as SUnits, so they don't appear to generate liveness. 2017 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2018 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2019 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2020 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2021 if (SkipRegDefs > 0) 2022 continue; 2023 unsigned RCId, Cost; 2024 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2025 if (RegPressure[RCId] < Cost) { 2026 // Register pressure tracking is imprecise. This can happen. But we try 2027 // hard not to let it happen because it likely results in poor scheduling. 2028 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 2029 RegPressure[RCId] = 0; 2030 } 2031 else { 2032 RegPressure[RCId] -= Cost; 2033 } 2034 } 2035 dumpRegPressure(); 2036} 2037 2038void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 2039 if (!TracksRegPressure) 2040 return; 2041 2042 const SDNode *N = SU->getNode(); 2043 if (!N) return; 2044 2045 if (!N->isMachineOpcode()) { 2046 if (N->getOpcode() != ISD::CopyToReg) 2047 return; 2048 } else { 2049 unsigned Opc = N->getMachineOpcode(); 2050 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2051 Opc == TargetOpcode::INSERT_SUBREG || 2052 Opc == TargetOpcode::SUBREG_TO_REG || 2053 Opc == TargetOpcode::REG_SEQUENCE || 2054 Opc == TargetOpcode::IMPLICIT_DEF) 2055 return; 2056 } 2057 2058 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2059 I != E; ++I) { 2060 if (I->isCtrl()) 2061 continue; 2062 SUnit *PredSU = I->getSUnit(); 2063 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2064 // counts data deps. 2065 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2066 continue; 2067 const SDNode *PN = PredSU->getNode(); 2068 if (!PN->isMachineOpcode()) { 2069 if (PN->getOpcode() == ISD::CopyFromReg) { 2070 EVT VT = PN->getValueType(0); 2071 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2072 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2073 } 2074 continue; 2075 } 2076 unsigned POpc = PN->getMachineOpcode(); 2077 if (POpc == TargetOpcode::IMPLICIT_DEF) 2078 continue; 2079 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2080 POpc == TargetOpcode::INSERT_SUBREG || 2081 POpc == TargetOpcode::SUBREG_TO_REG) { 2082 EVT VT = PN->getValueType(0); 2083 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2084 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2085 continue; 2086 } 2087 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2088 for (unsigned i = 0; i != NumDefs; ++i) { 2089 EVT VT = PN->getValueType(i); 2090 if (!PN->hasAnyUseOfValue(i)) 2091 continue; 2092 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2093 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2094 // Register pressure tracking is imprecise. This can happen. 2095 RegPressure[RCId] = 0; 2096 else 2097 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2098 } 2099 } 2100 2101 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2102 // may transfer data dependencies to CopyToReg. 2103 if (SU->NumSuccs && N->isMachineOpcode()) { 2104 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2105 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2106 EVT VT = N->getValueType(i); 2107 if (VT == MVT::Glue || VT == MVT::Other) 2108 continue; 2109 if (!N->hasAnyUseOfValue(i)) 2110 continue; 2111 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2112 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2113 } 2114 } 2115 2116 dumpRegPressure(); 2117} 2118 2119//===----------------------------------------------------------------------===// 2120// Dynamic Node Priority for Register Pressure Reduction 2121//===----------------------------------------------------------------------===// 2122 2123/// closestSucc - Returns the scheduled cycle of the successor which is 2124/// closest to the current cycle. 2125static unsigned closestSucc(const SUnit *SU) { 2126 unsigned MaxHeight = 0; 2127 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2128 I != E; ++I) { 2129 if (I->isCtrl()) continue; // ignore chain succs 2130 unsigned Height = I->getSUnit()->getHeight(); 2131 // If there are bunch of CopyToRegs stacked up, they should be considered 2132 // to be at the same position. 2133 if (I->getSUnit()->getNode() && 2134 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2135 Height = closestSucc(I->getSUnit())+1; 2136 if (Height > MaxHeight) 2137 MaxHeight = Height; 2138 } 2139 return MaxHeight; 2140} 2141 2142/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2143/// for scratch registers, i.e. number of data dependencies. 2144static unsigned calcMaxScratches(const SUnit *SU) { 2145 unsigned Scratches = 0; 2146 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2147 I != E; ++I) { 2148 if (I->isCtrl()) continue; // ignore chain preds 2149 Scratches++; 2150 } 2151 return Scratches; 2152} 2153 2154/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2155/// CopyFromReg from a virtual register. 2156static bool hasOnlyLiveInOpers(const SUnit *SU) { 2157 bool RetVal = false; 2158 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2159 I != E; ++I) { 2160 if (I->isCtrl()) continue; 2161 const SUnit *PredSU = I->getSUnit(); 2162 if (PredSU->getNode() && 2163 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2164 unsigned Reg = 2165 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2166 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2167 RetVal = true; 2168 continue; 2169 } 2170 } 2171 return false; 2172 } 2173 return RetVal; 2174} 2175 2176/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2177/// CopyToReg to a virtual register. This SU def is probably a liveout and 2178/// it has no other use. It should be scheduled closer to the terminator. 2179static bool hasOnlyLiveOutUses(const SUnit *SU) { 2180 bool RetVal = false; 2181 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2182 I != E; ++I) { 2183 if (I->isCtrl()) continue; 2184 const SUnit *SuccSU = I->getSUnit(); 2185 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2186 unsigned Reg = 2187 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2188 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2189 RetVal = true; 2190 continue; 2191 } 2192 } 2193 return false; 2194 } 2195 return RetVal; 2196} 2197 2198// Set isVRegCycle for a node with only live in opers and live out uses. Also 2199// set isVRegCycle for its CopyFromReg operands. 2200// 2201// This is only relevant for single-block loops, in which case the VRegCycle 2202// node is likely an induction variable in which the operand and target virtual 2203// registers should be coalesced (e.g. pre/post increment values). Setting the 2204// isVRegCycle flag helps the scheduler prioritize other uses of the same 2205// CopyFromReg so that this node becomes the virtual register "kill". This 2206// avoids interference between the values live in and out of the block and 2207// eliminates a copy inside the loop. 2208static void initVRegCycle(SUnit *SU) { 2209 if (DisableSchedVRegCycle) 2210 return; 2211 2212 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2213 return; 2214 2215 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2216 2217 SU->isVRegCycle = true; 2218 2219 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2220 I != E; ++I) { 2221 if (I->isCtrl()) continue; 2222 I->getSUnit()->isVRegCycle = true; 2223 } 2224} 2225 2226// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2227// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2228static void resetVRegCycle(SUnit *SU) { 2229 if (!SU->isVRegCycle) 2230 return; 2231 2232 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2233 I != E; ++I) { 2234 if (I->isCtrl()) continue; // ignore chain preds 2235 SUnit *PredSU = I->getSUnit(); 2236 if (PredSU->isVRegCycle) { 2237 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2238 "VRegCycle def must be CopyFromReg"); 2239 I->getSUnit()->isVRegCycle = 0; 2240 } 2241 } 2242} 2243 2244// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2245// means a node that defines the VRegCycle has not been scheduled yet. 2246static bool hasVRegCycleUse(const SUnit *SU) { 2247 // If this SU also defines the VReg, don't hoist it as a "use". 2248 if (SU->isVRegCycle) 2249 return false; 2250 2251 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2252 I != E; ++I) { 2253 if (I->isCtrl()) continue; // ignore chain preds 2254 if (I->getSUnit()->isVRegCycle && 2255 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2256 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2257 return true; 2258 } 2259 } 2260 return false; 2261} 2262 2263// Check for either a dependence (latency) or resource (hazard) stall. 2264// 2265// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2266static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2267 if ((int)SPQ->getCurCycle() < Height) return true; 2268 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2269 != ScheduleHazardRecognizer::NoHazard) 2270 return true; 2271 return false; 2272} 2273 2274// Return -1 if left has higher priority, 1 if right has higher priority. 2275// Return 0 if latency-based priority is equivalent. 2276static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2277 RegReductionPQBase *SPQ) { 2278 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2279 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2280 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2281 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2282 int LHeight = (int)left->getHeight() + LPenalty; 2283 int RHeight = (int)right->getHeight() + RPenalty; 2284 2285 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2286 BUHasStall(left, LHeight, SPQ); 2287 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2288 BUHasStall(right, RHeight, SPQ); 2289 2290 // If scheduling one of the node will cause a pipeline stall, delay it. 2291 // If scheduling either one of the node will cause a pipeline stall, sort 2292 // them according to their height. 2293 if (LStall) { 2294 if (!RStall) { 2295 DEBUG(++FactorCount[FactStall]); 2296 return 1; 2297 } 2298 if (LHeight != RHeight) { 2299 DEBUG(++FactorCount[FactStall]); 2300 return LHeight > RHeight ? 1 : -1; 2301 } 2302 } else if (RStall) { 2303 DEBUG(++FactorCount[FactStall]); 2304 return -1; 2305 } 2306 2307 // If either node is scheduling for latency, sort them by height/depth 2308 // and latency. 2309 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2310 right->SchedulingPref == Sched::ILP)) { 2311 if (DisableSchedCycles) { 2312 if (LHeight != RHeight) { 2313 DEBUG(++FactorCount[FactHeight]); 2314 return LHeight > RHeight ? 1 : -1; 2315 } 2316 } 2317 else { 2318 // If neither instruction stalls (!LStall && !RStall) then 2319 // its height is already covered so only its depth matters. We also reach 2320 // this if both stall but have the same height. 2321 int LDepth = left->getDepth() - LPenalty; 2322 int RDepth = right->getDepth() - RPenalty; 2323 if (LDepth != RDepth) { 2324 DEBUG(++FactorCount[FactDepth]); 2325 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2326 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2327 << ") depth " << RDepth << "\n"); 2328 return LDepth < RDepth ? 1 : -1; 2329 } 2330 } 2331 if (left->Latency != right->Latency) { 2332 DEBUG(++FactorCount[FactOther]); 2333 return left->Latency > right->Latency ? 1 : -1; 2334 } 2335 } 2336 return 0; 2337} 2338 2339static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2340 // Schedule physical register definitions close to their use. This is 2341 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2342 // long as shortening physreg live ranges is generally good, we can defer 2343 // creating a subtarget hook. 2344 if (!DisableSchedPhysRegJoin) { 2345 bool LHasPhysReg = left->hasPhysRegDefs; 2346 bool RHasPhysReg = right->hasPhysRegDefs; 2347 if (LHasPhysReg != RHasPhysReg) { 2348 DEBUG(++FactorCount[FactRegUses]); 2349 #ifndef NDEBUG 2350 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2351 #endif 2352 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2353 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2354 << PhysRegMsg[RHasPhysReg] << "\n"); 2355 return LHasPhysReg < RHasPhysReg; 2356 } 2357 } 2358 2359 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2360 unsigned LPriority = SPQ->getNodePriority(left); 2361 unsigned RPriority = SPQ->getNodePriority(right); 2362 2363 // Be really careful about hoisting call operands above previous calls. 2364 // Only allows it if it would reduce register pressure. 2365 if (left->isCall && right->isCallOp) { 2366 unsigned RNumVals = right->getNode()->getNumValues(); 2367 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2368 } 2369 if (right->isCall && left->isCallOp) { 2370 unsigned LNumVals = left->getNode()->getNumValues(); 2371 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2372 } 2373 2374 if (LPriority != RPriority) { 2375 DEBUG(++FactorCount[FactStatic]); 2376 return LPriority > RPriority; 2377 } 2378 2379 // One or both of the nodes are calls and their sethi-ullman numbers are the 2380 // same, then keep source order. 2381 if (left->isCall || right->isCall) { 2382 unsigned LOrder = SPQ->getNodeOrdering(left); 2383 unsigned ROrder = SPQ->getNodeOrdering(right); 2384 2385 // Prefer an ordering where the lower the non-zero order number, the higher 2386 // the preference. 2387 if ((LOrder || ROrder) && LOrder != ROrder) 2388 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2389 } 2390 2391 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2392 // e.g. 2393 // t1 = op t2, c1 2394 // t3 = op t4, c2 2395 // 2396 // and the following instructions are both ready. 2397 // t2 = op c3 2398 // t4 = op c4 2399 // 2400 // Then schedule t2 = op first. 2401 // i.e. 2402 // t4 = op c4 2403 // t2 = op c3 2404 // t1 = op t2, c1 2405 // t3 = op t4, c2 2406 // 2407 // This creates more short live intervals. 2408 unsigned LDist = closestSucc(left); 2409 unsigned RDist = closestSucc(right); 2410 if (LDist != RDist) { 2411 DEBUG(++FactorCount[FactOther]); 2412 return LDist < RDist; 2413 } 2414 2415 // How many registers becomes live when the node is scheduled. 2416 unsigned LScratch = calcMaxScratches(left); 2417 unsigned RScratch = calcMaxScratches(right); 2418 if (LScratch != RScratch) { 2419 DEBUG(++FactorCount[FactOther]); 2420 return LScratch > RScratch; 2421 } 2422 2423 // Comparing latency against a call makes little sense unless the node 2424 // is register pressure-neutral. 2425 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2426 return (left->NodeQueueId > right->NodeQueueId); 2427 2428 // Do not compare latencies when one or both of the nodes are calls. 2429 if (!DisableSchedCycles && 2430 !(left->isCall || right->isCall)) { 2431 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2432 if (result != 0) 2433 return result > 0; 2434 } 2435 else { 2436 if (left->getHeight() != right->getHeight()) { 2437 DEBUG(++FactorCount[FactHeight]); 2438 return left->getHeight() > right->getHeight(); 2439 } 2440 2441 if (left->getDepth() != right->getDepth()) { 2442 DEBUG(++FactorCount[FactDepth]); 2443 return left->getDepth() < right->getDepth(); 2444 } 2445 } 2446 2447 assert(left->NodeQueueId && right->NodeQueueId && 2448 "NodeQueueId cannot be zero"); 2449 DEBUG(++FactorCount[FactOther]); 2450 return (left->NodeQueueId > right->NodeQueueId); 2451} 2452 2453// Bottom up 2454bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2455 if (int res = checkSpecialNodes(left, right)) 2456 return res > 0; 2457 2458 return BURRSort(left, right, SPQ); 2459} 2460 2461// Source order, otherwise bottom up. 2462bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2463 if (int res = checkSpecialNodes(left, right)) 2464 return res > 0; 2465 2466 unsigned LOrder = SPQ->getNodeOrdering(left); 2467 unsigned ROrder = SPQ->getNodeOrdering(right); 2468 2469 // Prefer an ordering where the lower the non-zero order number, the higher 2470 // the preference. 2471 if ((LOrder || ROrder) && LOrder != ROrder) 2472 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2473 2474 return BURRSort(left, right, SPQ); 2475} 2476 2477// If the time between now and when the instruction will be ready can cover 2478// the spill code, then avoid adding it to the ready queue. This gives long 2479// stalls highest priority and allows hoisting across calls. It should also 2480// speed up processing the available queue. 2481bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2482 static const unsigned ReadyDelay = 3; 2483 2484 if (SPQ->MayReduceRegPressure(SU)) return true; 2485 2486 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2487 2488 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2489 != ScheduleHazardRecognizer::NoHazard) 2490 return false; 2491 2492 return true; 2493} 2494 2495// Return true if right should be scheduled with higher priority than left. 2496bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2497 if (int res = checkSpecialNodes(left, right)) 2498 return res > 0; 2499 2500 if (left->isCall || right->isCall) 2501 // No way to compute latency of calls. 2502 return BURRSort(left, right, SPQ); 2503 2504 bool LHigh = SPQ->HighRegPressure(left); 2505 bool RHigh = SPQ->HighRegPressure(right); 2506 // Avoid causing spills. If register pressure is high, schedule for 2507 // register pressure reduction. 2508 if (LHigh && !RHigh) { 2509 DEBUG(++FactorCount[FactPressureDiff]); 2510 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2511 << right->NodeNum << ")\n"); 2512 return true; 2513 } 2514 else if (!LHigh && RHigh) { 2515 DEBUG(++FactorCount[FactPressureDiff]); 2516 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2517 << left->NodeNum << ")\n"); 2518 return false; 2519 } 2520 if (!LHigh && !RHigh) { 2521 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2522 if (result != 0) 2523 return result > 0; 2524 } 2525 return BURRSort(left, right, SPQ); 2526} 2527 2528// Schedule as many instructions in each cycle as possible. So don't make an 2529// instruction available unless it is ready in the current cycle. 2530bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2531 if (SU->getHeight() > CurCycle) return false; 2532 2533 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2534 != ScheduleHazardRecognizer::NoHazard) 2535 return false; 2536 2537 return true; 2538} 2539 2540static bool canEnableCoalescing(SUnit *SU) { 2541 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2542 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2543 // CopyToReg should be close to its uses to facilitate coalescing and 2544 // avoid spilling. 2545 return true; 2546 2547 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2548 Opc == TargetOpcode::SUBREG_TO_REG || 2549 Opc == TargetOpcode::INSERT_SUBREG) 2550 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2551 // close to their uses to facilitate coalescing. 2552 return true; 2553 2554 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2555 // If SU does not have a register def, schedule it close to its uses 2556 // because it does not lengthen any live ranges. 2557 return true; 2558 2559 return false; 2560} 2561 2562// list-ilp is currently an experimental scheduler that allows various 2563// heuristics to be enabled prior to the normal register reduction logic. 2564bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2565 if (int res = checkSpecialNodes(left, right)) 2566 return res > 0; 2567 2568 if (left->isCall || right->isCall) 2569 // No way to compute latency of calls. 2570 return BURRSort(left, right, SPQ); 2571 2572 unsigned LLiveUses = 0, RLiveUses = 0; 2573 int LPDiff = 0, RPDiff = 0; 2574 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2575 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2576 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2577 } 2578 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2579 DEBUG(++FactorCount[FactPressureDiff]); 2580 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2581 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2582 return LPDiff > RPDiff; 2583 } 2584 2585 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2586 bool LReduce = canEnableCoalescing(left); 2587 bool RReduce = canEnableCoalescing(right); 2588 DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); 2589 if (LReduce && !RReduce) return false; 2590 if (RReduce && !LReduce) return true; 2591 } 2592 2593 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2594 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2595 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2596 DEBUG(++FactorCount[FactRegUses]); 2597 return LLiveUses < RLiveUses; 2598 } 2599 2600 if (!DisableSchedStalls) { 2601 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2602 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2603 if (LStall != RStall) { 2604 DEBUG(++FactorCount[FactHeight]); 2605 return left->getHeight() > right->getHeight(); 2606 } 2607 } 2608 2609 if (!DisableSchedCriticalPath) { 2610 int spread = (int)left->getDepth() - (int)right->getDepth(); 2611 if (std::abs(spread) > MaxReorderWindow) { 2612 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2613 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2614 << right->getDepth() << "\n"); 2615 DEBUG(++FactorCount[FactDepth]); 2616 return left->getDepth() < right->getDepth(); 2617 } 2618 } 2619 2620 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2621 int spread = (int)left->getHeight() - (int)right->getHeight(); 2622 if (std::abs(spread) > MaxReorderWindow) { 2623 DEBUG(++FactorCount[FactHeight]); 2624 return left->getHeight() > right->getHeight(); 2625 } 2626 } 2627 2628 return BURRSort(left, right, SPQ); 2629} 2630 2631void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2632 SUnits = &sunits; 2633 // Add pseudo dependency edges for two-address nodes. 2634 if (!Disable2AddrHack) 2635 AddPseudoTwoAddrDeps(); 2636 // Reroute edges to nodes with multiple uses. 2637 if (!TracksRegPressure) 2638 PrescheduleNodesWithMultipleUses(); 2639 // Calculate node priorities. 2640 CalculateSethiUllmanNumbers(); 2641 2642 // For single block loops, mark nodes that look like canonical IV increments. 2643 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2644 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2645 initVRegCycle(&sunits[i]); 2646 } 2647 } 2648} 2649 2650//===----------------------------------------------------------------------===// 2651// Preschedule for Register Pressure 2652//===----------------------------------------------------------------------===// 2653 2654bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2655 if (SU->isTwoAddress) { 2656 unsigned Opc = SU->getNode()->getMachineOpcode(); 2657 const MCInstrDesc &MCID = TII->get(Opc); 2658 unsigned NumRes = MCID.getNumDefs(); 2659 unsigned NumOps = MCID.getNumOperands() - NumRes; 2660 for (unsigned i = 0; i != NumOps; ++i) { 2661 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2662 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2663 if (DU->getNodeId() != -1 && 2664 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2665 return true; 2666 } 2667 } 2668 } 2669 return false; 2670} 2671 2672/// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2673/// successor's explicit physregs whose definition can reach DepSU. 2674/// i.e. DepSU should not be scheduled above SU. 2675static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2676 ScheduleDAGRRList *scheduleDAG, 2677 const TargetInstrInfo *TII, 2678 const TargetRegisterInfo *TRI) { 2679 const unsigned *ImpDefs 2680 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2681 if(!ImpDefs) 2682 return false; 2683 2684 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2685 SI != SE; ++SI) { 2686 SUnit *SuccSU = SI->getSUnit(); 2687 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2688 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2689 if (!PI->isAssignedRegDep()) 2690 continue; 2691 2692 for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { 2693 // Return true if SU clobbers this physical register use and the 2694 // definition of the register reaches from DepSU. IsReachable queries a 2695 // topological forward sort of the DAG (following the successors). 2696 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2697 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2698 return true; 2699 } 2700 } 2701 } 2702 return false; 2703} 2704 2705/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2706/// physical register defs. 2707static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2708 const TargetInstrInfo *TII, 2709 const TargetRegisterInfo *TRI) { 2710 SDNode *N = SuccSU->getNode(); 2711 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2712 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2713 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2714 for (const SDNode *SUNode = SU->getNode(); SUNode; 2715 SUNode = SUNode->getGluedNode()) { 2716 if (!SUNode->isMachineOpcode()) 2717 continue; 2718 const unsigned *SUImpDefs = 2719 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2720 if (!SUImpDefs) 2721 return false; 2722 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2723 EVT VT = N->getValueType(i); 2724 if (VT == MVT::Glue || VT == MVT::Other) 2725 continue; 2726 if (!N->hasAnyUseOfValue(i)) 2727 continue; 2728 unsigned Reg = ImpDefs[i - NumDefs]; 2729 for (;*SUImpDefs; ++SUImpDefs) { 2730 unsigned SUReg = *SUImpDefs; 2731 if (TRI->regsOverlap(Reg, SUReg)) 2732 return true; 2733 } 2734 } 2735 } 2736 return false; 2737} 2738 2739/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2740/// are not handled well by the general register pressure reduction 2741/// heuristics. When presented with code like this: 2742/// 2743/// N 2744/// / | 2745/// / | 2746/// U store 2747/// | 2748/// ... 2749/// 2750/// the heuristics tend to push the store up, but since the 2751/// operand of the store has another use (U), this would increase 2752/// the length of that other use (the U->N edge). 2753/// 2754/// This function transforms code like the above to route U's 2755/// dependence through the store when possible, like this: 2756/// 2757/// N 2758/// || 2759/// || 2760/// store 2761/// | 2762/// U 2763/// | 2764/// ... 2765/// 2766/// This results in the store being scheduled immediately 2767/// after N, which shortens the U->N live range, reducing 2768/// register pressure. 2769/// 2770void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2771 // Visit all the nodes in topological order, working top-down. 2772 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2773 SUnit *SU = &(*SUnits)[i]; 2774 // For now, only look at nodes with no data successors, such as stores. 2775 // These are especially important, due to the heuristics in 2776 // getNodePriority for nodes with no data successors. 2777 if (SU->NumSuccs != 0) 2778 continue; 2779 // For now, only look at nodes with exactly one data predecessor. 2780 if (SU->NumPreds != 1) 2781 continue; 2782 // Avoid prescheduling copies to virtual registers, which don't behave 2783 // like other nodes from the perspective of scheduling heuristics. 2784 if (SDNode *N = SU->getNode()) 2785 if (N->getOpcode() == ISD::CopyToReg && 2786 TargetRegisterInfo::isVirtualRegister 2787 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2788 continue; 2789 2790 // Locate the single data predecessor. 2791 SUnit *PredSU = 0; 2792 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2793 EE = SU->Preds.end(); II != EE; ++II) 2794 if (!II->isCtrl()) { 2795 PredSU = II->getSUnit(); 2796 break; 2797 } 2798 assert(PredSU); 2799 2800 // Don't rewrite edges that carry physregs, because that requires additional 2801 // support infrastructure. 2802 if (PredSU->hasPhysRegDefs) 2803 continue; 2804 // Short-circuit the case where SU is PredSU's only data successor. 2805 if (PredSU->NumSuccs == 1) 2806 continue; 2807 // Avoid prescheduling to copies from virtual registers, which don't behave 2808 // like other nodes from the perspective of scheduling heuristics. 2809 if (SDNode *N = SU->getNode()) 2810 if (N->getOpcode() == ISD::CopyFromReg && 2811 TargetRegisterInfo::isVirtualRegister 2812 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2813 continue; 2814 2815 // Perform checks on the successors of PredSU. 2816 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2817 EE = PredSU->Succs.end(); II != EE; ++II) { 2818 SUnit *PredSuccSU = II->getSUnit(); 2819 if (PredSuccSU == SU) continue; 2820 // If PredSU has another successor with no data successors, for 2821 // now don't attempt to choose either over the other. 2822 if (PredSuccSU->NumSuccs == 0) 2823 goto outer_loop_continue; 2824 // Don't break physical register dependencies. 2825 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2826 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2827 goto outer_loop_continue; 2828 // Don't introduce graph cycles. 2829 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2830 goto outer_loop_continue; 2831 } 2832 2833 // Ok, the transformation is safe and the heuristics suggest it is 2834 // profitable. Update the graph. 2835 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2836 << " next to PredSU #" << PredSU->NodeNum 2837 << " to guide scheduling in the presence of multiple uses\n"); 2838 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2839 SDep Edge = PredSU->Succs[i]; 2840 assert(!Edge.isAssignedRegDep()); 2841 SUnit *SuccSU = Edge.getSUnit(); 2842 if (SuccSU != SU) { 2843 Edge.setSUnit(PredSU); 2844 scheduleDAG->RemovePred(SuccSU, Edge); 2845 scheduleDAG->AddPred(SU, Edge); 2846 Edge.setSUnit(SU); 2847 scheduleDAG->AddPred(SuccSU, Edge); 2848 --i; 2849 } 2850 } 2851 outer_loop_continue:; 2852 } 2853} 2854 2855/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2856/// it as a def&use operand. Add a pseudo control edge from it to the other 2857/// node (if it won't create a cycle) so the two-address one will be scheduled 2858/// first (lower in the schedule). If both nodes are two-address, favor the 2859/// one that has a CopyToReg use (more likely to be a loop induction update). 2860/// If both are two-address, but one is commutable while the other is not 2861/// commutable, favor the one that's not commutable. 2862void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2863 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2864 SUnit *SU = &(*SUnits)[i]; 2865 if (!SU->isTwoAddress) 2866 continue; 2867 2868 SDNode *Node = SU->getNode(); 2869 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2870 continue; 2871 2872 bool isLiveOut = hasOnlyLiveOutUses(SU); 2873 unsigned Opc = Node->getMachineOpcode(); 2874 const MCInstrDesc &MCID = TII->get(Opc); 2875 unsigned NumRes = MCID.getNumDefs(); 2876 unsigned NumOps = MCID.getNumOperands() - NumRes; 2877 for (unsigned j = 0; j != NumOps; ++j) { 2878 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2879 continue; 2880 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2881 if (DU->getNodeId() == -1) 2882 continue; 2883 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2884 if (!DUSU) continue; 2885 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2886 E = DUSU->Succs.end(); I != E; ++I) { 2887 if (I->isCtrl()) continue; 2888 SUnit *SuccSU = I->getSUnit(); 2889 if (SuccSU == SU) 2890 continue; 2891 // Be conservative. Ignore if nodes aren't at roughly the same 2892 // depth and height. 2893 if (SuccSU->getHeight() < SU->getHeight() && 2894 (SU->getHeight() - SuccSU->getHeight()) > 1) 2895 continue; 2896 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2897 // constrains whatever is using the copy, instead of the copy 2898 // itself. In the case that the copy is coalesced, this 2899 // preserves the intent of the pseudo two-address heurietics. 2900 while (SuccSU->Succs.size() == 1 && 2901 SuccSU->getNode()->isMachineOpcode() && 2902 SuccSU->getNode()->getMachineOpcode() == 2903 TargetOpcode::COPY_TO_REGCLASS) 2904 SuccSU = SuccSU->Succs.front().getSUnit(); 2905 // Don't constrain non-instruction nodes. 2906 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2907 continue; 2908 // Don't constrain nodes with physical register defs if the 2909 // predecessor can clobber them. 2910 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2911 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2912 continue; 2913 } 2914 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2915 // these may be coalesced away. We want them close to their uses. 2916 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2917 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2918 SuccOpc == TargetOpcode::INSERT_SUBREG || 2919 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2920 continue; 2921 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2922 (!canClobber(SuccSU, DUSU) || 2923 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2924 (!SU->isCommutable && SuccSU->isCommutable)) && 2925 !scheduleDAG->IsReachable(SuccSU, SU)) { 2926 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2927 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2928 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2929 /*Reg=*/0, /*isNormalMemory=*/false, 2930 /*isMustAlias=*/false, 2931 /*isArtificial=*/true)); 2932 } 2933 } 2934 } 2935 } 2936} 2937 2938//===----------------------------------------------------------------------===// 2939// Public Constructor Functions 2940//===----------------------------------------------------------------------===// 2941 2942llvm::ScheduleDAGSDNodes * 2943llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2944 CodeGenOpt::Level OptLevel) { 2945 const TargetMachine &TM = IS->TM; 2946 const TargetInstrInfo *TII = TM.getInstrInfo(); 2947 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2948 2949 BURegReductionPriorityQueue *PQ = 2950 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2951 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2952 PQ->setScheduleDAG(SD); 2953 return SD; 2954} 2955 2956llvm::ScheduleDAGSDNodes * 2957llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2958 CodeGenOpt::Level OptLevel) { 2959 const TargetMachine &TM = IS->TM; 2960 const TargetInstrInfo *TII = TM.getInstrInfo(); 2961 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2962 2963 SrcRegReductionPriorityQueue *PQ = 2964 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2965 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2966 PQ->setScheduleDAG(SD); 2967 return SD; 2968} 2969 2970llvm::ScheduleDAGSDNodes * 2971llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2972 CodeGenOpt::Level OptLevel) { 2973 const TargetMachine &TM = IS->TM; 2974 const TargetInstrInfo *TII = TM.getInstrInfo(); 2975 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2976 const TargetLowering *TLI = &IS->getTargetLowering(); 2977 2978 HybridBURRPriorityQueue *PQ = 2979 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2980 2981 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2982 PQ->setScheduleDAG(SD); 2983 return SD; 2984} 2985 2986llvm::ScheduleDAGSDNodes * 2987llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2988 CodeGenOpt::Level OptLevel) { 2989 const TargetMachine &TM = IS->TM; 2990 const TargetInstrInfo *TII = TM.getInstrInfo(); 2991 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2992 const TargetLowering *TLI = &IS->getTargetLowering(); 2993 2994 ILPBURRPriorityQueue *PQ = 2995 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2996 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2997 PQ->setScheduleDAG(SD); 2998 return SD; 2999} 3000