ScheduleDAGRRList.cpp revision 30c44e18bf28c3f5feda56e97695e2b72de7fedb
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 sourceListDAGScheduler("source", 49 "Similar to list-burr but schedules in source " 50 "order when possible", 51 createSourceListDAGScheduler); 52 53static RegisterScheduler 54 hybridListDAGScheduler("list-hybrid", 55 "Bottom-up register pressure aware list scheduling " 56 "which tries to balance latency and register pressure", 57 createHybridListDAGScheduler); 58 59static RegisterScheduler 60 ILPListDAGScheduler("list-ilp", 61 "Bottom-up register pressure aware list scheduling " 62 "which tries to balance ILP and register pressure", 63 createILPListDAGScheduler); 64 65static cl::opt<bool> DisableSchedCycles( 66 "disable-sched-cycles", cl::Hidden, cl::init(false), 67 cl::desc("Disable cycle-level precision during preRA scheduling")); 68 69// Temporary sched=list-ilp flags until the heuristics are robust. 70// Some options are also available under sched=list-hybrid. 71static cl::opt<bool> DisableSchedRegPressure( 72 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 73 cl::desc("Disable regpressure priority in sched=list-ilp")); 74static cl::opt<bool> DisableSchedLiveUses( 75 "disable-sched-live-uses", cl::Hidden, cl::init(true), 76 cl::desc("Disable live use priority in sched=list-ilp")); 77static cl::opt<bool> DisableSchedVRegCycle( 78 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 79 cl::desc("Disable virtual register cycle interference checks")); 80static cl::opt<bool> DisableSchedPhysRegJoin( 81 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 82 cl::desc("Disable physreg def-use affinity")); 83static cl::opt<bool> DisableSchedStalls( 84 "disable-sched-stalls", cl::Hidden, cl::init(true), 85 cl::desc("Disable no-stall priority in sched=list-ilp")); 86static cl::opt<bool> DisableSchedCriticalPath( 87 "disable-sched-critical-path", cl::Hidden, cl::init(false), 88 cl::desc("Disable critical path priority in sched=list-ilp")); 89static cl::opt<bool> DisableSchedHeight( 90 "disable-sched-height", cl::Hidden, cl::init(false), 91 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 92static cl::opt<bool> Disable2AddrHack( 93 "disable-2addr-hack", cl::Hidden, cl::init(true), 94 cl::desc("Disable scheduler's two-address hack")); 95 96static cl::opt<int> MaxReorderWindow( 97 "max-sched-reorder", cl::Hidden, cl::init(6), 98 cl::desc("Number of instructions to allow ahead of the critical path " 99 "in sched=list-ilp")); 100 101static cl::opt<unsigned> AvgIPC( 102 "sched-avg-ipc", cl::Hidden, cl::init(1), 103 cl::desc("Average inst/cycle whan no target itinerary exists.")); 104 105namespace { 106//===----------------------------------------------------------------------===// 107/// ScheduleDAGRRList - The actual register reduction list scheduler 108/// implementation. This supports both top-down and bottom-up scheduling. 109/// 110class ScheduleDAGRRList : public ScheduleDAGSDNodes { 111private: 112 /// NeedLatency - True if the scheduler will make use of latency information. 113 /// 114 bool NeedLatency; 115 116 /// AvailableQueue - The priority queue to use for the available SUnits. 117 SchedulingPriorityQueue *AvailableQueue; 118 119 /// PendingQueue - This contains all of the instructions whose operands have 120 /// been issued, but their results are not ready yet (due to the latency of 121 /// the operation). Once the operands becomes available, the instruction is 122 /// added to the AvailableQueue. 123 std::vector<SUnit*> PendingQueue; 124 125 /// HazardRec - The hazard recognizer to use. 126 ScheduleHazardRecognizer *HazardRec; 127 128 /// CurCycle - The current scheduler state corresponds to this cycle. 129 unsigned CurCycle; 130 131 /// MinAvailableCycle - Cycle of the soonest available instruction. 132 unsigned MinAvailableCycle; 133 134 /// IssueCount - Count instructions issued in this cycle 135 /// Currently valid only for bottom-up scheduling. 136 unsigned IssueCount; 137 138 /// LiveRegDefs - A set of physical registers and their definition 139 /// that are "live". These nodes must be scheduled before any other nodes that 140 /// modifies the registers can be scheduled. 141 unsigned NumLiveRegs; 142 std::vector<SUnit*> LiveRegDefs; 143 std::vector<SUnit*> LiveRegGens; 144 145 /// Topo - A topological ordering for SUnits which permits fast IsReachable 146 /// and similar queries. 147 ScheduleDAGTopologicalSort Topo; 148 149public: 150 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 151 SchedulingPriorityQueue *availqueue, 152 CodeGenOpt::Level OptLevel) 153 : ScheduleDAGSDNodes(mf), 154 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 155 Topo(SUnits) { 156 157 const TargetMachine &tm = mf.getTarget(); 158 if (DisableSchedCycles || !NeedLatency) 159 HazardRec = new ScheduleHazardRecognizer(); 160 else 161 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 162 } 163 164 ~ScheduleDAGRRList() { 165 delete HazardRec; 166 delete AvailableQueue; 167 } 168 169 void Schedule(); 170 171 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 172 173 /// IsReachable - Checks if SU is reachable from TargetSU. 174 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 175 return Topo.IsReachable(SU, TargetSU); 176 } 177 178 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 179 /// create a cycle. 180 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 181 return Topo.WillCreateCycle(SU, TargetSU); 182 } 183 184 /// AddPred - adds a predecessor edge to SUnit SU. 185 /// This returns true if this is a new predecessor. 186 /// Updates the topological ordering if required. 187 void AddPred(SUnit *SU, const SDep &D) { 188 Topo.AddPred(SU, D.getSUnit()); 189 SU->addPred(D); 190 } 191 192 /// RemovePred - removes a predecessor edge from SUnit SU. 193 /// This returns true if an edge was removed. 194 /// Updates the topological ordering if required. 195 void RemovePred(SUnit *SU, const SDep &D) { 196 Topo.RemovePred(SU, D.getSUnit()); 197 SU->removePred(D); 198 } 199 200private: 201 bool isReady(SUnit *SU) { 202 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 203 AvailableQueue->isReady(SU); 204 } 205 206 void ReleasePred(SUnit *SU, const SDep *PredEdge); 207 void ReleasePredecessors(SUnit *SU); 208 void ReleasePending(); 209 void AdvanceToCycle(unsigned NextCycle); 210 void AdvancePastStalls(SUnit *SU); 211 void EmitNode(SUnit *SU); 212 void ScheduleNodeBottomUp(SUnit*); 213 void CapturePred(SDep *PredEdge); 214 void UnscheduleNodeBottomUp(SUnit*); 215 void RestoreHazardCheckerBottomUp(); 216 void BacktrackBottomUp(SUnit*, SUnit*); 217 SUnit *CopyAndMoveSuccessors(SUnit*); 218 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 219 const TargetRegisterClass*, 220 const TargetRegisterClass*, 221 SmallVector<SUnit*, 2>&); 222 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 223 224 SUnit *PickNodeToScheduleBottomUp(); 225 void ListScheduleBottomUp(); 226 227 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 228 /// Updates the topological ordering if required. 229 SUnit *CreateNewSUnit(SDNode *N) { 230 unsigned NumSUnits = SUnits.size(); 231 SUnit *NewNode = NewSUnit(N); 232 // Update the topological ordering. 233 if (NewNode->NodeNum >= NumSUnits) 234 Topo.InitDAGTopologicalSorting(); 235 return NewNode; 236 } 237 238 /// CreateClone - Creates a new SUnit from an existing one. 239 /// Updates the topological ordering if required. 240 SUnit *CreateClone(SUnit *N) { 241 unsigned NumSUnits = SUnits.size(); 242 SUnit *NewNode = Clone(N); 243 // Update the topological ordering. 244 if (NewNode->NodeNum >= NumSUnits) 245 Topo.InitDAGTopologicalSorting(); 246 return NewNode; 247 } 248 249 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 250 /// need actual latency information but the hybrid scheduler does. 251 bool ForceUnitLatencies() const { 252 return !NeedLatency; 253 } 254}; 255} // end anonymous namespace 256 257/// GetCostForDef - Looks up the register class and cost for a given definition. 258/// Typically this just means looking up the representative register class, 259/// but for untyped values (MVT::Untyped) it means inspecting the node's 260/// opcode to determine what register class is being generated. 261static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 262 const TargetLowering *TLI, 263 const TargetInstrInfo *TII, 264 const TargetRegisterInfo *TRI, 265 unsigned &RegClass, unsigned &Cost) { 266 EVT VT = RegDefPos.GetValue(); 267 268 // Special handling for untyped values. These values can only come from 269 // the expansion of custom DAG-to-DAG patterns. 270 if (VT == MVT::Untyped) { 271 const SDNode *Node = RegDefPos.GetNode(); 272 unsigned Opcode = Node->getMachineOpcode(); 273 274 if (Opcode == TargetOpcode::REG_SEQUENCE) { 275 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 276 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 277 RegClass = RC->getID(); 278 Cost = 1; 279 return; 280 } 281 282 unsigned Idx = RegDefPos.GetIdx(); 283 const MCInstrDesc Desc = TII->get(Opcode); 284 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); 285 RegClass = RC->getID(); 286 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 287 // better way to determine it. 288 Cost = 1; 289 } else { 290 RegClass = TLI->getRepRegClassFor(VT)->getID(); 291 Cost = TLI->getRepRegClassCostFor(VT); 292 } 293} 294 295/// Schedule - Schedule the DAG using list scheduling. 296void ScheduleDAGRRList::Schedule() { 297 DEBUG(dbgs() 298 << "********** List Scheduling BB#" << BB->getNumber() 299 << " '" << BB->getName() << "' **********\n"); 300 301 CurCycle = 0; 302 IssueCount = 0; 303 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 304 NumLiveRegs = 0; 305 // Allocate slots for each physical register, plus one for a special register 306 // to track the virtual resource of a calling sequence. 307 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 308 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 309 310 // Build the scheduling graph. 311 BuildSchedGraph(NULL); 312 313 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 314 SUnits[su].dumpAll(this)); 315 Topo.InitDAGTopologicalSorting(); 316 317 AvailableQueue->initNodes(SUnits); 318 319 HazardRec->Reset(); 320 321 // Execute the actual scheduling loop. 322 ListScheduleBottomUp(); 323 324 AvailableQueue->releaseState(); 325} 326 327//===----------------------------------------------------------------------===// 328// Bottom-Up Scheduling 329//===----------------------------------------------------------------------===// 330 331/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 332/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 333void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 334 SUnit *PredSU = PredEdge->getSUnit(); 335 336#ifndef NDEBUG 337 if (PredSU->NumSuccsLeft == 0) { 338 dbgs() << "*** Scheduling failed! ***\n"; 339 PredSU->dump(this); 340 dbgs() << " has been released too many times!\n"; 341 llvm_unreachable(0); 342 } 343#endif 344 --PredSU->NumSuccsLeft; 345 346 if (!ForceUnitLatencies()) { 347 // Updating predecessor's height. This is now the cycle when the 348 // predecessor can be scheduled without causing a pipeline stall. 349 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 350 } 351 352 // If all the node's successors are scheduled, this node is ready 353 // to be scheduled. Ignore the special EntrySU node. 354 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 355 PredSU->isAvailable = true; 356 357 unsigned Height = PredSU->getHeight(); 358 if (Height < MinAvailableCycle) 359 MinAvailableCycle = Height; 360 361 if (isReady(PredSU)) { 362 AvailableQueue->push(PredSU); 363 } 364 // CapturePred and others may have left the node in the pending queue, avoid 365 // adding it twice. 366 else if (!PredSU->isPending) { 367 PredSU->isPending = true; 368 PendingQueue.push_back(PredSU); 369 } 370 } 371} 372 373/// IsChainDependent - Test if Outer is reachable from Inner through 374/// chain dependencies. 375static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 376 unsigned NestLevel, 377 const TargetInstrInfo *TII) { 378 SDNode *N = Outer; 379 for (;;) { 380 if (N == Inner) 381 return true; 382 // For a TokenFactor, examine each operand. There may be multiple ways 383 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 384 // most nesting in order to ensure that we find the corresponding match. 385 if (N->getOpcode() == ISD::TokenFactor) { 386 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 387 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 388 return true; 389 return false; 390 } 391 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 392 if (N->isMachineOpcode()) { 393 if (N->getMachineOpcode() == 394 (unsigned)TII->getCallFrameDestroyOpcode()) { 395 ++NestLevel; 396 } else if (N->getMachineOpcode() == 397 (unsigned)TII->getCallFrameSetupOpcode()) { 398 if (NestLevel == 0) 399 return false; 400 --NestLevel; 401 } 402 } 403 // Otherwise, find the chain and continue climbing. 404 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 405 if (N->getOperand(i).getValueType() == MVT::Other) { 406 N = N->getOperand(i).getNode(); 407 goto found_chain_operand; 408 } 409 return false; 410 found_chain_operand:; 411 if (N->getOpcode() == ISD::EntryToken) 412 return false; 413 } 414} 415 416/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 417/// the corresponding (lowered) CALLSEQ_BEGIN node. 418/// 419/// NestLevel and MaxNested are used in recursion to indcate the current level 420/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 421/// level seen so far. 422/// 423/// TODO: It would be better to give CALLSEQ_END an explicit operand to point 424/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 425static SDNode * 426FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 427 const TargetInstrInfo *TII) { 428 for (;;) { 429 // For a TokenFactor, examine each operand. There may be multiple ways 430 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 431 // most nesting in order to ensure that we find the corresponding match. 432 if (N->getOpcode() == ISD::TokenFactor) { 433 SDNode *Best = 0; 434 unsigned BestMaxNest = MaxNest; 435 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 436 unsigned MyNestLevel = NestLevel; 437 unsigned MyMaxNest = MaxNest; 438 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 439 MyNestLevel, MyMaxNest, TII)) 440 if (!Best || (MyMaxNest > BestMaxNest)) { 441 Best = New; 442 BestMaxNest = MyMaxNest; 443 } 444 } 445 assert(Best); 446 MaxNest = BestMaxNest; 447 return Best; 448 } 449 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 450 if (N->isMachineOpcode()) { 451 if (N->getMachineOpcode() == 452 (unsigned)TII->getCallFrameDestroyOpcode()) { 453 ++NestLevel; 454 MaxNest = std::max(MaxNest, NestLevel); 455 } else if (N->getMachineOpcode() == 456 (unsigned)TII->getCallFrameSetupOpcode()) { 457 assert(NestLevel != 0); 458 --NestLevel; 459 if (NestLevel == 0) 460 return N; 461 } 462 } 463 // Otherwise, find the chain and continue climbing. 464 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 465 if (N->getOperand(i).getValueType() == MVT::Other) { 466 N = N->getOperand(i).getNode(); 467 goto found_chain_operand; 468 } 469 return 0; 470 found_chain_operand:; 471 if (N->getOpcode() == ISD::EntryToken) 472 return 0; 473 } 474} 475 476/// Call ReleasePred for each predecessor, then update register live def/gen. 477/// Always update LiveRegDefs for a register dependence even if the current SU 478/// also defines the register. This effectively create one large live range 479/// across a sequence of two-address node. This is important because the 480/// entire chain must be scheduled together. Example: 481/// 482/// flags = (3) add 483/// flags = (2) addc flags 484/// flags = (1) addc flags 485/// 486/// results in 487/// 488/// LiveRegDefs[flags] = 3 489/// LiveRegGens[flags] = 1 490/// 491/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 492/// interference on flags. 493void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 494 // Bottom up: release predecessors 495 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 496 I != E; ++I) { 497 ReleasePred(SU, &*I); 498 if (I->isAssignedRegDep()) { 499 // This is a physical register dependency and it's impossible or 500 // expensive to copy the register. Make sure nothing that can 501 // clobber the register is scheduled between the predecessor and 502 // this node. 503 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 504 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 505 "interference on register dependence"); 506 LiveRegDefs[I->getReg()] = I->getSUnit(); 507 if (!LiveRegGens[I->getReg()]) { 508 ++NumLiveRegs; 509 LiveRegGens[I->getReg()] = SU; 510 } 511 } 512 } 513 514 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 515 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 516 // these nodes, to prevent other calls from being interscheduled with them. 517 unsigned CallResource = TRI->getNumRegs(); 518 if (!LiveRegDefs[CallResource]) 519 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 520 if (Node->isMachineOpcode() && 521 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 522 unsigned NestLevel = 0; 523 unsigned MaxNest = 0; 524 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 525 526 SUnit *Def = &SUnits[N->getNodeId()]; 527 ++NumLiveRegs; 528 LiveRegDefs[CallResource] = Def; 529 LiveRegGens[CallResource] = SU; 530 break; 531 } 532} 533 534/// Check to see if any of the pending instructions are ready to issue. If 535/// so, add them to the available queue. 536void ScheduleDAGRRList::ReleasePending() { 537 if (DisableSchedCycles) { 538 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 539 return; 540 } 541 542 // If the available queue is empty, it is safe to reset MinAvailableCycle. 543 if (AvailableQueue->empty()) 544 MinAvailableCycle = UINT_MAX; 545 546 // Check to see if any of the pending instructions are ready to issue. If 547 // so, add them to the available queue. 548 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 549 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 550 if (ReadyCycle < MinAvailableCycle) 551 MinAvailableCycle = ReadyCycle; 552 553 if (PendingQueue[i]->isAvailable) { 554 if (!isReady(PendingQueue[i])) 555 continue; 556 AvailableQueue->push(PendingQueue[i]); 557 } 558 PendingQueue[i]->isPending = false; 559 PendingQueue[i] = PendingQueue.back(); 560 PendingQueue.pop_back(); 561 --i; --e; 562 } 563} 564 565/// Move the scheduler state forward by the specified number of Cycles. 566void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 567 if (NextCycle <= CurCycle) 568 return; 569 570 IssueCount = 0; 571 AvailableQueue->setCurCycle(NextCycle); 572 if (!HazardRec->isEnabled()) { 573 // Bypass lots of virtual calls in case of long latency. 574 CurCycle = NextCycle; 575 } 576 else { 577 for (; CurCycle != NextCycle; ++CurCycle) { 578 HazardRec->RecedeCycle(); 579 } 580 } 581 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 582 // available Q to release pending nodes at least once before popping. 583 ReleasePending(); 584} 585 586/// Move the scheduler state forward until the specified node's dependents are 587/// ready and can be scheduled with no resource conflicts. 588void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 589 if (DisableSchedCycles) 590 return; 591 592 // FIXME: Nodes such as CopyFromReg probably should not advance the current 593 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 594 // has predecessors the cycle will be advanced when they are scheduled. 595 // But given the crude nature of modeling latency though such nodes, we 596 // currently need to treat these nodes like real instructions. 597 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 598 599 unsigned ReadyCycle = SU->getHeight(); 600 601 // Bump CurCycle to account for latency. We assume the latency of other 602 // available instructions may be hidden by the stall (not a full pipe stall). 603 // This updates the hazard recognizer's cycle before reserving resources for 604 // this instruction. 605 AdvanceToCycle(ReadyCycle); 606 607 // Calls are scheduled in their preceding cycle, so don't conflict with 608 // hazards from instructions after the call. EmitNode will reset the 609 // scoreboard state before emitting the call. 610 if (SU->isCall) 611 return; 612 613 // FIXME: For resource conflicts in very long non-pipelined stages, we 614 // should probably skip ahead here to avoid useless scoreboard checks. 615 int Stalls = 0; 616 while (true) { 617 ScheduleHazardRecognizer::HazardType HT = 618 HazardRec->getHazardType(SU, -Stalls); 619 620 if (HT == ScheduleHazardRecognizer::NoHazard) 621 break; 622 623 ++Stalls; 624 } 625 AdvanceToCycle(CurCycle + Stalls); 626} 627 628/// Record this SUnit in the HazardRecognizer. 629/// Does not update CurCycle. 630void ScheduleDAGRRList::EmitNode(SUnit *SU) { 631 if (!HazardRec->isEnabled()) 632 return; 633 634 // Check for phys reg copy. 635 if (!SU->getNode()) 636 return; 637 638 switch (SU->getNode()->getOpcode()) { 639 default: 640 assert(SU->getNode()->isMachineOpcode() && 641 "This target-independent node should not be scheduled."); 642 break; 643 case ISD::MERGE_VALUES: 644 case ISD::TokenFactor: 645 case ISD::CopyToReg: 646 case ISD::CopyFromReg: 647 case ISD::EH_LABEL: 648 // Noops don't affect the scoreboard state. Copies are likely to be 649 // removed. 650 return; 651 case ISD::INLINEASM: 652 // For inline asm, clear the pipeline state. 653 HazardRec->Reset(); 654 return; 655 } 656 if (SU->isCall) { 657 // Calls are scheduled with their preceding instructions. For bottom-up 658 // scheduling, clear the pipeline state before emitting. 659 HazardRec->Reset(); 660 } 661 662 HazardRec->EmitInstruction(SU); 663} 664 665static void resetVRegCycle(SUnit *SU); 666 667/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 668/// count of its predecessors. If a predecessor pending count is zero, add it to 669/// the Available queue. 670void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 671 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 672 DEBUG(SU->dump(this)); 673 674#ifndef NDEBUG 675 if (CurCycle < SU->getHeight()) 676 DEBUG(dbgs() << " Height [" << SU->getHeight() 677 << "] pipeline stall!\n"); 678#endif 679 680 // FIXME: Do not modify node height. It may interfere with 681 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 682 // node its ready cycle can aid heuristics, and after scheduling it can 683 // indicate the scheduled cycle. 684 SU->setHeightToAtLeast(CurCycle); 685 686 // Reserve resources for the scheduled intruction. 687 EmitNode(SU); 688 689 Sequence.push_back(SU); 690 691 AvailableQueue->ScheduledNode(SU); 692 693 // If HazardRec is disabled, and each inst counts as one cycle, then 694 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 695 // PendingQueue for schedulers that implement HasReadyFilter. 696 if (!HazardRec->isEnabled() && AvgIPC < 2) 697 AdvanceToCycle(CurCycle + 1); 698 699 // Update liveness of predecessors before successors to avoid treating a 700 // two-address node as a live range def. 701 ReleasePredecessors(SU); 702 703 // Release all the implicit physical register defs that are live. 704 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 705 I != E; ++I) { 706 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 707 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 708 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 709 --NumLiveRegs; 710 LiveRegDefs[I->getReg()] = NULL; 711 LiveRegGens[I->getReg()] = NULL; 712 } 713 } 714 // Release the special call resource dependence, if this is the beginning 715 // of a call. 716 unsigned CallResource = TRI->getNumRegs(); 717 if (LiveRegDefs[CallResource] == SU) 718 for (const SDNode *SUNode = SU->getNode(); SUNode; 719 SUNode = SUNode->getGluedNode()) { 720 if (SUNode->isMachineOpcode() && 721 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 722 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 723 --NumLiveRegs; 724 LiveRegDefs[CallResource] = NULL; 725 LiveRegGens[CallResource] = NULL; 726 } 727 } 728 729 resetVRegCycle(SU); 730 731 SU->isScheduled = true; 732 733 // Conditions under which the scheduler should eagerly advance the cycle: 734 // (1) No available instructions 735 // (2) All pipelines full, so available instructions must have hazards. 736 // 737 // If HazardRec is disabled, the cycle was pre-advanced before calling 738 // ReleasePredecessors. In that case, IssueCount should remain 0. 739 // 740 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 741 if (HazardRec->isEnabled() || AvgIPC > 1) { 742 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 743 ++IssueCount; 744 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 745 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 746 AdvanceToCycle(CurCycle + 1); 747 } 748} 749 750/// CapturePred - This does the opposite of ReleasePred. Since SU is being 751/// unscheduled, incrcease the succ left count of its predecessors. Remove 752/// them from AvailableQueue if necessary. 753void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 754 SUnit *PredSU = PredEdge->getSUnit(); 755 if (PredSU->isAvailable) { 756 PredSU->isAvailable = false; 757 if (!PredSU->isPending) 758 AvailableQueue->remove(PredSU); 759 } 760 761 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 762 ++PredSU->NumSuccsLeft; 763} 764 765/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 766/// its predecessor states to reflect the change. 767void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 768 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 769 DEBUG(SU->dump(this)); 770 771 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 772 I != E; ++I) { 773 CapturePred(&*I); 774 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 775 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 776 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 777 "Physical register dependency violated?"); 778 --NumLiveRegs; 779 LiveRegDefs[I->getReg()] = NULL; 780 LiveRegGens[I->getReg()] = NULL; 781 } 782 } 783 784 // Reclaim the special call resource dependence, if this is the beginning 785 // of a call. 786 unsigned CallResource = TRI->getNumRegs(); 787 for (const SDNode *SUNode = SU->getNode(); SUNode; 788 SUNode = SUNode->getGluedNode()) { 789 if (SUNode->isMachineOpcode() && 790 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 791 ++NumLiveRegs; 792 LiveRegDefs[CallResource] = SU; 793 LiveRegGens[CallResource] = NULL; 794 } 795 } 796 797 // Release the special call resource dependence, if this is the end 798 // of a call. 799 if (LiveRegGens[CallResource] == SU) 800 for (const SDNode *SUNode = SU->getNode(); SUNode; 801 SUNode = SUNode->getGluedNode()) { 802 if (SUNode->isMachineOpcode() && 803 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 804 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 805 --NumLiveRegs; 806 LiveRegDefs[CallResource] = NULL; 807 LiveRegGens[CallResource] = NULL; 808 } 809 } 810 811 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 812 I != E; ++I) { 813 if (I->isAssignedRegDep()) { 814 if (!LiveRegDefs[I->getReg()]) 815 ++NumLiveRegs; 816 // This becomes the nearest def. Note that an earlier def may still be 817 // pending if this is a two-address node. 818 LiveRegDefs[I->getReg()] = SU; 819 if (LiveRegGens[I->getReg()] == NULL || 820 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 821 LiveRegGens[I->getReg()] = I->getSUnit(); 822 } 823 } 824 if (SU->getHeight() < MinAvailableCycle) 825 MinAvailableCycle = SU->getHeight(); 826 827 SU->setHeightDirty(); 828 SU->isScheduled = false; 829 SU->isAvailable = true; 830 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 831 // Don't make available until backtracking is complete. 832 SU->isPending = true; 833 PendingQueue.push_back(SU); 834 } 835 else { 836 AvailableQueue->push(SU); 837 } 838 AvailableQueue->UnscheduledNode(SU); 839} 840 841/// After backtracking, the hazard checker needs to be restored to a state 842/// corresponding the the current cycle. 843void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 844 HazardRec->Reset(); 845 846 unsigned LookAhead = std::min((unsigned)Sequence.size(), 847 HazardRec->getMaxLookAhead()); 848 if (LookAhead == 0) 849 return; 850 851 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 852 unsigned HazardCycle = (*I)->getHeight(); 853 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 854 SUnit *SU = *I; 855 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 856 HazardRec->RecedeCycle(); 857 } 858 EmitNode(SU); 859 } 860} 861 862/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 863/// BTCycle in order to schedule a specific node. 864void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 865 SUnit *OldSU = Sequence.back(); 866 while (true) { 867 Sequence.pop_back(); 868 if (SU->isSucc(OldSU)) 869 // Don't try to remove SU from AvailableQueue. 870 SU->isAvailable = false; 871 // FIXME: use ready cycle instead of height 872 CurCycle = OldSU->getHeight(); 873 UnscheduleNodeBottomUp(OldSU); 874 AvailableQueue->setCurCycle(CurCycle); 875 if (OldSU == BtSU) 876 break; 877 OldSU = Sequence.back(); 878 } 879 880 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 881 882 RestoreHazardCheckerBottomUp(); 883 884 ReleasePending(); 885 886 ++NumBacktracks; 887} 888 889static bool isOperandOf(const SUnit *SU, SDNode *N) { 890 for (const SDNode *SUNode = SU->getNode(); SUNode; 891 SUNode = SUNode->getGluedNode()) { 892 if (SUNode->isOperandOf(N)) 893 return true; 894 } 895 return false; 896} 897 898/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 899/// successors to the newly created node. 900SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 901 SDNode *N = SU->getNode(); 902 if (!N) 903 return NULL; 904 905 if (SU->getNode()->getGluedNode()) 906 return NULL; 907 908 SUnit *NewSU; 909 bool TryUnfold = false; 910 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 911 EVT VT = N->getValueType(i); 912 if (VT == MVT::Glue) 913 return NULL; 914 else if (VT == MVT::Other) 915 TryUnfold = true; 916 } 917 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 918 const SDValue &Op = N->getOperand(i); 919 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 920 if (VT == MVT::Glue) 921 return NULL; 922 } 923 924 if (TryUnfold) { 925 SmallVector<SDNode*, 2> NewNodes; 926 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 927 return NULL; 928 929 // unfolding an x86 DEC64m operation results in store, dec, load which 930 // can't be handled here so quit 931 if (NewNodes.size() == 3) 932 return NULL; 933 934 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 935 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 936 937 N = NewNodes[1]; 938 SDNode *LoadNode = NewNodes[0]; 939 unsigned NumVals = N->getNumValues(); 940 unsigned OldNumVals = SU->getNode()->getNumValues(); 941 for (unsigned i = 0; i != NumVals; ++i) 942 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 943 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 944 SDValue(LoadNode, 1)); 945 946 // LoadNode may already exist. This can happen when there is another 947 // load from the same location and producing the same type of value 948 // but it has different alignment or volatileness. 949 bool isNewLoad = true; 950 SUnit *LoadSU; 951 if (LoadNode->getNodeId() != -1) { 952 LoadSU = &SUnits[LoadNode->getNodeId()]; 953 isNewLoad = false; 954 } else { 955 LoadSU = CreateNewSUnit(LoadNode); 956 LoadNode->setNodeId(LoadSU->NodeNum); 957 958 InitNumRegDefsLeft(LoadSU); 959 ComputeLatency(LoadSU); 960 } 961 962 SUnit *NewSU = CreateNewSUnit(N); 963 assert(N->getNodeId() == -1 && "Node already inserted!"); 964 N->setNodeId(NewSU->NodeNum); 965 966 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 967 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 968 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 969 NewSU->isTwoAddress = true; 970 break; 971 } 972 } 973 if (MCID.isCommutable()) 974 NewSU->isCommutable = true; 975 976 InitNumRegDefsLeft(NewSU); 977 ComputeLatency(NewSU); 978 979 // Record all the edges to and from the old SU, by category. 980 SmallVector<SDep, 4> ChainPreds; 981 SmallVector<SDep, 4> ChainSuccs; 982 SmallVector<SDep, 4> LoadPreds; 983 SmallVector<SDep, 4> NodePreds; 984 SmallVector<SDep, 4> NodeSuccs; 985 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 986 I != E; ++I) { 987 if (I->isCtrl()) 988 ChainPreds.push_back(*I); 989 else if (isOperandOf(I->getSUnit(), LoadNode)) 990 LoadPreds.push_back(*I); 991 else 992 NodePreds.push_back(*I); 993 } 994 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 995 I != E; ++I) { 996 if (I->isCtrl()) 997 ChainSuccs.push_back(*I); 998 else 999 NodeSuccs.push_back(*I); 1000 } 1001 1002 // Now assign edges to the newly-created nodes. 1003 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 1004 const SDep &Pred = ChainPreds[i]; 1005 RemovePred(SU, Pred); 1006 if (isNewLoad) 1007 AddPred(LoadSU, Pred); 1008 } 1009 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 1010 const SDep &Pred = LoadPreds[i]; 1011 RemovePred(SU, Pred); 1012 if (isNewLoad) 1013 AddPred(LoadSU, Pred); 1014 } 1015 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 1016 const SDep &Pred = NodePreds[i]; 1017 RemovePred(SU, Pred); 1018 AddPred(NewSU, Pred); 1019 } 1020 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 1021 SDep D = NodeSuccs[i]; 1022 SUnit *SuccDep = D.getSUnit(); 1023 D.setSUnit(SU); 1024 RemovePred(SuccDep, D); 1025 D.setSUnit(NewSU); 1026 AddPred(SuccDep, D); 1027 // Balance register pressure. 1028 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 1029 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1030 --NewSU->NumRegDefsLeft; 1031 } 1032 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 1033 SDep D = ChainSuccs[i]; 1034 SUnit *SuccDep = D.getSUnit(); 1035 D.setSUnit(SU); 1036 RemovePred(SuccDep, D); 1037 if (isNewLoad) { 1038 D.setSUnit(LoadSU); 1039 AddPred(SuccDep, D); 1040 } 1041 } 1042 1043 // Add a data dependency to reflect that NewSU reads the value defined 1044 // by LoadSU. 1045 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 1046 1047 if (isNewLoad) 1048 AvailableQueue->addNode(LoadSU); 1049 AvailableQueue->addNode(NewSU); 1050 1051 ++NumUnfolds; 1052 1053 if (NewSU->NumSuccsLeft == 0) { 1054 NewSU->isAvailable = true; 1055 return NewSU; 1056 } 1057 SU = NewSU; 1058 } 1059 1060 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1061 NewSU = CreateClone(SU); 1062 1063 // New SUnit has the exact same predecessors. 1064 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1065 I != E; ++I) 1066 if (!I->isArtificial()) 1067 AddPred(NewSU, *I); 1068 1069 // Only copy scheduled successors. Cut them from old node's successor 1070 // list and move them over. 1071 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1072 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1073 I != E; ++I) { 1074 if (I->isArtificial()) 1075 continue; 1076 SUnit *SuccSU = I->getSUnit(); 1077 if (SuccSU->isScheduled) { 1078 SDep D = *I; 1079 D.setSUnit(NewSU); 1080 AddPred(SuccSU, D); 1081 D.setSUnit(SU); 1082 DelDeps.push_back(std::make_pair(SuccSU, D)); 1083 } 1084 } 1085 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1086 RemovePred(DelDeps[i].first, DelDeps[i].second); 1087 1088 AvailableQueue->updateNode(SU); 1089 AvailableQueue->addNode(NewSU); 1090 1091 ++NumDups; 1092 return NewSU; 1093} 1094 1095/// InsertCopiesAndMoveSuccs - Insert register copies and move all 1096/// scheduled successors of the given SUnit to the last copy. 1097void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1098 const TargetRegisterClass *DestRC, 1099 const TargetRegisterClass *SrcRC, 1100 SmallVector<SUnit*, 2> &Copies) { 1101 SUnit *CopyFromSU = CreateNewSUnit(NULL); 1102 CopyFromSU->CopySrcRC = SrcRC; 1103 CopyFromSU->CopyDstRC = DestRC; 1104 1105 SUnit *CopyToSU = CreateNewSUnit(NULL); 1106 CopyToSU->CopySrcRC = DestRC; 1107 CopyToSU->CopyDstRC = SrcRC; 1108 1109 // Only copy scheduled successors. Cut them from old node's successor 1110 // list and move them over. 1111 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1112 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1113 I != E; ++I) { 1114 if (I->isArtificial()) 1115 continue; 1116 SUnit *SuccSU = I->getSUnit(); 1117 if (SuccSU->isScheduled) { 1118 SDep D = *I; 1119 D.setSUnit(CopyToSU); 1120 AddPred(SuccSU, D); 1121 DelDeps.push_back(std::make_pair(SuccSU, *I)); 1122 } 1123 else { 1124 // Avoid scheduling the def-side copy before other successors. Otherwise 1125 // we could introduce another physreg interference on the copy and 1126 // continue inserting copies indefinitely. 1127 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1128 /*Reg=*/0, /*isNormalMemory=*/false, 1129 /*isMustAlias=*/false, /*isArtificial=*/true); 1130 AddPred(SuccSU, D); 1131 } 1132 } 1133 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1134 RemovePred(DelDeps[i].first, DelDeps[i].second); 1135 1136 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1137 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1138 1139 AvailableQueue->updateNode(SU); 1140 AvailableQueue->addNode(CopyFromSU); 1141 AvailableQueue->addNode(CopyToSU); 1142 Copies.push_back(CopyFromSU); 1143 Copies.push_back(CopyToSU); 1144 1145 ++NumPRCopies; 1146} 1147 1148/// getPhysicalRegisterVT - Returns the ValueType of the physical register 1149/// definition of the specified node. 1150/// FIXME: Move to SelectionDAG? 1151static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1152 const TargetInstrInfo *TII) { 1153 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1154 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1155 unsigned NumRes = MCID.getNumDefs(); 1156 for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1157 if (Reg == *ImpDef) 1158 break; 1159 ++NumRes; 1160 } 1161 return N->getValueType(NumRes); 1162} 1163 1164/// CheckForLiveRegDef - Return true and update live register vector if the 1165/// specified register def of the specified SUnit clobbers any "live" registers. 1166static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1167 std::vector<SUnit*> &LiveRegDefs, 1168 SmallSet<unsigned, 4> &RegAdded, 1169 SmallVector<unsigned, 4> &LRegs, 1170 const TargetRegisterInfo *TRI) { 1171 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1172 1173 // Check if Ref is live. 1174 if (!LiveRegDefs[*AliasI]) continue; 1175 1176 // Allow multiple uses of the same def. 1177 if (LiveRegDefs[*AliasI] == SU) continue; 1178 1179 // Add Reg to the set of interfering live regs. 1180 if (RegAdded.insert(*AliasI)) { 1181 LRegs.push_back(*AliasI); 1182 } 1183 } 1184} 1185 1186/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1187/// scheduling of the given node to satisfy live physical register dependencies. 1188/// If the specific node is the last one that's available to schedule, do 1189/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1190bool ScheduleDAGRRList:: 1191DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1192 if (NumLiveRegs == 0) 1193 return false; 1194 1195 SmallSet<unsigned, 4> RegAdded; 1196 // If this node would clobber any "live" register, then it's not ready. 1197 // 1198 // If SU is the currently live definition of the same register that it uses, 1199 // then we are free to schedule it. 1200 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1201 I != E; ++I) { 1202 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1203 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1204 RegAdded, LRegs, TRI); 1205 } 1206 1207 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1208 if (Node->getOpcode() == ISD::INLINEASM) { 1209 // Inline asm can clobber physical defs. 1210 unsigned NumOps = Node->getNumOperands(); 1211 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1212 --NumOps; // Ignore the glue operand. 1213 1214 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1215 unsigned Flags = 1216 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1217 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1218 1219 ++i; // Skip the ID value. 1220 if (InlineAsm::isRegDefKind(Flags) || 1221 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1222 InlineAsm::isClobberKind(Flags)) { 1223 // Check for def of register or earlyclobber register. 1224 for (; NumVals; --NumVals, ++i) { 1225 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1226 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1227 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1228 } 1229 } else 1230 i += NumVals; 1231 } 1232 continue; 1233 } 1234 1235 if (!Node->isMachineOpcode()) 1236 continue; 1237 // If we're in the middle of scheduling a call, don't begin scheduling 1238 // another call. Also, don't allow any physical registers to be live across 1239 // the call. 1240 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 1241 // Check the special calling-sequence resource. 1242 unsigned CallResource = TRI->getNumRegs(); 1243 if (LiveRegDefs[CallResource]) { 1244 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1245 while (SDNode *Glued = Gen->getGluedNode()) 1246 Gen = Glued; 1247 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 1248 LRegs.push_back(CallResource); 1249 } 1250 } 1251 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1252 if (!MCID.ImplicitDefs) 1253 continue; 1254 for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) 1255 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1256 } 1257 1258 return !LRegs.empty(); 1259} 1260 1261/// Return a node that can be scheduled in this cycle. Requirements: 1262/// (1) Ready: latency has been satisfied 1263/// (2) No Hazards: resources are available 1264/// (3) No Interferences: may unschedule to break register interferences. 1265SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1266 SmallVector<SUnit*, 4> Interferences; 1267 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1268 1269 SUnit *CurSU = AvailableQueue->pop(); 1270 while (CurSU) { 1271 SmallVector<unsigned, 4> LRegs; 1272 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1273 break; 1274 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1275 1276 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1277 Interferences.push_back(CurSU); 1278 CurSU = AvailableQueue->pop(); 1279 } 1280 if (CurSU) { 1281 // Add the nodes that aren't ready back onto the available list. 1282 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1283 Interferences[i]->isPending = false; 1284 assert(Interferences[i]->isAvailable && "must still be available"); 1285 AvailableQueue->push(Interferences[i]); 1286 } 1287 return CurSU; 1288 } 1289 1290 // All candidates are delayed due to live physical reg dependencies. 1291 // Try backtracking, code duplication, or inserting cross class copies 1292 // to resolve it. 1293 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1294 SUnit *TrySU = Interferences[i]; 1295 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1296 1297 // Try unscheduling up to the point where it's safe to schedule 1298 // this node. 1299 SUnit *BtSU = NULL; 1300 unsigned LiveCycle = UINT_MAX; 1301 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1302 unsigned Reg = LRegs[j]; 1303 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1304 BtSU = LiveRegGens[Reg]; 1305 LiveCycle = BtSU->getHeight(); 1306 } 1307 } 1308 if (!WillCreateCycle(TrySU, BtSU)) { 1309 BacktrackBottomUp(TrySU, BtSU); 1310 1311 // Force the current node to be scheduled before the node that 1312 // requires the physical reg dep. 1313 if (BtSU->isAvailable) { 1314 BtSU->isAvailable = false; 1315 if (!BtSU->isPending) 1316 AvailableQueue->remove(BtSU); 1317 } 1318 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1319 /*Reg=*/0, /*isNormalMemory=*/false, 1320 /*isMustAlias=*/false, /*isArtificial=*/true)); 1321 1322 // If one or more successors has been unscheduled, then the current 1323 // node is no longer avaialable. Schedule a successor that's now 1324 // available instead. 1325 if (!TrySU->isAvailable) { 1326 CurSU = AvailableQueue->pop(); 1327 } 1328 else { 1329 CurSU = TrySU; 1330 TrySU->isPending = false; 1331 Interferences.erase(Interferences.begin()+i); 1332 } 1333 break; 1334 } 1335 } 1336 1337 if (!CurSU) { 1338 // Can't backtrack. If it's too expensive to copy the value, then try 1339 // duplicate the nodes that produces these "too expensive to copy" 1340 // values to break the dependency. In case even that doesn't work, 1341 // insert cross class copies. 1342 // If it's not too expensive, i.e. cost != -1, issue copies. 1343 SUnit *TrySU = Interferences[0]; 1344 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1345 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1346 unsigned Reg = LRegs[0]; 1347 SUnit *LRDef = LiveRegDefs[Reg]; 1348 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1349 const TargetRegisterClass *RC = 1350 TRI->getMinimalPhysRegClass(Reg, VT); 1351 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1352 1353 // If cross copy register class is the same as RC, then it must be possible 1354 // copy the value directly. Do not try duplicate the def. 1355 // If cross copy register class is not the same as RC, then it's possible to 1356 // copy the value but it require cross register class copies and it is 1357 // expensive. 1358 // If cross copy register class is null, then it's not possible to copy 1359 // the value at all. 1360 SUnit *NewDef = 0; 1361 if (DestRC != RC) { 1362 NewDef = CopyAndMoveSuccessors(LRDef); 1363 if (!DestRC && !NewDef) 1364 report_fatal_error("Can't handle live physical register dependency!"); 1365 } 1366 if (!NewDef) { 1367 // Issue copies, these can be expensive cross register class copies. 1368 SmallVector<SUnit*, 2> Copies; 1369 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1370 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1371 << " to SU #" << Copies.front()->NodeNum << "\n"); 1372 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1373 /*Reg=*/0, /*isNormalMemory=*/false, 1374 /*isMustAlias=*/false, 1375 /*isArtificial=*/true)); 1376 NewDef = Copies.back(); 1377 } 1378 1379 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1380 << " to SU #" << TrySU->NodeNum << "\n"); 1381 LiveRegDefs[Reg] = NewDef; 1382 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1383 /*Reg=*/0, /*isNormalMemory=*/false, 1384 /*isMustAlias=*/false, 1385 /*isArtificial=*/true)); 1386 TrySU->isAvailable = false; 1387 CurSU = NewDef; 1388 } 1389 1390 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1391 1392 // Add the nodes that aren't ready back onto the available list. 1393 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1394 Interferences[i]->isPending = false; 1395 // May no longer be available due to backtracking. 1396 if (Interferences[i]->isAvailable) { 1397 AvailableQueue->push(Interferences[i]); 1398 } 1399 } 1400 return CurSU; 1401} 1402 1403/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1404/// schedulers. 1405void ScheduleDAGRRList::ListScheduleBottomUp() { 1406 // Release any predecessors of the special Exit node. 1407 ReleasePredecessors(&ExitSU); 1408 1409 // Add root to Available queue. 1410 if (!SUnits.empty()) { 1411 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1412 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1413 RootSU->isAvailable = true; 1414 AvailableQueue->push(RootSU); 1415 } 1416 1417 // While Available queue is not empty, grab the node with the highest 1418 // priority. If it is not ready put it back. Schedule the node. 1419 Sequence.reserve(SUnits.size()); 1420 while (!AvailableQueue->empty()) { 1421 DEBUG(dbgs() << "\nExamining Available:\n"; 1422 AvailableQueue->dump(this)); 1423 1424 // Pick the best node to schedule taking all constraints into 1425 // consideration. 1426 SUnit *SU = PickNodeToScheduleBottomUp(); 1427 1428 AdvancePastStalls(SU); 1429 1430 ScheduleNodeBottomUp(SU); 1431 1432 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1433 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1434 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1435 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1436 } 1437 } 1438 1439 // Reverse the order if it is bottom up. 1440 std::reverse(Sequence.begin(), Sequence.end()); 1441 1442#ifndef NDEBUG 1443 VerifySchedule(/*isBottomUp=*/true); 1444#endif 1445} 1446 1447//===----------------------------------------------------------------------===// 1448// RegReductionPriorityQueue Definition 1449//===----------------------------------------------------------------------===// 1450// 1451// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1452// to reduce register pressure. 1453// 1454namespace { 1455class RegReductionPQBase; 1456 1457struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1458 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1459}; 1460 1461#ifndef NDEBUG 1462template<class SF> 1463struct reverse_sort : public queue_sort { 1464 SF &SortFunc; 1465 reverse_sort(SF &sf) : SortFunc(sf) {} 1466 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1467 1468 bool operator()(SUnit* left, SUnit* right) const { 1469 // reverse left/right rather than simply !SortFunc(left, right) 1470 // to expose different paths in the comparison logic. 1471 return SortFunc(right, left); 1472 } 1473}; 1474#endif // NDEBUG 1475 1476/// bu_ls_rr_sort - Priority function for bottom up register pressure 1477// reduction scheduler. 1478struct bu_ls_rr_sort : public queue_sort { 1479 enum { 1480 IsBottomUp = true, 1481 HasReadyFilter = false 1482 }; 1483 1484 RegReductionPQBase *SPQ; 1485 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1486 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1487 1488 bool operator()(SUnit* left, SUnit* right) const; 1489}; 1490 1491// src_ls_rr_sort - Priority function for source order scheduler. 1492struct src_ls_rr_sort : public queue_sort { 1493 enum { 1494 IsBottomUp = true, 1495 HasReadyFilter = false 1496 }; 1497 1498 RegReductionPQBase *SPQ; 1499 src_ls_rr_sort(RegReductionPQBase *spq) 1500 : SPQ(spq) {} 1501 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1502 : SPQ(RHS.SPQ) {} 1503 1504 bool operator()(SUnit* left, SUnit* right) const; 1505}; 1506 1507// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1508struct hybrid_ls_rr_sort : public queue_sort { 1509 enum { 1510 IsBottomUp = true, 1511 HasReadyFilter = false 1512 }; 1513 1514 RegReductionPQBase *SPQ; 1515 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1516 : SPQ(spq) {} 1517 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1518 : SPQ(RHS.SPQ) {} 1519 1520 bool isReady(SUnit *SU, unsigned CurCycle) const; 1521 1522 bool operator()(SUnit* left, SUnit* right) const; 1523}; 1524 1525// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1526// scheduler. 1527struct ilp_ls_rr_sort : public queue_sort { 1528 enum { 1529 IsBottomUp = true, 1530 HasReadyFilter = false 1531 }; 1532 1533 RegReductionPQBase *SPQ; 1534 ilp_ls_rr_sort(RegReductionPQBase *spq) 1535 : SPQ(spq) {} 1536 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1537 : SPQ(RHS.SPQ) {} 1538 1539 bool isReady(SUnit *SU, unsigned CurCycle) const; 1540 1541 bool operator()(SUnit* left, SUnit* right) const; 1542}; 1543 1544class RegReductionPQBase : public SchedulingPriorityQueue { 1545protected: 1546 std::vector<SUnit*> Queue; 1547 unsigned CurQueueId; 1548 bool TracksRegPressure; 1549 1550 // SUnits - The SUnits for the current graph. 1551 std::vector<SUnit> *SUnits; 1552 1553 MachineFunction &MF; 1554 const TargetInstrInfo *TII; 1555 const TargetRegisterInfo *TRI; 1556 const TargetLowering *TLI; 1557 ScheduleDAGRRList *scheduleDAG; 1558 1559 // SethiUllmanNumbers - The SethiUllman number for each node. 1560 std::vector<unsigned> SethiUllmanNumbers; 1561 1562 /// RegPressure - Tracking current reg pressure per register class. 1563 /// 1564 std::vector<unsigned> RegPressure; 1565 1566 /// RegLimit - Tracking the number of allocatable registers per register 1567 /// class. 1568 std::vector<unsigned> RegLimit; 1569 1570public: 1571 RegReductionPQBase(MachineFunction &mf, 1572 bool hasReadyFilter, 1573 bool tracksrp, 1574 const TargetInstrInfo *tii, 1575 const TargetRegisterInfo *tri, 1576 const TargetLowering *tli) 1577 : SchedulingPriorityQueue(hasReadyFilter), 1578 CurQueueId(0), TracksRegPressure(tracksrp), 1579 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1580 if (TracksRegPressure) { 1581 unsigned NumRC = TRI->getNumRegClasses(); 1582 RegLimit.resize(NumRC); 1583 RegPressure.resize(NumRC); 1584 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1585 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1586 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1587 E = TRI->regclass_end(); I != E; ++I) 1588 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1589 } 1590 } 1591 1592 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1593 scheduleDAG = scheduleDag; 1594 } 1595 1596 ScheduleHazardRecognizer* getHazardRec() { 1597 return scheduleDAG->getHazardRec(); 1598 } 1599 1600 void initNodes(std::vector<SUnit> &sunits); 1601 1602 void addNode(const SUnit *SU); 1603 1604 void updateNode(const SUnit *SU); 1605 1606 void releaseState() { 1607 SUnits = 0; 1608 SethiUllmanNumbers.clear(); 1609 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1610 } 1611 1612 unsigned getNodePriority(const SUnit *SU) const; 1613 1614 unsigned getNodeOrdering(const SUnit *SU) const { 1615 if (!SU->getNode()) return 0; 1616 1617 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1618 } 1619 1620 bool empty() const { return Queue.empty(); } 1621 1622 void push(SUnit *U) { 1623 assert(!U->NodeQueueId && "Node in the queue already"); 1624 U->NodeQueueId = ++CurQueueId; 1625 Queue.push_back(U); 1626 } 1627 1628 void remove(SUnit *SU) { 1629 assert(!Queue.empty() && "Queue is empty!"); 1630 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1631 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1632 SU); 1633 if (I != prior(Queue.end())) 1634 std::swap(*I, Queue.back()); 1635 Queue.pop_back(); 1636 SU->NodeQueueId = 0; 1637 } 1638 1639 bool tracksRegPressure() const { return TracksRegPressure; } 1640 1641 void dumpRegPressure() const; 1642 1643 bool HighRegPressure(const SUnit *SU) const; 1644 1645 bool MayReduceRegPressure(SUnit *SU) const; 1646 1647 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1648 1649 void ScheduledNode(SUnit *SU); 1650 1651 void UnscheduledNode(SUnit *SU); 1652 1653protected: 1654 bool canClobber(const SUnit *SU, const SUnit *Op); 1655 void AddPseudoTwoAddrDeps(); 1656 void PrescheduleNodesWithMultipleUses(); 1657 void CalculateSethiUllmanNumbers(); 1658}; 1659 1660template<class SF> 1661static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1662 std::vector<SUnit *>::iterator Best = Q.begin(); 1663 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1664 E = Q.end(); I != E; ++I) 1665 if (Picker(*Best, *I)) 1666 Best = I; 1667 SUnit *V = *Best; 1668 if (Best != prior(Q.end())) 1669 std::swap(*Best, Q.back()); 1670 Q.pop_back(); 1671 return V; 1672} 1673 1674template<class SF> 1675SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1676#ifndef NDEBUG 1677 if (DAG->StressSched) { 1678 reverse_sort<SF> RPicker(Picker); 1679 return popFromQueueImpl(Q, RPicker); 1680 } 1681#endif 1682 (void)DAG; 1683 return popFromQueueImpl(Q, Picker); 1684} 1685 1686template<class SF> 1687class RegReductionPriorityQueue : public RegReductionPQBase { 1688 SF Picker; 1689 1690public: 1691 RegReductionPriorityQueue(MachineFunction &mf, 1692 bool tracksrp, 1693 const TargetInstrInfo *tii, 1694 const TargetRegisterInfo *tri, 1695 const TargetLowering *tli) 1696 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1697 Picker(this) {} 1698 1699 bool isBottomUp() const { return SF::IsBottomUp; } 1700 1701 bool isReady(SUnit *U) const { 1702 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1703 } 1704 1705 SUnit *pop() { 1706 if (Queue.empty()) return NULL; 1707 1708 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1709 V->NodeQueueId = 0; 1710 return V; 1711 } 1712 1713 void dump(ScheduleDAG *DAG) const { 1714 // Emulate pop() without clobbering NodeQueueIds. 1715 std::vector<SUnit*> DumpQueue = Queue; 1716 SF DumpPicker = Picker; 1717 while (!DumpQueue.empty()) { 1718 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1719 dbgs() << "Height " << SU->getHeight() << ": "; 1720 SU->dump(DAG); 1721 } 1722 } 1723}; 1724 1725typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1726BURegReductionPriorityQueue; 1727 1728typedef RegReductionPriorityQueue<src_ls_rr_sort> 1729SrcRegReductionPriorityQueue; 1730 1731typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1732HybridBURRPriorityQueue; 1733 1734typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1735ILPBURRPriorityQueue; 1736} // end anonymous namespace 1737 1738//===----------------------------------------------------------------------===// 1739// Static Node Priority for Register Pressure Reduction 1740//===----------------------------------------------------------------------===// 1741 1742// Check for special nodes that bypass scheduling heuristics. 1743// Currently this pushes TokenFactor nodes down, but may be used for other 1744// pseudo-ops as well. 1745// 1746// Return -1 to schedule right above left, 1 for left above right. 1747// Return 0 if no bias exists. 1748static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1749 bool LSchedLow = left->isScheduleLow; 1750 bool RSchedLow = right->isScheduleLow; 1751 if (LSchedLow != RSchedLow) 1752 return LSchedLow < RSchedLow ? 1 : -1; 1753 return 0; 1754} 1755 1756/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1757/// Smaller number is the higher priority. 1758static unsigned 1759CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1760 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1761 if (SethiUllmanNumber != 0) 1762 return SethiUllmanNumber; 1763 1764 unsigned Extra = 0; 1765 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1766 I != E; ++I) { 1767 if (I->isCtrl()) continue; // ignore chain preds 1768 SUnit *PredSU = I->getSUnit(); 1769 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1770 if (PredSethiUllman > SethiUllmanNumber) { 1771 SethiUllmanNumber = PredSethiUllman; 1772 Extra = 0; 1773 } else if (PredSethiUllman == SethiUllmanNumber) 1774 ++Extra; 1775 } 1776 1777 SethiUllmanNumber += Extra; 1778 1779 if (SethiUllmanNumber == 0) 1780 SethiUllmanNumber = 1; 1781 1782 return SethiUllmanNumber; 1783} 1784 1785/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1786/// scheduling units. 1787void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1788 SethiUllmanNumbers.assign(SUnits->size(), 0); 1789 1790 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1791 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1792} 1793 1794void RegReductionPQBase::addNode(const SUnit *SU) { 1795 unsigned SUSize = SethiUllmanNumbers.size(); 1796 if (SUnits->size() > SUSize) 1797 SethiUllmanNumbers.resize(SUSize*2, 0); 1798 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1799} 1800 1801void RegReductionPQBase::updateNode(const SUnit *SU) { 1802 SethiUllmanNumbers[SU->NodeNum] = 0; 1803 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1804} 1805 1806// Lower priority means schedule further down. For bottom-up scheduling, lower 1807// priority SUs are scheduled before higher priority SUs. 1808unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1809 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1810 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1811 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1812 // CopyToReg should be close to its uses to facilitate coalescing and 1813 // avoid spilling. 1814 return 0; 1815 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1816 Opc == TargetOpcode::SUBREG_TO_REG || 1817 Opc == TargetOpcode::INSERT_SUBREG) 1818 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1819 // close to their uses to facilitate coalescing. 1820 return 0; 1821 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1822 // If SU does not have a register use, i.e. it doesn't produce a value 1823 // that would be consumed (e.g. store), then it terminates a chain of 1824 // computation. Give it a large SethiUllman number so it will be 1825 // scheduled right before its predecessors that it doesn't lengthen 1826 // their live ranges. 1827 return 0xffff; 1828 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1829 // If SU does not have a register def, schedule it close to its uses 1830 // because it does not lengthen any live ranges. 1831 return 0; 1832#if 1 1833 return SethiUllmanNumbers[SU->NodeNum]; 1834#else 1835 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1836 if (SU->isCallOp) { 1837 // FIXME: This assumes all of the defs are used as call operands. 1838 int NP = (int)Priority - SU->getNode()->getNumValues(); 1839 return (NP > 0) ? NP : 0; 1840 } 1841 return Priority; 1842#endif 1843} 1844 1845//===----------------------------------------------------------------------===// 1846// Register Pressure Tracking 1847//===----------------------------------------------------------------------===// 1848 1849void RegReductionPQBase::dumpRegPressure() const { 1850 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1851 E = TRI->regclass_end(); I != E; ++I) { 1852 const TargetRegisterClass *RC = *I; 1853 unsigned Id = RC->getID(); 1854 unsigned RP = RegPressure[Id]; 1855 if (!RP) continue; 1856 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1857 << '\n'); 1858 } 1859} 1860 1861bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1862 if (!TLI) 1863 return false; 1864 1865 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1866 I != E; ++I) { 1867 if (I->isCtrl()) 1868 continue; 1869 SUnit *PredSU = I->getSUnit(); 1870 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1871 // to cover the number of registers defined (they are all live). 1872 if (PredSU->NumRegDefsLeft == 0) { 1873 continue; 1874 } 1875 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1876 RegDefPos.IsValid(); RegDefPos.Advance()) { 1877 unsigned RCId, Cost; 1878 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1879 1880 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1881 return true; 1882 } 1883 } 1884 return false; 1885} 1886 1887bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1888 const SDNode *N = SU->getNode(); 1889 1890 if (!N->isMachineOpcode() || !SU->NumSuccs) 1891 return false; 1892 1893 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1894 for (unsigned i = 0; i != NumDefs; ++i) { 1895 EVT VT = N->getValueType(i); 1896 if (!N->hasAnyUseOfValue(i)) 1897 continue; 1898 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1899 if (RegPressure[RCId] >= RegLimit[RCId]) 1900 return true; 1901 } 1902 return false; 1903} 1904 1905// Compute the register pressure contribution by this instruction by count up 1906// for uses that are not live and down for defs. Only count register classes 1907// that are already under high pressure. As a side effect, compute the number of 1908// uses of registers that are already live. 1909// 1910// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1911// so could probably be factored. 1912int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1913 LiveUses = 0; 1914 int PDiff = 0; 1915 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1916 I != E; ++I) { 1917 if (I->isCtrl()) 1918 continue; 1919 SUnit *PredSU = I->getSUnit(); 1920 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1921 // to cover the number of registers defined (they are all live). 1922 if (PredSU->NumRegDefsLeft == 0) { 1923 if (PredSU->getNode()->isMachineOpcode()) 1924 ++LiveUses; 1925 continue; 1926 } 1927 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1928 RegDefPos.IsValid(); RegDefPos.Advance()) { 1929 EVT VT = RegDefPos.GetValue(); 1930 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1931 if (RegPressure[RCId] >= RegLimit[RCId]) 1932 ++PDiff; 1933 } 1934 } 1935 const SDNode *N = SU->getNode(); 1936 1937 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1938 return PDiff; 1939 1940 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1941 for (unsigned i = 0; i != NumDefs; ++i) { 1942 EVT VT = N->getValueType(i); 1943 if (!N->hasAnyUseOfValue(i)) 1944 continue; 1945 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1946 if (RegPressure[RCId] >= RegLimit[RCId]) 1947 --PDiff; 1948 } 1949 return PDiff; 1950} 1951 1952void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1953 if (!TracksRegPressure) 1954 return; 1955 1956 if (!SU->getNode()) 1957 return; 1958 1959 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1960 I != E; ++I) { 1961 if (I->isCtrl()) 1962 continue; 1963 SUnit *PredSU = I->getSUnit(); 1964 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1965 // to cover the number of registers defined (they are all live). 1966 if (PredSU->NumRegDefsLeft == 0) { 1967 continue; 1968 } 1969 // FIXME: The ScheduleDAG currently loses information about which of a 1970 // node's values is consumed by each dependence. Consequently, if the node 1971 // defines multiple register classes, we don't know which to pressurize 1972 // here. Instead the following loop consumes the register defs in an 1973 // arbitrary order. At least it handles the common case of clustered loads 1974 // to the same class. For precise liveness, each SDep needs to indicate the 1975 // result number. But that tightly couples the ScheduleDAG with the 1976 // SelectionDAG making updates tricky. A simpler hack would be to attach a 1977 // value type or register class to SDep. 1978 // 1979 // The most important aspect of register tracking is balancing the increase 1980 // here with the reduction further below. Note that this SU may use multiple 1981 // defs in PredSU. The can't be determined here, but we've already 1982 // compensated by reducing NumRegDefsLeft in PredSU during 1983 // ScheduleDAGSDNodes::AddSchedEdges. 1984 --PredSU->NumRegDefsLeft; 1985 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 1986 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1987 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1988 if (SkipRegDefs) 1989 continue; 1990 1991 unsigned RCId, Cost; 1992 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1993 RegPressure[RCId] += Cost; 1994 break; 1995 } 1996 } 1997 1998 // We should have this assert, but there may be dead SDNodes that never 1999 // materialize as SUnits, so they don't appear to generate liveness. 2000 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2001 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2002 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2003 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2004 if (SkipRegDefs > 0) 2005 continue; 2006 unsigned RCId, Cost; 2007 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2008 if (RegPressure[RCId] < Cost) { 2009 // Register pressure tracking is imprecise. This can happen. But we try 2010 // hard not to let it happen because it likely results in poor scheduling. 2011 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 2012 RegPressure[RCId] = 0; 2013 } 2014 else { 2015 RegPressure[RCId] -= Cost; 2016 } 2017 } 2018 dumpRegPressure(); 2019} 2020 2021void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 2022 if (!TracksRegPressure) 2023 return; 2024 2025 const SDNode *N = SU->getNode(); 2026 if (!N) return; 2027 2028 if (!N->isMachineOpcode()) { 2029 if (N->getOpcode() != ISD::CopyToReg) 2030 return; 2031 } else { 2032 unsigned Opc = N->getMachineOpcode(); 2033 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2034 Opc == TargetOpcode::INSERT_SUBREG || 2035 Opc == TargetOpcode::SUBREG_TO_REG || 2036 Opc == TargetOpcode::REG_SEQUENCE || 2037 Opc == TargetOpcode::IMPLICIT_DEF) 2038 return; 2039 } 2040 2041 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2042 I != E; ++I) { 2043 if (I->isCtrl()) 2044 continue; 2045 SUnit *PredSU = I->getSUnit(); 2046 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2047 // counts data deps. 2048 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2049 continue; 2050 const SDNode *PN = PredSU->getNode(); 2051 if (!PN->isMachineOpcode()) { 2052 if (PN->getOpcode() == ISD::CopyFromReg) { 2053 EVT VT = PN->getValueType(0); 2054 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2055 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2056 } 2057 continue; 2058 } 2059 unsigned POpc = PN->getMachineOpcode(); 2060 if (POpc == TargetOpcode::IMPLICIT_DEF) 2061 continue; 2062 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2063 POpc == TargetOpcode::INSERT_SUBREG || 2064 POpc == TargetOpcode::SUBREG_TO_REG) { 2065 EVT VT = PN->getValueType(0); 2066 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2067 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2068 continue; 2069 } 2070 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2071 for (unsigned i = 0; i != NumDefs; ++i) { 2072 EVT VT = PN->getValueType(i); 2073 if (!PN->hasAnyUseOfValue(i)) 2074 continue; 2075 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2076 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2077 // Register pressure tracking is imprecise. This can happen. 2078 RegPressure[RCId] = 0; 2079 else 2080 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2081 } 2082 } 2083 2084 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2085 // may transfer data dependencies to CopyToReg. 2086 if (SU->NumSuccs && N->isMachineOpcode()) { 2087 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2088 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2089 EVT VT = N->getValueType(i); 2090 if (VT == MVT::Glue || VT == MVT::Other) 2091 continue; 2092 if (!N->hasAnyUseOfValue(i)) 2093 continue; 2094 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2095 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2096 } 2097 } 2098 2099 dumpRegPressure(); 2100} 2101 2102//===----------------------------------------------------------------------===// 2103// Dynamic Node Priority for Register Pressure Reduction 2104//===----------------------------------------------------------------------===// 2105 2106/// closestSucc - Returns the scheduled cycle of the successor which is 2107/// closest to the current cycle. 2108static unsigned closestSucc(const SUnit *SU) { 2109 unsigned MaxHeight = 0; 2110 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2111 I != E; ++I) { 2112 if (I->isCtrl()) continue; // ignore chain succs 2113 unsigned Height = I->getSUnit()->getHeight(); 2114 // If there are bunch of CopyToRegs stacked up, they should be considered 2115 // to be at the same position. 2116 if (I->getSUnit()->getNode() && 2117 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2118 Height = closestSucc(I->getSUnit())+1; 2119 if (Height > MaxHeight) 2120 MaxHeight = Height; 2121 } 2122 return MaxHeight; 2123} 2124 2125/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2126/// for scratch registers, i.e. number of data dependencies. 2127static unsigned calcMaxScratches(const SUnit *SU) { 2128 unsigned Scratches = 0; 2129 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2130 I != E; ++I) { 2131 if (I->isCtrl()) continue; // ignore chain preds 2132 Scratches++; 2133 } 2134 return Scratches; 2135} 2136 2137/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2138/// CopyFromReg from a virtual register. 2139static bool hasOnlyLiveInOpers(const SUnit *SU) { 2140 bool RetVal = false; 2141 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2142 I != E; ++I) { 2143 if (I->isCtrl()) continue; 2144 const SUnit *PredSU = I->getSUnit(); 2145 if (PredSU->getNode() && 2146 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2147 unsigned Reg = 2148 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2149 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2150 RetVal = true; 2151 continue; 2152 } 2153 } 2154 return false; 2155 } 2156 return RetVal; 2157} 2158 2159/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2160/// CopyToReg to a virtual register. This SU def is probably a liveout and 2161/// it has no other use. It should be scheduled closer to the terminator. 2162static bool hasOnlyLiveOutUses(const SUnit *SU) { 2163 bool RetVal = false; 2164 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2165 I != E; ++I) { 2166 if (I->isCtrl()) continue; 2167 const SUnit *SuccSU = I->getSUnit(); 2168 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2169 unsigned Reg = 2170 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2171 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2172 RetVal = true; 2173 continue; 2174 } 2175 } 2176 return false; 2177 } 2178 return RetVal; 2179} 2180 2181// Set isVRegCycle for a node with only live in opers and live out uses. Also 2182// set isVRegCycle for its CopyFromReg operands. 2183// 2184// This is only relevant for single-block loops, in which case the VRegCycle 2185// node is likely an induction variable in which the operand and target virtual 2186// registers should be coalesced (e.g. pre/post increment values). Setting the 2187// isVRegCycle flag helps the scheduler prioritize other uses of the same 2188// CopyFromReg so that this node becomes the virtual register "kill". This 2189// avoids interference between the values live in and out of the block and 2190// eliminates a copy inside the loop. 2191static void initVRegCycle(SUnit *SU) { 2192 if (DisableSchedVRegCycle) 2193 return; 2194 2195 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2196 return; 2197 2198 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2199 2200 SU->isVRegCycle = true; 2201 2202 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2203 I != E; ++I) { 2204 if (I->isCtrl()) continue; 2205 I->getSUnit()->isVRegCycle = true; 2206 } 2207} 2208 2209// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2210// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2211static void resetVRegCycle(SUnit *SU) { 2212 if (!SU->isVRegCycle) 2213 return; 2214 2215 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2216 I != E; ++I) { 2217 if (I->isCtrl()) continue; // ignore chain preds 2218 SUnit *PredSU = I->getSUnit(); 2219 if (PredSU->isVRegCycle) { 2220 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2221 "VRegCycle def must be CopyFromReg"); 2222 I->getSUnit()->isVRegCycle = 0; 2223 } 2224 } 2225} 2226 2227// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2228// means a node that defines the VRegCycle has not been scheduled yet. 2229static bool hasVRegCycleUse(const SUnit *SU) { 2230 // If this SU also defines the VReg, don't hoist it as a "use". 2231 if (SU->isVRegCycle) 2232 return false; 2233 2234 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2235 I != E; ++I) { 2236 if (I->isCtrl()) continue; // ignore chain preds 2237 if (I->getSUnit()->isVRegCycle && 2238 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2239 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2240 return true; 2241 } 2242 } 2243 return false; 2244} 2245 2246// Check for either a dependence (latency) or resource (hazard) stall. 2247// 2248// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2249static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2250 if ((int)SPQ->getCurCycle() < Height) return true; 2251 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2252 != ScheduleHazardRecognizer::NoHazard) 2253 return true; 2254 return false; 2255} 2256 2257// Return -1 if left has higher priority, 1 if right has higher priority. 2258// Return 0 if latency-based priority is equivalent. 2259static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2260 RegReductionPQBase *SPQ) { 2261 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2262 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2263 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2264 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2265 int LHeight = (int)left->getHeight() + LPenalty; 2266 int RHeight = (int)right->getHeight() + RPenalty; 2267 2268 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2269 BUHasStall(left, LHeight, SPQ); 2270 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2271 BUHasStall(right, RHeight, SPQ); 2272 2273 // If scheduling one of the node will cause a pipeline stall, delay it. 2274 // If scheduling either one of the node will cause a pipeline stall, sort 2275 // them according to their height. 2276 if (LStall) { 2277 if (!RStall) 2278 return 1; 2279 if (LHeight != RHeight) 2280 return LHeight > RHeight ? 1 : -1; 2281 } else if (RStall) 2282 return -1; 2283 2284 // If either node is scheduling for latency, sort them by height/depth 2285 // and latency. 2286 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2287 right->SchedulingPref == Sched::ILP)) { 2288 if (DisableSchedCycles) { 2289 if (LHeight != RHeight) 2290 return LHeight > RHeight ? 1 : -1; 2291 } 2292 else { 2293 // If neither instruction stalls (!LStall && !RStall) then 2294 // its height is already covered so only its depth matters. We also reach 2295 // this if both stall but have the same height. 2296 int LDepth = left->getDepth() - LPenalty; 2297 int RDepth = right->getDepth() - RPenalty; 2298 if (LDepth != RDepth) { 2299 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2300 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2301 << ") depth " << RDepth << "\n"); 2302 return LDepth < RDepth ? 1 : -1; 2303 } 2304 } 2305 if (left->Latency != right->Latency) 2306 return left->Latency > right->Latency ? 1 : -1; 2307 } 2308 return 0; 2309} 2310 2311static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2312 // Schedule physical register definitions close to their use. This is 2313 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2314 // long as shortening physreg live ranges is generally good, we can defer 2315 // creating a subtarget hook. 2316 if (!DisableSchedPhysRegJoin) { 2317 bool LHasPhysReg = left->hasPhysRegDefs; 2318 bool RHasPhysReg = right->hasPhysRegDefs; 2319 if (LHasPhysReg != RHasPhysReg) { 2320 #ifndef NDEBUG 2321 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2322 #endif 2323 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2324 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2325 << PhysRegMsg[RHasPhysReg] << "\n"); 2326 return LHasPhysReg < RHasPhysReg; 2327 } 2328 } 2329 2330 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2331 unsigned LPriority = SPQ->getNodePriority(left); 2332 unsigned RPriority = SPQ->getNodePriority(right); 2333 2334 // Be really careful about hoisting call operands above previous calls. 2335 // Only allows it if it would reduce register pressure. 2336 if (left->isCall && right->isCallOp) { 2337 unsigned RNumVals = right->getNode()->getNumValues(); 2338 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2339 } 2340 if (right->isCall && left->isCallOp) { 2341 unsigned LNumVals = left->getNode()->getNumValues(); 2342 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2343 } 2344 2345 if (LPriority != RPriority) 2346 return LPriority > RPriority; 2347 2348 // One or both of the nodes are calls and their sethi-ullman numbers are the 2349 // same, then keep source order. 2350 if (left->isCall || right->isCall) { 2351 unsigned LOrder = SPQ->getNodeOrdering(left); 2352 unsigned ROrder = SPQ->getNodeOrdering(right); 2353 2354 // Prefer an ordering where the lower the non-zero order number, the higher 2355 // the preference. 2356 if ((LOrder || ROrder) && LOrder != ROrder) 2357 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2358 } 2359 2360 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2361 // e.g. 2362 // t1 = op t2, c1 2363 // t3 = op t4, c2 2364 // 2365 // and the following instructions are both ready. 2366 // t2 = op c3 2367 // t4 = op c4 2368 // 2369 // Then schedule t2 = op first. 2370 // i.e. 2371 // t4 = op c4 2372 // t2 = op c3 2373 // t1 = op t2, c1 2374 // t3 = op t4, c2 2375 // 2376 // This creates more short live intervals. 2377 unsigned LDist = closestSucc(left); 2378 unsigned RDist = closestSucc(right); 2379 if (LDist != RDist) 2380 return LDist < RDist; 2381 2382 // How many registers becomes live when the node is scheduled. 2383 unsigned LScratch = calcMaxScratches(left); 2384 unsigned RScratch = calcMaxScratches(right); 2385 if (LScratch != RScratch) 2386 return LScratch > RScratch; 2387 2388 // Comparing latency against a call makes little sense unless the node 2389 // is register pressure-neutral. 2390 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2391 return (left->NodeQueueId > right->NodeQueueId); 2392 2393 // Do not compare latencies when one or both of the nodes are calls. 2394 if (!DisableSchedCycles && 2395 !(left->isCall || right->isCall)) { 2396 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2397 if (result != 0) 2398 return result > 0; 2399 } 2400 else { 2401 if (left->getHeight() != right->getHeight()) 2402 return left->getHeight() > right->getHeight(); 2403 2404 if (left->getDepth() != right->getDepth()) 2405 return left->getDepth() < right->getDepth(); 2406 } 2407 2408 assert(left->NodeQueueId && right->NodeQueueId && 2409 "NodeQueueId cannot be zero"); 2410 return (left->NodeQueueId > right->NodeQueueId); 2411} 2412 2413// Bottom up 2414bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2415 if (int res = checkSpecialNodes(left, right)) 2416 return res > 0; 2417 2418 return BURRSort(left, right, SPQ); 2419} 2420 2421// Source order, otherwise bottom up. 2422bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2423 if (int res = checkSpecialNodes(left, right)) 2424 return res > 0; 2425 2426 unsigned LOrder = SPQ->getNodeOrdering(left); 2427 unsigned ROrder = SPQ->getNodeOrdering(right); 2428 2429 // Prefer an ordering where the lower the non-zero order number, the higher 2430 // the preference. 2431 if ((LOrder || ROrder) && LOrder != ROrder) 2432 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2433 2434 return BURRSort(left, right, SPQ); 2435} 2436 2437// If the time between now and when the instruction will be ready can cover 2438// the spill code, then avoid adding it to the ready queue. This gives long 2439// stalls highest priority and allows hoisting across calls. It should also 2440// speed up processing the available queue. 2441bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2442 static const unsigned ReadyDelay = 3; 2443 2444 if (SPQ->MayReduceRegPressure(SU)) return true; 2445 2446 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2447 2448 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2449 != ScheduleHazardRecognizer::NoHazard) 2450 return false; 2451 2452 return true; 2453} 2454 2455// Return true if right should be scheduled with higher priority than left. 2456bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2457 if (int res = checkSpecialNodes(left, right)) 2458 return res > 0; 2459 2460 if (left->isCall || right->isCall) 2461 // No way to compute latency of calls. 2462 return BURRSort(left, right, SPQ); 2463 2464 bool LHigh = SPQ->HighRegPressure(left); 2465 bool RHigh = SPQ->HighRegPressure(right); 2466 // Avoid causing spills. If register pressure is high, schedule for 2467 // register pressure reduction. 2468 if (LHigh && !RHigh) { 2469 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2470 << right->NodeNum << ")\n"); 2471 return true; 2472 } 2473 else if (!LHigh && RHigh) { 2474 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2475 << left->NodeNum << ")\n"); 2476 return false; 2477 } 2478 if (!LHigh && !RHigh) { 2479 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2480 if (result != 0) 2481 return result > 0; 2482 } 2483 return BURRSort(left, right, SPQ); 2484} 2485 2486// Schedule as many instructions in each cycle as possible. So don't make an 2487// instruction available unless it is ready in the current cycle. 2488bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2489 if (SU->getHeight() > CurCycle) return false; 2490 2491 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2492 != ScheduleHazardRecognizer::NoHazard) 2493 return false; 2494 2495 return true; 2496} 2497 2498static bool canEnableCoalescing(SUnit *SU) { 2499 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2500 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2501 // CopyToReg should be close to its uses to facilitate coalescing and 2502 // avoid spilling. 2503 return true; 2504 2505 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2506 Opc == TargetOpcode::SUBREG_TO_REG || 2507 Opc == TargetOpcode::INSERT_SUBREG) 2508 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2509 // close to their uses to facilitate coalescing. 2510 return true; 2511 2512 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2513 // If SU does not have a register def, schedule it close to its uses 2514 // because it does not lengthen any live ranges. 2515 return true; 2516 2517 return false; 2518} 2519 2520// list-ilp is currently an experimental scheduler that allows various 2521// heuristics to be enabled prior to the normal register reduction logic. 2522bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2523 if (int res = checkSpecialNodes(left, right)) 2524 return res > 0; 2525 2526 if (left->isCall || right->isCall) 2527 // No way to compute latency of calls. 2528 return BURRSort(left, right, SPQ); 2529 2530 unsigned LLiveUses = 0, RLiveUses = 0; 2531 int LPDiff = 0, RPDiff = 0; 2532 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2533 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2534 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2535 } 2536 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2537 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2538 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2539 return LPDiff > RPDiff; 2540 } 2541 2542 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2543 bool LReduce = canEnableCoalescing(left); 2544 bool RReduce = canEnableCoalescing(right); 2545 if (LReduce && !RReduce) return false; 2546 if (RReduce && !LReduce) return true; 2547 } 2548 2549 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2550 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2551 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2552 return LLiveUses < RLiveUses; 2553 } 2554 2555 if (!DisableSchedStalls) { 2556 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2557 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2558 if (LStall != RStall) 2559 return left->getHeight() > right->getHeight(); 2560 } 2561 2562 if (!DisableSchedCriticalPath) { 2563 int spread = (int)left->getDepth() - (int)right->getDepth(); 2564 if (std::abs(spread) > MaxReorderWindow) { 2565 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2566 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2567 << right->getDepth() << "\n"); 2568 return left->getDepth() < right->getDepth(); 2569 } 2570 } 2571 2572 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2573 int spread = (int)left->getHeight() - (int)right->getHeight(); 2574 if (std::abs(spread) > MaxReorderWindow) 2575 return left->getHeight() > right->getHeight(); 2576 } 2577 2578 return BURRSort(left, right, SPQ); 2579} 2580 2581void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2582 SUnits = &sunits; 2583 // Add pseudo dependency edges for two-address nodes. 2584 if (!Disable2AddrHack) 2585 AddPseudoTwoAddrDeps(); 2586 // Reroute edges to nodes with multiple uses. 2587 if (!TracksRegPressure) 2588 PrescheduleNodesWithMultipleUses(); 2589 // Calculate node priorities. 2590 CalculateSethiUllmanNumbers(); 2591 2592 // For single block loops, mark nodes that look like canonical IV increments. 2593 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2594 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2595 initVRegCycle(&sunits[i]); 2596 } 2597 } 2598} 2599 2600//===----------------------------------------------------------------------===// 2601// Preschedule for Register Pressure 2602//===----------------------------------------------------------------------===// 2603 2604bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2605 if (SU->isTwoAddress) { 2606 unsigned Opc = SU->getNode()->getMachineOpcode(); 2607 const MCInstrDesc &MCID = TII->get(Opc); 2608 unsigned NumRes = MCID.getNumDefs(); 2609 unsigned NumOps = MCID.getNumOperands() - NumRes; 2610 for (unsigned i = 0; i != NumOps; ++i) { 2611 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2612 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2613 if (DU->getNodeId() != -1 && 2614 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2615 return true; 2616 } 2617 } 2618 } 2619 return false; 2620} 2621 2622/// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2623/// successor's explicit physregs whose definition can reach DepSU. 2624/// i.e. DepSU should not be scheduled above SU. 2625static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2626 ScheduleDAGRRList *scheduleDAG, 2627 const TargetInstrInfo *TII, 2628 const TargetRegisterInfo *TRI) { 2629 const unsigned *ImpDefs 2630 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2631 if(!ImpDefs) 2632 return false; 2633 2634 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2635 SI != SE; ++SI) { 2636 SUnit *SuccSU = SI->getSUnit(); 2637 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2638 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2639 if (!PI->isAssignedRegDep()) 2640 continue; 2641 2642 for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { 2643 // Return true if SU clobbers this physical register use and the 2644 // definition of the register reaches from DepSU. IsReachable queries a 2645 // topological forward sort of the DAG (following the successors). 2646 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2647 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2648 return true; 2649 } 2650 } 2651 } 2652 return false; 2653} 2654 2655/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2656/// physical register defs. 2657static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2658 const TargetInstrInfo *TII, 2659 const TargetRegisterInfo *TRI) { 2660 SDNode *N = SuccSU->getNode(); 2661 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2662 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2663 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2664 for (const SDNode *SUNode = SU->getNode(); SUNode; 2665 SUNode = SUNode->getGluedNode()) { 2666 if (!SUNode->isMachineOpcode()) 2667 continue; 2668 const unsigned *SUImpDefs = 2669 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2670 if (!SUImpDefs) 2671 return false; 2672 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2673 EVT VT = N->getValueType(i); 2674 if (VT == MVT::Glue || VT == MVT::Other) 2675 continue; 2676 if (!N->hasAnyUseOfValue(i)) 2677 continue; 2678 unsigned Reg = ImpDefs[i - NumDefs]; 2679 for (;*SUImpDefs; ++SUImpDefs) { 2680 unsigned SUReg = *SUImpDefs; 2681 if (TRI->regsOverlap(Reg, SUReg)) 2682 return true; 2683 } 2684 } 2685 } 2686 return false; 2687} 2688 2689/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2690/// are not handled well by the general register pressure reduction 2691/// heuristics. When presented with code like this: 2692/// 2693/// N 2694/// / | 2695/// / | 2696/// U store 2697/// | 2698/// ... 2699/// 2700/// the heuristics tend to push the store up, but since the 2701/// operand of the store has another use (U), this would increase 2702/// the length of that other use (the U->N edge). 2703/// 2704/// This function transforms code like the above to route U's 2705/// dependence through the store when possible, like this: 2706/// 2707/// N 2708/// || 2709/// || 2710/// store 2711/// | 2712/// U 2713/// | 2714/// ... 2715/// 2716/// This results in the store being scheduled immediately 2717/// after N, which shortens the U->N live range, reducing 2718/// register pressure. 2719/// 2720void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2721 // Visit all the nodes in topological order, working top-down. 2722 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2723 SUnit *SU = &(*SUnits)[i]; 2724 // For now, only look at nodes with no data successors, such as stores. 2725 // These are especially important, due to the heuristics in 2726 // getNodePriority for nodes with no data successors. 2727 if (SU->NumSuccs != 0) 2728 continue; 2729 // For now, only look at nodes with exactly one data predecessor. 2730 if (SU->NumPreds != 1) 2731 continue; 2732 // Avoid prescheduling copies to virtual registers, which don't behave 2733 // like other nodes from the perspective of scheduling heuristics. 2734 if (SDNode *N = SU->getNode()) 2735 if (N->getOpcode() == ISD::CopyToReg && 2736 TargetRegisterInfo::isVirtualRegister 2737 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2738 continue; 2739 2740 // Locate the single data predecessor. 2741 SUnit *PredSU = 0; 2742 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2743 EE = SU->Preds.end(); II != EE; ++II) 2744 if (!II->isCtrl()) { 2745 PredSU = II->getSUnit(); 2746 break; 2747 } 2748 assert(PredSU); 2749 2750 // Don't rewrite edges that carry physregs, because that requires additional 2751 // support infrastructure. 2752 if (PredSU->hasPhysRegDefs) 2753 continue; 2754 // Short-circuit the case where SU is PredSU's only data successor. 2755 if (PredSU->NumSuccs == 1) 2756 continue; 2757 // Avoid prescheduling to copies from virtual registers, which don't behave 2758 // like other nodes from the perspective of scheduling heuristics. 2759 if (SDNode *N = SU->getNode()) 2760 if (N->getOpcode() == ISD::CopyFromReg && 2761 TargetRegisterInfo::isVirtualRegister 2762 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2763 continue; 2764 2765 // Perform checks on the successors of PredSU. 2766 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2767 EE = PredSU->Succs.end(); II != EE; ++II) { 2768 SUnit *PredSuccSU = II->getSUnit(); 2769 if (PredSuccSU == SU) continue; 2770 // If PredSU has another successor with no data successors, for 2771 // now don't attempt to choose either over the other. 2772 if (PredSuccSU->NumSuccs == 0) 2773 goto outer_loop_continue; 2774 // Don't break physical register dependencies. 2775 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2776 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2777 goto outer_loop_continue; 2778 // Don't introduce graph cycles. 2779 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2780 goto outer_loop_continue; 2781 } 2782 2783 // Ok, the transformation is safe and the heuristics suggest it is 2784 // profitable. Update the graph. 2785 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2786 << " next to PredSU #" << PredSU->NodeNum 2787 << " to guide scheduling in the presence of multiple uses\n"); 2788 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2789 SDep Edge = PredSU->Succs[i]; 2790 assert(!Edge.isAssignedRegDep()); 2791 SUnit *SuccSU = Edge.getSUnit(); 2792 if (SuccSU != SU) { 2793 Edge.setSUnit(PredSU); 2794 scheduleDAG->RemovePred(SuccSU, Edge); 2795 scheduleDAG->AddPred(SU, Edge); 2796 Edge.setSUnit(SU); 2797 scheduleDAG->AddPred(SuccSU, Edge); 2798 --i; 2799 } 2800 } 2801 outer_loop_continue:; 2802 } 2803} 2804 2805/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2806/// it as a def&use operand. Add a pseudo control edge from it to the other 2807/// node (if it won't create a cycle) so the two-address one will be scheduled 2808/// first (lower in the schedule). If both nodes are two-address, favor the 2809/// one that has a CopyToReg use (more likely to be a loop induction update). 2810/// If both are two-address, but one is commutable while the other is not 2811/// commutable, favor the one that's not commutable. 2812void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2813 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2814 SUnit *SU = &(*SUnits)[i]; 2815 if (!SU->isTwoAddress) 2816 continue; 2817 2818 SDNode *Node = SU->getNode(); 2819 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2820 continue; 2821 2822 bool isLiveOut = hasOnlyLiveOutUses(SU); 2823 unsigned Opc = Node->getMachineOpcode(); 2824 const MCInstrDesc &MCID = TII->get(Opc); 2825 unsigned NumRes = MCID.getNumDefs(); 2826 unsigned NumOps = MCID.getNumOperands() - NumRes; 2827 for (unsigned j = 0; j != NumOps; ++j) { 2828 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2829 continue; 2830 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2831 if (DU->getNodeId() == -1) 2832 continue; 2833 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2834 if (!DUSU) continue; 2835 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2836 E = DUSU->Succs.end(); I != E; ++I) { 2837 if (I->isCtrl()) continue; 2838 SUnit *SuccSU = I->getSUnit(); 2839 if (SuccSU == SU) 2840 continue; 2841 // Be conservative. Ignore if nodes aren't at roughly the same 2842 // depth and height. 2843 if (SuccSU->getHeight() < SU->getHeight() && 2844 (SU->getHeight() - SuccSU->getHeight()) > 1) 2845 continue; 2846 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2847 // constrains whatever is using the copy, instead of the copy 2848 // itself. In the case that the copy is coalesced, this 2849 // preserves the intent of the pseudo two-address heurietics. 2850 while (SuccSU->Succs.size() == 1 && 2851 SuccSU->getNode()->isMachineOpcode() && 2852 SuccSU->getNode()->getMachineOpcode() == 2853 TargetOpcode::COPY_TO_REGCLASS) 2854 SuccSU = SuccSU->Succs.front().getSUnit(); 2855 // Don't constrain non-instruction nodes. 2856 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2857 continue; 2858 // Don't constrain nodes with physical register defs if the 2859 // predecessor can clobber them. 2860 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2861 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2862 continue; 2863 } 2864 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2865 // these may be coalesced away. We want them close to their uses. 2866 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2867 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2868 SuccOpc == TargetOpcode::INSERT_SUBREG || 2869 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2870 continue; 2871 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2872 (!canClobber(SuccSU, DUSU) || 2873 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2874 (!SU->isCommutable && SuccSU->isCommutable)) && 2875 !scheduleDAG->IsReachable(SuccSU, SU)) { 2876 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2877 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2878 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2879 /*Reg=*/0, /*isNormalMemory=*/false, 2880 /*isMustAlias=*/false, 2881 /*isArtificial=*/true)); 2882 } 2883 } 2884 } 2885 } 2886} 2887 2888//===----------------------------------------------------------------------===// 2889// Public Constructor Functions 2890//===----------------------------------------------------------------------===// 2891 2892llvm::ScheduleDAGSDNodes * 2893llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2894 CodeGenOpt::Level OptLevel) { 2895 const TargetMachine &TM = IS->TM; 2896 const TargetInstrInfo *TII = TM.getInstrInfo(); 2897 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2898 2899 BURegReductionPriorityQueue *PQ = 2900 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2901 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2902 PQ->setScheduleDAG(SD); 2903 return SD; 2904} 2905 2906llvm::ScheduleDAGSDNodes * 2907llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2908 CodeGenOpt::Level OptLevel) { 2909 const TargetMachine &TM = IS->TM; 2910 const TargetInstrInfo *TII = TM.getInstrInfo(); 2911 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2912 2913 SrcRegReductionPriorityQueue *PQ = 2914 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2915 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2916 PQ->setScheduleDAG(SD); 2917 return SD; 2918} 2919 2920llvm::ScheduleDAGSDNodes * 2921llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2922 CodeGenOpt::Level OptLevel) { 2923 const TargetMachine &TM = IS->TM; 2924 const TargetInstrInfo *TII = TM.getInstrInfo(); 2925 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2926 const TargetLowering *TLI = &IS->getTargetLowering(); 2927 2928 HybridBURRPriorityQueue *PQ = 2929 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2930 2931 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2932 PQ->setScheduleDAG(SD); 2933 return SD; 2934} 2935 2936llvm::ScheduleDAGSDNodes * 2937llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2938 CodeGenOpt::Level OptLevel) { 2939 const TargetMachine &TM = IS->TM; 2940 const TargetInstrInfo *TII = TM.getInstrInfo(); 2941 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2942 const TargetLowering *TLI = &IS->getTargetLowering(); 2943 2944 ILPBURRPriorityQueue *PQ = 2945 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2946 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2947 PQ->setScheduleDAG(SD); 2948 return SD; 2949} 2950