ScheduleDAGRRList.cpp revision 4c727204271067f3dbf50bd23098b2df8e1cc47a
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 sourceListDAGScheduler("source", 49 "Similar to list-burr but schedules in source " 50 "order when possible", 51 createSourceListDAGScheduler); 52 53static RegisterScheduler 54 hybridListDAGScheduler("list-hybrid", 55 "Bottom-up register pressure aware list scheduling " 56 "which tries to balance latency and register pressure", 57 createHybridListDAGScheduler); 58 59static RegisterScheduler 60 ILPListDAGScheduler("list-ilp", 61 "Bottom-up register pressure aware list scheduling " 62 "which tries to balance ILP and register pressure", 63 createILPListDAGScheduler); 64 65static cl::opt<bool> DisableSchedCycles( 66 "disable-sched-cycles", cl::Hidden, cl::init(false), 67 cl::desc("Disable cycle-level precision during preRA scheduling")); 68 69// Temporary sched=list-ilp flags until the heuristics are robust. 70// Some options are also available under sched=list-hybrid. 71static cl::opt<bool> DisableSchedRegPressure( 72 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 73 cl::desc("Disable regpressure priority in sched=list-ilp")); 74static cl::opt<bool> DisableSchedLiveUses( 75 "disable-sched-live-uses", cl::Hidden, cl::init(true), 76 cl::desc("Disable live use priority in sched=list-ilp")); 77static cl::opt<bool> DisableSchedVRegCycle( 78 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 79 cl::desc("Disable virtual register cycle interference checks")); 80static cl::opt<bool> DisableSchedPhysRegJoin( 81 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 82 cl::desc("Disable physreg def-use affinity")); 83static cl::opt<bool> DisableSchedStalls( 84 "disable-sched-stalls", cl::Hidden, cl::init(true), 85 cl::desc("Disable no-stall priority in sched=list-ilp")); 86static cl::opt<bool> DisableSchedCriticalPath( 87 "disable-sched-critical-path", cl::Hidden, cl::init(false), 88 cl::desc("Disable critical path priority in sched=list-ilp")); 89static cl::opt<bool> DisableSchedHeight( 90 "disable-sched-height", cl::Hidden, cl::init(false), 91 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 92static cl::opt<bool> Disable2AddrHack( 93 "disable-2addr-hack", cl::Hidden, cl::init(true), 94 cl::desc("Disable scheduler's two-address hack")); 95 96static cl::opt<int> MaxReorderWindow( 97 "max-sched-reorder", cl::Hidden, cl::init(6), 98 cl::desc("Number of instructions to allow ahead of the critical path " 99 "in sched=list-ilp")); 100 101static cl::opt<unsigned> AvgIPC( 102 "sched-avg-ipc", cl::Hidden, cl::init(1), 103 cl::desc("Average inst/cycle whan no target itinerary exists.")); 104 105namespace { 106//===----------------------------------------------------------------------===// 107/// ScheduleDAGRRList - The actual register reduction list scheduler 108/// implementation. This supports both top-down and bottom-up scheduling. 109/// 110class ScheduleDAGRRList : public ScheduleDAGSDNodes { 111private: 112 /// NeedLatency - True if the scheduler will make use of latency information. 113 /// 114 bool NeedLatency; 115 116 /// AvailableQueue - The priority queue to use for the available SUnits. 117 SchedulingPriorityQueue *AvailableQueue; 118 119 /// PendingQueue - This contains all of the instructions whose operands have 120 /// been issued, but their results are not ready yet (due to the latency of 121 /// the operation). Once the operands becomes available, the instruction is 122 /// added to the AvailableQueue. 123 std::vector<SUnit*> PendingQueue; 124 125 /// HazardRec - The hazard recognizer to use. 126 ScheduleHazardRecognizer *HazardRec; 127 128 /// CurCycle - The current scheduler state corresponds to this cycle. 129 unsigned CurCycle; 130 131 /// MinAvailableCycle - Cycle of the soonest available instruction. 132 unsigned MinAvailableCycle; 133 134 /// IssueCount - Count instructions issued in this cycle 135 /// Currently valid only for bottom-up scheduling. 136 unsigned IssueCount; 137 138 /// LiveRegDefs - A set of physical registers and their definition 139 /// that are "live". These nodes must be scheduled before any other nodes that 140 /// modifies the registers can be scheduled. 141 unsigned NumLiveRegs; 142 std::vector<SUnit*> LiveRegDefs; 143 std::vector<SUnit*> LiveRegGens; 144 145 /// Topo - A topological ordering for SUnits which permits fast IsReachable 146 /// and similar queries. 147 ScheduleDAGTopologicalSort Topo; 148 149 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 150 // DAG crawling. 151 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 152 153public: 154 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 155 SchedulingPriorityQueue *availqueue, 156 CodeGenOpt::Level OptLevel) 157 : ScheduleDAGSDNodes(mf), 158 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 159 Topo(SUnits) { 160 161 const TargetMachine &tm = mf.getTarget(); 162 if (DisableSchedCycles || !NeedLatency) 163 HazardRec = new ScheduleHazardRecognizer(); 164 else 165 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 166 } 167 168 ~ScheduleDAGRRList() { 169 delete HazardRec; 170 delete AvailableQueue; 171 } 172 173 void Schedule(); 174 175 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 176 177 /// IsReachable - Checks if SU is reachable from TargetSU. 178 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 179 return Topo.IsReachable(SU, TargetSU); 180 } 181 182 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 183 /// create a cycle. 184 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 185 return Topo.WillCreateCycle(SU, TargetSU); 186 } 187 188 /// AddPred - adds a predecessor edge to SUnit SU. 189 /// This returns true if this is a new predecessor. 190 /// Updates the topological ordering if required. 191 void AddPred(SUnit *SU, const SDep &D) { 192 Topo.AddPred(SU, D.getSUnit()); 193 SU->addPred(D); 194 } 195 196 /// RemovePred - removes a predecessor edge from SUnit SU. 197 /// This returns true if an edge was removed. 198 /// Updates the topological ordering if required. 199 void RemovePred(SUnit *SU, const SDep &D) { 200 Topo.RemovePred(SU, D.getSUnit()); 201 SU->removePred(D); 202 } 203 204private: 205 bool isReady(SUnit *SU) { 206 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 207 AvailableQueue->isReady(SU); 208 } 209 210 void ReleasePred(SUnit *SU, const SDep *PredEdge); 211 void ReleasePredecessors(SUnit *SU); 212 void ReleasePending(); 213 void AdvanceToCycle(unsigned NextCycle); 214 void AdvancePastStalls(SUnit *SU); 215 void EmitNode(SUnit *SU); 216 void ScheduleNodeBottomUp(SUnit*); 217 void CapturePred(SDep *PredEdge); 218 void UnscheduleNodeBottomUp(SUnit*); 219 void RestoreHazardCheckerBottomUp(); 220 void BacktrackBottomUp(SUnit*, SUnit*); 221 SUnit *CopyAndMoveSuccessors(SUnit*); 222 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 223 const TargetRegisterClass*, 224 const TargetRegisterClass*, 225 SmallVector<SUnit*, 2>&); 226 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 227 228 SUnit *PickNodeToScheduleBottomUp(); 229 void ListScheduleBottomUp(); 230 231 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 232 /// Updates the topological ordering if required. 233 SUnit *CreateNewSUnit(SDNode *N) { 234 unsigned NumSUnits = SUnits.size(); 235 SUnit *NewNode = NewSUnit(N); 236 // Update the topological ordering. 237 if (NewNode->NodeNum >= NumSUnits) 238 Topo.InitDAGTopologicalSorting(); 239 return NewNode; 240 } 241 242 /// CreateClone - Creates a new SUnit from an existing one. 243 /// Updates the topological ordering if required. 244 SUnit *CreateClone(SUnit *N) { 245 unsigned NumSUnits = SUnits.size(); 246 SUnit *NewNode = Clone(N); 247 // Update the topological ordering. 248 if (NewNode->NodeNum >= NumSUnits) 249 Topo.InitDAGTopologicalSorting(); 250 return NewNode; 251 } 252 253 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 254 /// need actual latency information but the hybrid scheduler does. 255 bool ForceUnitLatencies() const { 256 return !NeedLatency; 257 } 258}; 259} // end anonymous namespace 260 261/// GetCostForDef - Looks up the register class and cost for a given definition. 262/// Typically this just means looking up the representative register class, 263/// but for untyped values (MVT::Untyped) it means inspecting the node's 264/// opcode to determine what register class is being generated. 265static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 266 const TargetLowering *TLI, 267 const TargetInstrInfo *TII, 268 const TargetRegisterInfo *TRI, 269 unsigned &RegClass, unsigned &Cost) { 270 EVT VT = RegDefPos.GetValue(); 271 272 // Special handling for untyped values. These values can only come from 273 // the expansion of custom DAG-to-DAG patterns. 274 if (VT == MVT::Untyped) { 275 const SDNode *Node = RegDefPos.GetNode(); 276 unsigned Opcode = Node->getMachineOpcode(); 277 278 if (Opcode == TargetOpcode::REG_SEQUENCE) { 279 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 280 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 281 RegClass = RC->getID(); 282 Cost = 1; 283 return; 284 } 285 286 unsigned Idx = RegDefPos.GetIdx(); 287 const MCInstrDesc Desc = TII->get(Opcode); 288 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); 289 RegClass = RC->getID(); 290 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 291 // better way to determine it. 292 Cost = 1; 293 } else { 294 RegClass = TLI->getRepRegClassFor(VT)->getID(); 295 Cost = TLI->getRepRegClassCostFor(VT); 296 } 297} 298 299/// Schedule - Schedule the DAG using list scheduling. 300void ScheduleDAGRRList::Schedule() { 301 DEBUG(dbgs() 302 << "********** List Scheduling BB#" << BB->getNumber() 303 << " '" << BB->getName() << "' **********\n"); 304 305 CurCycle = 0; 306 IssueCount = 0; 307 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 308 NumLiveRegs = 0; 309 // Allocate slots for each physical register, plus one for a special register 310 // to track the virtual resource of a calling sequence. 311 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 312 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 313 CallSeqEndForStart.clear(); 314 315 // Build the scheduling graph. 316 BuildSchedGraph(NULL); 317 318 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 319 SUnits[su].dumpAll(this)); 320 Topo.InitDAGTopologicalSorting(); 321 322 AvailableQueue->initNodes(SUnits); 323 324 HazardRec->Reset(); 325 326 // Execute the actual scheduling loop. 327 ListScheduleBottomUp(); 328 329 AvailableQueue->releaseState(); 330} 331 332//===----------------------------------------------------------------------===// 333// Bottom-Up Scheduling 334//===----------------------------------------------------------------------===// 335 336/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 337/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 338void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 339 SUnit *PredSU = PredEdge->getSUnit(); 340 341#ifndef NDEBUG 342 if (PredSU->NumSuccsLeft == 0) { 343 dbgs() << "*** Scheduling failed! ***\n"; 344 PredSU->dump(this); 345 dbgs() << " has been released too many times!\n"; 346 llvm_unreachable(0); 347 } 348#endif 349 --PredSU->NumSuccsLeft; 350 351 if (!ForceUnitLatencies()) { 352 // Updating predecessor's height. This is now the cycle when the 353 // predecessor can be scheduled without causing a pipeline stall. 354 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 355 } 356 357 // If all the node's successors are scheduled, this node is ready 358 // to be scheduled. Ignore the special EntrySU node. 359 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 360 PredSU->isAvailable = true; 361 362 unsigned Height = PredSU->getHeight(); 363 if (Height < MinAvailableCycle) 364 MinAvailableCycle = Height; 365 366 if (isReady(PredSU)) { 367 AvailableQueue->push(PredSU); 368 } 369 // CapturePred and others may have left the node in the pending queue, avoid 370 // adding it twice. 371 else if (!PredSU->isPending) { 372 PredSU->isPending = true; 373 PendingQueue.push_back(PredSU); 374 } 375 } 376} 377 378/// IsChainDependent - Test if Outer is reachable from Inner through 379/// chain dependencies. 380static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 381 unsigned NestLevel, 382 const TargetInstrInfo *TII) { 383 SDNode *N = Outer; 384 for (;;) { 385 if (N == Inner) 386 return true; 387 // For a TokenFactor, examine each operand. There may be multiple ways 388 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 389 // most nesting in order to ensure that we find the corresponding match. 390 if (N->getOpcode() == ISD::TokenFactor) { 391 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 392 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 393 return true; 394 return false; 395 } 396 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 397 if (N->isMachineOpcode()) { 398 if (N->getMachineOpcode() == 399 (unsigned)TII->getCallFrameDestroyOpcode()) { 400 ++NestLevel; 401 } else if (N->getMachineOpcode() == 402 (unsigned)TII->getCallFrameSetupOpcode()) { 403 if (NestLevel == 0) 404 return false; 405 --NestLevel; 406 } 407 } 408 // Otherwise, find the chain and continue climbing. 409 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 410 if (N->getOperand(i).getValueType() == MVT::Other) { 411 N = N->getOperand(i).getNode(); 412 goto found_chain_operand; 413 } 414 return false; 415 found_chain_operand:; 416 if (N->getOpcode() == ISD::EntryToken) 417 return false; 418 } 419} 420 421/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 422/// the corresponding (lowered) CALLSEQ_BEGIN node. 423/// 424/// NestLevel and MaxNested are used in recursion to indcate the current level 425/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 426/// level seen so far. 427/// 428/// TODO: It would be better to give CALLSEQ_END an explicit operand to point 429/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 430static SDNode * 431FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 432 const TargetInstrInfo *TII) { 433 for (;;) { 434 // For a TokenFactor, examine each operand. There may be multiple ways 435 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 436 // most nesting in order to ensure that we find the corresponding match. 437 if (N->getOpcode() == ISD::TokenFactor) { 438 SDNode *Best = 0; 439 unsigned BestMaxNest = MaxNest; 440 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 441 unsigned MyNestLevel = NestLevel; 442 unsigned MyMaxNest = MaxNest; 443 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 444 MyNestLevel, MyMaxNest, TII)) 445 if (!Best || (MyMaxNest > BestMaxNest)) { 446 Best = New; 447 BestMaxNest = MyMaxNest; 448 } 449 } 450 assert(Best); 451 MaxNest = BestMaxNest; 452 return Best; 453 } 454 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 455 if (N->isMachineOpcode()) { 456 if (N->getMachineOpcode() == 457 (unsigned)TII->getCallFrameDestroyOpcode()) { 458 ++NestLevel; 459 MaxNest = std::max(MaxNest, NestLevel); 460 } else if (N->getMachineOpcode() == 461 (unsigned)TII->getCallFrameSetupOpcode()) { 462 assert(NestLevel != 0); 463 --NestLevel; 464 if (NestLevel == 0) 465 return N; 466 } 467 } 468 // Otherwise, find the chain and continue climbing. 469 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 470 if (N->getOperand(i).getValueType() == MVT::Other) { 471 N = N->getOperand(i).getNode(); 472 goto found_chain_operand; 473 } 474 return 0; 475 found_chain_operand:; 476 if (N->getOpcode() == ISD::EntryToken) 477 return 0; 478 } 479} 480 481/// Call ReleasePred for each predecessor, then update register live def/gen. 482/// Always update LiveRegDefs for a register dependence even if the current SU 483/// also defines the register. This effectively create one large live range 484/// across a sequence of two-address node. This is important because the 485/// entire chain must be scheduled together. Example: 486/// 487/// flags = (3) add 488/// flags = (2) addc flags 489/// flags = (1) addc flags 490/// 491/// results in 492/// 493/// LiveRegDefs[flags] = 3 494/// LiveRegGens[flags] = 1 495/// 496/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 497/// interference on flags. 498void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 499 // Bottom up: release predecessors 500 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 501 I != E; ++I) { 502 ReleasePred(SU, &*I); 503 if (I->isAssignedRegDep()) { 504 // This is a physical register dependency and it's impossible or 505 // expensive to copy the register. Make sure nothing that can 506 // clobber the register is scheduled between the predecessor and 507 // this node. 508 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 509 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 510 "interference on register dependence"); 511 LiveRegDefs[I->getReg()] = I->getSUnit(); 512 if (!LiveRegGens[I->getReg()]) { 513 ++NumLiveRegs; 514 LiveRegGens[I->getReg()] = SU; 515 } 516 } 517 } 518 519 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 520 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 521 // these nodes, to prevent other calls from being interscheduled with them. 522 unsigned CallResource = TRI->getNumRegs(); 523 if (!LiveRegDefs[CallResource]) 524 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 525 if (Node->isMachineOpcode() && 526 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 527 unsigned NestLevel = 0; 528 unsigned MaxNest = 0; 529 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 530 531 SUnit *Def = &SUnits[N->getNodeId()]; 532 CallSeqEndForStart[Def] = SU; 533 534 ++NumLiveRegs; 535 LiveRegDefs[CallResource] = Def; 536 LiveRegGens[CallResource] = SU; 537 break; 538 } 539} 540 541/// Check to see if any of the pending instructions are ready to issue. If 542/// so, add them to the available queue. 543void ScheduleDAGRRList::ReleasePending() { 544 if (DisableSchedCycles) { 545 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 546 return; 547 } 548 549 // If the available queue is empty, it is safe to reset MinAvailableCycle. 550 if (AvailableQueue->empty()) 551 MinAvailableCycle = UINT_MAX; 552 553 // Check to see if any of the pending instructions are ready to issue. If 554 // so, add them to the available queue. 555 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 556 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 557 if (ReadyCycle < MinAvailableCycle) 558 MinAvailableCycle = ReadyCycle; 559 560 if (PendingQueue[i]->isAvailable) { 561 if (!isReady(PendingQueue[i])) 562 continue; 563 AvailableQueue->push(PendingQueue[i]); 564 } 565 PendingQueue[i]->isPending = false; 566 PendingQueue[i] = PendingQueue.back(); 567 PendingQueue.pop_back(); 568 --i; --e; 569 } 570} 571 572/// Move the scheduler state forward by the specified number of Cycles. 573void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 574 if (NextCycle <= CurCycle) 575 return; 576 577 IssueCount = 0; 578 AvailableQueue->setCurCycle(NextCycle); 579 if (!HazardRec->isEnabled()) { 580 // Bypass lots of virtual calls in case of long latency. 581 CurCycle = NextCycle; 582 } 583 else { 584 for (; CurCycle != NextCycle; ++CurCycle) { 585 HazardRec->RecedeCycle(); 586 } 587 } 588 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 589 // available Q to release pending nodes at least once before popping. 590 ReleasePending(); 591} 592 593/// Move the scheduler state forward until the specified node's dependents are 594/// ready and can be scheduled with no resource conflicts. 595void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 596 if (DisableSchedCycles) 597 return; 598 599 // FIXME: Nodes such as CopyFromReg probably should not advance the current 600 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 601 // has predecessors the cycle will be advanced when they are scheduled. 602 // But given the crude nature of modeling latency though such nodes, we 603 // currently need to treat these nodes like real instructions. 604 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 605 606 unsigned ReadyCycle = SU->getHeight(); 607 608 // Bump CurCycle to account for latency. We assume the latency of other 609 // available instructions may be hidden by the stall (not a full pipe stall). 610 // This updates the hazard recognizer's cycle before reserving resources for 611 // this instruction. 612 AdvanceToCycle(ReadyCycle); 613 614 // Calls are scheduled in their preceding cycle, so don't conflict with 615 // hazards from instructions after the call. EmitNode will reset the 616 // scoreboard state before emitting the call. 617 if (SU->isCall) 618 return; 619 620 // FIXME: For resource conflicts in very long non-pipelined stages, we 621 // should probably skip ahead here to avoid useless scoreboard checks. 622 int Stalls = 0; 623 while (true) { 624 ScheduleHazardRecognizer::HazardType HT = 625 HazardRec->getHazardType(SU, -Stalls); 626 627 if (HT == ScheduleHazardRecognizer::NoHazard) 628 break; 629 630 ++Stalls; 631 } 632 AdvanceToCycle(CurCycle + Stalls); 633} 634 635/// Record this SUnit in the HazardRecognizer. 636/// Does not update CurCycle. 637void ScheduleDAGRRList::EmitNode(SUnit *SU) { 638 if (!HazardRec->isEnabled()) 639 return; 640 641 // Check for phys reg copy. 642 if (!SU->getNode()) 643 return; 644 645 switch (SU->getNode()->getOpcode()) { 646 default: 647 assert(SU->getNode()->isMachineOpcode() && 648 "This target-independent node should not be scheduled."); 649 break; 650 case ISD::MERGE_VALUES: 651 case ISD::TokenFactor: 652 case ISD::CopyToReg: 653 case ISD::CopyFromReg: 654 case ISD::EH_LABEL: 655 // Noops don't affect the scoreboard state. Copies are likely to be 656 // removed. 657 return; 658 case ISD::INLINEASM: 659 // For inline asm, clear the pipeline state. 660 HazardRec->Reset(); 661 return; 662 } 663 if (SU->isCall) { 664 // Calls are scheduled with their preceding instructions. For bottom-up 665 // scheduling, clear the pipeline state before emitting. 666 HazardRec->Reset(); 667 } 668 669 HazardRec->EmitInstruction(SU); 670} 671 672static void resetVRegCycle(SUnit *SU); 673 674/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 675/// count of its predecessors. If a predecessor pending count is zero, add it to 676/// the Available queue. 677void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 678 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 679 DEBUG(SU->dump(this)); 680 681#ifndef NDEBUG 682 if (CurCycle < SU->getHeight()) 683 DEBUG(dbgs() << " Height [" << SU->getHeight() 684 << "] pipeline stall!\n"); 685#endif 686 687 // FIXME: Do not modify node height. It may interfere with 688 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 689 // node its ready cycle can aid heuristics, and after scheduling it can 690 // indicate the scheduled cycle. 691 SU->setHeightToAtLeast(CurCycle); 692 693 // Reserve resources for the scheduled intruction. 694 EmitNode(SU); 695 696 Sequence.push_back(SU); 697 698 AvailableQueue->ScheduledNode(SU); 699 700 // If HazardRec is disabled, and each inst counts as one cycle, then 701 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 702 // PendingQueue for schedulers that implement HasReadyFilter. 703 if (!HazardRec->isEnabled() && AvgIPC < 2) 704 AdvanceToCycle(CurCycle + 1); 705 706 // Update liveness of predecessors before successors to avoid treating a 707 // two-address node as a live range def. 708 ReleasePredecessors(SU); 709 710 // Release all the implicit physical register defs that are live. 711 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 712 I != E; ++I) { 713 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 714 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 715 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 716 --NumLiveRegs; 717 LiveRegDefs[I->getReg()] = NULL; 718 LiveRegGens[I->getReg()] = NULL; 719 } 720 } 721 // Release the special call resource dependence, if this is the beginning 722 // of a call. 723 unsigned CallResource = TRI->getNumRegs(); 724 if (LiveRegDefs[CallResource] == SU) 725 for (const SDNode *SUNode = SU->getNode(); SUNode; 726 SUNode = SUNode->getGluedNode()) { 727 if (SUNode->isMachineOpcode() && 728 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 729 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 730 --NumLiveRegs; 731 LiveRegDefs[CallResource] = NULL; 732 LiveRegGens[CallResource] = NULL; 733 } 734 } 735 736 resetVRegCycle(SU); 737 738 SU->isScheduled = true; 739 740 // Conditions under which the scheduler should eagerly advance the cycle: 741 // (1) No available instructions 742 // (2) All pipelines full, so available instructions must have hazards. 743 // 744 // If HazardRec is disabled, the cycle was pre-advanced before calling 745 // ReleasePredecessors. In that case, IssueCount should remain 0. 746 // 747 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 748 if (HazardRec->isEnabled() || AvgIPC > 1) { 749 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 750 ++IssueCount; 751 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 752 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 753 AdvanceToCycle(CurCycle + 1); 754 } 755} 756 757/// CapturePred - This does the opposite of ReleasePred. Since SU is being 758/// unscheduled, incrcease the succ left count of its predecessors. Remove 759/// them from AvailableQueue if necessary. 760void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 761 SUnit *PredSU = PredEdge->getSUnit(); 762 if (PredSU->isAvailable) { 763 PredSU->isAvailable = false; 764 if (!PredSU->isPending) 765 AvailableQueue->remove(PredSU); 766 } 767 768 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 769 ++PredSU->NumSuccsLeft; 770} 771 772/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 773/// its predecessor states to reflect the change. 774void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 775 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 776 DEBUG(SU->dump(this)); 777 778 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 779 I != E; ++I) { 780 CapturePred(&*I); 781 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 782 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 783 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 784 "Physical register dependency violated?"); 785 --NumLiveRegs; 786 LiveRegDefs[I->getReg()] = NULL; 787 LiveRegGens[I->getReg()] = NULL; 788 } 789 } 790 791 // Reclaim the special call resource dependence, if this is the beginning 792 // of a call. 793 unsigned CallResource = TRI->getNumRegs(); 794 for (const SDNode *SUNode = SU->getNode(); SUNode; 795 SUNode = SUNode->getGluedNode()) { 796 if (SUNode->isMachineOpcode() && 797 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 798 ++NumLiveRegs; 799 LiveRegDefs[CallResource] = SU; 800 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; 801 } 802 } 803 804 // Release the special call resource dependence, if this is the end 805 // of a call. 806 if (LiveRegGens[CallResource] == SU) 807 for (const SDNode *SUNode = SU->getNode(); SUNode; 808 SUNode = SUNode->getGluedNode()) { 809 if (SUNode->isMachineOpcode() && 810 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 811 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 812 --NumLiveRegs; 813 LiveRegDefs[CallResource] = NULL; 814 LiveRegGens[CallResource] = NULL; 815 } 816 } 817 818 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 819 I != E; ++I) { 820 if (I->isAssignedRegDep()) { 821 if (!LiveRegDefs[I->getReg()]) 822 ++NumLiveRegs; 823 // This becomes the nearest def. Note that an earlier def may still be 824 // pending if this is a two-address node. 825 LiveRegDefs[I->getReg()] = SU; 826 if (LiveRegGens[I->getReg()] == NULL || 827 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 828 LiveRegGens[I->getReg()] = I->getSUnit(); 829 } 830 } 831 if (SU->getHeight() < MinAvailableCycle) 832 MinAvailableCycle = SU->getHeight(); 833 834 SU->setHeightDirty(); 835 SU->isScheduled = false; 836 SU->isAvailable = true; 837 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 838 // Don't make available until backtracking is complete. 839 SU->isPending = true; 840 PendingQueue.push_back(SU); 841 } 842 else { 843 AvailableQueue->push(SU); 844 } 845 AvailableQueue->UnscheduledNode(SU); 846} 847 848/// After backtracking, the hazard checker needs to be restored to a state 849/// corresponding the the current cycle. 850void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 851 HazardRec->Reset(); 852 853 unsigned LookAhead = std::min((unsigned)Sequence.size(), 854 HazardRec->getMaxLookAhead()); 855 if (LookAhead == 0) 856 return; 857 858 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 859 unsigned HazardCycle = (*I)->getHeight(); 860 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 861 SUnit *SU = *I; 862 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 863 HazardRec->RecedeCycle(); 864 } 865 EmitNode(SU); 866 } 867} 868 869/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 870/// BTCycle in order to schedule a specific node. 871void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 872 SUnit *OldSU = Sequence.back(); 873 while (true) { 874 Sequence.pop_back(); 875 if (SU->isSucc(OldSU)) 876 // Don't try to remove SU from AvailableQueue. 877 SU->isAvailable = false; 878 // FIXME: use ready cycle instead of height 879 CurCycle = OldSU->getHeight(); 880 UnscheduleNodeBottomUp(OldSU); 881 AvailableQueue->setCurCycle(CurCycle); 882 if (OldSU == BtSU) 883 break; 884 OldSU = Sequence.back(); 885 } 886 887 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 888 889 RestoreHazardCheckerBottomUp(); 890 891 ReleasePending(); 892 893 ++NumBacktracks; 894} 895 896static bool isOperandOf(const SUnit *SU, SDNode *N) { 897 for (const SDNode *SUNode = SU->getNode(); SUNode; 898 SUNode = SUNode->getGluedNode()) { 899 if (SUNode->isOperandOf(N)) 900 return true; 901 } 902 return false; 903} 904 905/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 906/// successors to the newly created node. 907SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 908 SDNode *N = SU->getNode(); 909 if (!N) 910 return NULL; 911 912 if (SU->getNode()->getGluedNode()) 913 return NULL; 914 915 SUnit *NewSU; 916 bool TryUnfold = false; 917 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 918 EVT VT = N->getValueType(i); 919 if (VT == MVT::Glue) 920 return NULL; 921 else if (VT == MVT::Other) 922 TryUnfold = true; 923 } 924 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 925 const SDValue &Op = N->getOperand(i); 926 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 927 if (VT == MVT::Glue) 928 return NULL; 929 } 930 931 if (TryUnfold) { 932 SmallVector<SDNode*, 2> NewNodes; 933 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 934 return NULL; 935 936 // unfolding an x86 DEC64m operation results in store, dec, load which 937 // can't be handled here so quit 938 if (NewNodes.size() == 3) 939 return NULL; 940 941 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 942 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 943 944 N = NewNodes[1]; 945 SDNode *LoadNode = NewNodes[0]; 946 unsigned NumVals = N->getNumValues(); 947 unsigned OldNumVals = SU->getNode()->getNumValues(); 948 for (unsigned i = 0; i != NumVals; ++i) 949 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 950 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 951 SDValue(LoadNode, 1)); 952 953 // LoadNode may already exist. This can happen when there is another 954 // load from the same location and producing the same type of value 955 // but it has different alignment or volatileness. 956 bool isNewLoad = true; 957 SUnit *LoadSU; 958 if (LoadNode->getNodeId() != -1) { 959 LoadSU = &SUnits[LoadNode->getNodeId()]; 960 isNewLoad = false; 961 } else { 962 LoadSU = CreateNewSUnit(LoadNode); 963 LoadNode->setNodeId(LoadSU->NodeNum); 964 965 InitNumRegDefsLeft(LoadSU); 966 ComputeLatency(LoadSU); 967 } 968 969 SUnit *NewSU = CreateNewSUnit(N); 970 assert(N->getNodeId() == -1 && "Node already inserted!"); 971 N->setNodeId(NewSU->NodeNum); 972 973 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 974 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 975 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 976 NewSU->isTwoAddress = true; 977 break; 978 } 979 } 980 if (MCID.isCommutable()) 981 NewSU->isCommutable = true; 982 983 InitNumRegDefsLeft(NewSU); 984 ComputeLatency(NewSU); 985 986 // Record all the edges to and from the old SU, by category. 987 SmallVector<SDep, 4> ChainPreds; 988 SmallVector<SDep, 4> ChainSuccs; 989 SmallVector<SDep, 4> LoadPreds; 990 SmallVector<SDep, 4> NodePreds; 991 SmallVector<SDep, 4> NodeSuccs; 992 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 993 I != E; ++I) { 994 if (I->isCtrl()) 995 ChainPreds.push_back(*I); 996 else if (isOperandOf(I->getSUnit(), LoadNode)) 997 LoadPreds.push_back(*I); 998 else 999 NodePreds.push_back(*I); 1000 } 1001 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1002 I != E; ++I) { 1003 if (I->isCtrl()) 1004 ChainSuccs.push_back(*I); 1005 else 1006 NodeSuccs.push_back(*I); 1007 } 1008 1009 // Now assign edges to the newly-created nodes. 1010 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 1011 const SDep &Pred = ChainPreds[i]; 1012 RemovePred(SU, Pred); 1013 if (isNewLoad) 1014 AddPred(LoadSU, Pred); 1015 } 1016 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 1017 const SDep &Pred = LoadPreds[i]; 1018 RemovePred(SU, Pred); 1019 if (isNewLoad) 1020 AddPred(LoadSU, Pred); 1021 } 1022 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 1023 const SDep &Pred = NodePreds[i]; 1024 RemovePred(SU, Pred); 1025 AddPred(NewSU, Pred); 1026 } 1027 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 1028 SDep D = NodeSuccs[i]; 1029 SUnit *SuccDep = D.getSUnit(); 1030 D.setSUnit(SU); 1031 RemovePred(SuccDep, D); 1032 D.setSUnit(NewSU); 1033 AddPred(SuccDep, D); 1034 // Balance register pressure. 1035 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 1036 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1037 --NewSU->NumRegDefsLeft; 1038 } 1039 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 1040 SDep D = ChainSuccs[i]; 1041 SUnit *SuccDep = D.getSUnit(); 1042 D.setSUnit(SU); 1043 RemovePred(SuccDep, D); 1044 if (isNewLoad) { 1045 D.setSUnit(LoadSU); 1046 AddPred(SuccDep, D); 1047 } 1048 } 1049 1050 // Add a data dependency to reflect that NewSU reads the value defined 1051 // by LoadSU. 1052 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 1053 1054 if (isNewLoad) 1055 AvailableQueue->addNode(LoadSU); 1056 AvailableQueue->addNode(NewSU); 1057 1058 ++NumUnfolds; 1059 1060 if (NewSU->NumSuccsLeft == 0) { 1061 NewSU->isAvailable = true; 1062 return NewSU; 1063 } 1064 SU = NewSU; 1065 } 1066 1067 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1068 NewSU = CreateClone(SU); 1069 1070 // New SUnit has the exact same predecessors. 1071 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1072 I != E; ++I) 1073 if (!I->isArtificial()) 1074 AddPred(NewSU, *I); 1075 1076 // Only copy scheduled successors. Cut them from old node's successor 1077 // list and move them over. 1078 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1079 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1080 I != E; ++I) { 1081 if (I->isArtificial()) 1082 continue; 1083 SUnit *SuccSU = I->getSUnit(); 1084 if (SuccSU->isScheduled) { 1085 SDep D = *I; 1086 D.setSUnit(NewSU); 1087 AddPred(SuccSU, D); 1088 D.setSUnit(SU); 1089 DelDeps.push_back(std::make_pair(SuccSU, D)); 1090 } 1091 } 1092 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1093 RemovePred(DelDeps[i].first, DelDeps[i].second); 1094 1095 AvailableQueue->updateNode(SU); 1096 AvailableQueue->addNode(NewSU); 1097 1098 ++NumDups; 1099 return NewSU; 1100} 1101 1102/// InsertCopiesAndMoveSuccs - Insert register copies and move all 1103/// scheduled successors of the given SUnit to the last copy. 1104void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1105 const TargetRegisterClass *DestRC, 1106 const TargetRegisterClass *SrcRC, 1107 SmallVector<SUnit*, 2> &Copies) { 1108 SUnit *CopyFromSU = CreateNewSUnit(NULL); 1109 CopyFromSU->CopySrcRC = SrcRC; 1110 CopyFromSU->CopyDstRC = DestRC; 1111 1112 SUnit *CopyToSU = CreateNewSUnit(NULL); 1113 CopyToSU->CopySrcRC = DestRC; 1114 CopyToSU->CopyDstRC = SrcRC; 1115 1116 // Only copy scheduled successors. Cut them from old node's successor 1117 // list and move them over. 1118 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1119 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1120 I != E; ++I) { 1121 if (I->isArtificial()) 1122 continue; 1123 SUnit *SuccSU = I->getSUnit(); 1124 if (SuccSU->isScheduled) { 1125 SDep D = *I; 1126 D.setSUnit(CopyToSU); 1127 AddPred(SuccSU, D); 1128 DelDeps.push_back(std::make_pair(SuccSU, *I)); 1129 } 1130 else { 1131 // Avoid scheduling the def-side copy before other successors. Otherwise 1132 // we could introduce another physreg interference on the copy and 1133 // continue inserting copies indefinitely. 1134 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1135 /*Reg=*/0, /*isNormalMemory=*/false, 1136 /*isMustAlias=*/false, /*isArtificial=*/true); 1137 AddPred(SuccSU, D); 1138 } 1139 } 1140 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1141 RemovePred(DelDeps[i].first, DelDeps[i].second); 1142 1143 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1144 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1145 1146 AvailableQueue->updateNode(SU); 1147 AvailableQueue->addNode(CopyFromSU); 1148 AvailableQueue->addNode(CopyToSU); 1149 Copies.push_back(CopyFromSU); 1150 Copies.push_back(CopyToSU); 1151 1152 ++NumPRCopies; 1153} 1154 1155/// getPhysicalRegisterVT - Returns the ValueType of the physical register 1156/// definition of the specified node. 1157/// FIXME: Move to SelectionDAG? 1158static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1159 const TargetInstrInfo *TII) { 1160 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1161 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1162 unsigned NumRes = MCID.getNumDefs(); 1163 for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1164 if (Reg == *ImpDef) 1165 break; 1166 ++NumRes; 1167 } 1168 return N->getValueType(NumRes); 1169} 1170 1171/// CheckForLiveRegDef - Return true and update live register vector if the 1172/// specified register def of the specified SUnit clobbers any "live" registers. 1173static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1174 std::vector<SUnit*> &LiveRegDefs, 1175 SmallSet<unsigned, 4> &RegAdded, 1176 SmallVector<unsigned, 4> &LRegs, 1177 const TargetRegisterInfo *TRI) { 1178 for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1179 1180 // Check if Ref is live. 1181 if (!LiveRegDefs[*AliasI]) continue; 1182 1183 // Allow multiple uses of the same def. 1184 if (LiveRegDefs[*AliasI] == SU) continue; 1185 1186 // Add Reg to the set of interfering live regs. 1187 if (RegAdded.insert(*AliasI)) { 1188 LRegs.push_back(*AliasI); 1189 } 1190 } 1191} 1192 1193/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1194/// by RegMask, and add them to LRegs. 1195static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1196 std::vector<SUnit*> &LiveRegDefs, 1197 SmallSet<unsigned, 4> &RegAdded, 1198 SmallVector<unsigned, 4> &LRegs) { 1199 // Look at all live registers. Skip Reg0 and the special CallResource. 1200 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1201 if (!LiveRegDefs[i]) continue; 1202 if (LiveRegDefs[i] == SU) continue; 1203 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1204 if (RegAdded.insert(i)) 1205 LRegs.push_back(i); 1206 } 1207} 1208 1209/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1210static const uint32_t *getNodeRegMask(const SDNode *N) { 1211 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1212 if (const RegisterMaskSDNode *Op = 1213 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) 1214 return Op->getRegMask(); 1215 return NULL; 1216} 1217 1218/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1219/// scheduling of the given node to satisfy live physical register dependencies. 1220/// If the specific node is the last one that's available to schedule, do 1221/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1222bool ScheduleDAGRRList:: 1223DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1224 if (NumLiveRegs == 0) 1225 return false; 1226 1227 SmallSet<unsigned, 4> RegAdded; 1228 // If this node would clobber any "live" register, then it's not ready. 1229 // 1230 // If SU is the currently live definition of the same register that it uses, 1231 // then we are free to schedule it. 1232 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1233 I != E; ++I) { 1234 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1235 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1236 RegAdded, LRegs, TRI); 1237 } 1238 1239 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1240 if (Node->getOpcode() == ISD::INLINEASM) { 1241 // Inline asm can clobber physical defs. 1242 unsigned NumOps = Node->getNumOperands(); 1243 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1244 --NumOps; // Ignore the glue operand. 1245 1246 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1247 unsigned Flags = 1248 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1249 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1250 1251 ++i; // Skip the ID value. 1252 if (InlineAsm::isRegDefKind(Flags) || 1253 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1254 InlineAsm::isClobberKind(Flags)) { 1255 // Check for def of register or earlyclobber register. 1256 for (; NumVals; --NumVals, ++i) { 1257 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1258 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1259 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1260 } 1261 } else 1262 i += NumVals; 1263 } 1264 continue; 1265 } 1266 1267 if (!Node->isMachineOpcode()) 1268 continue; 1269 // If we're in the middle of scheduling a call, don't begin scheduling 1270 // another call. Also, don't allow any physical registers to be live across 1271 // the call. 1272 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 1273 // Check the special calling-sequence resource. 1274 unsigned CallResource = TRI->getNumRegs(); 1275 if (LiveRegDefs[CallResource]) { 1276 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1277 while (SDNode *Glued = Gen->getGluedNode()) 1278 Gen = Glued; 1279 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 1280 LRegs.push_back(CallResource); 1281 } 1282 } 1283 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1284 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); 1285 1286 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1287 if (!MCID.ImplicitDefs) 1288 continue; 1289 for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) 1290 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1291 } 1292 1293 return !LRegs.empty(); 1294} 1295 1296/// Return a node that can be scheduled in this cycle. Requirements: 1297/// (1) Ready: latency has been satisfied 1298/// (2) No Hazards: resources are available 1299/// (3) No Interferences: may unschedule to break register interferences. 1300SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1301 SmallVector<SUnit*, 4> Interferences; 1302 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1303 1304 SUnit *CurSU = AvailableQueue->pop(); 1305 while (CurSU) { 1306 SmallVector<unsigned, 4> LRegs; 1307 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1308 break; 1309 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1310 1311 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1312 Interferences.push_back(CurSU); 1313 CurSU = AvailableQueue->pop(); 1314 } 1315 if (CurSU) { 1316 // Add the nodes that aren't ready back onto the available list. 1317 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1318 Interferences[i]->isPending = false; 1319 assert(Interferences[i]->isAvailable && "must still be available"); 1320 AvailableQueue->push(Interferences[i]); 1321 } 1322 return CurSU; 1323 } 1324 1325 // All candidates are delayed due to live physical reg dependencies. 1326 // Try backtracking, code duplication, or inserting cross class copies 1327 // to resolve it. 1328 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1329 SUnit *TrySU = Interferences[i]; 1330 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1331 1332 // Try unscheduling up to the point where it's safe to schedule 1333 // this node. 1334 SUnit *BtSU = NULL; 1335 unsigned LiveCycle = UINT_MAX; 1336 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1337 unsigned Reg = LRegs[j]; 1338 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1339 BtSU = LiveRegGens[Reg]; 1340 LiveCycle = BtSU->getHeight(); 1341 } 1342 } 1343 if (!WillCreateCycle(TrySU, BtSU)) { 1344 BacktrackBottomUp(TrySU, BtSU); 1345 1346 // Force the current node to be scheduled before the node that 1347 // requires the physical reg dep. 1348 if (BtSU->isAvailable) { 1349 BtSU->isAvailable = false; 1350 if (!BtSU->isPending) 1351 AvailableQueue->remove(BtSU); 1352 } 1353 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1354 /*Reg=*/0, /*isNormalMemory=*/false, 1355 /*isMustAlias=*/false, /*isArtificial=*/true)); 1356 1357 // If one or more successors has been unscheduled, then the current 1358 // node is no longer avaialable. Schedule a successor that's now 1359 // available instead. 1360 if (!TrySU->isAvailable) { 1361 CurSU = AvailableQueue->pop(); 1362 } 1363 else { 1364 CurSU = TrySU; 1365 TrySU->isPending = false; 1366 Interferences.erase(Interferences.begin()+i); 1367 } 1368 break; 1369 } 1370 } 1371 1372 if (!CurSU) { 1373 // Can't backtrack. If it's too expensive to copy the value, then try 1374 // duplicate the nodes that produces these "too expensive to copy" 1375 // values to break the dependency. In case even that doesn't work, 1376 // insert cross class copies. 1377 // If it's not too expensive, i.e. cost != -1, issue copies. 1378 SUnit *TrySU = Interferences[0]; 1379 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1380 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1381 unsigned Reg = LRegs[0]; 1382 SUnit *LRDef = LiveRegDefs[Reg]; 1383 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1384 const TargetRegisterClass *RC = 1385 TRI->getMinimalPhysRegClass(Reg, VT); 1386 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1387 1388 // If cross copy register class is the same as RC, then it must be possible 1389 // copy the value directly. Do not try duplicate the def. 1390 // If cross copy register class is not the same as RC, then it's possible to 1391 // copy the value but it require cross register class copies and it is 1392 // expensive. 1393 // If cross copy register class is null, then it's not possible to copy 1394 // the value at all. 1395 SUnit *NewDef = 0; 1396 if (DestRC != RC) { 1397 NewDef = CopyAndMoveSuccessors(LRDef); 1398 if (!DestRC && !NewDef) 1399 report_fatal_error("Can't handle live physical register dependency!"); 1400 } 1401 if (!NewDef) { 1402 // Issue copies, these can be expensive cross register class copies. 1403 SmallVector<SUnit*, 2> Copies; 1404 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1405 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1406 << " to SU #" << Copies.front()->NodeNum << "\n"); 1407 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1408 /*Reg=*/0, /*isNormalMemory=*/false, 1409 /*isMustAlias=*/false, 1410 /*isArtificial=*/true)); 1411 NewDef = Copies.back(); 1412 } 1413 1414 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1415 << " to SU #" << TrySU->NodeNum << "\n"); 1416 LiveRegDefs[Reg] = NewDef; 1417 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1418 /*Reg=*/0, /*isNormalMemory=*/false, 1419 /*isMustAlias=*/false, 1420 /*isArtificial=*/true)); 1421 TrySU->isAvailable = false; 1422 CurSU = NewDef; 1423 } 1424 1425 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1426 1427 // Add the nodes that aren't ready back onto the available list. 1428 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1429 Interferences[i]->isPending = false; 1430 // May no longer be available due to backtracking. 1431 if (Interferences[i]->isAvailable) { 1432 AvailableQueue->push(Interferences[i]); 1433 } 1434 } 1435 return CurSU; 1436} 1437 1438/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1439/// schedulers. 1440void ScheduleDAGRRList::ListScheduleBottomUp() { 1441 // Release any predecessors of the special Exit node. 1442 ReleasePredecessors(&ExitSU); 1443 1444 // Add root to Available queue. 1445 if (!SUnits.empty()) { 1446 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1447 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1448 RootSU->isAvailable = true; 1449 AvailableQueue->push(RootSU); 1450 } 1451 1452 // While Available queue is not empty, grab the node with the highest 1453 // priority. If it is not ready put it back. Schedule the node. 1454 Sequence.reserve(SUnits.size()); 1455 while (!AvailableQueue->empty()) { 1456 DEBUG(dbgs() << "\nExamining Available:\n"; 1457 AvailableQueue->dump(this)); 1458 1459 // Pick the best node to schedule taking all constraints into 1460 // consideration. 1461 SUnit *SU = PickNodeToScheduleBottomUp(); 1462 1463 AdvancePastStalls(SU); 1464 1465 ScheduleNodeBottomUp(SU); 1466 1467 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1468 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1469 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1470 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1471 } 1472 } 1473 1474 // Reverse the order if it is bottom up. 1475 std::reverse(Sequence.begin(), Sequence.end()); 1476 1477#ifndef NDEBUG 1478 VerifyScheduledSequence(/*isBottomUp=*/true); 1479#endif 1480} 1481 1482//===----------------------------------------------------------------------===// 1483// RegReductionPriorityQueue Definition 1484//===----------------------------------------------------------------------===// 1485// 1486// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1487// to reduce register pressure. 1488// 1489namespace { 1490class RegReductionPQBase; 1491 1492struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1493 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1494}; 1495 1496#ifndef NDEBUG 1497template<class SF> 1498struct reverse_sort : public queue_sort { 1499 SF &SortFunc; 1500 reverse_sort(SF &sf) : SortFunc(sf) {} 1501 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1502 1503 bool operator()(SUnit* left, SUnit* right) const { 1504 // reverse left/right rather than simply !SortFunc(left, right) 1505 // to expose different paths in the comparison logic. 1506 return SortFunc(right, left); 1507 } 1508}; 1509#endif // NDEBUG 1510 1511/// bu_ls_rr_sort - Priority function for bottom up register pressure 1512// reduction scheduler. 1513struct bu_ls_rr_sort : public queue_sort { 1514 enum { 1515 IsBottomUp = true, 1516 HasReadyFilter = false 1517 }; 1518 1519 RegReductionPQBase *SPQ; 1520 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1521 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1522 1523 bool operator()(SUnit* left, SUnit* right) const; 1524}; 1525 1526// src_ls_rr_sort - Priority function for source order scheduler. 1527struct src_ls_rr_sort : public queue_sort { 1528 enum { 1529 IsBottomUp = true, 1530 HasReadyFilter = false 1531 }; 1532 1533 RegReductionPQBase *SPQ; 1534 src_ls_rr_sort(RegReductionPQBase *spq) 1535 : SPQ(spq) {} 1536 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1537 : SPQ(RHS.SPQ) {} 1538 1539 bool operator()(SUnit* left, SUnit* right) const; 1540}; 1541 1542// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1543struct hybrid_ls_rr_sort : public queue_sort { 1544 enum { 1545 IsBottomUp = true, 1546 HasReadyFilter = false 1547 }; 1548 1549 RegReductionPQBase *SPQ; 1550 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1551 : SPQ(spq) {} 1552 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1553 : SPQ(RHS.SPQ) {} 1554 1555 bool isReady(SUnit *SU, unsigned CurCycle) const; 1556 1557 bool operator()(SUnit* left, SUnit* right) const; 1558}; 1559 1560// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1561// scheduler. 1562struct ilp_ls_rr_sort : public queue_sort { 1563 enum { 1564 IsBottomUp = true, 1565 HasReadyFilter = false 1566 }; 1567 1568 RegReductionPQBase *SPQ; 1569 ilp_ls_rr_sort(RegReductionPQBase *spq) 1570 : SPQ(spq) {} 1571 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1572 : SPQ(RHS.SPQ) {} 1573 1574 bool isReady(SUnit *SU, unsigned CurCycle) const; 1575 1576 bool operator()(SUnit* left, SUnit* right) const; 1577}; 1578 1579class RegReductionPQBase : public SchedulingPriorityQueue { 1580protected: 1581 std::vector<SUnit*> Queue; 1582 unsigned CurQueueId; 1583 bool TracksRegPressure; 1584 1585 // SUnits - The SUnits for the current graph. 1586 std::vector<SUnit> *SUnits; 1587 1588 MachineFunction &MF; 1589 const TargetInstrInfo *TII; 1590 const TargetRegisterInfo *TRI; 1591 const TargetLowering *TLI; 1592 ScheduleDAGRRList *scheduleDAG; 1593 1594 // SethiUllmanNumbers - The SethiUllman number for each node. 1595 std::vector<unsigned> SethiUllmanNumbers; 1596 1597 /// RegPressure - Tracking current reg pressure per register class. 1598 /// 1599 std::vector<unsigned> RegPressure; 1600 1601 /// RegLimit - Tracking the number of allocatable registers per register 1602 /// class. 1603 std::vector<unsigned> RegLimit; 1604 1605public: 1606 RegReductionPQBase(MachineFunction &mf, 1607 bool hasReadyFilter, 1608 bool tracksrp, 1609 const TargetInstrInfo *tii, 1610 const TargetRegisterInfo *tri, 1611 const TargetLowering *tli) 1612 : SchedulingPriorityQueue(hasReadyFilter), 1613 CurQueueId(0), TracksRegPressure(tracksrp), 1614 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1615 if (TracksRegPressure) { 1616 unsigned NumRC = TRI->getNumRegClasses(); 1617 RegLimit.resize(NumRC); 1618 RegPressure.resize(NumRC); 1619 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1620 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1621 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1622 E = TRI->regclass_end(); I != E; ++I) 1623 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1624 } 1625 } 1626 1627 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1628 scheduleDAG = scheduleDag; 1629 } 1630 1631 ScheduleHazardRecognizer* getHazardRec() { 1632 return scheduleDAG->getHazardRec(); 1633 } 1634 1635 void initNodes(std::vector<SUnit> &sunits); 1636 1637 void addNode(const SUnit *SU); 1638 1639 void updateNode(const SUnit *SU); 1640 1641 void releaseState() { 1642 SUnits = 0; 1643 SethiUllmanNumbers.clear(); 1644 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1645 } 1646 1647 unsigned getNodePriority(const SUnit *SU) const; 1648 1649 unsigned getNodeOrdering(const SUnit *SU) const { 1650 if (!SU->getNode()) return 0; 1651 1652 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1653 } 1654 1655 bool empty() const { return Queue.empty(); } 1656 1657 void push(SUnit *U) { 1658 assert(!U->NodeQueueId && "Node in the queue already"); 1659 U->NodeQueueId = ++CurQueueId; 1660 Queue.push_back(U); 1661 } 1662 1663 void remove(SUnit *SU) { 1664 assert(!Queue.empty() && "Queue is empty!"); 1665 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1666 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1667 SU); 1668 if (I != prior(Queue.end())) 1669 std::swap(*I, Queue.back()); 1670 Queue.pop_back(); 1671 SU->NodeQueueId = 0; 1672 } 1673 1674 bool tracksRegPressure() const { return TracksRegPressure; } 1675 1676 void dumpRegPressure() const; 1677 1678 bool HighRegPressure(const SUnit *SU) const; 1679 1680 bool MayReduceRegPressure(SUnit *SU) const; 1681 1682 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1683 1684 void ScheduledNode(SUnit *SU); 1685 1686 void UnscheduledNode(SUnit *SU); 1687 1688protected: 1689 bool canClobber(const SUnit *SU, const SUnit *Op); 1690 void AddPseudoTwoAddrDeps(); 1691 void PrescheduleNodesWithMultipleUses(); 1692 void CalculateSethiUllmanNumbers(); 1693}; 1694 1695template<class SF> 1696static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1697 std::vector<SUnit *>::iterator Best = Q.begin(); 1698 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1699 E = Q.end(); I != E; ++I) 1700 if (Picker(*Best, *I)) 1701 Best = I; 1702 SUnit *V = *Best; 1703 if (Best != prior(Q.end())) 1704 std::swap(*Best, Q.back()); 1705 Q.pop_back(); 1706 return V; 1707} 1708 1709template<class SF> 1710SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1711#ifndef NDEBUG 1712 if (DAG->StressSched) { 1713 reverse_sort<SF> RPicker(Picker); 1714 return popFromQueueImpl(Q, RPicker); 1715 } 1716#endif 1717 (void)DAG; 1718 return popFromQueueImpl(Q, Picker); 1719} 1720 1721template<class SF> 1722class RegReductionPriorityQueue : public RegReductionPQBase { 1723 SF Picker; 1724 1725public: 1726 RegReductionPriorityQueue(MachineFunction &mf, 1727 bool tracksrp, 1728 const TargetInstrInfo *tii, 1729 const TargetRegisterInfo *tri, 1730 const TargetLowering *tli) 1731 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1732 Picker(this) {} 1733 1734 bool isBottomUp() const { return SF::IsBottomUp; } 1735 1736 bool isReady(SUnit *U) const { 1737 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1738 } 1739 1740 SUnit *pop() { 1741 if (Queue.empty()) return NULL; 1742 1743 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1744 V->NodeQueueId = 0; 1745 return V; 1746 } 1747 1748 void dump(ScheduleDAG *DAG) const { 1749 // Emulate pop() without clobbering NodeQueueIds. 1750 std::vector<SUnit*> DumpQueue = Queue; 1751 SF DumpPicker = Picker; 1752 while (!DumpQueue.empty()) { 1753 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1754 dbgs() << "Height " << SU->getHeight() << ": "; 1755 SU->dump(DAG); 1756 } 1757 } 1758}; 1759 1760typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1761BURegReductionPriorityQueue; 1762 1763typedef RegReductionPriorityQueue<src_ls_rr_sort> 1764SrcRegReductionPriorityQueue; 1765 1766typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1767HybridBURRPriorityQueue; 1768 1769typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1770ILPBURRPriorityQueue; 1771} // end anonymous namespace 1772 1773//===----------------------------------------------------------------------===// 1774// Static Node Priority for Register Pressure Reduction 1775//===----------------------------------------------------------------------===// 1776 1777// Check for special nodes that bypass scheduling heuristics. 1778// Currently this pushes TokenFactor nodes down, but may be used for other 1779// pseudo-ops as well. 1780// 1781// Return -1 to schedule right above left, 1 for left above right. 1782// Return 0 if no bias exists. 1783static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1784 bool LSchedLow = left->isScheduleLow; 1785 bool RSchedLow = right->isScheduleLow; 1786 if (LSchedLow != RSchedLow) 1787 return LSchedLow < RSchedLow ? 1 : -1; 1788 return 0; 1789} 1790 1791/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1792/// Smaller number is the higher priority. 1793static unsigned 1794CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1795 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1796 if (SethiUllmanNumber != 0) 1797 return SethiUllmanNumber; 1798 1799 unsigned Extra = 0; 1800 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1801 I != E; ++I) { 1802 if (I->isCtrl()) continue; // ignore chain preds 1803 SUnit *PredSU = I->getSUnit(); 1804 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1805 if (PredSethiUllman > SethiUllmanNumber) { 1806 SethiUllmanNumber = PredSethiUllman; 1807 Extra = 0; 1808 } else if (PredSethiUllman == SethiUllmanNumber) 1809 ++Extra; 1810 } 1811 1812 SethiUllmanNumber += Extra; 1813 1814 if (SethiUllmanNumber == 0) 1815 SethiUllmanNumber = 1; 1816 1817 return SethiUllmanNumber; 1818} 1819 1820/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1821/// scheduling units. 1822void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1823 SethiUllmanNumbers.assign(SUnits->size(), 0); 1824 1825 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1826 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1827} 1828 1829void RegReductionPQBase::addNode(const SUnit *SU) { 1830 unsigned SUSize = SethiUllmanNumbers.size(); 1831 if (SUnits->size() > SUSize) 1832 SethiUllmanNumbers.resize(SUSize*2, 0); 1833 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1834} 1835 1836void RegReductionPQBase::updateNode(const SUnit *SU) { 1837 SethiUllmanNumbers[SU->NodeNum] = 0; 1838 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1839} 1840 1841// Lower priority means schedule further down. For bottom-up scheduling, lower 1842// priority SUs are scheduled before higher priority SUs. 1843unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1844 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1845 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1846 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1847 // CopyToReg should be close to its uses to facilitate coalescing and 1848 // avoid spilling. 1849 return 0; 1850 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1851 Opc == TargetOpcode::SUBREG_TO_REG || 1852 Opc == TargetOpcode::INSERT_SUBREG) 1853 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1854 // close to their uses to facilitate coalescing. 1855 return 0; 1856 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1857 // If SU does not have a register use, i.e. it doesn't produce a value 1858 // that would be consumed (e.g. store), then it terminates a chain of 1859 // computation. Give it a large SethiUllman number so it will be 1860 // scheduled right before its predecessors that it doesn't lengthen 1861 // their live ranges. 1862 return 0xffff; 1863 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1864 // If SU does not have a register def, schedule it close to its uses 1865 // because it does not lengthen any live ranges. 1866 return 0; 1867#if 1 1868 return SethiUllmanNumbers[SU->NodeNum]; 1869#else 1870 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1871 if (SU->isCallOp) { 1872 // FIXME: This assumes all of the defs are used as call operands. 1873 int NP = (int)Priority - SU->getNode()->getNumValues(); 1874 return (NP > 0) ? NP : 0; 1875 } 1876 return Priority; 1877#endif 1878} 1879 1880//===----------------------------------------------------------------------===// 1881// Register Pressure Tracking 1882//===----------------------------------------------------------------------===// 1883 1884void RegReductionPQBase::dumpRegPressure() const { 1885 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1886 E = TRI->regclass_end(); I != E; ++I) { 1887 const TargetRegisterClass *RC = *I; 1888 unsigned Id = RC->getID(); 1889 unsigned RP = RegPressure[Id]; 1890 if (!RP) continue; 1891 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1892 << '\n'); 1893 } 1894} 1895 1896bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1897 if (!TLI) 1898 return false; 1899 1900 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1901 I != E; ++I) { 1902 if (I->isCtrl()) 1903 continue; 1904 SUnit *PredSU = I->getSUnit(); 1905 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1906 // to cover the number of registers defined (they are all live). 1907 if (PredSU->NumRegDefsLeft == 0) { 1908 continue; 1909 } 1910 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1911 RegDefPos.IsValid(); RegDefPos.Advance()) { 1912 unsigned RCId, Cost; 1913 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1914 1915 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1916 return true; 1917 } 1918 } 1919 return false; 1920} 1921 1922bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1923 const SDNode *N = SU->getNode(); 1924 1925 if (!N->isMachineOpcode() || !SU->NumSuccs) 1926 return false; 1927 1928 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1929 for (unsigned i = 0; i != NumDefs; ++i) { 1930 EVT VT = N->getValueType(i); 1931 if (!N->hasAnyUseOfValue(i)) 1932 continue; 1933 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1934 if (RegPressure[RCId] >= RegLimit[RCId]) 1935 return true; 1936 } 1937 return false; 1938} 1939 1940// Compute the register pressure contribution by this instruction by count up 1941// for uses that are not live and down for defs. Only count register classes 1942// that are already under high pressure. As a side effect, compute the number of 1943// uses of registers that are already live. 1944// 1945// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1946// so could probably be factored. 1947int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1948 LiveUses = 0; 1949 int PDiff = 0; 1950 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1951 I != E; ++I) { 1952 if (I->isCtrl()) 1953 continue; 1954 SUnit *PredSU = I->getSUnit(); 1955 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1956 // to cover the number of registers defined (they are all live). 1957 if (PredSU->NumRegDefsLeft == 0) { 1958 if (PredSU->getNode()->isMachineOpcode()) 1959 ++LiveUses; 1960 continue; 1961 } 1962 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1963 RegDefPos.IsValid(); RegDefPos.Advance()) { 1964 EVT VT = RegDefPos.GetValue(); 1965 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1966 if (RegPressure[RCId] >= RegLimit[RCId]) 1967 ++PDiff; 1968 } 1969 } 1970 const SDNode *N = SU->getNode(); 1971 1972 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1973 return PDiff; 1974 1975 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1976 for (unsigned i = 0; i != NumDefs; ++i) { 1977 EVT VT = N->getValueType(i); 1978 if (!N->hasAnyUseOfValue(i)) 1979 continue; 1980 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1981 if (RegPressure[RCId] >= RegLimit[RCId]) 1982 --PDiff; 1983 } 1984 return PDiff; 1985} 1986 1987void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1988 if (!TracksRegPressure) 1989 return; 1990 1991 if (!SU->getNode()) 1992 return; 1993 1994 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1995 I != E; ++I) { 1996 if (I->isCtrl()) 1997 continue; 1998 SUnit *PredSU = I->getSUnit(); 1999 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2000 // to cover the number of registers defined (they are all live). 2001 if (PredSU->NumRegDefsLeft == 0) { 2002 continue; 2003 } 2004 // FIXME: The ScheduleDAG currently loses information about which of a 2005 // node's values is consumed by each dependence. Consequently, if the node 2006 // defines multiple register classes, we don't know which to pressurize 2007 // here. Instead the following loop consumes the register defs in an 2008 // arbitrary order. At least it handles the common case of clustered loads 2009 // to the same class. For precise liveness, each SDep needs to indicate the 2010 // result number. But that tightly couples the ScheduleDAG with the 2011 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2012 // value type or register class to SDep. 2013 // 2014 // The most important aspect of register tracking is balancing the increase 2015 // here with the reduction further below. Note that this SU may use multiple 2016 // defs in PredSU. The can't be determined here, but we've already 2017 // compensated by reducing NumRegDefsLeft in PredSU during 2018 // ScheduleDAGSDNodes::AddSchedEdges. 2019 --PredSU->NumRegDefsLeft; 2020 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2021 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2022 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2023 if (SkipRegDefs) 2024 continue; 2025 2026 unsigned RCId, Cost; 2027 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2028 RegPressure[RCId] += Cost; 2029 break; 2030 } 2031 } 2032 2033 // We should have this assert, but there may be dead SDNodes that never 2034 // materialize as SUnits, so they don't appear to generate liveness. 2035 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2036 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2037 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2038 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2039 if (SkipRegDefs > 0) 2040 continue; 2041 unsigned RCId, Cost; 2042 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2043 if (RegPressure[RCId] < Cost) { 2044 // Register pressure tracking is imprecise. This can happen. But we try 2045 // hard not to let it happen because it likely results in poor scheduling. 2046 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 2047 RegPressure[RCId] = 0; 2048 } 2049 else { 2050 RegPressure[RCId] -= Cost; 2051 } 2052 } 2053 dumpRegPressure(); 2054} 2055 2056void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 2057 if (!TracksRegPressure) 2058 return; 2059 2060 const SDNode *N = SU->getNode(); 2061 if (!N) return; 2062 2063 if (!N->isMachineOpcode()) { 2064 if (N->getOpcode() != ISD::CopyToReg) 2065 return; 2066 } else { 2067 unsigned Opc = N->getMachineOpcode(); 2068 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2069 Opc == TargetOpcode::INSERT_SUBREG || 2070 Opc == TargetOpcode::SUBREG_TO_REG || 2071 Opc == TargetOpcode::REG_SEQUENCE || 2072 Opc == TargetOpcode::IMPLICIT_DEF) 2073 return; 2074 } 2075 2076 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2077 I != E; ++I) { 2078 if (I->isCtrl()) 2079 continue; 2080 SUnit *PredSU = I->getSUnit(); 2081 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2082 // counts data deps. 2083 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2084 continue; 2085 const SDNode *PN = PredSU->getNode(); 2086 if (!PN->isMachineOpcode()) { 2087 if (PN->getOpcode() == ISD::CopyFromReg) { 2088 EVT VT = PN->getValueType(0); 2089 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2090 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2091 } 2092 continue; 2093 } 2094 unsigned POpc = PN->getMachineOpcode(); 2095 if (POpc == TargetOpcode::IMPLICIT_DEF) 2096 continue; 2097 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2098 POpc == TargetOpcode::INSERT_SUBREG || 2099 POpc == TargetOpcode::SUBREG_TO_REG) { 2100 EVT VT = PN->getValueType(0); 2101 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2102 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2103 continue; 2104 } 2105 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2106 for (unsigned i = 0; i != NumDefs; ++i) { 2107 EVT VT = PN->getValueType(i); 2108 if (!PN->hasAnyUseOfValue(i)) 2109 continue; 2110 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2111 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2112 // Register pressure tracking is imprecise. This can happen. 2113 RegPressure[RCId] = 0; 2114 else 2115 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2116 } 2117 } 2118 2119 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2120 // may transfer data dependencies to CopyToReg. 2121 if (SU->NumSuccs && N->isMachineOpcode()) { 2122 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2123 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2124 EVT VT = N->getValueType(i); 2125 if (VT == MVT::Glue || VT == MVT::Other) 2126 continue; 2127 if (!N->hasAnyUseOfValue(i)) 2128 continue; 2129 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2130 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2131 } 2132 } 2133 2134 dumpRegPressure(); 2135} 2136 2137//===----------------------------------------------------------------------===// 2138// Dynamic Node Priority for Register Pressure Reduction 2139//===----------------------------------------------------------------------===// 2140 2141/// closestSucc - Returns the scheduled cycle of the successor which is 2142/// closest to the current cycle. 2143static unsigned closestSucc(const SUnit *SU) { 2144 unsigned MaxHeight = 0; 2145 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2146 I != E; ++I) { 2147 if (I->isCtrl()) continue; // ignore chain succs 2148 unsigned Height = I->getSUnit()->getHeight(); 2149 // If there are bunch of CopyToRegs stacked up, they should be considered 2150 // to be at the same position. 2151 if (I->getSUnit()->getNode() && 2152 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2153 Height = closestSucc(I->getSUnit())+1; 2154 if (Height > MaxHeight) 2155 MaxHeight = Height; 2156 } 2157 return MaxHeight; 2158} 2159 2160/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2161/// for scratch registers, i.e. number of data dependencies. 2162static unsigned calcMaxScratches(const SUnit *SU) { 2163 unsigned Scratches = 0; 2164 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2165 I != E; ++I) { 2166 if (I->isCtrl()) continue; // ignore chain preds 2167 Scratches++; 2168 } 2169 return Scratches; 2170} 2171 2172/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2173/// CopyFromReg from a virtual register. 2174static bool hasOnlyLiveInOpers(const SUnit *SU) { 2175 bool RetVal = false; 2176 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2177 I != E; ++I) { 2178 if (I->isCtrl()) continue; 2179 const SUnit *PredSU = I->getSUnit(); 2180 if (PredSU->getNode() && 2181 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2182 unsigned Reg = 2183 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2184 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2185 RetVal = true; 2186 continue; 2187 } 2188 } 2189 return false; 2190 } 2191 return RetVal; 2192} 2193 2194/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2195/// CopyToReg to a virtual register. This SU def is probably a liveout and 2196/// it has no other use. It should be scheduled closer to the terminator. 2197static bool hasOnlyLiveOutUses(const SUnit *SU) { 2198 bool RetVal = false; 2199 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2200 I != E; ++I) { 2201 if (I->isCtrl()) continue; 2202 const SUnit *SuccSU = I->getSUnit(); 2203 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2204 unsigned Reg = 2205 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2206 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2207 RetVal = true; 2208 continue; 2209 } 2210 } 2211 return false; 2212 } 2213 return RetVal; 2214} 2215 2216// Set isVRegCycle for a node with only live in opers and live out uses. Also 2217// set isVRegCycle for its CopyFromReg operands. 2218// 2219// This is only relevant for single-block loops, in which case the VRegCycle 2220// node is likely an induction variable in which the operand and target virtual 2221// registers should be coalesced (e.g. pre/post increment values). Setting the 2222// isVRegCycle flag helps the scheduler prioritize other uses of the same 2223// CopyFromReg so that this node becomes the virtual register "kill". This 2224// avoids interference between the values live in and out of the block and 2225// eliminates a copy inside the loop. 2226static void initVRegCycle(SUnit *SU) { 2227 if (DisableSchedVRegCycle) 2228 return; 2229 2230 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2231 return; 2232 2233 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2234 2235 SU->isVRegCycle = true; 2236 2237 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2238 I != E; ++I) { 2239 if (I->isCtrl()) continue; 2240 I->getSUnit()->isVRegCycle = true; 2241 } 2242} 2243 2244// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2245// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2246static void resetVRegCycle(SUnit *SU) { 2247 if (!SU->isVRegCycle) 2248 return; 2249 2250 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2251 I != E; ++I) { 2252 if (I->isCtrl()) continue; // ignore chain preds 2253 SUnit *PredSU = I->getSUnit(); 2254 if (PredSU->isVRegCycle) { 2255 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2256 "VRegCycle def must be CopyFromReg"); 2257 I->getSUnit()->isVRegCycle = 0; 2258 } 2259 } 2260} 2261 2262// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2263// means a node that defines the VRegCycle has not been scheduled yet. 2264static bool hasVRegCycleUse(const SUnit *SU) { 2265 // If this SU also defines the VReg, don't hoist it as a "use". 2266 if (SU->isVRegCycle) 2267 return false; 2268 2269 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2270 I != E; ++I) { 2271 if (I->isCtrl()) continue; // ignore chain preds 2272 if (I->getSUnit()->isVRegCycle && 2273 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2274 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2275 return true; 2276 } 2277 } 2278 return false; 2279} 2280 2281// Check for either a dependence (latency) or resource (hazard) stall. 2282// 2283// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2284static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2285 if ((int)SPQ->getCurCycle() < Height) return true; 2286 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2287 != ScheduleHazardRecognizer::NoHazard) 2288 return true; 2289 return false; 2290} 2291 2292// Return -1 if left has higher priority, 1 if right has higher priority. 2293// Return 0 if latency-based priority is equivalent. 2294static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2295 RegReductionPQBase *SPQ) { 2296 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2297 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2298 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2299 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2300 int LHeight = (int)left->getHeight() + LPenalty; 2301 int RHeight = (int)right->getHeight() + RPenalty; 2302 2303 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2304 BUHasStall(left, LHeight, SPQ); 2305 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2306 BUHasStall(right, RHeight, SPQ); 2307 2308 // If scheduling one of the node will cause a pipeline stall, delay it. 2309 // If scheduling either one of the node will cause a pipeline stall, sort 2310 // them according to their height. 2311 if (LStall) { 2312 if (!RStall) 2313 return 1; 2314 if (LHeight != RHeight) 2315 return LHeight > RHeight ? 1 : -1; 2316 } else if (RStall) 2317 return -1; 2318 2319 // If either node is scheduling for latency, sort them by height/depth 2320 // and latency. 2321 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2322 right->SchedulingPref == Sched::ILP)) { 2323 if (DisableSchedCycles) { 2324 if (LHeight != RHeight) 2325 return LHeight > RHeight ? 1 : -1; 2326 } 2327 else { 2328 // If neither instruction stalls (!LStall && !RStall) then 2329 // its height is already covered so only its depth matters. We also reach 2330 // this if both stall but have the same height. 2331 int LDepth = left->getDepth() - LPenalty; 2332 int RDepth = right->getDepth() - RPenalty; 2333 if (LDepth != RDepth) { 2334 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2335 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2336 << ") depth " << RDepth << "\n"); 2337 return LDepth < RDepth ? 1 : -1; 2338 } 2339 } 2340 if (left->Latency != right->Latency) 2341 return left->Latency > right->Latency ? 1 : -1; 2342 } 2343 return 0; 2344} 2345 2346static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2347 // Schedule physical register definitions close to their use. This is 2348 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2349 // long as shortening physreg live ranges is generally good, we can defer 2350 // creating a subtarget hook. 2351 if (!DisableSchedPhysRegJoin) { 2352 bool LHasPhysReg = left->hasPhysRegDefs; 2353 bool RHasPhysReg = right->hasPhysRegDefs; 2354 if (LHasPhysReg != RHasPhysReg) { 2355 #ifndef NDEBUG 2356 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2357 #endif 2358 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2359 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2360 << PhysRegMsg[RHasPhysReg] << "\n"); 2361 return LHasPhysReg < RHasPhysReg; 2362 } 2363 } 2364 2365 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2366 unsigned LPriority = SPQ->getNodePriority(left); 2367 unsigned RPriority = SPQ->getNodePriority(right); 2368 2369 // Be really careful about hoisting call operands above previous calls. 2370 // Only allows it if it would reduce register pressure. 2371 if (left->isCall && right->isCallOp) { 2372 unsigned RNumVals = right->getNode()->getNumValues(); 2373 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2374 } 2375 if (right->isCall && left->isCallOp) { 2376 unsigned LNumVals = left->getNode()->getNumValues(); 2377 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2378 } 2379 2380 if (LPriority != RPriority) 2381 return LPriority > RPriority; 2382 2383 // One or both of the nodes are calls and their sethi-ullman numbers are the 2384 // same, then keep source order. 2385 if (left->isCall || right->isCall) { 2386 unsigned LOrder = SPQ->getNodeOrdering(left); 2387 unsigned ROrder = SPQ->getNodeOrdering(right); 2388 2389 // Prefer an ordering where the lower the non-zero order number, the higher 2390 // the preference. 2391 if ((LOrder || ROrder) && LOrder != ROrder) 2392 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2393 } 2394 2395 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2396 // e.g. 2397 // t1 = op t2, c1 2398 // t3 = op t4, c2 2399 // 2400 // and the following instructions are both ready. 2401 // t2 = op c3 2402 // t4 = op c4 2403 // 2404 // Then schedule t2 = op first. 2405 // i.e. 2406 // t4 = op c4 2407 // t2 = op c3 2408 // t1 = op t2, c1 2409 // t3 = op t4, c2 2410 // 2411 // This creates more short live intervals. 2412 unsigned LDist = closestSucc(left); 2413 unsigned RDist = closestSucc(right); 2414 if (LDist != RDist) 2415 return LDist < RDist; 2416 2417 // How many registers becomes live when the node is scheduled. 2418 unsigned LScratch = calcMaxScratches(left); 2419 unsigned RScratch = calcMaxScratches(right); 2420 if (LScratch != RScratch) 2421 return LScratch > RScratch; 2422 2423 // Comparing latency against a call makes little sense unless the node 2424 // is register pressure-neutral. 2425 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2426 return (left->NodeQueueId > right->NodeQueueId); 2427 2428 // Do not compare latencies when one or both of the nodes are calls. 2429 if (!DisableSchedCycles && 2430 !(left->isCall || right->isCall)) { 2431 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2432 if (result != 0) 2433 return result > 0; 2434 } 2435 else { 2436 if (left->getHeight() != right->getHeight()) 2437 return left->getHeight() > right->getHeight(); 2438 2439 if (left->getDepth() != right->getDepth()) 2440 return left->getDepth() < right->getDepth(); 2441 } 2442 2443 assert(left->NodeQueueId && right->NodeQueueId && 2444 "NodeQueueId cannot be zero"); 2445 return (left->NodeQueueId > right->NodeQueueId); 2446} 2447 2448// Bottom up 2449bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2450 if (int res = checkSpecialNodes(left, right)) 2451 return res > 0; 2452 2453 return BURRSort(left, right, SPQ); 2454} 2455 2456// Source order, otherwise bottom up. 2457bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2458 if (int res = checkSpecialNodes(left, right)) 2459 return res > 0; 2460 2461 unsigned LOrder = SPQ->getNodeOrdering(left); 2462 unsigned ROrder = SPQ->getNodeOrdering(right); 2463 2464 // Prefer an ordering where the lower the non-zero order number, the higher 2465 // the preference. 2466 if ((LOrder || ROrder) && LOrder != ROrder) 2467 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2468 2469 return BURRSort(left, right, SPQ); 2470} 2471 2472// If the time between now and when the instruction will be ready can cover 2473// the spill code, then avoid adding it to the ready queue. This gives long 2474// stalls highest priority and allows hoisting across calls. It should also 2475// speed up processing the available queue. 2476bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2477 static const unsigned ReadyDelay = 3; 2478 2479 if (SPQ->MayReduceRegPressure(SU)) return true; 2480 2481 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2482 2483 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2484 != ScheduleHazardRecognizer::NoHazard) 2485 return false; 2486 2487 return true; 2488} 2489 2490// Return true if right should be scheduled with higher priority than left. 2491bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2492 if (int res = checkSpecialNodes(left, right)) 2493 return res > 0; 2494 2495 if (left->isCall || right->isCall) 2496 // No way to compute latency of calls. 2497 return BURRSort(left, right, SPQ); 2498 2499 bool LHigh = SPQ->HighRegPressure(left); 2500 bool RHigh = SPQ->HighRegPressure(right); 2501 // Avoid causing spills. If register pressure is high, schedule for 2502 // register pressure reduction. 2503 if (LHigh && !RHigh) { 2504 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2505 << right->NodeNum << ")\n"); 2506 return true; 2507 } 2508 else if (!LHigh && RHigh) { 2509 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2510 << left->NodeNum << ")\n"); 2511 return false; 2512 } 2513 if (!LHigh && !RHigh) { 2514 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2515 if (result != 0) 2516 return result > 0; 2517 } 2518 return BURRSort(left, right, SPQ); 2519} 2520 2521// Schedule as many instructions in each cycle as possible. So don't make an 2522// instruction available unless it is ready in the current cycle. 2523bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2524 if (SU->getHeight() > CurCycle) return false; 2525 2526 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2527 != ScheduleHazardRecognizer::NoHazard) 2528 return false; 2529 2530 return true; 2531} 2532 2533static bool canEnableCoalescing(SUnit *SU) { 2534 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2535 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2536 // CopyToReg should be close to its uses to facilitate coalescing and 2537 // avoid spilling. 2538 return true; 2539 2540 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2541 Opc == TargetOpcode::SUBREG_TO_REG || 2542 Opc == TargetOpcode::INSERT_SUBREG) 2543 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2544 // close to their uses to facilitate coalescing. 2545 return true; 2546 2547 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2548 // If SU does not have a register def, schedule it close to its uses 2549 // because it does not lengthen any live ranges. 2550 return true; 2551 2552 return false; 2553} 2554 2555// list-ilp is currently an experimental scheduler that allows various 2556// heuristics to be enabled prior to the normal register reduction logic. 2557bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2558 if (int res = checkSpecialNodes(left, right)) 2559 return res > 0; 2560 2561 if (left->isCall || right->isCall) 2562 // No way to compute latency of calls. 2563 return BURRSort(left, right, SPQ); 2564 2565 unsigned LLiveUses = 0, RLiveUses = 0; 2566 int LPDiff = 0, RPDiff = 0; 2567 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2568 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2569 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2570 } 2571 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2572 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2573 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2574 return LPDiff > RPDiff; 2575 } 2576 2577 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2578 bool LReduce = canEnableCoalescing(left); 2579 bool RReduce = canEnableCoalescing(right); 2580 if (LReduce && !RReduce) return false; 2581 if (RReduce && !LReduce) return true; 2582 } 2583 2584 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2585 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2586 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2587 return LLiveUses < RLiveUses; 2588 } 2589 2590 if (!DisableSchedStalls) { 2591 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2592 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2593 if (LStall != RStall) 2594 return left->getHeight() > right->getHeight(); 2595 } 2596 2597 if (!DisableSchedCriticalPath) { 2598 int spread = (int)left->getDepth() - (int)right->getDepth(); 2599 if (std::abs(spread) > MaxReorderWindow) { 2600 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2601 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2602 << right->getDepth() << "\n"); 2603 return left->getDepth() < right->getDepth(); 2604 } 2605 } 2606 2607 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2608 int spread = (int)left->getHeight() - (int)right->getHeight(); 2609 if (std::abs(spread) > MaxReorderWindow) 2610 return left->getHeight() > right->getHeight(); 2611 } 2612 2613 return BURRSort(left, right, SPQ); 2614} 2615 2616void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2617 SUnits = &sunits; 2618 // Add pseudo dependency edges for two-address nodes. 2619 if (!Disable2AddrHack) 2620 AddPseudoTwoAddrDeps(); 2621 // Reroute edges to nodes with multiple uses. 2622 if (!TracksRegPressure) 2623 PrescheduleNodesWithMultipleUses(); 2624 // Calculate node priorities. 2625 CalculateSethiUllmanNumbers(); 2626 2627 // For single block loops, mark nodes that look like canonical IV increments. 2628 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2629 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2630 initVRegCycle(&sunits[i]); 2631 } 2632 } 2633} 2634 2635//===----------------------------------------------------------------------===// 2636// Preschedule for Register Pressure 2637//===----------------------------------------------------------------------===// 2638 2639bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2640 if (SU->isTwoAddress) { 2641 unsigned Opc = SU->getNode()->getMachineOpcode(); 2642 const MCInstrDesc &MCID = TII->get(Opc); 2643 unsigned NumRes = MCID.getNumDefs(); 2644 unsigned NumOps = MCID.getNumOperands() - NumRes; 2645 for (unsigned i = 0; i != NumOps; ++i) { 2646 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2647 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2648 if (DU->getNodeId() != -1 && 2649 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2650 return true; 2651 } 2652 } 2653 } 2654 return false; 2655} 2656 2657/// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2658/// successor's explicit physregs whose definition can reach DepSU. 2659/// i.e. DepSU should not be scheduled above SU. 2660static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2661 ScheduleDAGRRList *scheduleDAG, 2662 const TargetInstrInfo *TII, 2663 const TargetRegisterInfo *TRI) { 2664 const unsigned *ImpDefs 2665 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2666 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2667 if(!ImpDefs && !RegMask) 2668 return false; 2669 2670 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2671 SI != SE; ++SI) { 2672 SUnit *SuccSU = SI->getSUnit(); 2673 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2674 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2675 if (!PI->isAssignedRegDep()) 2676 continue; 2677 2678 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && 2679 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2680 return true; 2681 2682 if (ImpDefs) 2683 for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 2684 // Return true if SU clobbers this physical register use and the 2685 // definition of the register reaches from DepSU. IsReachable queries 2686 // a topological forward sort of the DAG (following the successors). 2687 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2688 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2689 return true; 2690 } 2691 } 2692 return false; 2693} 2694 2695/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2696/// physical register defs. 2697static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2698 const TargetInstrInfo *TII, 2699 const TargetRegisterInfo *TRI) { 2700 SDNode *N = SuccSU->getNode(); 2701 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2702 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2703 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2704 for (const SDNode *SUNode = SU->getNode(); SUNode; 2705 SUNode = SUNode->getGluedNode()) { 2706 if (!SUNode->isMachineOpcode()) 2707 continue; 2708 const unsigned *SUImpDefs = 2709 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2710 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2711 if (!SUImpDefs && !SURegMask) 2712 continue; 2713 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2714 EVT VT = N->getValueType(i); 2715 if (VT == MVT::Glue || VT == MVT::Other) 2716 continue; 2717 if (!N->hasAnyUseOfValue(i)) 2718 continue; 2719 unsigned Reg = ImpDefs[i - NumDefs]; 2720 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2721 return true; 2722 if (!SUImpDefs) 2723 continue; 2724 for (;*SUImpDefs; ++SUImpDefs) { 2725 unsigned SUReg = *SUImpDefs; 2726 if (TRI->regsOverlap(Reg, SUReg)) 2727 return true; 2728 } 2729 } 2730 } 2731 return false; 2732} 2733 2734/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2735/// are not handled well by the general register pressure reduction 2736/// heuristics. When presented with code like this: 2737/// 2738/// N 2739/// / | 2740/// / | 2741/// U store 2742/// | 2743/// ... 2744/// 2745/// the heuristics tend to push the store up, but since the 2746/// operand of the store has another use (U), this would increase 2747/// the length of that other use (the U->N edge). 2748/// 2749/// This function transforms code like the above to route U's 2750/// dependence through the store when possible, like this: 2751/// 2752/// N 2753/// || 2754/// || 2755/// store 2756/// | 2757/// U 2758/// | 2759/// ... 2760/// 2761/// This results in the store being scheduled immediately 2762/// after N, which shortens the U->N live range, reducing 2763/// register pressure. 2764/// 2765void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2766 // Visit all the nodes in topological order, working top-down. 2767 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2768 SUnit *SU = &(*SUnits)[i]; 2769 // For now, only look at nodes with no data successors, such as stores. 2770 // These are especially important, due to the heuristics in 2771 // getNodePriority for nodes with no data successors. 2772 if (SU->NumSuccs != 0) 2773 continue; 2774 // For now, only look at nodes with exactly one data predecessor. 2775 if (SU->NumPreds != 1) 2776 continue; 2777 // Avoid prescheduling copies to virtual registers, which don't behave 2778 // like other nodes from the perspective of scheduling heuristics. 2779 if (SDNode *N = SU->getNode()) 2780 if (N->getOpcode() == ISD::CopyToReg && 2781 TargetRegisterInfo::isVirtualRegister 2782 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2783 continue; 2784 2785 // Locate the single data predecessor. 2786 SUnit *PredSU = 0; 2787 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2788 EE = SU->Preds.end(); II != EE; ++II) 2789 if (!II->isCtrl()) { 2790 PredSU = II->getSUnit(); 2791 break; 2792 } 2793 assert(PredSU); 2794 2795 // Don't rewrite edges that carry physregs, because that requires additional 2796 // support infrastructure. 2797 if (PredSU->hasPhysRegDefs) 2798 continue; 2799 // Short-circuit the case where SU is PredSU's only data successor. 2800 if (PredSU->NumSuccs == 1) 2801 continue; 2802 // Avoid prescheduling to copies from virtual registers, which don't behave 2803 // like other nodes from the perspective of scheduling heuristics. 2804 if (SDNode *N = SU->getNode()) 2805 if (N->getOpcode() == ISD::CopyFromReg && 2806 TargetRegisterInfo::isVirtualRegister 2807 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2808 continue; 2809 2810 // Perform checks on the successors of PredSU. 2811 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2812 EE = PredSU->Succs.end(); II != EE; ++II) { 2813 SUnit *PredSuccSU = II->getSUnit(); 2814 if (PredSuccSU == SU) continue; 2815 // If PredSU has another successor with no data successors, for 2816 // now don't attempt to choose either over the other. 2817 if (PredSuccSU->NumSuccs == 0) 2818 goto outer_loop_continue; 2819 // Don't break physical register dependencies. 2820 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2821 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2822 goto outer_loop_continue; 2823 // Don't introduce graph cycles. 2824 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2825 goto outer_loop_continue; 2826 } 2827 2828 // Ok, the transformation is safe and the heuristics suggest it is 2829 // profitable. Update the graph. 2830 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2831 << " next to PredSU #" << PredSU->NodeNum 2832 << " to guide scheduling in the presence of multiple uses\n"); 2833 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2834 SDep Edge = PredSU->Succs[i]; 2835 assert(!Edge.isAssignedRegDep()); 2836 SUnit *SuccSU = Edge.getSUnit(); 2837 if (SuccSU != SU) { 2838 Edge.setSUnit(PredSU); 2839 scheduleDAG->RemovePred(SuccSU, Edge); 2840 scheduleDAG->AddPred(SU, Edge); 2841 Edge.setSUnit(SU); 2842 scheduleDAG->AddPred(SuccSU, Edge); 2843 --i; 2844 } 2845 } 2846 outer_loop_continue:; 2847 } 2848} 2849 2850/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2851/// it as a def&use operand. Add a pseudo control edge from it to the other 2852/// node (if it won't create a cycle) so the two-address one will be scheduled 2853/// first (lower in the schedule). If both nodes are two-address, favor the 2854/// one that has a CopyToReg use (more likely to be a loop induction update). 2855/// If both are two-address, but one is commutable while the other is not 2856/// commutable, favor the one that's not commutable. 2857void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2858 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2859 SUnit *SU = &(*SUnits)[i]; 2860 if (!SU->isTwoAddress) 2861 continue; 2862 2863 SDNode *Node = SU->getNode(); 2864 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2865 continue; 2866 2867 bool isLiveOut = hasOnlyLiveOutUses(SU); 2868 unsigned Opc = Node->getMachineOpcode(); 2869 const MCInstrDesc &MCID = TII->get(Opc); 2870 unsigned NumRes = MCID.getNumDefs(); 2871 unsigned NumOps = MCID.getNumOperands() - NumRes; 2872 for (unsigned j = 0; j != NumOps; ++j) { 2873 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2874 continue; 2875 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2876 if (DU->getNodeId() == -1) 2877 continue; 2878 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2879 if (!DUSU) continue; 2880 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2881 E = DUSU->Succs.end(); I != E; ++I) { 2882 if (I->isCtrl()) continue; 2883 SUnit *SuccSU = I->getSUnit(); 2884 if (SuccSU == SU) 2885 continue; 2886 // Be conservative. Ignore if nodes aren't at roughly the same 2887 // depth and height. 2888 if (SuccSU->getHeight() < SU->getHeight() && 2889 (SU->getHeight() - SuccSU->getHeight()) > 1) 2890 continue; 2891 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2892 // constrains whatever is using the copy, instead of the copy 2893 // itself. In the case that the copy is coalesced, this 2894 // preserves the intent of the pseudo two-address heurietics. 2895 while (SuccSU->Succs.size() == 1 && 2896 SuccSU->getNode()->isMachineOpcode() && 2897 SuccSU->getNode()->getMachineOpcode() == 2898 TargetOpcode::COPY_TO_REGCLASS) 2899 SuccSU = SuccSU->Succs.front().getSUnit(); 2900 // Don't constrain non-instruction nodes. 2901 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2902 continue; 2903 // Don't constrain nodes with physical register defs if the 2904 // predecessor can clobber them. 2905 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2906 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2907 continue; 2908 } 2909 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2910 // these may be coalesced away. We want them close to their uses. 2911 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2912 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2913 SuccOpc == TargetOpcode::INSERT_SUBREG || 2914 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2915 continue; 2916 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2917 (!canClobber(SuccSU, DUSU) || 2918 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2919 (!SU->isCommutable && SuccSU->isCommutable)) && 2920 !scheduleDAG->IsReachable(SuccSU, SU)) { 2921 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2922 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2923 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2924 /*Reg=*/0, /*isNormalMemory=*/false, 2925 /*isMustAlias=*/false, 2926 /*isArtificial=*/true)); 2927 } 2928 } 2929 } 2930 } 2931} 2932 2933//===----------------------------------------------------------------------===// 2934// Public Constructor Functions 2935//===----------------------------------------------------------------------===// 2936 2937llvm::ScheduleDAGSDNodes * 2938llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2939 CodeGenOpt::Level OptLevel) { 2940 const TargetMachine &TM = IS->TM; 2941 const TargetInstrInfo *TII = TM.getInstrInfo(); 2942 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2943 2944 BURegReductionPriorityQueue *PQ = 2945 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2946 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2947 PQ->setScheduleDAG(SD); 2948 return SD; 2949} 2950 2951llvm::ScheduleDAGSDNodes * 2952llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2953 CodeGenOpt::Level OptLevel) { 2954 const TargetMachine &TM = IS->TM; 2955 const TargetInstrInfo *TII = TM.getInstrInfo(); 2956 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2957 2958 SrcRegReductionPriorityQueue *PQ = 2959 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2960 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2961 PQ->setScheduleDAG(SD); 2962 return SD; 2963} 2964 2965llvm::ScheduleDAGSDNodes * 2966llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2967 CodeGenOpt::Level OptLevel) { 2968 const TargetMachine &TM = IS->TM; 2969 const TargetInstrInfo *TII = TM.getInstrInfo(); 2970 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2971 const TargetLowering *TLI = &IS->getTargetLowering(); 2972 2973 HybridBURRPriorityQueue *PQ = 2974 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2975 2976 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2977 PQ->setScheduleDAG(SD); 2978 return SD; 2979} 2980 2981llvm::ScheduleDAGSDNodes * 2982llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2983 CodeGenOpt::Level OptLevel) { 2984 const TargetMachine &TM = IS->TM; 2985 const TargetInstrInfo *TII = TM.getInstrInfo(); 2986 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2987 const TargetLowering *TLI = &IS->getTargetLowering(); 2988 2989 ILPBURRPriorityQueue *PQ = 2990 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2991 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2992 PQ->setScheduleDAG(SD); 2993 return SD; 2994} 2995