1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24#include "llvm/Target/TargetRegisterInfo.h" 25#include "llvm/Target/TargetData.h" 26#include "llvm/Target/TargetMachine.h" 27#include "llvm/Target/TargetInstrInfo.h" 28#include "llvm/Target/TargetLowering.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/raw_ostream.h" 35#include <climits> 36using namespace llvm; 37 38STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40STATISTIC(NumDups, "Number of duplicated nodes"); 41STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47static RegisterScheduler 48 sourceListDAGScheduler("source", 49 "Similar to list-burr but schedules in source " 50 "order when possible", 51 createSourceListDAGScheduler); 52 53static RegisterScheduler 54 hybridListDAGScheduler("list-hybrid", 55 "Bottom-up register pressure aware list scheduling " 56 "which tries to balance latency and register pressure", 57 createHybridListDAGScheduler); 58 59static RegisterScheduler 60 ILPListDAGScheduler("list-ilp", 61 "Bottom-up register pressure aware list scheduling " 62 "which tries to balance ILP and register pressure", 63 createILPListDAGScheduler); 64 65static cl::opt<bool> DisableSchedCycles( 66 "disable-sched-cycles", cl::Hidden, cl::init(false), 67 cl::desc("Disable cycle-level precision during preRA scheduling")); 68 69// Temporary sched=list-ilp flags until the heuristics are robust. 70// Some options are also available under sched=list-hybrid. 71static cl::opt<bool> DisableSchedRegPressure( 72 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 73 cl::desc("Disable regpressure priority in sched=list-ilp")); 74static cl::opt<bool> DisableSchedLiveUses( 75 "disable-sched-live-uses", cl::Hidden, cl::init(true), 76 cl::desc("Disable live use priority in sched=list-ilp")); 77static cl::opt<bool> DisableSchedVRegCycle( 78 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 79 cl::desc("Disable virtual register cycle interference checks")); 80static cl::opt<bool> DisableSchedPhysRegJoin( 81 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 82 cl::desc("Disable physreg def-use affinity")); 83static cl::opt<bool> DisableSchedStalls( 84 "disable-sched-stalls", cl::Hidden, cl::init(true), 85 cl::desc("Disable no-stall priority in sched=list-ilp")); 86static cl::opt<bool> DisableSchedCriticalPath( 87 "disable-sched-critical-path", cl::Hidden, cl::init(false), 88 cl::desc("Disable critical path priority in sched=list-ilp")); 89static cl::opt<bool> DisableSchedHeight( 90 "disable-sched-height", cl::Hidden, cl::init(false), 91 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 92static cl::opt<bool> Disable2AddrHack( 93 "disable-2addr-hack", cl::Hidden, cl::init(true), 94 cl::desc("Disable scheduler's two-address hack")); 95 96static cl::opt<int> MaxReorderWindow( 97 "max-sched-reorder", cl::Hidden, cl::init(6), 98 cl::desc("Number of instructions to allow ahead of the critical path " 99 "in sched=list-ilp")); 100 101static cl::opt<unsigned> AvgIPC( 102 "sched-avg-ipc", cl::Hidden, cl::init(1), 103 cl::desc("Average inst/cycle whan no target itinerary exists.")); 104 105namespace { 106//===----------------------------------------------------------------------===// 107/// ScheduleDAGRRList - The actual register reduction list scheduler 108/// implementation. This supports both top-down and bottom-up scheduling. 109/// 110class ScheduleDAGRRList : public ScheduleDAGSDNodes { 111private: 112 /// NeedLatency - True if the scheduler will make use of latency information. 113 /// 114 bool NeedLatency; 115 116 /// AvailableQueue - The priority queue to use for the available SUnits. 117 SchedulingPriorityQueue *AvailableQueue; 118 119 /// PendingQueue - This contains all of the instructions whose operands have 120 /// been issued, but their results are not ready yet (due to the latency of 121 /// the operation). Once the operands becomes available, the instruction is 122 /// added to the AvailableQueue. 123 std::vector<SUnit*> PendingQueue; 124 125 /// HazardRec - The hazard recognizer to use. 126 ScheduleHazardRecognizer *HazardRec; 127 128 /// CurCycle - The current scheduler state corresponds to this cycle. 129 unsigned CurCycle; 130 131 /// MinAvailableCycle - Cycle of the soonest available instruction. 132 unsigned MinAvailableCycle; 133 134 /// IssueCount - Count instructions issued in this cycle 135 /// Currently valid only for bottom-up scheduling. 136 unsigned IssueCount; 137 138 /// LiveRegDefs - A set of physical registers and their definition 139 /// that are "live". These nodes must be scheduled before any other nodes that 140 /// modifies the registers can be scheduled. 141 unsigned NumLiveRegs; 142 std::vector<SUnit*> LiveRegDefs; 143 std::vector<SUnit*> LiveRegGens; 144 145 /// Topo - A topological ordering for SUnits which permits fast IsReachable 146 /// and similar queries. 147 ScheduleDAGTopologicalSort Topo; 148 149 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 150 // DAG crawling. 151 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 152 153public: 154 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 155 SchedulingPriorityQueue *availqueue, 156 CodeGenOpt::Level OptLevel) 157 : ScheduleDAGSDNodes(mf), 158 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 159 Topo(SUnits) { 160 161 const TargetMachine &tm = mf.getTarget(); 162 if (DisableSchedCycles || !NeedLatency) 163 HazardRec = new ScheduleHazardRecognizer(); 164 else 165 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 166 } 167 168 ~ScheduleDAGRRList() { 169 delete HazardRec; 170 delete AvailableQueue; 171 } 172 173 void Schedule(); 174 175 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 176 177 /// IsReachable - Checks if SU is reachable from TargetSU. 178 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 179 return Topo.IsReachable(SU, TargetSU); 180 } 181 182 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 183 /// create a cycle. 184 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 185 return Topo.WillCreateCycle(SU, TargetSU); 186 } 187 188 /// AddPred - adds a predecessor edge to SUnit SU. 189 /// This returns true if this is a new predecessor. 190 /// Updates the topological ordering if required. 191 void AddPred(SUnit *SU, const SDep &D) { 192 Topo.AddPred(SU, D.getSUnit()); 193 SU->addPred(D); 194 } 195 196 /// RemovePred - removes a predecessor edge from SUnit SU. 197 /// This returns true if an edge was removed. 198 /// Updates the topological ordering if required. 199 void RemovePred(SUnit *SU, const SDep &D) { 200 Topo.RemovePred(SU, D.getSUnit()); 201 SU->removePred(D); 202 } 203 204private: 205 bool isReady(SUnit *SU) { 206 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 207 AvailableQueue->isReady(SU); 208 } 209 210 void ReleasePred(SUnit *SU, const SDep *PredEdge); 211 void ReleasePredecessors(SUnit *SU); 212 void ReleasePending(); 213 void AdvanceToCycle(unsigned NextCycle); 214 void AdvancePastStalls(SUnit *SU); 215 void EmitNode(SUnit *SU); 216 void ScheduleNodeBottomUp(SUnit*); 217 void CapturePred(SDep *PredEdge); 218 void UnscheduleNodeBottomUp(SUnit*); 219 void RestoreHazardCheckerBottomUp(); 220 void BacktrackBottomUp(SUnit*, SUnit*); 221 SUnit *CopyAndMoveSuccessors(SUnit*); 222 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 223 const TargetRegisterClass*, 224 const TargetRegisterClass*, 225 SmallVector<SUnit*, 2>&); 226 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 227 228 SUnit *PickNodeToScheduleBottomUp(); 229 void ListScheduleBottomUp(); 230 231 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 232 /// Updates the topological ordering if required. 233 SUnit *CreateNewSUnit(SDNode *N) { 234 unsigned NumSUnits = SUnits.size(); 235 SUnit *NewNode = newSUnit(N); 236 // Update the topological ordering. 237 if (NewNode->NodeNum >= NumSUnits) 238 Topo.InitDAGTopologicalSorting(); 239 return NewNode; 240 } 241 242 /// CreateClone - Creates a new SUnit from an existing one. 243 /// Updates the topological ordering if required. 244 SUnit *CreateClone(SUnit *N) { 245 unsigned NumSUnits = SUnits.size(); 246 SUnit *NewNode = Clone(N); 247 // Update the topological ordering. 248 if (NewNode->NodeNum >= NumSUnits) 249 Topo.InitDAGTopologicalSorting(); 250 return NewNode; 251 } 252 253 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 254 /// need actual latency information but the hybrid scheduler does. 255 bool forceUnitLatencies() const { 256 return !NeedLatency; 257 } 258}; 259} // end anonymous namespace 260 261/// GetCostForDef - Looks up the register class and cost for a given definition. 262/// Typically this just means looking up the representative register class, 263/// but for untyped values (MVT::Untyped) it means inspecting the node's 264/// opcode to determine what register class is being generated. 265static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 266 const TargetLowering *TLI, 267 const TargetInstrInfo *TII, 268 const TargetRegisterInfo *TRI, 269 unsigned &RegClass, unsigned &Cost, 270 const MachineFunction &MF) { 271 EVT VT = RegDefPos.GetValue(); 272 273 // Special handling for untyped values. These values can only come from 274 // the expansion of custom DAG-to-DAG patterns. 275 if (VT == MVT::Untyped) { 276 const SDNode *Node = RegDefPos.GetNode(); 277 unsigned Opcode = Node->getMachineOpcode(); 278 279 if (Opcode == TargetOpcode::REG_SEQUENCE) { 280 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 281 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 282 RegClass = RC->getID(); 283 Cost = 1; 284 return; 285 } 286 287 unsigned Idx = RegDefPos.GetIdx(); 288 const MCInstrDesc Desc = TII->get(Opcode); 289 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 290 RegClass = RC->getID(); 291 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 292 // better way to determine it. 293 Cost = 1; 294 } else { 295 RegClass = TLI->getRepRegClassFor(VT)->getID(); 296 Cost = TLI->getRepRegClassCostFor(VT); 297 } 298} 299 300/// Schedule - Schedule the DAG using list scheduling. 301void ScheduleDAGRRList::Schedule() { 302 DEBUG(dbgs() 303 << "********** List Scheduling BB#" << BB->getNumber() 304 << " '" << BB->getName() << "' **********\n"); 305 306 CurCycle = 0; 307 IssueCount = 0; 308 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 309 NumLiveRegs = 0; 310 // Allocate slots for each physical register, plus one for a special register 311 // to track the virtual resource of a calling sequence. 312 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 313 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 314 CallSeqEndForStart.clear(); 315 316 // Build the scheduling graph. 317 BuildSchedGraph(NULL); 318 319 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 320 SUnits[su].dumpAll(this)); 321 Topo.InitDAGTopologicalSorting(); 322 323 AvailableQueue->initNodes(SUnits); 324 325 HazardRec->Reset(); 326 327 // Execute the actual scheduling loop. 328 ListScheduleBottomUp(); 329 330 AvailableQueue->releaseState(); 331 332 DEBUG({ 333 dbgs() << "*** Final schedule ***\n"; 334 dumpSchedule(); 335 dbgs() << '\n'; 336 }); 337} 338 339//===----------------------------------------------------------------------===// 340// Bottom-Up Scheduling 341//===----------------------------------------------------------------------===// 342 343/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 344/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 345void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 346 SUnit *PredSU = PredEdge->getSUnit(); 347 348#ifndef NDEBUG 349 if (PredSU->NumSuccsLeft == 0) { 350 dbgs() << "*** Scheduling failed! ***\n"; 351 PredSU->dump(this); 352 dbgs() << " has been released too many times!\n"; 353 llvm_unreachable(0); 354 } 355#endif 356 --PredSU->NumSuccsLeft; 357 358 if (!forceUnitLatencies()) { 359 // Updating predecessor's height. This is now the cycle when the 360 // predecessor can be scheduled without causing a pipeline stall. 361 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 362 } 363 364 // If all the node's successors are scheduled, this node is ready 365 // to be scheduled. Ignore the special EntrySU node. 366 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 367 PredSU->isAvailable = true; 368 369 unsigned Height = PredSU->getHeight(); 370 if (Height < MinAvailableCycle) 371 MinAvailableCycle = Height; 372 373 if (isReady(PredSU)) { 374 AvailableQueue->push(PredSU); 375 } 376 // CapturePred and others may have left the node in the pending queue, avoid 377 // adding it twice. 378 else if (!PredSU->isPending) { 379 PredSU->isPending = true; 380 PendingQueue.push_back(PredSU); 381 } 382 } 383} 384 385/// IsChainDependent - Test if Outer is reachable from Inner through 386/// chain dependencies. 387static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 388 unsigned NestLevel, 389 const TargetInstrInfo *TII) { 390 SDNode *N = Outer; 391 for (;;) { 392 if (N == Inner) 393 return true; 394 // For a TokenFactor, examine each operand. There may be multiple ways 395 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 396 // most nesting in order to ensure that we find the corresponding match. 397 if (N->getOpcode() == ISD::TokenFactor) { 398 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 399 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 400 return true; 401 return false; 402 } 403 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 404 if (N->isMachineOpcode()) { 405 if (N->getMachineOpcode() == 406 (unsigned)TII->getCallFrameDestroyOpcode()) { 407 ++NestLevel; 408 } else if (N->getMachineOpcode() == 409 (unsigned)TII->getCallFrameSetupOpcode()) { 410 if (NestLevel == 0) 411 return false; 412 --NestLevel; 413 } 414 } 415 // Otherwise, find the chain and continue climbing. 416 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 417 if (N->getOperand(i).getValueType() == MVT::Other) { 418 N = N->getOperand(i).getNode(); 419 goto found_chain_operand; 420 } 421 return false; 422 found_chain_operand:; 423 if (N->getOpcode() == ISD::EntryToken) 424 return false; 425 } 426} 427 428/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 429/// the corresponding (lowered) CALLSEQ_BEGIN node. 430/// 431/// NestLevel and MaxNested are used in recursion to indcate the current level 432/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 433/// level seen so far. 434/// 435/// TODO: It would be better to give CALLSEQ_END an explicit operand to point 436/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 437static SDNode * 438FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 439 const TargetInstrInfo *TII) { 440 for (;;) { 441 // For a TokenFactor, examine each operand. There may be multiple ways 442 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 443 // most nesting in order to ensure that we find the corresponding match. 444 if (N->getOpcode() == ISD::TokenFactor) { 445 SDNode *Best = 0; 446 unsigned BestMaxNest = MaxNest; 447 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 448 unsigned MyNestLevel = NestLevel; 449 unsigned MyMaxNest = MaxNest; 450 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 451 MyNestLevel, MyMaxNest, TII)) 452 if (!Best || (MyMaxNest > BestMaxNest)) { 453 Best = New; 454 BestMaxNest = MyMaxNest; 455 } 456 } 457 assert(Best); 458 MaxNest = BestMaxNest; 459 return Best; 460 } 461 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 462 if (N->isMachineOpcode()) { 463 if (N->getMachineOpcode() == 464 (unsigned)TII->getCallFrameDestroyOpcode()) { 465 ++NestLevel; 466 MaxNest = std::max(MaxNest, NestLevel); 467 } else if (N->getMachineOpcode() == 468 (unsigned)TII->getCallFrameSetupOpcode()) { 469 assert(NestLevel != 0); 470 --NestLevel; 471 if (NestLevel == 0) 472 return N; 473 } 474 } 475 // Otherwise, find the chain and continue climbing. 476 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 477 if (N->getOperand(i).getValueType() == MVT::Other) { 478 N = N->getOperand(i).getNode(); 479 goto found_chain_operand; 480 } 481 return 0; 482 found_chain_operand:; 483 if (N->getOpcode() == ISD::EntryToken) 484 return 0; 485 } 486} 487 488/// Call ReleasePred for each predecessor, then update register live def/gen. 489/// Always update LiveRegDefs for a register dependence even if the current SU 490/// also defines the register. This effectively create one large live range 491/// across a sequence of two-address node. This is important because the 492/// entire chain must be scheduled together. Example: 493/// 494/// flags = (3) add 495/// flags = (2) addc flags 496/// flags = (1) addc flags 497/// 498/// results in 499/// 500/// LiveRegDefs[flags] = 3 501/// LiveRegGens[flags] = 1 502/// 503/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 504/// interference on flags. 505void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 506 // Bottom up: release predecessors 507 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 508 I != E; ++I) { 509 ReleasePred(SU, &*I); 510 if (I->isAssignedRegDep()) { 511 // This is a physical register dependency and it's impossible or 512 // expensive to copy the register. Make sure nothing that can 513 // clobber the register is scheduled between the predecessor and 514 // this node. 515 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 516 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 517 "interference on register dependence"); 518 LiveRegDefs[I->getReg()] = I->getSUnit(); 519 if (!LiveRegGens[I->getReg()]) { 520 ++NumLiveRegs; 521 LiveRegGens[I->getReg()] = SU; 522 } 523 } 524 } 525 526 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 527 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 528 // these nodes, to prevent other calls from being interscheduled with them. 529 unsigned CallResource = TRI->getNumRegs(); 530 if (!LiveRegDefs[CallResource]) 531 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 532 if (Node->isMachineOpcode() && 533 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 534 unsigned NestLevel = 0; 535 unsigned MaxNest = 0; 536 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 537 538 SUnit *Def = &SUnits[N->getNodeId()]; 539 CallSeqEndForStart[Def] = SU; 540 541 ++NumLiveRegs; 542 LiveRegDefs[CallResource] = Def; 543 LiveRegGens[CallResource] = SU; 544 break; 545 } 546} 547 548/// Check to see if any of the pending instructions are ready to issue. If 549/// so, add them to the available queue. 550void ScheduleDAGRRList::ReleasePending() { 551 if (DisableSchedCycles) { 552 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 553 return; 554 } 555 556 // If the available queue is empty, it is safe to reset MinAvailableCycle. 557 if (AvailableQueue->empty()) 558 MinAvailableCycle = UINT_MAX; 559 560 // Check to see if any of the pending instructions are ready to issue. If 561 // so, add them to the available queue. 562 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 563 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 564 if (ReadyCycle < MinAvailableCycle) 565 MinAvailableCycle = ReadyCycle; 566 567 if (PendingQueue[i]->isAvailable) { 568 if (!isReady(PendingQueue[i])) 569 continue; 570 AvailableQueue->push(PendingQueue[i]); 571 } 572 PendingQueue[i]->isPending = false; 573 PendingQueue[i] = PendingQueue.back(); 574 PendingQueue.pop_back(); 575 --i; --e; 576 } 577} 578 579/// Move the scheduler state forward by the specified number of Cycles. 580void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 581 if (NextCycle <= CurCycle) 582 return; 583 584 IssueCount = 0; 585 AvailableQueue->setCurCycle(NextCycle); 586 if (!HazardRec->isEnabled()) { 587 // Bypass lots of virtual calls in case of long latency. 588 CurCycle = NextCycle; 589 } 590 else { 591 for (; CurCycle != NextCycle; ++CurCycle) { 592 HazardRec->RecedeCycle(); 593 } 594 } 595 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 596 // available Q to release pending nodes at least once before popping. 597 ReleasePending(); 598} 599 600/// Move the scheduler state forward until the specified node's dependents are 601/// ready and can be scheduled with no resource conflicts. 602void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 603 if (DisableSchedCycles) 604 return; 605 606 // FIXME: Nodes such as CopyFromReg probably should not advance the current 607 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 608 // has predecessors the cycle will be advanced when they are scheduled. 609 // But given the crude nature of modeling latency though such nodes, we 610 // currently need to treat these nodes like real instructions. 611 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 612 613 unsigned ReadyCycle = SU->getHeight(); 614 615 // Bump CurCycle to account for latency. We assume the latency of other 616 // available instructions may be hidden by the stall (not a full pipe stall). 617 // This updates the hazard recognizer's cycle before reserving resources for 618 // this instruction. 619 AdvanceToCycle(ReadyCycle); 620 621 // Calls are scheduled in their preceding cycle, so don't conflict with 622 // hazards from instructions after the call. EmitNode will reset the 623 // scoreboard state before emitting the call. 624 if (SU->isCall) 625 return; 626 627 // FIXME: For resource conflicts in very long non-pipelined stages, we 628 // should probably skip ahead here to avoid useless scoreboard checks. 629 int Stalls = 0; 630 while (true) { 631 ScheduleHazardRecognizer::HazardType HT = 632 HazardRec->getHazardType(SU, -Stalls); 633 634 if (HT == ScheduleHazardRecognizer::NoHazard) 635 break; 636 637 ++Stalls; 638 } 639 AdvanceToCycle(CurCycle + Stalls); 640} 641 642/// Record this SUnit in the HazardRecognizer. 643/// Does not update CurCycle. 644void ScheduleDAGRRList::EmitNode(SUnit *SU) { 645 if (!HazardRec->isEnabled()) 646 return; 647 648 // Check for phys reg copy. 649 if (!SU->getNode()) 650 return; 651 652 switch (SU->getNode()->getOpcode()) { 653 default: 654 assert(SU->getNode()->isMachineOpcode() && 655 "This target-independent node should not be scheduled."); 656 break; 657 case ISD::MERGE_VALUES: 658 case ISD::TokenFactor: 659 case ISD::LIFETIME_START: 660 case ISD::LIFETIME_END: 661 case ISD::CopyToReg: 662 case ISD::CopyFromReg: 663 case ISD::EH_LABEL: 664 // Noops don't affect the scoreboard state. Copies are likely to be 665 // removed. 666 return; 667 case ISD::INLINEASM: 668 // For inline asm, clear the pipeline state. 669 HazardRec->Reset(); 670 return; 671 } 672 if (SU->isCall) { 673 // Calls are scheduled with their preceding instructions. For bottom-up 674 // scheduling, clear the pipeline state before emitting. 675 HazardRec->Reset(); 676 } 677 678 HazardRec->EmitInstruction(SU); 679} 680 681static void resetVRegCycle(SUnit *SU); 682 683/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 684/// count of its predecessors. If a predecessor pending count is zero, add it to 685/// the Available queue. 686void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 687 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 688 DEBUG(SU->dump(this)); 689 690#ifndef NDEBUG 691 if (CurCycle < SU->getHeight()) 692 DEBUG(dbgs() << " Height [" << SU->getHeight() 693 << "] pipeline stall!\n"); 694#endif 695 696 // FIXME: Do not modify node height. It may interfere with 697 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 698 // node its ready cycle can aid heuristics, and after scheduling it can 699 // indicate the scheduled cycle. 700 SU->setHeightToAtLeast(CurCycle); 701 702 // Reserve resources for the scheduled intruction. 703 EmitNode(SU); 704 705 Sequence.push_back(SU); 706 707 AvailableQueue->scheduledNode(SU); 708 709 // If HazardRec is disabled, and each inst counts as one cycle, then 710 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 711 // PendingQueue for schedulers that implement HasReadyFilter. 712 if (!HazardRec->isEnabled() && AvgIPC < 2) 713 AdvanceToCycle(CurCycle + 1); 714 715 // Update liveness of predecessors before successors to avoid treating a 716 // two-address node as a live range def. 717 ReleasePredecessors(SU); 718 719 // Release all the implicit physical register defs that are live. 720 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 721 I != E; ++I) { 722 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 723 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 724 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 725 --NumLiveRegs; 726 LiveRegDefs[I->getReg()] = NULL; 727 LiveRegGens[I->getReg()] = NULL; 728 } 729 } 730 // Release the special call resource dependence, if this is the beginning 731 // of a call. 732 unsigned CallResource = TRI->getNumRegs(); 733 if (LiveRegDefs[CallResource] == SU) 734 for (const SDNode *SUNode = SU->getNode(); SUNode; 735 SUNode = SUNode->getGluedNode()) { 736 if (SUNode->isMachineOpcode() && 737 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 738 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 739 --NumLiveRegs; 740 LiveRegDefs[CallResource] = NULL; 741 LiveRegGens[CallResource] = NULL; 742 } 743 } 744 745 resetVRegCycle(SU); 746 747 SU->isScheduled = true; 748 749 // Conditions under which the scheduler should eagerly advance the cycle: 750 // (1) No available instructions 751 // (2) All pipelines full, so available instructions must have hazards. 752 // 753 // If HazardRec is disabled, the cycle was pre-advanced before calling 754 // ReleasePredecessors. In that case, IssueCount should remain 0. 755 // 756 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 757 if (HazardRec->isEnabled() || AvgIPC > 1) { 758 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 759 ++IssueCount; 760 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 761 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 762 AdvanceToCycle(CurCycle + 1); 763 } 764} 765 766/// CapturePred - This does the opposite of ReleasePred. Since SU is being 767/// unscheduled, incrcease the succ left count of its predecessors. Remove 768/// them from AvailableQueue if necessary. 769void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 770 SUnit *PredSU = PredEdge->getSUnit(); 771 if (PredSU->isAvailable) { 772 PredSU->isAvailable = false; 773 if (!PredSU->isPending) 774 AvailableQueue->remove(PredSU); 775 } 776 777 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 778 ++PredSU->NumSuccsLeft; 779} 780 781/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 782/// its predecessor states to reflect the change. 783void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 784 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 785 DEBUG(SU->dump(this)); 786 787 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 788 I != E; ++I) { 789 CapturePred(&*I); 790 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 791 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 792 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 793 "Physical register dependency violated?"); 794 --NumLiveRegs; 795 LiveRegDefs[I->getReg()] = NULL; 796 LiveRegGens[I->getReg()] = NULL; 797 } 798 } 799 800 // Reclaim the special call resource dependence, if this is the beginning 801 // of a call. 802 unsigned CallResource = TRI->getNumRegs(); 803 for (const SDNode *SUNode = SU->getNode(); SUNode; 804 SUNode = SUNode->getGluedNode()) { 805 if (SUNode->isMachineOpcode() && 806 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 807 ++NumLiveRegs; 808 LiveRegDefs[CallResource] = SU; 809 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; 810 } 811 } 812 813 // Release the special call resource dependence, if this is the end 814 // of a call. 815 if (LiveRegGens[CallResource] == SU) 816 for (const SDNode *SUNode = SU->getNode(); SUNode; 817 SUNode = SUNode->getGluedNode()) { 818 if (SUNode->isMachineOpcode() && 819 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 820 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 821 --NumLiveRegs; 822 LiveRegDefs[CallResource] = NULL; 823 LiveRegGens[CallResource] = NULL; 824 } 825 } 826 827 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 828 I != E; ++I) { 829 if (I->isAssignedRegDep()) { 830 if (!LiveRegDefs[I->getReg()]) 831 ++NumLiveRegs; 832 // This becomes the nearest def. Note that an earlier def may still be 833 // pending if this is a two-address node. 834 LiveRegDefs[I->getReg()] = SU; 835 if (LiveRegGens[I->getReg()] == NULL || 836 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 837 LiveRegGens[I->getReg()] = I->getSUnit(); 838 } 839 } 840 if (SU->getHeight() < MinAvailableCycle) 841 MinAvailableCycle = SU->getHeight(); 842 843 SU->setHeightDirty(); 844 SU->isScheduled = false; 845 SU->isAvailable = true; 846 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 847 // Don't make available until backtracking is complete. 848 SU->isPending = true; 849 PendingQueue.push_back(SU); 850 } 851 else { 852 AvailableQueue->push(SU); 853 } 854 AvailableQueue->unscheduledNode(SU); 855} 856 857/// After backtracking, the hazard checker needs to be restored to a state 858/// corresponding the current cycle. 859void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 860 HazardRec->Reset(); 861 862 unsigned LookAhead = std::min((unsigned)Sequence.size(), 863 HazardRec->getMaxLookAhead()); 864 if (LookAhead == 0) 865 return; 866 867 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 868 unsigned HazardCycle = (*I)->getHeight(); 869 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 870 SUnit *SU = *I; 871 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 872 HazardRec->RecedeCycle(); 873 } 874 EmitNode(SU); 875 } 876} 877 878/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 879/// BTCycle in order to schedule a specific node. 880void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 881 SUnit *OldSU = Sequence.back(); 882 while (true) { 883 Sequence.pop_back(); 884 if (SU->isSucc(OldSU)) 885 // Don't try to remove SU from AvailableQueue. 886 SU->isAvailable = false; 887 // FIXME: use ready cycle instead of height 888 CurCycle = OldSU->getHeight(); 889 UnscheduleNodeBottomUp(OldSU); 890 AvailableQueue->setCurCycle(CurCycle); 891 if (OldSU == BtSU) 892 break; 893 OldSU = Sequence.back(); 894 } 895 896 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 897 898 RestoreHazardCheckerBottomUp(); 899 900 ReleasePending(); 901 902 ++NumBacktracks; 903} 904 905static bool isOperandOf(const SUnit *SU, SDNode *N) { 906 for (const SDNode *SUNode = SU->getNode(); SUNode; 907 SUNode = SUNode->getGluedNode()) { 908 if (SUNode->isOperandOf(N)) 909 return true; 910 } 911 return false; 912} 913 914/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 915/// successors to the newly created node. 916SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 917 SDNode *N = SU->getNode(); 918 if (!N) 919 return NULL; 920 921 if (SU->getNode()->getGluedNode()) 922 return NULL; 923 924 SUnit *NewSU; 925 bool TryUnfold = false; 926 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 927 EVT VT = N->getValueType(i); 928 if (VT == MVT::Glue) 929 return NULL; 930 else if (VT == MVT::Other) 931 TryUnfold = true; 932 } 933 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 934 const SDValue &Op = N->getOperand(i); 935 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 936 if (VT == MVT::Glue) 937 return NULL; 938 } 939 940 if (TryUnfold) { 941 SmallVector<SDNode*, 2> NewNodes; 942 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 943 return NULL; 944 945 // unfolding an x86 DEC64m operation results in store, dec, load which 946 // can't be handled here so quit 947 if (NewNodes.size() == 3) 948 return NULL; 949 950 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 951 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 952 953 N = NewNodes[1]; 954 SDNode *LoadNode = NewNodes[0]; 955 unsigned NumVals = N->getNumValues(); 956 unsigned OldNumVals = SU->getNode()->getNumValues(); 957 for (unsigned i = 0; i != NumVals; ++i) 958 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 959 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 960 SDValue(LoadNode, 1)); 961 962 // LoadNode may already exist. This can happen when there is another 963 // load from the same location and producing the same type of value 964 // but it has different alignment or volatileness. 965 bool isNewLoad = true; 966 SUnit *LoadSU; 967 if (LoadNode->getNodeId() != -1) { 968 LoadSU = &SUnits[LoadNode->getNodeId()]; 969 isNewLoad = false; 970 } else { 971 LoadSU = CreateNewSUnit(LoadNode); 972 LoadNode->setNodeId(LoadSU->NodeNum); 973 974 InitNumRegDefsLeft(LoadSU); 975 computeLatency(LoadSU); 976 } 977 978 SUnit *NewSU = CreateNewSUnit(N); 979 assert(N->getNodeId() == -1 && "Node already inserted!"); 980 N->setNodeId(NewSU->NodeNum); 981 982 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 983 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 984 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 985 NewSU->isTwoAddress = true; 986 break; 987 } 988 } 989 if (MCID.isCommutable()) 990 NewSU->isCommutable = true; 991 992 InitNumRegDefsLeft(NewSU); 993 computeLatency(NewSU); 994 995 // Record all the edges to and from the old SU, by category. 996 SmallVector<SDep, 4> ChainPreds; 997 SmallVector<SDep, 4> ChainSuccs; 998 SmallVector<SDep, 4> LoadPreds; 999 SmallVector<SDep, 4> NodePreds; 1000 SmallVector<SDep, 4> NodeSuccs; 1001 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1002 I != E; ++I) { 1003 if (I->isCtrl()) 1004 ChainPreds.push_back(*I); 1005 else if (isOperandOf(I->getSUnit(), LoadNode)) 1006 LoadPreds.push_back(*I); 1007 else 1008 NodePreds.push_back(*I); 1009 } 1010 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1011 I != E; ++I) { 1012 if (I->isCtrl()) 1013 ChainSuccs.push_back(*I); 1014 else 1015 NodeSuccs.push_back(*I); 1016 } 1017 1018 // Now assign edges to the newly-created nodes. 1019 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 1020 const SDep &Pred = ChainPreds[i]; 1021 RemovePred(SU, Pred); 1022 if (isNewLoad) 1023 AddPred(LoadSU, Pred); 1024 } 1025 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 1026 const SDep &Pred = LoadPreds[i]; 1027 RemovePred(SU, Pred); 1028 if (isNewLoad) 1029 AddPred(LoadSU, Pred); 1030 } 1031 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 1032 const SDep &Pred = NodePreds[i]; 1033 RemovePred(SU, Pred); 1034 AddPred(NewSU, Pred); 1035 } 1036 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 1037 SDep D = NodeSuccs[i]; 1038 SUnit *SuccDep = D.getSUnit(); 1039 D.setSUnit(SU); 1040 RemovePred(SuccDep, D); 1041 D.setSUnit(NewSU); 1042 AddPred(SuccDep, D); 1043 // Balance register pressure. 1044 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 1045 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1046 --NewSU->NumRegDefsLeft; 1047 } 1048 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 1049 SDep D = ChainSuccs[i]; 1050 SUnit *SuccDep = D.getSUnit(); 1051 D.setSUnit(SU); 1052 RemovePred(SuccDep, D); 1053 if (isNewLoad) { 1054 D.setSUnit(LoadSU); 1055 AddPred(SuccDep, D); 1056 } 1057 } 1058 1059 // Add a data dependency to reflect that NewSU reads the value defined 1060 // by LoadSU. 1061 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 1062 1063 if (isNewLoad) 1064 AvailableQueue->addNode(LoadSU); 1065 AvailableQueue->addNode(NewSU); 1066 1067 ++NumUnfolds; 1068 1069 if (NewSU->NumSuccsLeft == 0) { 1070 NewSU->isAvailable = true; 1071 return NewSU; 1072 } 1073 SU = NewSU; 1074 } 1075 1076 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1077 NewSU = CreateClone(SU); 1078 1079 // New SUnit has the exact same predecessors. 1080 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1081 I != E; ++I) 1082 if (!I->isArtificial()) 1083 AddPred(NewSU, *I); 1084 1085 // Only copy scheduled successors. Cut them from old node's successor 1086 // list and move them over. 1087 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1088 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1089 I != E; ++I) { 1090 if (I->isArtificial()) 1091 continue; 1092 SUnit *SuccSU = I->getSUnit(); 1093 if (SuccSU->isScheduled) { 1094 SDep D = *I; 1095 D.setSUnit(NewSU); 1096 AddPred(SuccSU, D); 1097 D.setSUnit(SU); 1098 DelDeps.push_back(std::make_pair(SuccSU, D)); 1099 } 1100 } 1101 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1102 RemovePred(DelDeps[i].first, DelDeps[i].second); 1103 1104 AvailableQueue->updateNode(SU); 1105 AvailableQueue->addNode(NewSU); 1106 1107 ++NumDups; 1108 return NewSU; 1109} 1110 1111/// InsertCopiesAndMoveSuccs - Insert register copies and move all 1112/// scheduled successors of the given SUnit to the last copy. 1113void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1114 const TargetRegisterClass *DestRC, 1115 const TargetRegisterClass *SrcRC, 1116 SmallVector<SUnit*, 2> &Copies) { 1117 SUnit *CopyFromSU = CreateNewSUnit(NULL); 1118 CopyFromSU->CopySrcRC = SrcRC; 1119 CopyFromSU->CopyDstRC = DestRC; 1120 1121 SUnit *CopyToSU = CreateNewSUnit(NULL); 1122 CopyToSU->CopySrcRC = DestRC; 1123 CopyToSU->CopyDstRC = SrcRC; 1124 1125 // Only copy scheduled successors. Cut them from old node's successor 1126 // list and move them over. 1127 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1128 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1129 I != E; ++I) { 1130 if (I->isArtificial()) 1131 continue; 1132 SUnit *SuccSU = I->getSUnit(); 1133 if (SuccSU->isScheduled) { 1134 SDep D = *I; 1135 D.setSUnit(CopyToSU); 1136 AddPred(SuccSU, D); 1137 DelDeps.push_back(std::make_pair(SuccSU, *I)); 1138 } 1139 else { 1140 // Avoid scheduling the def-side copy before other successors. Otherwise 1141 // we could introduce another physreg interference on the copy and 1142 // continue inserting copies indefinitely. 1143 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1144 /*Reg=*/0, /*isNormalMemory=*/false, 1145 /*isMustAlias=*/false, /*isArtificial=*/true); 1146 AddPred(SuccSU, D); 1147 } 1148 } 1149 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1150 RemovePred(DelDeps[i].first, DelDeps[i].second); 1151 1152 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1153 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1154 1155 AvailableQueue->updateNode(SU); 1156 AvailableQueue->addNode(CopyFromSU); 1157 AvailableQueue->addNode(CopyToSU); 1158 Copies.push_back(CopyFromSU); 1159 Copies.push_back(CopyToSU); 1160 1161 ++NumPRCopies; 1162} 1163 1164/// getPhysicalRegisterVT - Returns the ValueType of the physical register 1165/// definition of the specified node. 1166/// FIXME: Move to SelectionDAG? 1167static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1168 const TargetInstrInfo *TII) { 1169 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1170 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1171 unsigned NumRes = MCID.getNumDefs(); 1172 for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1173 if (Reg == *ImpDef) 1174 break; 1175 ++NumRes; 1176 } 1177 return N->getValueType(NumRes); 1178} 1179 1180/// CheckForLiveRegDef - Return true and update live register vector if the 1181/// specified register def of the specified SUnit clobbers any "live" registers. 1182static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1183 std::vector<SUnit*> &LiveRegDefs, 1184 SmallSet<unsigned, 4> &RegAdded, 1185 SmallVector<unsigned, 4> &LRegs, 1186 const TargetRegisterInfo *TRI) { 1187 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 1188 1189 // Check if Ref is live. 1190 if (!LiveRegDefs[*AliasI]) continue; 1191 1192 // Allow multiple uses of the same def. 1193 if (LiveRegDefs[*AliasI] == SU) continue; 1194 1195 // Add Reg to the set of interfering live regs. 1196 if (RegAdded.insert(*AliasI)) { 1197 LRegs.push_back(*AliasI); 1198 } 1199 } 1200} 1201 1202/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1203/// by RegMask, and add them to LRegs. 1204static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1205 std::vector<SUnit*> &LiveRegDefs, 1206 SmallSet<unsigned, 4> &RegAdded, 1207 SmallVector<unsigned, 4> &LRegs) { 1208 // Look at all live registers. Skip Reg0 and the special CallResource. 1209 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1210 if (!LiveRegDefs[i]) continue; 1211 if (LiveRegDefs[i] == SU) continue; 1212 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1213 if (RegAdded.insert(i)) 1214 LRegs.push_back(i); 1215 } 1216} 1217 1218/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1219static const uint32_t *getNodeRegMask(const SDNode *N) { 1220 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1221 if (const RegisterMaskSDNode *Op = 1222 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) 1223 return Op->getRegMask(); 1224 return NULL; 1225} 1226 1227/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1228/// scheduling of the given node to satisfy live physical register dependencies. 1229/// If the specific node is the last one that's available to schedule, do 1230/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1231bool ScheduleDAGRRList:: 1232DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1233 if (NumLiveRegs == 0) 1234 return false; 1235 1236 SmallSet<unsigned, 4> RegAdded; 1237 // If this node would clobber any "live" register, then it's not ready. 1238 // 1239 // If SU is the currently live definition of the same register that it uses, 1240 // then we are free to schedule it. 1241 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1242 I != E; ++I) { 1243 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1244 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1245 RegAdded, LRegs, TRI); 1246 } 1247 1248 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1249 if (Node->getOpcode() == ISD::INLINEASM) { 1250 // Inline asm can clobber physical defs. 1251 unsigned NumOps = Node->getNumOperands(); 1252 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1253 --NumOps; // Ignore the glue operand. 1254 1255 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1256 unsigned Flags = 1257 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1258 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1259 1260 ++i; // Skip the ID value. 1261 if (InlineAsm::isRegDefKind(Flags) || 1262 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1263 InlineAsm::isClobberKind(Flags)) { 1264 // Check for def of register or earlyclobber register. 1265 for (; NumVals; --NumVals, ++i) { 1266 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1267 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1268 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1269 } 1270 } else 1271 i += NumVals; 1272 } 1273 continue; 1274 } 1275 1276 if (!Node->isMachineOpcode()) 1277 continue; 1278 // If we're in the middle of scheduling a call, don't begin scheduling 1279 // another call. Also, don't allow any physical registers to be live across 1280 // the call. 1281 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 1282 // Check the special calling-sequence resource. 1283 unsigned CallResource = TRI->getNumRegs(); 1284 if (LiveRegDefs[CallResource]) { 1285 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1286 while (SDNode *Glued = Gen->getGluedNode()) 1287 Gen = Glued; 1288 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 1289 LRegs.push_back(CallResource); 1290 } 1291 } 1292 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1293 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); 1294 1295 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1296 if (!MCID.ImplicitDefs) 1297 continue; 1298 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) 1299 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1300 } 1301 1302 return !LRegs.empty(); 1303} 1304 1305/// Return a node that can be scheduled in this cycle. Requirements: 1306/// (1) Ready: latency has been satisfied 1307/// (2) No Hazards: resources are available 1308/// (3) No Interferences: may unschedule to break register interferences. 1309SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1310 SmallVector<SUnit*, 4> Interferences; 1311 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1312 1313 SUnit *CurSU = AvailableQueue->pop(); 1314 while (CurSU) { 1315 SmallVector<unsigned, 4> LRegs; 1316 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1317 break; 1318 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1319 1320 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1321 Interferences.push_back(CurSU); 1322 CurSU = AvailableQueue->pop(); 1323 } 1324 if (CurSU) { 1325 // Add the nodes that aren't ready back onto the available list. 1326 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1327 Interferences[i]->isPending = false; 1328 assert(Interferences[i]->isAvailable && "must still be available"); 1329 AvailableQueue->push(Interferences[i]); 1330 } 1331 return CurSU; 1332 } 1333 1334 // All candidates are delayed due to live physical reg dependencies. 1335 // Try backtracking, code duplication, or inserting cross class copies 1336 // to resolve it. 1337 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1338 SUnit *TrySU = Interferences[i]; 1339 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1340 1341 // Try unscheduling up to the point where it's safe to schedule 1342 // this node. 1343 SUnit *BtSU = NULL; 1344 unsigned LiveCycle = UINT_MAX; 1345 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1346 unsigned Reg = LRegs[j]; 1347 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1348 BtSU = LiveRegGens[Reg]; 1349 LiveCycle = BtSU->getHeight(); 1350 } 1351 } 1352 if (!WillCreateCycle(TrySU, BtSU)) { 1353 BacktrackBottomUp(TrySU, BtSU); 1354 1355 // Force the current node to be scheduled before the node that 1356 // requires the physical reg dep. 1357 if (BtSU->isAvailable) { 1358 BtSU->isAvailable = false; 1359 if (!BtSU->isPending) 1360 AvailableQueue->remove(BtSU); 1361 } 1362 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1363 /*Reg=*/0, /*isNormalMemory=*/false, 1364 /*isMustAlias=*/false, /*isArtificial=*/true)); 1365 1366 // If one or more successors has been unscheduled, then the current 1367 // node is no longer avaialable. Schedule a successor that's now 1368 // available instead. 1369 if (!TrySU->isAvailable) { 1370 CurSU = AvailableQueue->pop(); 1371 } 1372 else { 1373 CurSU = TrySU; 1374 TrySU->isPending = false; 1375 Interferences.erase(Interferences.begin()+i); 1376 } 1377 break; 1378 } 1379 } 1380 1381 if (!CurSU) { 1382 // Can't backtrack. If it's too expensive to copy the value, then try 1383 // duplicate the nodes that produces these "too expensive to copy" 1384 // values to break the dependency. In case even that doesn't work, 1385 // insert cross class copies. 1386 // If it's not too expensive, i.e. cost != -1, issue copies. 1387 SUnit *TrySU = Interferences[0]; 1388 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1389 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1390 unsigned Reg = LRegs[0]; 1391 SUnit *LRDef = LiveRegDefs[Reg]; 1392 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1393 const TargetRegisterClass *RC = 1394 TRI->getMinimalPhysRegClass(Reg, VT); 1395 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1396 1397 // If cross copy register class is the same as RC, then it must be possible 1398 // copy the value directly. Do not try duplicate the def. 1399 // If cross copy register class is not the same as RC, then it's possible to 1400 // copy the value but it require cross register class copies and it is 1401 // expensive. 1402 // If cross copy register class is null, then it's not possible to copy 1403 // the value at all. 1404 SUnit *NewDef = 0; 1405 if (DestRC != RC) { 1406 NewDef = CopyAndMoveSuccessors(LRDef); 1407 if (!DestRC && !NewDef) 1408 report_fatal_error("Can't handle live physical register dependency!"); 1409 } 1410 if (!NewDef) { 1411 // Issue copies, these can be expensive cross register class copies. 1412 SmallVector<SUnit*, 2> Copies; 1413 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1414 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1415 << " to SU #" << Copies.front()->NodeNum << "\n"); 1416 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1417 /*Reg=*/0, /*isNormalMemory=*/false, 1418 /*isMustAlias=*/false, 1419 /*isArtificial=*/true)); 1420 NewDef = Copies.back(); 1421 } 1422 1423 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1424 << " to SU #" << TrySU->NodeNum << "\n"); 1425 LiveRegDefs[Reg] = NewDef; 1426 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1427 /*Reg=*/0, /*isNormalMemory=*/false, 1428 /*isMustAlias=*/false, 1429 /*isArtificial=*/true)); 1430 TrySU->isAvailable = false; 1431 CurSU = NewDef; 1432 } 1433 1434 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1435 1436 // Add the nodes that aren't ready back onto the available list. 1437 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1438 Interferences[i]->isPending = false; 1439 // May no longer be available due to backtracking. 1440 if (Interferences[i]->isAvailable) { 1441 AvailableQueue->push(Interferences[i]); 1442 } 1443 } 1444 return CurSU; 1445} 1446 1447/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1448/// schedulers. 1449void ScheduleDAGRRList::ListScheduleBottomUp() { 1450 // Release any predecessors of the special Exit node. 1451 ReleasePredecessors(&ExitSU); 1452 1453 // Add root to Available queue. 1454 if (!SUnits.empty()) { 1455 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1456 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1457 RootSU->isAvailable = true; 1458 AvailableQueue->push(RootSU); 1459 } 1460 1461 // While Available queue is not empty, grab the node with the highest 1462 // priority. If it is not ready put it back. Schedule the node. 1463 Sequence.reserve(SUnits.size()); 1464 while (!AvailableQueue->empty()) { 1465 DEBUG(dbgs() << "\nExamining Available:\n"; 1466 AvailableQueue->dump(this)); 1467 1468 // Pick the best node to schedule taking all constraints into 1469 // consideration. 1470 SUnit *SU = PickNodeToScheduleBottomUp(); 1471 1472 AdvancePastStalls(SU); 1473 1474 ScheduleNodeBottomUp(SU); 1475 1476 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1477 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1478 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1479 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1480 } 1481 } 1482 1483 // Reverse the order if it is bottom up. 1484 std::reverse(Sequence.begin(), Sequence.end()); 1485 1486#ifndef NDEBUG 1487 VerifyScheduledSequence(/*isBottomUp=*/true); 1488#endif 1489} 1490 1491//===----------------------------------------------------------------------===// 1492// RegReductionPriorityQueue Definition 1493//===----------------------------------------------------------------------===// 1494// 1495// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1496// to reduce register pressure. 1497// 1498namespace { 1499class RegReductionPQBase; 1500 1501struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1502 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1503}; 1504 1505#ifndef NDEBUG 1506template<class SF> 1507struct reverse_sort : public queue_sort { 1508 SF &SortFunc; 1509 reverse_sort(SF &sf) : SortFunc(sf) {} 1510 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1511 1512 bool operator()(SUnit* left, SUnit* right) const { 1513 // reverse left/right rather than simply !SortFunc(left, right) 1514 // to expose different paths in the comparison logic. 1515 return SortFunc(right, left); 1516 } 1517}; 1518#endif // NDEBUG 1519 1520/// bu_ls_rr_sort - Priority function for bottom up register pressure 1521// reduction scheduler. 1522struct bu_ls_rr_sort : public queue_sort { 1523 enum { 1524 IsBottomUp = true, 1525 HasReadyFilter = false 1526 }; 1527 1528 RegReductionPQBase *SPQ; 1529 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1530 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1531 1532 bool operator()(SUnit* left, SUnit* right) const; 1533}; 1534 1535// src_ls_rr_sort - Priority function for source order scheduler. 1536struct src_ls_rr_sort : public queue_sort { 1537 enum { 1538 IsBottomUp = true, 1539 HasReadyFilter = false 1540 }; 1541 1542 RegReductionPQBase *SPQ; 1543 src_ls_rr_sort(RegReductionPQBase *spq) 1544 : SPQ(spq) {} 1545 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1546 : SPQ(RHS.SPQ) {} 1547 1548 bool operator()(SUnit* left, SUnit* right) const; 1549}; 1550 1551// hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1552struct hybrid_ls_rr_sort : public queue_sort { 1553 enum { 1554 IsBottomUp = true, 1555 HasReadyFilter = false 1556 }; 1557 1558 RegReductionPQBase *SPQ; 1559 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1560 : SPQ(spq) {} 1561 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1562 : SPQ(RHS.SPQ) {} 1563 1564 bool isReady(SUnit *SU, unsigned CurCycle) const; 1565 1566 bool operator()(SUnit* left, SUnit* right) const; 1567}; 1568 1569// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1570// scheduler. 1571struct ilp_ls_rr_sort : public queue_sort { 1572 enum { 1573 IsBottomUp = true, 1574 HasReadyFilter = false 1575 }; 1576 1577 RegReductionPQBase *SPQ; 1578 ilp_ls_rr_sort(RegReductionPQBase *spq) 1579 : SPQ(spq) {} 1580 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1581 : SPQ(RHS.SPQ) {} 1582 1583 bool isReady(SUnit *SU, unsigned CurCycle) const; 1584 1585 bool operator()(SUnit* left, SUnit* right) const; 1586}; 1587 1588class RegReductionPQBase : public SchedulingPriorityQueue { 1589protected: 1590 std::vector<SUnit*> Queue; 1591 unsigned CurQueueId; 1592 bool TracksRegPressure; 1593 bool SrcOrder; 1594 1595 // SUnits - The SUnits for the current graph. 1596 std::vector<SUnit> *SUnits; 1597 1598 MachineFunction &MF; 1599 const TargetInstrInfo *TII; 1600 const TargetRegisterInfo *TRI; 1601 const TargetLowering *TLI; 1602 ScheduleDAGRRList *scheduleDAG; 1603 1604 // SethiUllmanNumbers - The SethiUllman number for each node. 1605 std::vector<unsigned> SethiUllmanNumbers; 1606 1607 /// RegPressure - Tracking current reg pressure per register class. 1608 /// 1609 std::vector<unsigned> RegPressure; 1610 1611 /// RegLimit - Tracking the number of allocatable registers per register 1612 /// class. 1613 std::vector<unsigned> RegLimit; 1614 1615public: 1616 RegReductionPQBase(MachineFunction &mf, 1617 bool hasReadyFilter, 1618 bool tracksrp, 1619 bool srcorder, 1620 const TargetInstrInfo *tii, 1621 const TargetRegisterInfo *tri, 1622 const TargetLowering *tli) 1623 : SchedulingPriorityQueue(hasReadyFilter), 1624 CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), 1625 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1626 if (TracksRegPressure) { 1627 unsigned NumRC = TRI->getNumRegClasses(); 1628 RegLimit.resize(NumRC); 1629 RegPressure.resize(NumRC); 1630 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1631 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1632 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1633 E = TRI->regclass_end(); I != E; ++I) 1634 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1635 } 1636 } 1637 1638 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1639 scheduleDAG = scheduleDag; 1640 } 1641 1642 ScheduleHazardRecognizer* getHazardRec() { 1643 return scheduleDAG->getHazardRec(); 1644 } 1645 1646 void initNodes(std::vector<SUnit> &sunits); 1647 1648 void addNode(const SUnit *SU); 1649 1650 void updateNode(const SUnit *SU); 1651 1652 void releaseState() { 1653 SUnits = 0; 1654 SethiUllmanNumbers.clear(); 1655 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1656 } 1657 1658 unsigned getNodePriority(const SUnit *SU) const; 1659 1660 unsigned getNodeOrdering(const SUnit *SU) const { 1661 if (!SU->getNode()) return 0; 1662 1663 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1664 } 1665 1666 bool empty() const { return Queue.empty(); } 1667 1668 void push(SUnit *U) { 1669 assert(!U->NodeQueueId && "Node in the queue already"); 1670 U->NodeQueueId = ++CurQueueId; 1671 Queue.push_back(U); 1672 } 1673 1674 void remove(SUnit *SU) { 1675 assert(!Queue.empty() && "Queue is empty!"); 1676 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1677 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1678 SU); 1679 if (I != prior(Queue.end())) 1680 std::swap(*I, Queue.back()); 1681 Queue.pop_back(); 1682 SU->NodeQueueId = 0; 1683 } 1684 1685 bool tracksRegPressure() const { return TracksRegPressure; } 1686 1687 void dumpRegPressure() const; 1688 1689 bool HighRegPressure(const SUnit *SU) const; 1690 1691 bool MayReduceRegPressure(SUnit *SU) const; 1692 1693 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1694 1695 void scheduledNode(SUnit *SU); 1696 1697 void unscheduledNode(SUnit *SU); 1698 1699protected: 1700 bool canClobber(const SUnit *SU, const SUnit *Op); 1701 void AddPseudoTwoAddrDeps(); 1702 void PrescheduleNodesWithMultipleUses(); 1703 void CalculateSethiUllmanNumbers(); 1704}; 1705 1706template<class SF> 1707static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1708 std::vector<SUnit *>::iterator Best = Q.begin(); 1709 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1710 E = Q.end(); I != E; ++I) 1711 if (Picker(*Best, *I)) 1712 Best = I; 1713 SUnit *V = *Best; 1714 if (Best != prior(Q.end())) 1715 std::swap(*Best, Q.back()); 1716 Q.pop_back(); 1717 return V; 1718} 1719 1720template<class SF> 1721SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1722#ifndef NDEBUG 1723 if (DAG->StressSched) { 1724 reverse_sort<SF> RPicker(Picker); 1725 return popFromQueueImpl(Q, RPicker); 1726 } 1727#endif 1728 (void)DAG; 1729 return popFromQueueImpl(Q, Picker); 1730} 1731 1732template<class SF> 1733class RegReductionPriorityQueue : public RegReductionPQBase { 1734 SF Picker; 1735 1736public: 1737 RegReductionPriorityQueue(MachineFunction &mf, 1738 bool tracksrp, 1739 bool srcorder, 1740 const TargetInstrInfo *tii, 1741 const TargetRegisterInfo *tri, 1742 const TargetLowering *tli) 1743 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 1744 tii, tri, tli), 1745 Picker(this) {} 1746 1747 bool isBottomUp() const { return SF::IsBottomUp; } 1748 1749 bool isReady(SUnit *U) const { 1750 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1751 } 1752 1753 SUnit *pop() { 1754 if (Queue.empty()) return NULL; 1755 1756 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1757 V->NodeQueueId = 0; 1758 return V; 1759 } 1760 1761#ifndef NDEBUG 1762 void dump(ScheduleDAG *DAG) const { 1763 // Emulate pop() without clobbering NodeQueueIds. 1764 std::vector<SUnit*> DumpQueue = Queue; 1765 SF DumpPicker = Picker; 1766 while (!DumpQueue.empty()) { 1767 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1768 dbgs() << "Height " << SU->getHeight() << ": "; 1769 SU->dump(DAG); 1770 } 1771 } 1772#endif 1773}; 1774 1775typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1776BURegReductionPriorityQueue; 1777 1778typedef RegReductionPriorityQueue<src_ls_rr_sort> 1779SrcRegReductionPriorityQueue; 1780 1781typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1782HybridBURRPriorityQueue; 1783 1784typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1785ILPBURRPriorityQueue; 1786} // end anonymous namespace 1787 1788//===----------------------------------------------------------------------===// 1789// Static Node Priority for Register Pressure Reduction 1790//===----------------------------------------------------------------------===// 1791 1792// Check for special nodes that bypass scheduling heuristics. 1793// Currently this pushes TokenFactor nodes down, but may be used for other 1794// pseudo-ops as well. 1795// 1796// Return -1 to schedule right above left, 1 for left above right. 1797// Return 0 if no bias exists. 1798static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1799 bool LSchedLow = left->isScheduleLow; 1800 bool RSchedLow = right->isScheduleLow; 1801 if (LSchedLow != RSchedLow) 1802 return LSchedLow < RSchedLow ? 1 : -1; 1803 return 0; 1804} 1805 1806/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1807/// Smaller number is the higher priority. 1808static unsigned 1809CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1810 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1811 if (SethiUllmanNumber != 0) 1812 return SethiUllmanNumber; 1813 1814 unsigned Extra = 0; 1815 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1816 I != E; ++I) { 1817 if (I->isCtrl()) continue; // ignore chain preds 1818 SUnit *PredSU = I->getSUnit(); 1819 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1820 if (PredSethiUllman > SethiUllmanNumber) { 1821 SethiUllmanNumber = PredSethiUllman; 1822 Extra = 0; 1823 } else if (PredSethiUllman == SethiUllmanNumber) 1824 ++Extra; 1825 } 1826 1827 SethiUllmanNumber += Extra; 1828 1829 if (SethiUllmanNumber == 0) 1830 SethiUllmanNumber = 1; 1831 1832 return SethiUllmanNumber; 1833} 1834 1835/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1836/// scheduling units. 1837void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1838 SethiUllmanNumbers.assign(SUnits->size(), 0); 1839 1840 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1841 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1842} 1843 1844void RegReductionPQBase::addNode(const SUnit *SU) { 1845 unsigned SUSize = SethiUllmanNumbers.size(); 1846 if (SUnits->size() > SUSize) 1847 SethiUllmanNumbers.resize(SUSize*2, 0); 1848 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1849} 1850 1851void RegReductionPQBase::updateNode(const SUnit *SU) { 1852 SethiUllmanNumbers[SU->NodeNum] = 0; 1853 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1854} 1855 1856// Lower priority means schedule further down. For bottom-up scheduling, lower 1857// priority SUs are scheduled before higher priority SUs. 1858unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1859 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1860 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1861 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1862 // CopyToReg should be close to its uses to facilitate coalescing and 1863 // avoid spilling. 1864 return 0; 1865 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1866 Opc == TargetOpcode::SUBREG_TO_REG || 1867 Opc == TargetOpcode::INSERT_SUBREG) 1868 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1869 // close to their uses to facilitate coalescing. 1870 return 0; 1871 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1872 // If SU does not have a register use, i.e. it doesn't produce a value 1873 // that would be consumed (e.g. store), then it terminates a chain of 1874 // computation. Give it a large SethiUllman number so it will be 1875 // scheduled right before its predecessors that it doesn't lengthen 1876 // their live ranges. 1877 return 0xffff; 1878 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1879 // If SU does not have a register def, schedule it close to its uses 1880 // because it does not lengthen any live ranges. 1881 return 0; 1882#if 1 1883 return SethiUllmanNumbers[SU->NodeNum]; 1884#else 1885 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1886 if (SU->isCallOp) { 1887 // FIXME: This assumes all of the defs are used as call operands. 1888 int NP = (int)Priority - SU->getNode()->getNumValues(); 1889 return (NP > 0) ? NP : 0; 1890 } 1891 return Priority; 1892#endif 1893} 1894 1895//===----------------------------------------------------------------------===// 1896// Register Pressure Tracking 1897//===----------------------------------------------------------------------===// 1898 1899void RegReductionPQBase::dumpRegPressure() const { 1900#ifndef NDEBUG 1901 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1902 E = TRI->regclass_end(); I != E; ++I) { 1903 const TargetRegisterClass *RC = *I; 1904 unsigned Id = RC->getID(); 1905 unsigned RP = RegPressure[Id]; 1906 if (!RP) continue; 1907 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1908 << '\n'); 1909 } 1910#endif 1911} 1912 1913bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1914 if (!TLI) 1915 return false; 1916 1917 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1918 I != E; ++I) { 1919 if (I->isCtrl()) 1920 continue; 1921 SUnit *PredSU = I->getSUnit(); 1922 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1923 // to cover the number of registers defined (they are all live). 1924 if (PredSU->NumRegDefsLeft == 0) { 1925 continue; 1926 } 1927 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1928 RegDefPos.IsValid(); RegDefPos.Advance()) { 1929 unsigned RCId, Cost; 1930 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 1931 1932 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1933 return true; 1934 } 1935 } 1936 return false; 1937} 1938 1939bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1940 const SDNode *N = SU->getNode(); 1941 1942 if (!N->isMachineOpcode() || !SU->NumSuccs) 1943 return false; 1944 1945 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1946 for (unsigned i = 0; i != NumDefs; ++i) { 1947 EVT VT = N->getValueType(i); 1948 if (!N->hasAnyUseOfValue(i)) 1949 continue; 1950 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1951 if (RegPressure[RCId] >= RegLimit[RCId]) 1952 return true; 1953 } 1954 return false; 1955} 1956 1957// Compute the register pressure contribution by this instruction by count up 1958// for uses that are not live and down for defs. Only count register classes 1959// that are already under high pressure. As a side effect, compute the number of 1960// uses of registers that are already live. 1961// 1962// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1963// so could probably be factored. 1964int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1965 LiveUses = 0; 1966 int PDiff = 0; 1967 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1968 I != E; ++I) { 1969 if (I->isCtrl()) 1970 continue; 1971 SUnit *PredSU = I->getSUnit(); 1972 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1973 // to cover the number of registers defined (they are all live). 1974 if (PredSU->NumRegDefsLeft == 0) { 1975 if (PredSU->getNode()->isMachineOpcode()) 1976 ++LiveUses; 1977 continue; 1978 } 1979 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1980 RegDefPos.IsValid(); RegDefPos.Advance()) { 1981 EVT VT = RegDefPos.GetValue(); 1982 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1983 if (RegPressure[RCId] >= RegLimit[RCId]) 1984 ++PDiff; 1985 } 1986 } 1987 const SDNode *N = SU->getNode(); 1988 1989 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1990 return PDiff; 1991 1992 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1993 for (unsigned i = 0; i != NumDefs; ++i) { 1994 EVT VT = N->getValueType(i); 1995 if (!N->hasAnyUseOfValue(i)) 1996 continue; 1997 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1998 if (RegPressure[RCId] >= RegLimit[RCId]) 1999 --PDiff; 2000 } 2001 return PDiff; 2002} 2003 2004void RegReductionPQBase::scheduledNode(SUnit *SU) { 2005 if (!TracksRegPressure) 2006 return; 2007 2008 if (!SU->getNode()) 2009 return; 2010 2011 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2012 I != E; ++I) { 2013 if (I->isCtrl()) 2014 continue; 2015 SUnit *PredSU = I->getSUnit(); 2016 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2017 // to cover the number of registers defined (they are all live). 2018 if (PredSU->NumRegDefsLeft == 0) { 2019 continue; 2020 } 2021 // FIXME: The ScheduleDAG currently loses information about which of a 2022 // node's values is consumed by each dependence. Consequently, if the node 2023 // defines multiple register classes, we don't know which to pressurize 2024 // here. Instead the following loop consumes the register defs in an 2025 // arbitrary order. At least it handles the common case of clustered loads 2026 // to the same class. For precise liveness, each SDep needs to indicate the 2027 // result number. But that tightly couples the ScheduleDAG with the 2028 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2029 // value type or register class to SDep. 2030 // 2031 // The most important aspect of register tracking is balancing the increase 2032 // here with the reduction further below. Note that this SU may use multiple 2033 // defs in PredSU. The can't be determined here, but we've already 2034 // compensated by reducing NumRegDefsLeft in PredSU during 2035 // ScheduleDAGSDNodes::AddSchedEdges. 2036 --PredSU->NumRegDefsLeft; 2037 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2038 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2039 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2040 if (SkipRegDefs) 2041 continue; 2042 2043 unsigned RCId, Cost; 2044 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2045 RegPressure[RCId] += Cost; 2046 break; 2047 } 2048 } 2049 2050 // We should have this assert, but there may be dead SDNodes that never 2051 // materialize as SUnits, so they don't appear to generate liveness. 2052 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2053 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2054 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2055 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2056 if (SkipRegDefs > 0) 2057 continue; 2058 unsigned RCId, Cost; 2059 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2060 if (RegPressure[RCId] < Cost) { 2061 // Register pressure tracking is imprecise. This can happen. But we try 2062 // hard not to let it happen because it likely results in poor scheduling. 2063 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 2064 RegPressure[RCId] = 0; 2065 } 2066 else { 2067 RegPressure[RCId] -= Cost; 2068 } 2069 } 2070 dumpRegPressure(); 2071} 2072 2073void RegReductionPQBase::unscheduledNode(SUnit *SU) { 2074 if (!TracksRegPressure) 2075 return; 2076 2077 const SDNode *N = SU->getNode(); 2078 if (!N) return; 2079 2080 if (!N->isMachineOpcode()) { 2081 if (N->getOpcode() != ISD::CopyToReg) 2082 return; 2083 } else { 2084 unsigned Opc = N->getMachineOpcode(); 2085 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2086 Opc == TargetOpcode::INSERT_SUBREG || 2087 Opc == TargetOpcode::SUBREG_TO_REG || 2088 Opc == TargetOpcode::REG_SEQUENCE || 2089 Opc == TargetOpcode::IMPLICIT_DEF) 2090 return; 2091 } 2092 2093 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2094 I != E; ++I) { 2095 if (I->isCtrl()) 2096 continue; 2097 SUnit *PredSU = I->getSUnit(); 2098 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2099 // counts data deps. 2100 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2101 continue; 2102 const SDNode *PN = PredSU->getNode(); 2103 if (!PN->isMachineOpcode()) { 2104 if (PN->getOpcode() == ISD::CopyFromReg) { 2105 EVT VT = PN->getValueType(0); 2106 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2107 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2108 } 2109 continue; 2110 } 2111 unsigned POpc = PN->getMachineOpcode(); 2112 if (POpc == TargetOpcode::IMPLICIT_DEF) 2113 continue; 2114 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2115 POpc == TargetOpcode::INSERT_SUBREG || 2116 POpc == TargetOpcode::SUBREG_TO_REG) { 2117 EVT VT = PN->getValueType(0); 2118 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2119 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2120 continue; 2121 } 2122 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2123 for (unsigned i = 0; i != NumDefs; ++i) { 2124 EVT VT = PN->getValueType(i); 2125 if (!PN->hasAnyUseOfValue(i)) 2126 continue; 2127 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2128 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2129 // Register pressure tracking is imprecise. This can happen. 2130 RegPressure[RCId] = 0; 2131 else 2132 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2133 } 2134 } 2135 2136 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2137 // may transfer data dependencies to CopyToReg. 2138 if (SU->NumSuccs && N->isMachineOpcode()) { 2139 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2140 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2141 EVT VT = N->getValueType(i); 2142 if (VT == MVT::Glue || VT == MVT::Other) 2143 continue; 2144 if (!N->hasAnyUseOfValue(i)) 2145 continue; 2146 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2147 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2148 } 2149 } 2150 2151 dumpRegPressure(); 2152} 2153 2154//===----------------------------------------------------------------------===// 2155// Dynamic Node Priority for Register Pressure Reduction 2156//===----------------------------------------------------------------------===// 2157 2158/// closestSucc - Returns the scheduled cycle of the successor which is 2159/// closest to the current cycle. 2160static unsigned closestSucc(const SUnit *SU) { 2161 unsigned MaxHeight = 0; 2162 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2163 I != E; ++I) { 2164 if (I->isCtrl()) continue; // ignore chain succs 2165 unsigned Height = I->getSUnit()->getHeight(); 2166 // If there are bunch of CopyToRegs stacked up, they should be considered 2167 // to be at the same position. 2168 if (I->getSUnit()->getNode() && 2169 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2170 Height = closestSucc(I->getSUnit())+1; 2171 if (Height > MaxHeight) 2172 MaxHeight = Height; 2173 } 2174 return MaxHeight; 2175} 2176 2177/// calcMaxScratches - Returns an cost estimate of the worse case requirement 2178/// for scratch registers, i.e. number of data dependencies. 2179static unsigned calcMaxScratches(const SUnit *SU) { 2180 unsigned Scratches = 0; 2181 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2182 I != E; ++I) { 2183 if (I->isCtrl()) continue; // ignore chain preds 2184 Scratches++; 2185 } 2186 return Scratches; 2187} 2188 2189/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2190/// CopyFromReg from a virtual register. 2191static bool hasOnlyLiveInOpers(const SUnit *SU) { 2192 bool RetVal = false; 2193 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2194 I != E; ++I) { 2195 if (I->isCtrl()) continue; 2196 const SUnit *PredSU = I->getSUnit(); 2197 if (PredSU->getNode() && 2198 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2199 unsigned Reg = 2200 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2201 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2202 RetVal = true; 2203 continue; 2204 } 2205 } 2206 return false; 2207 } 2208 return RetVal; 2209} 2210 2211/// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2212/// CopyToReg to a virtual register. This SU def is probably a liveout and 2213/// it has no other use. It should be scheduled closer to the terminator. 2214static bool hasOnlyLiveOutUses(const SUnit *SU) { 2215 bool RetVal = false; 2216 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2217 I != E; ++I) { 2218 if (I->isCtrl()) continue; 2219 const SUnit *SuccSU = I->getSUnit(); 2220 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2221 unsigned Reg = 2222 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2223 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2224 RetVal = true; 2225 continue; 2226 } 2227 } 2228 return false; 2229 } 2230 return RetVal; 2231} 2232 2233// Set isVRegCycle for a node with only live in opers and live out uses. Also 2234// set isVRegCycle for its CopyFromReg operands. 2235// 2236// This is only relevant for single-block loops, in which case the VRegCycle 2237// node is likely an induction variable in which the operand and target virtual 2238// registers should be coalesced (e.g. pre/post increment values). Setting the 2239// isVRegCycle flag helps the scheduler prioritize other uses of the same 2240// CopyFromReg so that this node becomes the virtual register "kill". This 2241// avoids interference between the values live in and out of the block and 2242// eliminates a copy inside the loop. 2243static void initVRegCycle(SUnit *SU) { 2244 if (DisableSchedVRegCycle) 2245 return; 2246 2247 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2248 return; 2249 2250 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2251 2252 SU->isVRegCycle = true; 2253 2254 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2255 I != E; ++I) { 2256 if (I->isCtrl()) continue; 2257 I->getSUnit()->isVRegCycle = true; 2258 } 2259} 2260 2261// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2262// CopyFromReg operands. We should no longer penalize other uses of this VReg. 2263static void resetVRegCycle(SUnit *SU) { 2264 if (!SU->isVRegCycle) 2265 return; 2266 2267 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2268 I != E; ++I) { 2269 if (I->isCtrl()) continue; // ignore chain preds 2270 SUnit *PredSU = I->getSUnit(); 2271 if (PredSU->isVRegCycle) { 2272 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2273 "VRegCycle def must be CopyFromReg"); 2274 I->getSUnit()->isVRegCycle = 0; 2275 } 2276 } 2277} 2278 2279// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2280// means a node that defines the VRegCycle has not been scheduled yet. 2281static bool hasVRegCycleUse(const SUnit *SU) { 2282 // If this SU also defines the VReg, don't hoist it as a "use". 2283 if (SU->isVRegCycle) 2284 return false; 2285 2286 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2287 I != E; ++I) { 2288 if (I->isCtrl()) continue; // ignore chain preds 2289 if (I->getSUnit()->isVRegCycle && 2290 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2291 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2292 return true; 2293 } 2294 } 2295 return false; 2296} 2297 2298// Check for either a dependence (latency) or resource (hazard) stall. 2299// 2300// Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2301static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2302 if ((int)SPQ->getCurCycle() < Height) return true; 2303 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2304 != ScheduleHazardRecognizer::NoHazard) 2305 return true; 2306 return false; 2307} 2308 2309// Return -1 if left has higher priority, 1 if right has higher priority. 2310// Return 0 if latency-based priority is equivalent. 2311static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2312 RegReductionPQBase *SPQ) { 2313 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2314 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2315 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2316 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2317 int LHeight = (int)left->getHeight() + LPenalty; 2318 int RHeight = (int)right->getHeight() + RPenalty; 2319 2320 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2321 BUHasStall(left, LHeight, SPQ); 2322 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2323 BUHasStall(right, RHeight, SPQ); 2324 2325 // If scheduling one of the node will cause a pipeline stall, delay it. 2326 // If scheduling either one of the node will cause a pipeline stall, sort 2327 // them according to their height. 2328 if (LStall) { 2329 if (!RStall) 2330 return 1; 2331 if (LHeight != RHeight) 2332 return LHeight > RHeight ? 1 : -1; 2333 } else if (RStall) 2334 return -1; 2335 2336 // If either node is scheduling for latency, sort them by height/depth 2337 // and latency. 2338 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2339 right->SchedulingPref == Sched::ILP)) { 2340 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 2341 // is enabled, grouping instructions by cycle, then its height is already 2342 // covered so only its depth matters. We also reach this point if both stall 2343 // but have the same height. 2344 if (!SPQ->getHazardRec()->isEnabled()) { 2345 if (LHeight != RHeight) 2346 return LHeight > RHeight ? 1 : -1; 2347 } 2348 int LDepth = left->getDepth() - LPenalty; 2349 int RDepth = right->getDepth() - RPenalty; 2350 if (LDepth != RDepth) { 2351 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2352 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2353 << ") depth " << RDepth << "\n"); 2354 return LDepth < RDepth ? 1 : -1; 2355 } 2356 if (left->Latency != right->Latency) 2357 return left->Latency > right->Latency ? 1 : -1; 2358 } 2359 return 0; 2360} 2361 2362static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2363 // Schedule physical register definitions close to their use. This is 2364 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2365 // long as shortening physreg live ranges is generally good, we can defer 2366 // creating a subtarget hook. 2367 if (!DisableSchedPhysRegJoin) { 2368 bool LHasPhysReg = left->hasPhysRegDefs; 2369 bool RHasPhysReg = right->hasPhysRegDefs; 2370 if (LHasPhysReg != RHasPhysReg) { 2371 #ifndef NDEBUG 2372 const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; 2373 #endif 2374 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2375 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2376 << PhysRegMsg[RHasPhysReg] << "\n"); 2377 return LHasPhysReg < RHasPhysReg; 2378 } 2379 } 2380 2381 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2382 unsigned LPriority = SPQ->getNodePriority(left); 2383 unsigned RPriority = SPQ->getNodePriority(right); 2384 2385 // Be really careful about hoisting call operands above previous calls. 2386 // Only allows it if it would reduce register pressure. 2387 if (left->isCall && right->isCallOp) { 2388 unsigned RNumVals = right->getNode()->getNumValues(); 2389 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2390 } 2391 if (right->isCall && left->isCallOp) { 2392 unsigned LNumVals = left->getNode()->getNumValues(); 2393 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2394 } 2395 2396 if (LPriority != RPriority) 2397 return LPriority > RPriority; 2398 2399 // One or both of the nodes are calls and their sethi-ullman numbers are the 2400 // same, then keep source order. 2401 if (left->isCall || right->isCall) { 2402 unsigned LOrder = SPQ->getNodeOrdering(left); 2403 unsigned ROrder = SPQ->getNodeOrdering(right); 2404 2405 // Prefer an ordering where the lower the non-zero order number, the higher 2406 // the preference. 2407 if ((LOrder || ROrder) && LOrder != ROrder) 2408 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2409 } 2410 2411 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2412 // e.g. 2413 // t1 = op t2, c1 2414 // t3 = op t4, c2 2415 // 2416 // and the following instructions are both ready. 2417 // t2 = op c3 2418 // t4 = op c4 2419 // 2420 // Then schedule t2 = op first. 2421 // i.e. 2422 // t4 = op c4 2423 // t2 = op c3 2424 // t1 = op t2, c1 2425 // t3 = op t4, c2 2426 // 2427 // This creates more short live intervals. 2428 unsigned LDist = closestSucc(left); 2429 unsigned RDist = closestSucc(right); 2430 if (LDist != RDist) 2431 return LDist < RDist; 2432 2433 // How many registers becomes live when the node is scheduled. 2434 unsigned LScratch = calcMaxScratches(left); 2435 unsigned RScratch = calcMaxScratches(right); 2436 if (LScratch != RScratch) 2437 return LScratch > RScratch; 2438 2439 // Comparing latency against a call makes little sense unless the node 2440 // is register pressure-neutral. 2441 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2442 return (left->NodeQueueId > right->NodeQueueId); 2443 2444 // Do not compare latencies when one or both of the nodes are calls. 2445 if (!DisableSchedCycles && 2446 !(left->isCall || right->isCall)) { 2447 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2448 if (result != 0) 2449 return result > 0; 2450 } 2451 else { 2452 if (left->getHeight() != right->getHeight()) 2453 return left->getHeight() > right->getHeight(); 2454 2455 if (left->getDepth() != right->getDepth()) 2456 return left->getDepth() < right->getDepth(); 2457 } 2458 2459 assert(left->NodeQueueId && right->NodeQueueId && 2460 "NodeQueueId cannot be zero"); 2461 return (left->NodeQueueId > right->NodeQueueId); 2462} 2463 2464// Bottom up 2465bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2466 if (int res = checkSpecialNodes(left, right)) 2467 return res > 0; 2468 2469 return BURRSort(left, right, SPQ); 2470} 2471 2472// Source order, otherwise bottom up. 2473bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2474 if (int res = checkSpecialNodes(left, right)) 2475 return res > 0; 2476 2477 unsigned LOrder = SPQ->getNodeOrdering(left); 2478 unsigned ROrder = SPQ->getNodeOrdering(right); 2479 2480 // Prefer an ordering where the lower the non-zero order number, the higher 2481 // the preference. 2482 if ((LOrder || ROrder) && LOrder != ROrder) 2483 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2484 2485 return BURRSort(left, right, SPQ); 2486} 2487 2488// If the time between now and when the instruction will be ready can cover 2489// the spill code, then avoid adding it to the ready queue. This gives long 2490// stalls highest priority and allows hoisting across calls. It should also 2491// speed up processing the available queue. 2492bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2493 static const unsigned ReadyDelay = 3; 2494 2495 if (SPQ->MayReduceRegPressure(SU)) return true; 2496 2497 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2498 2499 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2500 != ScheduleHazardRecognizer::NoHazard) 2501 return false; 2502 2503 return true; 2504} 2505 2506// Return true if right should be scheduled with higher priority than left. 2507bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2508 if (int res = checkSpecialNodes(left, right)) 2509 return res > 0; 2510 2511 if (left->isCall || right->isCall) 2512 // No way to compute latency of calls. 2513 return BURRSort(left, right, SPQ); 2514 2515 bool LHigh = SPQ->HighRegPressure(left); 2516 bool RHigh = SPQ->HighRegPressure(right); 2517 // Avoid causing spills. If register pressure is high, schedule for 2518 // register pressure reduction. 2519 if (LHigh && !RHigh) { 2520 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2521 << right->NodeNum << ")\n"); 2522 return true; 2523 } 2524 else if (!LHigh && RHigh) { 2525 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2526 << left->NodeNum << ")\n"); 2527 return false; 2528 } 2529 if (!LHigh && !RHigh) { 2530 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2531 if (result != 0) 2532 return result > 0; 2533 } 2534 return BURRSort(left, right, SPQ); 2535} 2536 2537// Schedule as many instructions in each cycle as possible. So don't make an 2538// instruction available unless it is ready in the current cycle. 2539bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2540 if (SU->getHeight() > CurCycle) return false; 2541 2542 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2543 != ScheduleHazardRecognizer::NoHazard) 2544 return false; 2545 2546 return true; 2547} 2548 2549static bool canEnableCoalescing(SUnit *SU) { 2550 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2551 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2552 // CopyToReg should be close to its uses to facilitate coalescing and 2553 // avoid spilling. 2554 return true; 2555 2556 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2557 Opc == TargetOpcode::SUBREG_TO_REG || 2558 Opc == TargetOpcode::INSERT_SUBREG) 2559 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2560 // close to their uses to facilitate coalescing. 2561 return true; 2562 2563 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2564 // If SU does not have a register def, schedule it close to its uses 2565 // because it does not lengthen any live ranges. 2566 return true; 2567 2568 return false; 2569} 2570 2571// list-ilp is currently an experimental scheduler that allows various 2572// heuristics to be enabled prior to the normal register reduction logic. 2573bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2574 if (int res = checkSpecialNodes(left, right)) 2575 return res > 0; 2576 2577 if (left->isCall || right->isCall) 2578 // No way to compute latency of calls. 2579 return BURRSort(left, right, SPQ); 2580 2581 unsigned LLiveUses = 0, RLiveUses = 0; 2582 int LPDiff = 0, RPDiff = 0; 2583 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2584 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2585 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2586 } 2587 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2588 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2589 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2590 return LPDiff > RPDiff; 2591 } 2592 2593 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2594 bool LReduce = canEnableCoalescing(left); 2595 bool RReduce = canEnableCoalescing(right); 2596 if (LReduce && !RReduce) return false; 2597 if (RReduce && !LReduce) return true; 2598 } 2599 2600 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2601 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2602 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2603 return LLiveUses < RLiveUses; 2604 } 2605 2606 if (!DisableSchedStalls) { 2607 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2608 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2609 if (LStall != RStall) 2610 return left->getHeight() > right->getHeight(); 2611 } 2612 2613 if (!DisableSchedCriticalPath) { 2614 int spread = (int)left->getDepth() - (int)right->getDepth(); 2615 if (std::abs(spread) > MaxReorderWindow) { 2616 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2617 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2618 << right->getDepth() << "\n"); 2619 return left->getDepth() < right->getDepth(); 2620 } 2621 } 2622 2623 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2624 int spread = (int)left->getHeight() - (int)right->getHeight(); 2625 if (std::abs(spread) > MaxReorderWindow) 2626 return left->getHeight() > right->getHeight(); 2627 } 2628 2629 return BURRSort(left, right, SPQ); 2630} 2631 2632void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2633 SUnits = &sunits; 2634 // Add pseudo dependency edges for two-address nodes. 2635 if (!Disable2AddrHack) 2636 AddPseudoTwoAddrDeps(); 2637 // Reroute edges to nodes with multiple uses. 2638 if (!TracksRegPressure && !SrcOrder) 2639 PrescheduleNodesWithMultipleUses(); 2640 // Calculate node priorities. 2641 CalculateSethiUllmanNumbers(); 2642 2643 // For single block loops, mark nodes that look like canonical IV increments. 2644 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2645 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2646 initVRegCycle(&sunits[i]); 2647 } 2648 } 2649} 2650 2651//===----------------------------------------------------------------------===// 2652// Preschedule for Register Pressure 2653//===----------------------------------------------------------------------===// 2654 2655bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2656 if (SU->isTwoAddress) { 2657 unsigned Opc = SU->getNode()->getMachineOpcode(); 2658 const MCInstrDesc &MCID = TII->get(Opc); 2659 unsigned NumRes = MCID.getNumDefs(); 2660 unsigned NumOps = MCID.getNumOperands() - NumRes; 2661 for (unsigned i = 0; i != NumOps; ++i) { 2662 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2663 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2664 if (DU->getNodeId() != -1 && 2665 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2666 return true; 2667 } 2668 } 2669 } 2670 return false; 2671} 2672 2673/// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2674/// successor's explicit physregs whose definition can reach DepSU. 2675/// i.e. DepSU should not be scheduled above SU. 2676static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2677 ScheduleDAGRRList *scheduleDAG, 2678 const TargetInstrInfo *TII, 2679 const TargetRegisterInfo *TRI) { 2680 const uint16_t *ImpDefs 2681 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2682 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2683 if(!ImpDefs && !RegMask) 2684 return false; 2685 2686 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2687 SI != SE; ++SI) { 2688 SUnit *SuccSU = SI->getSUnit(); 2689 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2690 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2691 if (!PI->isAssignedRegDep()) 2692 continue; 2693 2694 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && 2695 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2696 return true; 2697 2698 if (ImpDefs) 2699 for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 2700 // Return true if SU clobbers this physical register use and the 2701 // definition of the register reaches from DepSU. IsReachable queries 2702 // a topological forward sort of the DAG (following the successors). 2703 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2704 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2705 return true; 2706 } 2707 } 2708 return false; 2709} 2710 2711/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2712/// physical register defs. 2713static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2714 const TargetInstrInfo *TII, 2715 const TargetRegisterInfo *TRI) { 2716 SDNode *N = SuccSU->getNode(); 2717 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2718 const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2719 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2720 for (const SDNode *SUNode = SU->getNode(); SUNode; 2721 SUNode = SUNode->getGluedNode()) { 2722 if (!SUNode->isMachineOpcode()) 2723 continue; 2724 const uint16_t *SUImpDefs = 2725 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2726 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2727 if (!SUImpDefs && !SURegMask) 2728 continue; 2729 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2730 EVT VT = N->getValueType(i); 2731 if (VT == MVT::Glue || VT == MVT::Other) 2732 continue; 2733 if (!N->hasAnyUseOfValue(i)) 2734 continue; 2735 unsigned Reg = ImpDefs[i - NumDefs]; 2736 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2737 return true; 2738 if (!SUImpDefs) 2739 continue; 2740 for (;*SUImpDefs; ++SUImpDefs) { 2741 unsigned SUReg = *SUImpDefs; 2742 if (TRI->regsOverlap(Reg, SUReg)) 2743 return true; 2744 } 2745 } 2746 } 2747 return false; 2748} 2749 2750/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2751/// are not handled well by the general register pressure reduction 2752/// heuristics. When presented with code like this: 2753/// 2754/// N 2755/// / | 2756/// / | 2757/// U store 2758/// | 2759/// ... 2760/// 2761/// the heuristics tend to push the store up, but since the 2762/// operand of the store has another use (U), this would increase 2763/// the length of that other use (the U->N edge). 2764/// 2765/// This function transforms code like the above to route U's 2766/// dependence through the store when possible, like this: 2767/// 2768/// N 2769/// || 2770/// || 2771/// store 2772/// | 2773/// U 2774/// | 2775/// ... 2776/// 2777/// This results in the store being scheduled immediately 2778/// after N, which shortens the U->N live range, reducing 2779/// register pressure. 2780/// 2781void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2782 // Visit all the nodes in topological order, working top-down. 2783 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2784 SUnit *SU = &(*SUnits)[i]; 2785 // For now, only look at nodes with no data successors, such as stores. 2786 // These are especially important, due to the heuristics in 2787 // getNodePriority for nodes with no data successors. 2788 if (SU->NumSuccs != 0) 2789 continue; 2790 // For now, only look at nodes with exactly one data predecessor. 2791 if (SU->NumPreds != 1) 2792 continue; 2793 // Avoid prescheduling copies to virtual registers, which don't behave 2794 // like other nodes from the perspective of scheduling heuristics. 2795 if (SDNode *N = SU->getNode()) 2796 if (N->getOpcode() == ISD::CopyToReg && 2797 TargetRegisterInfo::isVirtualRegister 2798 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2799 continue; 2800 2801 // Locate the single data predecessor. 2802 SUnit *PredSU = 0; 2803 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2804 EE = SU->Preds.end(); II != EE; ++II) 2805 if (!II->isCtrl()) { 2806 PredSU = II->getSUnit(); 2807 break; 2808 } 2809 assert(PredSU); 2810 2811 // Don't rewrite edges that carry physregs, because that requires additional 2812 // support infrastructure. 2813 if (PredSU->hasPhysRegDefs) 2814 continue; 2815 // Short-circuit the case where SU is PredSU's only data successor. 2816 if (PredSU->NumSuccs == 1) 2817 continue; 2818 // Avoid prescheduling to copies from virtual registers, which don't behave 2819 // like other nodes from the perspective of scheduling heuristics. 2820 if (SDNode *N = SU->getNode()) 2821 if (N->getOpcode() == ISD::CopyFromReg && 2822 TargetRegisterInfo::isVirtualRegister 2823 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2824 continue; 2825 2826 // Perform checks on the successors of PredSU. 2827 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2828 EE = PredSU->Succs.end(); II != EE; ++II) { 2829 SUnit *PredSuccSU = II->getSUnit(); 2830 if (PredSuccSU == SU) continue; 2831 // If PredSU has another successor with no data successors, for 2832 // now don't attempt to choose either over the other. 2833 if (PredSuccSU->NumSuccs == 0) 2834 goto outer_loop_continue; 2835 // Don't break physical register dependencies. 2836 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2837 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2838 goto outer_loop_continue; 2839 // Don't introduce graph cycles. 2840 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2841 goto outer_loop_continue; 2842 } 2843 2844 // Ok, the transformation is safe and the heuristics suggest it is 2845 // profitable. Update the graph. 2846 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2847 << " next to PredSU #" << PredSU->NodeNum 2848 << " to guide scheduling in the presence of multiple uses\n"); 2849 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2850 SDep Edge = PredSU->Succs[i]; 2851 assert(!Edge.isAssignedRegDep()); 2852 SUnit *SuccSU = Edge.getSUnit(); 2853 if (SuccSU != SU) { 2854 Edge.setSUnit(PredSU); 2855 scheduleDAG->RemovePred(SuccSU, Edge); 2856 scheduleDAG->AddPred(SU, Edge); 2857 Edge.setSUnit(SU); 2858 scheduleDAG->AddPred(SuccSU, Edge); 2859 --i; 2860 } 2861 } 2862 outer_loop_continue:; 2863 } 2864} 2865 2866/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2867/// it as a def&use operand. Add a pseudo control edge from it to the other 2868/// node (if it won't create a cycle) so the two-address one will be scheduled 2869/// first (lower in the schedule). If both nodes are two-address, favor the 2870/// one that has a CopyToReg use (more likely to be a loop induction update). 2871/// If both are two-address, but one is commutable while the other is not 2872/// commutable, favor the one that's not commutable. 2873void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2874 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2875 SUnit *SU = &(*SUnits)[i]; 2876 if (!SU->isTwoAddress) 2877 continue; 2878 2879 SDNode *Node = SU->getNode(); 2880 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2881 continue; 2882 2883 bool isLiveOut = hasOnlyLiveOutUses(SU); 2884 unsigned Opc = Node->getMachineOpcode(); 2885 const MCInstrDesc &MCID = TII->get(Opc); 2886 unsigned NumRes = MCID.getNumDefs(); 2887 unsigned NumOps = MCID.getNumOperands() - NumRes; 2888 for (unsigned j = 0; j != NumOps; ++j) { 2889 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2890 continue; 2891 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2892 if (DU->getNodeId() == -1) 2893 continue; 2894 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2895 if (!DUSU) continue; 2896 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2897 E = DUSU->Succs.end(); I != E; ++I) { 2898 if (I->isCtrl()) continue; 2899 SUnit *SuccSU = I->getSUnit(); 2900 if (SuccSU == SU) 2901 continue; 2902 // Be conservative. Ignore if nodes aren't at roughly the same 2903 // depth and height. 2904 if (SuccSU->getHeight() < SU->getHeight() && 2905 (SU->getHeight() - SuccSU->getHeight()) > 1) 2906 continue; 2907 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2908 // constrains whatever is using the copy, instead of the copy 2909 // itself. In the case that the copy is coalesced, this 2910 // preserves the intent of the pseudo two-address heurietics. 2911 while (SuccSU->Succs.size() == 1 && 2912 SuccSU->getNode()->isMachineOpcode() && 2913 SuccSU->getNode()->getMachineOpcode() == 2914 TargetOpcode::COPY_TO_REGCLASS) 2915 SuccSU = SuccSU->Succs.front().getSUnit(); 2916 // Don't constrain non-instruction nodes. 2917 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2918 continue; 2919 // Don't constrain nodes with physical register defs if the 2920 // predecessor can clobber them. 2921 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2922 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2923 continue; 2924 } 2925 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2926 // these may be coalesced away. We want them close to their uses. 2927 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2928 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2929 SuccOpc == TargetOpcode::INSERT_SUBREG || 2930 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2931 continue; 2932 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2933 (!canClobber(SuccSU, DUSU) || 2934 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2935 (!SU->isCommutable && SuccSU->isCommutable)) && 2936 !scheduleDAG->IsReachable(SuccSU, SU)) { 2937 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2938 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2939 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2940 /*Reg=*/0, /*isNormalMemory=*/false, 2941 /*isMustAlias=*/false, 2942 /*isArtificial=*/true)); 2943 } 2944 } 2945 } 2946 } 2947} 2948 2949//===----------------------------------------------------------------------===// 2950// Public Constructor Functions 2951//===----------------------------------------------------------------------===// 2952 2953llvm::ScheduleDAGSDNodes * 2954llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2955 CodeGenOpt::Level OptLevel) { 2956 const TargetMachine &TM = IS->TM; 2957 const TargetInstrInfo *TII = TM.getInstrInfo(); 2958 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2959 2960 BURegReductionPriorityQueue *PQ = 2961 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); 2962 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2963 PQ->setScheduleDAG(SD); 2964 return SD; 2965} 2966 2967llvm::ScheduleDAGSDNodes * 2968llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2969 CodeGenOpt::Level OptLevel) { 2970 const TargetMachine &TM = IS->TM; 2971 const TargetInstrInfo *TII = TM.getInstrInfo(); 2972 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2973 2974 SrcRegReductionPriorityQueue *PQ = 2975 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); 2976 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2977 PQ->setScheduleDAG(SD); 2978 return SD; 2979} 2980 2981llvm::ScheduleDAGSDNodes * 2982llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2983 CodeGenOpt::Level OptLevel) { 2984 const TargetMachine &TM = IS->TM; 2985 const TargetInstrInfo *TII = TM.getInstrInfo(); 2986 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2987 const TargetLowering *TLI = &IS->getTargetLowering(); 2988 2989 HybridBURRPriorityQueue *PQ = 2990 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 2991 2992 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2993 PQ->setScheduleDAG(SD); 2994 return SD; 2995} 2996 2997llvm::ScheduleDAGSDNodes * 2998llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2999 CodeGenOpt::Level OptLevel) { 3000 const TargetMachine &TM = IS->TM; 3001 const TargetInstrInfo *TII = TM.getInstrInfo(); 3002 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 3003 const TargetLowering *TLI = &IS->getTargetLowering(); 3004 3005 ILPBURRPriorityQueue *PQ = 3006 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3007 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3008 PQ->setScheduleDAG(SD); 3009 return SD; 3010} 3011