ScheduleDAGRRList.cpp revision 98adea11496400c8385b774b4d9f9acd4c99d254
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/ADT/STLExtras.h" 33#include <climits> 34#include "llvm/Support/CommandLine.h" 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumCCCopies, "Number of cross class copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50 51namespace { 52//===----------------------------------------------------------------------===// 53/// ScheduleDAGRRList - The actual register reduction list scheduler 54/// implementation. This supports both top-down and bottom-up scheduling. 55/// 56class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 57private: 58 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 59 /// it is top-down. 60 bool isBottomUp; 61 62 /// AvailableQueue - The priority queue to use for the available SUnits. 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 unsigned NumLiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72public: 73 ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb, 74 const TargetMachine &tm, bool isbottomup, 75 SchedulingPriorityQueue *availqueue) 76 : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), 77 AvailableQueue(availqueue) { 78 } 79 80 ~ScheduleDAGRRList() { 81 delete AvailableQueue; 82 } 83 84 void Schedule(); 85 86 /// IsReachable - Checks if SU is reachable from TargetSU. 87 bool IsReachable(const SUnit *SU, const SUnit *TargetSU); 88 89 /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 90 /// create a cycle. 91 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU); 92 93 /// AddPred - This adds the specified node X as a predecessor of 94 /// the current node Y if not already. 95 /// This returns true if this is a new predecessor. 96 /// Updates the topological ordering if required. 97 bool AddPred(SUnit *Y, SUnit *X, bool isCtrl, bool isArtificial, 98 unsigned PhyReg = 0, int Cost = 1); 99 100 /// RemovePred - This removes the specified node N from the predecessors of 101 /// the current node M. Updates the topological ordering if required. 102 bool RemovePred(SUnit *M, SUnit *N, bool isCtrl, bool isArtificial); 103 104private: 105 void ReleasePred(SUnit *SU, SUnit *PredSU, bool isChain); 106 void ReleaseSucc(SUnit *SU, SUnit *SuccSU, bool isChain); 107 void CapturePred(SUnit*, SUnit*, bool); 108 void ScheduleNodeBottomUp(SUnit*, unsigned); 109 void ScheduleNodeTopDown(SUnit*, unsigned); 110 void UnscheduleNodeBottomUp(SUnit*); 111 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 112 SUnit *CopyAndMoveSuccessors(SUnit*); 113 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 114 const TargetRegisterClass*, 115 const TargetRegisterClass*, 116 SmallVector<SUnit*, 2>&); 117 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 118 void ListScheduleTopDown(); 119 void ListScheduleBottomUp(); 120 void CommuteNodesToReducePressure(); 121 122 123 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 124 /// Updates the topological ordering if required. 125 SUnit *CreateNewSUnit(SDNode *N) { 126 SUnit *NewNode = NewSUnit(N); 127 // Update the topological ordering. 128 if (NewNode->NodeNum >= Node2Index.size()) 129 InitDAGTopologicalSorting(); 130 return NewNode; 131 } 132 133 /// CreateClone - Creates a new SUnit from an existing one. 134 /// Updates the topological ordering if required. 135 SUnit *CreateClone(SUnit *N) { 136 SUnit *NewNode = Clone(N); 137 // Update the topological ordering. 138 if (NewNode->NodeNum >= Node2Index.size()) 139 InitDAGTopologicalSorting(); 140 return NewNode; 141 } 142 143 /// Functions for preserving the topological ordering 144 /// even after dynamic insertions of new edges. 145 /// This allows a very fast implementation of IsReachable. 146 147 /// InitDAGTopologicalSorting - create the initial topological 148 /// ordering from the DAG to be scheduled. 149 void InitDAGTopologicalSorting(); 150 151 /// DFS - make a DFS traversal and mark all nodes affected by the 152 /// edge insertion. These nodes will later get new topological indexes 153 /// by means of the Shift method. 154 void DFS(const SUnit *SU, int UpperBound, bool& HasLoop); 155 156 /// Shift - reassign topological indexes for the nodes in the DAG 157 /// to preserve the topological ordering. 158 void Shift(BitVector& Visited, int LowerBound, int UpperBound); 159 160 /// Allocate - assign the topological index to the node n. 161 void Allocate(int n, int index); 162 163 /// Index2Node - Maps topological index to the node number. 164 std::vector<int> Index2Node; 165 /// Node2Index - Maps the node number to its topological index. 166 std::vector<int> Node2Index; 167 /// Visited - a set of nodes visited during a DFS traversal. 168 BitVector Visited; 169}; 170} // end anonymous namespace 171 172 173/// Schedule - Schedule the DAG using list scheduling. 174void ScheduleDAGRRList::Schedule() { 175 DOUT << "********** List Scheduling **********\n"; 176 177 NumLiveRegs = 0; 178 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 179 LiveRegCycles.resize(TRI->getNumRegs(), 0); 180 181 // Build scheduling units. 182 BuildSchedUnits(); 183 184 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 185 SUnits[su].dumpAll(this)); 186 CalculateDepths(); 187 CalculateHeights(); 188 InitDAGTopologicalSorting(); 189 190 AvailableQueue->initNodes(SUnits); 191 192 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 193 if (isBottomUp) 194 ListScheduleBottomUp(); 195 else 196 ListScheduleTopDown(); 197 198 AvailableQueue->releaseState(); 199 200 CommuteNodesToReducePressure(); 201} 202 203/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 204/// it is not the last use of its first operand, add it to the CommuteSet if 205/// possible. It will be commuted when it is translated to a MI. 206void ScheduleDAGRRList::CommuteNodesToReducePressure() { 207 SmallPtrSet<SUnit*, 4> OperandSeen; 208 for (unsigned i = Sequence.size(); i != 0; ) { 209 --i; 210 SUnit *SU = Sequence[i]; 211 if (!SU || !SU->getNode()) continue; 212 if (SU->isCommutable) { 213 unsigned Opc = SU->getNode()->getMachineOpcode(); 214 const TargetInstrDesc &TID = TII->get(Opc); 215 unsigned NumRes = TID.getNumDefs(); 216 unsigned NumOps = TID.getNumOperands() - NumRes; 217 for (unsigned j = 0; j != NumOps; ++j) { 218 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 219 continue; 220 221 SDNode *OpN = SU->getNode()->getOperand(j).getNode(); 222 SUnit *OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 223 if (OpSU && OperandSeen.count(OpSU) == 1) { 224 // Ok, so SU is not the last use of OpSU, but SU is two-address so 225 // it will clobber OpSU. Try to commute SU if no other source operands 226 // are live below. 227 bool DoCommute = true; 228 for (unsigned k = 0; k < NumOps; ++k) { 229 if (k != j) { 230 OpN = SU->getNode()->getOperand(k).getNode(); 231 OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 232 if (OpSU && OperandSeen.count(OpSU) == 1) { 233 DoCommute = false; 234 break; 235 } 236 } 237 } 238 if (DoCommute) 239 CommuteSet.insert(SU->getNode()); 240 } 241 242 // Only look at the first use&def node for now. 243 break; 244 } 245 } 246 247 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 248 I != E; ++I) { 249 if (!I->isCtrl) 250 OperandSeen.insert(I->Dep->OrigNode); 251 } 252 } 253} 254 255//===----------------------------------------------------------------------===// 256// Bottom-Up Scheduling 257//===----------------------------------------------------------------------===// 258 259/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 260/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 261void ScheduleDAGRRList::ReleasePred(SUnit *SU, SUnit *PredSU, bool isChain) { 262 --PredSU->NumSuccsLeft; 263 264#ifndef NDEBUG 265 if (PredSU->NumSuccsLeft < 0) { 266 cerr << "*** Scheduling failed! ***\n"; 267 PredSU->dump(this); 268 cerr << " has been released too many times!\n"; 269 assert(0); 270 } 271#endif 272 273 if (PredSU->NumSuccsLeft == 0) { 274 PredSU->isAvailable = true; 275 AvailableQueue->push(PredSU); 276 } 277} 278 279/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 280/// count of its predecessors. If a predecessor pending count is zero, add it to 281/// the Available queue. 282void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 283 DOUT << "*** Scheduling [" << CurCycle << "]: "; 284 DEBUG(SU->dump(this)); 285 286 SU->Cycle = CurCycle; 287 Sequence.push_back(SU); 288 289 // Bottom up: release predecessors 290 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 291 I != E; ++I) { 292 ReleasePred(SU, I->Dep, I->isCtrl); 293 if (I->Cost < 0) { 294 // This is a physical register dependency and it's impossible or 295 // expensive to copy the register. Make sure nothing that can 296 // clobber the register is scheduled between the predecessor and 297 // this node. 298 if (!LiveRegDefs[I->Reg]) { 299 ++NumLiveRegs; 300 LiveRegDefs[I->Reg] = I->Dep; 301 LiveRegCycles[I->Reg] = CurCycle; 302 } 303 } 304 } 305 306 // Release all the implicit physical register defs that are live. 307 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 308 I != E; ++I) { 309 if (I->Cost < 0) { 310 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 311 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 312 assert(LiveRegDefs[I->Reg] == SU && 313 "Physical register dependency violated?"); 314 --NumLiveRegs; 315 LiveRegDefs[I->Reg] = NULL; 316 LiveRegCycles[I->Reg] = 0; 317 } 318 } 319 } 320 321 SU->isScheduled = true; 322 AvailableQueue->ScheduledNode(SU); 323} 324 325/// CapturePred - This does the opposite of ReleasePred. Since SU is being 326/// unscheduled, incrcease the succ left count of its predecessors. Remove 327/// them from AvailableQueue if necessary. 328void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 329 if (PredSU->isAvailable) { 330 PredSU->isAvailable = false; 331 if (!PredSU->isPending) 332 AvailableQueue->remove(PredSU); 333 } 334 335 ++PredSU->NumSuccsLeft; 336} 337 338/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 339/// its predecessor states to reflect the change. 340void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 341 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 342 DEBUG(SU->dump(this)); 343 344 AvailableQueue->UnscheduledNode(SU); 345 346 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 347 I != E; ++I) { 348 CapturePred(I->Dep, SU, I->isCtrl); 349 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 350 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 351 assert(LiveRegDefs[I->Reg] == I->Dep && 352 "Physical register dependency violated?"); 353 --NumLiveRegs; 354 LiveRegDefs[I->Reg] = NULL; 355 LiveRegCycles[I->Reg] = 0; 356 } 357 } 358 359 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 360 I != E; ++I) { 361 if (I->Cost < 0) { 362 if (!LiveRegDefs[I->Reg]) { 363 LiveRegDefs[I->Reg] = SU; 364 ++NumLiveRegs; 365 } 366 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 367 LiveRegCycles[I->Reg] = I->Dep->Cycle; 368 } 369 } 370 371 SU->Cycle = 0; 372 SU->isScheduled = false; 373 SU->isAvailable = true; 374 AvailableQueue->push(SU); 375} 376 377/// IsReachable - Checks if SU is reachable from TargetSU. 378bool ScheduleDAGRRList::IsReachable(const SUnit *SU, const SUnit *TargetSU) { 379 // If insertion of the edge SU->TargetSU would create a cycle 380 // then there is a path from TargetSU to SU. 381 int UpperBound, LowerBound; 382 LowerBound = Node2Index[TargetSU->NodeNum]; 383 UpperBound = Node2Index[SU->NodeNum]; 384 bool HasLoop = false; 385 // Is Ord(TargetSU) < Ord(SU) ? 386 if (LowerBound < UpperBound) { 387 Visited.reset(); 388 // There may be a path from TargetSU to SU. Check for it. 389 DFS(TargetSU, UpperBound, HasLoop); 390 } 391 return HasLoop; 392} 393 394/// Allocate - assign the topological index to the node n. 395inline void ScheduleDAGRRList::Allocate(int n, int index) { 396 Node2Index[n] = index; 397 Index2Node[index] = n; 398} 399 400/// InitDAGTopologicalSorting - create the initial topological 401/// ordering from the DAG to be scheduled. 402 403/// The idea of the algorithm is taken from 404/// "Online algorithms for managing the topological order of 405/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly 406/// This is the MNR algorithm, which was first introduced by 407/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in 408/// "Maintaining a topological order under edge insertions". 409/// 410/// Short description of the algorithm: 411/// 412/// Topological ordering, ord, of a DAG maps each node to a topological 413/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). 414/// 415/// This means that if there is a path from the node X to the node Z, 416/// then ord(X) < ord(Z). 417/// 418/// This property can be used to check for reachability of nodes: 419/// if Z is reachable from X, then an insertion of the edge Z->X would 420/// create a cycle. 421/// 422/// The algorithm first computes a topological ordering for the DAG by 423/// initializing the Index2Node and Node2Index arrays and then tries to keep 424/// the ordering up-to-date after edge insertions by reordering the DAG. 425/// 426/// On insertion of the edge X->Y, the algorithm first marks by calling DFS 427/// the nodes reachable from Y, and then shifts them using Shift to lie 428/// immediately after X in Index2Node. 429void ScheduleDAGRRList::InitDAGTopologicalSorting() { 430 unsigned DAGSize = SUnits.size(); 431 std::vector<SUnit*> WorkList; 432 WorkList.reserve(DAGSize); 433 434 Index2Node.resize(DAGSize); 435 Node2Index.resize(DAGSize); 436 437 // Initialize the data structures. 438 for (unsigned i = 0, e = DAGSize; i != e; ++i) { 439 SUnit *SU = &SUnits[i]; 440 int NodeNum = SU->NodeNum; 441 unsigned Degree = SU->Succs.size(); 442 // Temporarily use the Node2Index array as scratch space for degree counts. 443 Node2Index[NodeNum] = Degree; 444 445 // Is it a node without dependencies? 446 if (Degree == 0) { 447 assert(SU->Succs.empty() && "SUnit should have no successors"); 448 // Collect leaf nodes. 449 WorkList.push_back(SU); 450 } 451 } 452 453 int Id = DAGSize; 454 while (!WorkList.empty()) { 455 SUnit *SU = WorkList.back(); 456 WorkList.pop_back(); 457 Allocate(SU->NodeNum, --Id); 458 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 459 I != E; ++I) { 460 SUnit *SU = I->Dep; 461 if (!--Node2Index[SU->NodeNum]) 462 // If all dependencies of the node are processed already, 463 // then the node can be computed now. 464 WorkList.push_back(SU); 465 } 466 } 467 468 Visited.resize(DAGSize); 469 470#ifndef NDEBUG 471 // Check correctness of the ordering 472 for (unsigned i = 0, e = DAGSize; i != e; ++i) { 473 SUnit *SU = &SUnits[i]; 474 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 475 I != E; ++I) { 476 assert(Node2Index[SU->NodeNum] > Node2Index[I->Dep->NodeNum] && 477 "Wrong topological sorting"); 478 } 479 } 480#endif 481} 482 483/// AddPred - adds an edge from SUnit X to SUnit Y. 484/// Updates the topological ordering if required. 485bool ScheduleDAGRRList::AddPred(SUnit *Y, SUnit *X, bool isCtrl, 486 bool isArtificial, unsigned PhyReg, int Cost) { 487 int UpperBound, LowerBound; 488 LowerBound = Node2Index[Y->NodeNum]; 489 UpperBound = Node2Index[X->NodeNum]; 490 bool HasLoop = false; 491 // Is Ord(X) < Ord(Y) ? 492 if (LowerBound < UpperBound) { 493 // Update the topological order. 494 Visited.reset(); 495 DFS(Y, UpperBound, HasLoop); 496 assert(!HasLoop && "Inserted edge creates a loop!"); 497 // Recompute topological indexes. 498 Shift(Visited, LowerBound, UpperBound); 499 } 500 // Now really insert the edge. 501 return Y->addPred(X, isCtrl, isArtificial, PhyReg, Cost); 502} 503 504/// RemovePred - This removes the specified node N from the predecessors of 505/// the current node M. Updates the topological ordering if required. 506bool ScheduleDAGRRList::RemovePred(SUnit *M, SUnit *N, 507 bool isCtrl, bool isArtificial) { 508 // InitDAGTopologicalSorting(); 509 return M->removePred(N, isCtrl, isArtificial); 510} 511 512/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark 513/// all nodes affected by the edge insertion. These nodes will later get new 514/// topological indexes by means of the Shift method. 515void ScheduleDAGRRList::DFS(const SUnit *SU, int UpperBound, bool& HasLoop) { 516 std::vector<const SUnit*> WorkList; 517 WorkList.reserve(SUnits.size()); 518 519 WorkList.push_back(SU); 520 while (!WorkList.empty()) { 521 SU = WorkList.back(); 522 WorkList.pop_back(); 523 Visited.set(SU->NodeNum); 524 for (int I = SU->Succs.size()-1; I >= 0; --I) { 525 int s = SU->Succs[I].Dep->NodeNum; 526 if (Node2Index[s] == UpperBound) { 527 HasLoop = true; 528 return; 529 } 530 // Visit successors if not already and in affected region. 531 if (!Visited.test(s) && Node2Index[s] < UpperBound) { 532 WorkList.push_back(SU->Succs[I].Dep); 533 } 534 } 535 } 536} 537 538/// Shift - Renumber the nodes so that the topological ordering is 539/// preserved. 540void ScheduleDAGRRList::Shift(BitVector& Visited, int LowerBound, 541 int UpperBound) { 542 std::vector<int> L; 543 int shift = 0; 544 int i; 545 546 for (i = LowerBound; i <= UpperBound; ++i) { 547 // w is node at topological index i. 548 int w = Index2Node[i]; 549 if (Visited.test(w)) { 550 // Unmark. 551 Visited.reset(w); 552 L.push_back(w); 553 shift = shift + 1; 554 } else { 555 Allocate(w, i - shift); 556 } 557 } 558 559 for (unsigned j = 0; j < L.size(); ++j) { 560 Allocate(L[j], i - shift); 561 i = i + 1; 562 } 563} 564 565 566/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 567/// create a cycle. 568bool ScheduleDAGRRList::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 569 if (IsReachable(TargetSU, SU)) 570 return true; 571 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 572 I != E; ++I) 573 if (I->Cost < 0 && IsReachable(TargetSU, I->Dep)) 574 return true; 575 return false; 576} 577 578/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 579/// BTCycle in order to schedule a specific node. Returns the last unscheduled 580/// SUnit. Also returns if a successor is unscheduled in the process. 581void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 582 unsigned &CurCycle) { 583 SUnit *OldSU = NULL; 584 while (CurCycle > BtCycle) { 585 OldSU = Sequence.back(); 586 Sequence.pop_back(); 587 if (SU->isSucc(OldSU)) 588 // Don't try to remove SU from AvailableQueue. 589 SU->isAvailable = false; 590 UnscheduleNodeBottomUp(OldSU); 591 --CurCycle; 592 } 593 594 595 if (SU->isSucc(OldSU)) { 596 assert(false && "Something is wrong!"); 597 abort(); 598 } 599 600 ++NumBacktracks; 601} 602 603/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 604/// successors to the newly created node. 605SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 606 if (SU->getNode()->getFlaggedNode()) 607 return NULL; 608 609 SDNode *N = SU->getNode(); 610 if (!N) 611 return NULL; 612 613 SUnit *NewSU; 614 bool TryUnfold = false; 615 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 616 MVT VT = N->getValueType(i); 617 if (VT == MVT::Flag) 618 return NULL; 619 else if (VT == MVT::Other) 620 TryUnfold = true; 621 } 622 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 623 const SDValue &Op = N->getOperand(i); 624 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 625 if (VT == MVT::Flag) 626 return NULL; 627 } 628 629 if (TryUnfold) { 630 SmallVector<SDNode*, 2> NewNodes; 631 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 632 return NULL; 633 634 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 635 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 636 637 N = NewNodes[1]; 638 SDNode *LoadNode = NewNodes[0]; 639 unsigned NumVals = N->getNumValues(); 640 unsigned OldNumVals = SU->getNode()->getNumValues(); 641 for (unsigned i = 0; i != NumVals; ++i) 642 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 643 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 644 SDValue(LoadNode, 1)); 645 646 // LoadNode may already exist. This can happen when there is another 647 // load from the same location and producing the same type of value 648 // but it has different alignment or volatileness. 649 bool isNewLoad = true; 650 SUnit *LoadSU; 651 if (LoadNode->getNodeId() != -1) { 652 LoadSU = &SUnits[LoadNode->getNodeId()]; 653 isNewLoad = false; 654 } else { 655 LoadSU = CreateNewSUnit(LoadNode); 656 LoadNode->setNodeId(LoadSU->NodeNum); 657 658 LoadSU->Depth = SU->Depth; 659 LoadSU->Height = SU->Height; 660 ComputeLatency(LoadSU); 661 } 662 663 SUnit *NewSU = CreateNewSUnit(N); 664 assert(N->getNodeId() == -1 && "Node already inserted!"); 665 N->setNodeId(NewSU->NodeNum); 666 667 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 668 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 669 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 670 NewSU->isTwoAddress = true; 671 break; 672 } 673 } 674 if (TID.isCommutable()) 675 NewSU->isCommutable = true; 676 // FIXME: Calculate height / depth and propagate the changes? 677 NewSU->Depth = SU->Depth; 678 NewSU->Height = SU->Height; 679 ComputeLatency(NewSU); 680 681 SUnit *ChainPred = NULL; 682 SmallVector<SDep, 4> ChainSuccs; 683 SmallVector<SDep, 4> LoadPreds; 684 SmallVector<SDep, 4> NodePreds; 685 SmallVector<SDep, 4> NodeSuccs; 686 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 687 I != E; ++I) { 688 if (I->isCtrl) 689 ChainPred = I->Dep; 690 else if (I->Dep->getNode() && I->Dep->getNode()->isOperandOf(LoadNode)) 691 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 692 else 693 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 694 } 695 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 696 I != E; ++I) { 697 if (I->isCtrl) 698 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 699 I->isCtrl, I->isArtificial)); 700 else 701 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 702 I->isCtrl, I->isArtificial)); 703 } 704 705 if (ChainPred) { 706 RemovePred(SU, ChainPred, true, false); 707 if (isNewLoad) 708 AddPred(LoadSU, ChainPred, true, false); 709 } 710 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 711 SDep *Pred = &LoadPreds[i]; 712 RemovePred(SU, Pred->Dep, Pred->isCtrl, Pred->isArtificial); 713 if (isNewLoad) { 714 AddPred(LoadSU, Pred->Dep, Pred->isCtrl, Pred->isArtificial, 715 Pred->Reg, Pred->Cost); 716 } 717 } 718 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 719 SDep *Pred = &NodePreds[i]; 720 RemovePred(SU, Pred->Dep, Pred->isCtrl, Pred->isArtificial); 721 AddPred(NewSU, Pred->Dep, Pred->isCtrl, Pred->isArtificial, 722 Pred->Reg, Pred->Cost); 723 } 724 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 725 SDep *Succ = &NodeSuccs[i]; 726 RemovePred(Succ->Dep, SU, Succ->isCtrl, Succ->isArtificial); 727 AddPred(Succ->Dep, NewSU, Succ->isCtrl, Succ->isArtificial, 728 Succ->Reg, Succ->Cost); 729 } 730 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 731 SDep *Succ = &ChainSuccs[i]; 732 RemovePred(Succ->Dep, SU, Succ->isCtrl, Succ->isArtificial); 733 if (isNewLoad) { 734 AddPred(Succ->Dep, LoadSU, Succ->isCtrl, Succ->isArtificial, 735 Succ->Reg, Succ->Cost); 736 } 737 } 738 if (isNewLoad) { 739 AddPred(NewSU, LoadSU, false, false); 740 } 741 742 if (isNewLoad) 743 AvailableQueue->addNode(LoadSU); 744 AvailableQueue->addNode(NewSU); 745 746 ++NumUnfolds; 747 748 if (NewSU->NumSuccsLeft == 0) { 749 NewSU->isAvailable = true; 750 return NewSU; 751 } 752 SU = NewSU; 753 } 754 755 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 756 NewSU = CreateClone(SU); 757 758 // New SUnit has the exact same predecessors. 759 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 760 I != E; ++I) 761 if (!I->isArtificial) { 762 AddPred(NewSU, I->Dep, I->isCtrl, false, I->Reg, I->Cost); 763 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 764 } 765 766 // Only copy scheduled successors. Cut them from old node's successor 767 // list and move them over. 768 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 769 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 770 I != E; ++I) { 771 if (I->isArtificial) 772 continue; 773 if (I->Dep->isScheduled) { 774 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 775 AddPred(I->Dep, NewSU, I->isCtrl, false, I->Reg, I->Cost); 776 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 777 } 778 } 779 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 780 SUnit *Succ = DelDeps[i].first; 781 bool isCtrl = DelDeps[i].second; 782 RemovePred(Succ, SU, isCtrl, false); 783 } 784 785 AvailableQueue->updateNode(SU); 786 AvailableQueue->addNode(NewSU); 787 788 ++NumDups; 789 return NewSU; 790} 791 792/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 793/// and move all scheduled successors of the given SUnit to the last copy. 794void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 795 const TargetRegisterClass *DestRC, 796 const TargetRegisterClass *SrcRC, 797 SmallVector<SUnit*, 2> &Copies) { 798 SUnit *CopyFromSU = CreateNewSUnit(NULL); 799 CopyFromSU->CopySrcRC = SrcRC; 800 CopyFromSU->CopyDstRC = DestRC; 801 CopyFromSU->Depth = SU->Depth; 802 CopyFromSU->Height = SU->Height; 803 804 SUnit *CopyToSU = CreateNewSUnit(NULL); 805 CopyToSU->CopySrcRC = DestRC; 806 CopyToSU->CopyDstRC = SrcRC; 807 808 // Only copy scheduled successors. Cut them from old node's successor 809 // list and move them over. 810 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 811 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 812 I != E; ++I) { 813 if (I->isArtificial) 814 continue; 815 if (I->Dep->isScheduled) { 816 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 817 AddPred(I->Dep, CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 818 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 819 } 820 } 821 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 822 SUnit *Succ = DelDeps[i].first; 823 bool isCtrl = DelDeps[i].second; 824 RemovePred(Succ, SU, isCtrl, false); 825 } 826 827 AddPred(CopyFromSU, SU, false, false, Reg, -1); 828 AddPred(CopyToSU, CopyFromSU, false, false, Reg, 1); 829 830 AvailableQueue->updateNode(SU); 831 AvailableQueue->addNode(CopyFromSU); 832 AvailableQueue->addNode(CopyToSU); 833 Copies.push_back(CopyFromSU); 834 Copies.push_back(CopyToSU); 835 836 ++NumCCCopies; 837} 838 839/// getPhysicalRegisterVT - Returns the ValueType of the physical register 840/// definition of the specified node. 841/// FIXME: Move to SelectionDAG? 842static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 843 const TargetInstrInfo *TII) { 844 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 845 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 846 unsigned NumRes = TID.getNumDefs(); 847 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 848 if (Reg == *ImpDef) 849 break; 850 ++NumRes; 851 } 852 return N->getValueType(NumRes); 853} 854 855/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 856/// scheduling of the given node to satisfy live physical register dependencies. 857/// If the specific node is the last one that's available to schedule, do 858/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 859bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 860 SmallVector<unsigned, 4> &LRegs){ 861 if (NumLiveRegs == 0) 862 return false; 863 864 SmallSet<unsigned, 4> RegAdded; 865 // If this node would clobber any "live" register, then it's not ready. 866 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 867 I != E; ++I) { 868 if (I->Cost < 0) { 869 unsigned Reg = I->Reg; 870 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->Dep) { 871 if (RegAdded.insert(Reg)) 872 LRegs.push_back(Reg); 873 } 874 for (const unsigned *Alias = TRI->getAliasSet(Reg); 875 *Alias; ++Alias) 876 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->Dep) { 877 if (RegAdded.insert(*Alias)) 878 LRegs.push_back(*Alias); 879 } 880 } 881 } 882 883 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 884 if (!Node->isMachineOpcode()) 885 continue; 886 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 887 if (!TID.ImplicitDefs) 888 continue; 889 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 890 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { 891 if (RegAdded.insert(*Reg)) 892 LRegs.push_back(*Reg); 893 } 894 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 895 *Alias; ++Alias) 896 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 897 if (RegAdded.insert(*Alias)) 898 LRegs.push_back(*Alias); 899 } 900 } 901 } 902 return !LRegs.empty(); 903} 904 905 906/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 907/// schedulers. 908void ScheduleDAGRRList::ListScheduleBottomUp() { 909 unsigned CurCycle = 0; 910 // Add root to Available queue. 911 if (!SUnits.empty()) { 912 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 913 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 914 RootSU->isAvailable = true; 915 AvailableQueue->push(RootSU); 916 } 917 918 // While Available queue is not empty, grab the node with the highest 919 // priority. If it is not ready put it back. Schedule the node. 920 SmallVector<SUnit*, 4> NotReady; 921 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 922 Sequence.reserve(SUnits.size()); 923 while (!AvailableQueue->empty()) { 924 bool Delayed = false; 925 LRegsMap.clear(); 926 SUnit *CurSU = AvailableQueue->pop(); 927 while (CurSU) { 928 SmallVector<unsigned, 4> LRegs; 929 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 930 break; 931 Delayed = true; 932 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 933 934 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 935 NotReady.push_back(CurSU); 936 CurSU = AvailableQueue->pop(); 937 } 938 939 // All candidates are delayed due to live physical reg dependencies. 940 // Try backtracking, code duplication, or inserting cross class copies 941 // to resolve it. 942 if (Delayed && !CurSU) { 943 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 944 SUnit *TrySU = NotReady[i]; 945 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 946 947 // Try unscheduling up to the point where it's safe to schedule 948 // this node. 949 unsigned LiveCycle = CurCycle; 950 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 951 unsigned Reg = LRegs[j]; 952 unsigned LCycle = LiveRegCycles[Reg]; 953 LiveCycle = std::min(LiveCycle, LCycle); 954 } 955 SUnit *OldSU = Sequence[LiveCycle]; 956 if (!WillCreateCycle(TrySU, OldSU)) { 957 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 958 // Force the current node to be scheduled before the node that 959 // requires the physical reg dep. 960 if (OldSU->isAvailable) { 961 OldSU->isAvailable = false; 962 AvailableQueue->remove(OldSU); 963 } 964 AddPred(TrySU, OldSU, true, true); 965 // If one or more successors has been unscheduled, then the current 966 // node is no longer avaialable. Schedule a successor that's now 967 // available instead. 968 if (!TrySU->isAvailable) 969 CurSU = AvailableQueue->pop(); 970 else { 971 CurSU = TrySU; 972 TrySU->isPending = false; 973 NotReady.erase(NotReady.begin()+i); 974 } 975 break; 976 } 977 } 978 979 if (!CurSU) { 980 // Can't backtrack. Try duplicating the nodes that produces these 981 // "expensive to copy" values to break the dependency. In case even 982 // that doesn't work, insert cross class copies. 983 SUnit *TrySU = NotReady[0]; 984 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 985 assert(LRegs.size() == 1 && "Can't handle this yet!"); 986 unsigned Reg = LRegs[0]; 987 SUnit *LRDef = LiveRegDefs[Reg]; 988 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 989 if (!NewDef) { 990 // Issue expensive cross register class copies. 991 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 992 const TargetRegisterClass *RC = 993 TRI->getPhysicalRegisterRegClass(Reg, VT); 994 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 995 if (!DestRC) { 996 assert(false && "Don't know how to copy this physical register!"); 997 abort(); 998 } 999 SmallVector<SUnit*, 2> Copies; 1000 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1001 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 1002 << " to SU #" << Copies.front()->NodeNum << "\n"; 1003 AddPred(TrySU, Copies.front(), true, true); 1004 NewDef = Copies.back(); 1005 } 1006 1007 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 1008 << " to SU #" << TrySU->NodeNum << "\n"; 1009 LiveRegDefs[Reg] = NewDef; 1010 AddPred(NewDef, TrySU, true, true); 1011 TrySU->isAvailable = false; 1012 CurSU = NewDef; 1013 } 1014 1015 if (!CurSU) { 1016 assert(false && "Unable to resolve live physical register dependencies!"); 1017 abort(); 1018 } 1019 } 1020 1021 // Add the nodes that aren't ready back onto the available list. 1022 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 1023 NotReady[i]->isPending = false; 1024 // May no longer be available due to backtracking. 1025 if (NotReady[i]->isAvailable) 1026 AvailableQueue->push(NotReady[i]); 1027 } 1028 NotReady.clear(); 1029 1030 if (CurSU) 1031 ScheduleNodeBottomUp(CurSU, CurCycle); 1032 ++CurCycle; 1033 } 1034 1035 // Reverse the order if it is bottom up. 1036 std::reverse(Sequence.begin(), Sequence.end()); 1037 1038#ifndef NDEBUG 1039 VerifySchedule(isBottomUp); 1040#endif 1041} 1042 1043//===----------------------------------------------------------------------===// 1044// Top-Down Scheduling 1045//===----------------------------------------------------------------------===// 1046 1047/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1048/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 1049void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, SUnit *SuccSU, bool isChain) { 1050 --SuccSU->NumPredsLeft; 1051 1052#ifndef NDEBUG 1053 if (SuccSU->NumPredsLeft < 0) { 1054 cerr << "*** Scheduling failed! ***\n"; 1055 SuccSU->dump(this); 1056 cerr << " has been released too many times!\n"; 1057 assert(0); 1058 } 1059#endif 1060 1061 if (SuccSU->NumPredsLeft == 0) { 1062 SuccSU->isAvailable = true; 1063 AvailableQueue->push(SuccSU); 1064 } 1065} 1066 1067/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1068/// count of its successors. If a successor pending count is zero, add it to 1069/// the Available queue. 1070void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 1071 DOUT << "*** Scheduling [" << CurCycle << "]: "; 1072 DEBUG(SU->dump(this)); 1073 1074 SU->Cycle = CurCycle; 1075 Sequence.push_back(SU); 1076 1077 // Top down: release successors 1078 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1079 I != E; ++I) 1080 ReleaseSucc(SU, I->Dep, I->isCtrl); 1081 1082 SU->isScheduled = true; 1083 AvailableQueue->ScheduledNode(SU); 1084} 1085 1086/// ListScheduleTopDown - The main loop of list scheduling for top-down 1087/// schedulers. 1088void ScheduleDAGRRList::ListScheduleTopDown() { 1089 unsigned CurCycle = 0; 1090 1091 // All leaves to Available queue. 1092 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1093 // It is available if it has no predecessors. 1094 if (SUnits[i].Preds.empty()) { 1095 AvailableQueue->push(&SUnits[i]); 1096 SUnits[i].isAvailable = true; 1097 } 1098 } 1099 1100 // While Available queue is not empty, grab the node with the highest 1101 // priority. If it is not ready put it back. Schedule the node. 1102 Sequence.reserve(SUnits.size()); 1103 while (!AvailableQueue->empty()) { 1104 SUnit *CurSU = AvailableQueue->pop(); 1105 1106 if (CurSU) 1107 ScheduleNodeTopDown(CurSU, CurCycle); 1108 ++CurCycle; 1109 } 1110 1111#ifndef NDEBUG 1112 VerifySchedule(isBottomUp); 1113#endif 1114} 1115 1116 1117//===----------------------------------------------------------------------===// 1118// RegReductionPriorityQueue Implementation 1119//===----------------------------------------------------------------------===// 1120// 1121// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1122// to reduce register pressure. 1123// 1124namespace { 1125 template<class SF> 1126 class RegReductionPriorityQueue; 1127 1128 /// Sorting functions for the Available queue. 1129 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1130 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 1131 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 1132 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1133 1134 bool operator()(const SUnit* left, const SUnit* right) const; 1135 }; 1136 1137 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1138 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 1139 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 1140 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1141 1142 bool operator()(const SUnit* left, const SUnit* right) const; 1143 }; 1144} // end anonymous namespace 1145 1146static inline bool isCopyFromLiveIn(const SUnit *SU) { 1147 SDNode *N = SU->getNode(); 1148 return N && N->getOpcode() == ISD::CopyFromReg && 1149 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 1150} 1151 1152/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1153/// Smaller number is the higher priority. 1154static unsigned 1155CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1156 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1157 if (SethiUllmanNumber != 0) 1158 return SethiUllmanNumber; 1159 1160 unsigned Extra = 0; 1161 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1162 I != E; ++I) { 1163 if (I->isCtrl) continue; // ignore chain preds 1164 SUnit *PredSU = I->Dep; 1165 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1166 if (PredSethiUllman > SethiUllmanNumber) { 1167 SethiUllmanNumber = PredSethiUllman; 1168 Extra = 0; 1169 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1170 ++Extra; 1171 } 1172 1173 SethiUllmanNumber += Extra; 1174 1175 if (SethiUllmanNumber == 0) 1176 SethiUllmanNumber = 1; 1177 1178 return SethiUllmanNumber; 1179} 1180 1181namespace { 1182 template<class SF> 1183 class VISIBILITY_HIDDEN RegReductionPriorityQueue 1184 : public SchedulingPriorityQueue { 1185 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 1186 unsigned currentQueueId; 1187 1188 protected: 1189 // SUnits - The SUnits for the current graph. 1190 std::vector<SUnit> *SUnits; 1191 1192 const TargetInstrInfo *TII; 1193 const TargetRegisterInfo *TRI; 1194 ScheduleDAGRRList *scheduleDAG; 1195 1196 // SethiUllmanNumbers - The SethiUllman number for each node. 1197 std::vector<unsigned> SethiUllmanNumbers; 1198 1199 public: 1200 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1201 const TargetRegisterInfo *tri) : 1202 Queue(SF(this)), currentQueueId(0), 1203 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1204 1205 void initNodes(std::vector<SUnit> &sunits) { 1206 SUnits = &sunits; 1207 // Add pseudo dependency edges for two-address nodes. 1208 AddPseudoTwoAddrDeps(); 1209 // Calculate node priorities. 1210 CalculateSethiUllmanNumbers(); 1211 } 1212 1213 void addNode(const SUnit *SU) { 1214 unsigned SUSize = SethiUllmanNumbers.size(); 1215 if (SUnits->size() > SUSize) 1216 SethiUllmanNumbers.resize(SUSize*2, 0); 1217 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1218 } 1219 1220 void updateNode(const SUnit *SU) { 1221 SethiUllmanNumbers[SU->NodeNum] = 0; 1222 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1223 } 1224 1225 void releaseState() { 1226 SUnits = 0; 1227 SethiUllmanNumbers.clear(); 1228 } 1229 1230 unsigned getNodePriority(const SUnit *SU) const { 1231 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1232 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1233 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1234 // CopyFromReg should be close to its def because it restricts 1235 // allocation choices. But if it is a livein then perhaps we want it 1236 // closer to its uses so it can be coalesced. 1237 return 0xffff; 1238 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1239 // CopyToReg should be close to its uses to facilitate coalescing and 1240 // avoid spilling. 1241 return 0; 1242 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1243 Opc == TargetInstrInfo::INSERT_SUBREG) 1244 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1245 // facilitate coalescing. 1246 return 0; 1247 else if (SU->NumSuccs == 0) 1248 // If SU does not have a use, i.e. it doesn't produce a value that would 1249 // be consumed (e.g. store), then it terminates a chain of computation. 1250 // Give it a large SethiUllman number so it will be scheduled right 1251 // before its predecessors that it doesn't lengthen their live ranges. 1252 return 0xffff; 1253 else if (SU->NumPreds == 0) 1254 // If SU does not have a def, schedule it close to its uses because it 1255 // does not lengthen any live ranges. 1256 return 0; 1257 else 1258 return SethiUllmanNumbers[SU->NodeNum]; 1259 } 1260 1261 unsigned size() const { return Queue.size(); } 1262 1263 bool empty() const { return Queue.empty(); } 1264 1265 void push(SUnit *U) { 1266 assert(!U->NodeQueueId && "Node in the queue already"); 1267 U->NodeQueueId = ++currentQueueId; 1268 Queue.push(U); 1269 } 1270 1271 void push_all(const std::vector<SUnit *> &Nodes) { 1272 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1273 push(Nodes[i]); 1274 } 1275 1276 SUnit *pop() { 1277 if (empty()) return NULL; 1278 SUnit *V = Queue.top(); 1279 Queue.pop(); 1280 V->NodeQueueId = 0; 1281 return V; 1282 } 1283 1284 void remove(SUnit *SU) { 1285 assert(!Queue.empty() && "Queue is empty!"); 1286 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1287 Queue.erase_one(SU); 1288 SU->NodeQueueId = 0; 1289 } 1290 1291 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1292 scheduleDAG = scheduleDag; 1293 } 1294 1295 protected: 1296 bool canClobber(const SUnit *SU, const SUnit *Op); 1297 void AddPseudoTwoAddrDeps(); 1298 void CalculateSethiUllmanNumbers(); 1299 }; 1300 1301 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1302 BURegReductionPriorityQueue; 1303 1304 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1305 TDRegReductionPriorityQueue; 1306} 1307 1308/// closestSucc - Returns the scheduled cycle of the successor which is 1309/// closet to the current cycle. 1310static unsigned closestSucc(const SUnit *SU) { 1311 unsigned MaxCycle = 0; 1312 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1313 I != E; ++I) { 1314 unsigned Cycle = I->Dep->Cycle; 1315 // If there are bunch of CopyToRegs stacked up, they should be considered 1316 // to be at the same position. 1317 if (I->Dep->getNode() && I->Dep->getNode()->getOpcode() == ISD::CopyToReg) 1318 Cycle = closestSucc(I->Dep)+1; 1319 if (Cycle > MaxCycle) 1320 MaxCycle = Cycle; 1321 } 1322 return MaxCycle; 1323} 1324 1325/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1326/// for scratch registers. Live-in operands and live-out results don't count 1327/// since they are "fixed". 1328static unsigned calcMaxScratches(const SUnit *SU) { 1329 unsigned Scratches = 0; 1330 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1331 I != E; ++I) { 1332 if (I->isCtrl) continue; // ignore chain preds 1333 if (!I->Dep->getNode() || I->Dep->getNode()->getOpcode() != ISD::CopyFromReg) 1334 Scratches++; 1335 } 1336 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1337 I != E; ++I) { 1338 if (I->isCtrl) continue; // ignore chain succs 1339 if (!I->Dep->getNode() || I->Dep->getNode()->getOpcode() != ISD::CopyToReg) 1340 Scratches += 10; 1341 } 1342 return Scratches; 1343} 1344 1345// Bottom up 1346bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1347 unsigned LPriority = SPQ->getNodePriority(left); 1348 unsigned RPriority = SPQ->getNodePriority(right); 1349 if (LPriority != RPriority) 1350 return LPriority > RPriority; 1351 1352 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1353 // e.g. 1354 // t1 = op t2, c1 1355 // t3 = op t4, c2 1356 // 1357 // and the following instructions are both ready. 1358 // t2 = op c3 1359 // t4 = op c4 1360 // 1361 // Then schedule t2 = op first. 1362 // i.e. 1363 // t4 = op c4 1364 // t2 = op c3 1365 // t1 = op t2, c1 1366 // t3 = op t4, c2 1367 // 1368 // This creates more short live intervals. 1369 unsigned LDist = closestSucc(left); 1370 unsigned RDist = closestSucc(right); 1371 if (LDist != RDist) 1372 return LDist < RDist; 1373 1374 // Intuitively, it's good to push down instructions whose results are 1375 // liveout so their long live ranges won't conflict with other values 1376 // which are needed inside the BB. Further prioritize liveout instructions 1377 // by the number of operands which are calculated within the BB. 1378 unsigned LScratch = calcMaxScratches(left); 1379 unsigned RScratch = calcMaxScratches(right); 1380 if (LScratch != RScratch) 1381 return LScratch > RScratch; 1382 1383 if (left->Height != right->Height) 1384 return left->Height > right->Height; 1385 1386 if (left->Depth != right->Depth) 1387 return left->Depth < right->Depth; 1388 1389 assert(left->NodeQueueId && right->NodeQueueId && 1390 "NodeQueueId cannot be zero"); 1391 return (left->NodeQueueId > right->NodeQueueId); 1392} 1393 1394template<class SF> 1395bool 1396RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1397 if (SU->isTwoAddress) { 1398 unsigned Opc = SU->getNode()->getMachineOpcode(); 1399 const TargetInstrDesc &TID = TII->get(Opc); 1400 unsigned NumRes = TID.getNumDefs(); 1401 unsigned NumOps = TID.getNumOperands() - NumRes; 1402 for (unsigned i = 0; i != NumOps; ++i) { 1403 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1404 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1405 if (DU->getNodeId() != -1 && 1406 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1407 return true; 1408 } 1409 } 1410 } 1411 return false; 1412} 1413 1414 1415/// hasCopyToRegUse - Return true if SU has a value successor that is a 1416/// CopyToReg node. 1417static bool hasCopyToRegUse(const SUnit *SU) { 1418 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1419 I != E; ++I) { 1420 if (I->isCtrl) continue; 1421 const SUnit *SuccSU = I->Dep; 1422 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1423 return true; 1424 } 1425 return false; 1426} 1427 1428/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1429/// physical register defs. 1430static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1431 const TargetInstrInfo *TII, 1432 const TargetRegisterInfo *TRI) { 1433 SDNode *N = SuccSU->getNode(); 1434 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1435 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1436 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1437 const unsigned *SUImpDefs = 1438 TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 1439 if (!SUImpDefs) 1440 return false; 1441 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1442 MVT VT = N->getValueType(i); 1443 if (VT == MVT::Flag || VT == MVT::Other) 1444 continue; 1445 if (!N->hasAnyUseOfValue(i)) 1446 continue; 1447 unsigned Reg = ImpDefs[i - NumDefs]; 1448 for (;*SUImpDefs; ++SUImpDefs) { 1449 unsigned SUReg = *SUImpDefs; 1450 if (TRI->regsOverlap(Reg, SUReg)) 1451 return true; 1452 } 1453 } 1454 return false; 1455} 1456 1457/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1458/// it as a def&use operand. Add a pseudo control edge from it to the other 1459/// node (if it won't create a cycle) so the two-address one will be scheduled 1460/// first (lower in the schedule). If both nodes are two-address, favor the 1461/// one that has a CopyToReg use (more likely to be a loop induction update). 1462/// If both are two-address, but one is commutable while the other is not 1463/// commutable, favor the one that's not commutable. 1464template<class SF> 1465void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1466 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1467 SUnit *SU = &(*SUnits)[i]; 1468 if (!SU->isTwoAddress) 1469 continue; 1470 1471 SDNode *Node = SU->getNode(); 1472 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1473 continue; 1474 1475 unsigned Opc = Node->getMachineOpcode(); 1476 const TargetInstrDesc &TID = TII->get(Opc); 1477 unsigned NumRes = TID.getNumDefs(); 1478 unsigned NumOps = TID.getNumOperands() - NumRes; 1479 for (unsigned j = 0; j != NumOps; ++j) { 1480 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1481 continue; 1482 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1483 if (DU->getNodeId() == -1) 1484 continue; 1485 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1486 if (!DUSU) continue; 1487 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1488 E = DUSU->Succs.end(); I != E; ++I) { 1489 if (I->isCtrl) continue; 1490 SUnit *SuccSU = I->Dep; 1491 if (SuccSU == SU) 1492 continue; 1493 // Be conservative. Ignore if nodes aren't at roughly the same 1494 // depth and height. 1495 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1496 continue; 1497 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1498 continue; 1499 // Don't constrain nodes with physical register defs if the 1500 // predecessor can clobber them. 1501 if (SuccSU->hasPhysRegDefs) { 1502 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1503 continue; 1504 } 1505 // Don't constraint extract_subreg / insert_subreg these may be 1506 // coalesced away. We don't them close to their uses. 1507 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1508 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1509 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1510 continue; 1511 if ((!canClobber(SuccSU, DUSU) || 1512 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1513 (!SU->isCommutable && SuccSU->isCommutable)) && 1514 !scheduleDAG->IsReachable(SuccSU, SU)) { 1515 DOUT << "Adding an edge from SU # " << SU->NodeNum 1516 << " to SU #" << SuccSU->NodeNum << "\n"; 1517 scheduleDAG->AddPred(SU, SuccSU, true, true); 1518 } 1519 } 1520 } 1521 } 1522} 1523 1524/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1525/// scheduling units. 1526template<class SF> 1527void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1528 SethiUllmanNumbers.assign(SUnits->size(), 0); 1529 1530 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1531 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1532} 1533 1534/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1535/// predecessors of the successors of the SUnit SU. Stop when the provided 1536/// limit is exceeded. 1537static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1538 unsigned Limit) { 1539 unsigned Sum = 0; 1540 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1541 I != E; ++I) { 1542 const SUnit *SuccSU = I->Dep; 1543 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1544 EE = SuccSU->Preds.end(); II != EE; ++II) { 1545 SUnit *PredSU = II->Dep; 1546 if (!PredSU->isScheduled) 1547 if (++Sum > Limit) 1548 return Sum; 1549 } 1550 } 1551 return Sum; 1552} 1553 1554 1555// Top down 1556bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1557 unsigned LPriority = SPQ->getNodePriority(left); 1558 unsigned RPriority = SPQ->getNodePriority(right); 1559 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1560 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1561 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1562 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1563 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1564 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1565 1566 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1567 return false; 1568 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1569 return true; 1570 1571 if (LIsFloater) 1572 LBonus -= 2; 1573 if (RIsFloater) 1574 RBonus -= 2; 1575 if (left->NumSuccs == 1) 1576 LBonus += 2; 1577 if (right->NumSuccs == 1) 1578 RBonus += 2; 1579 1580 if (LPriority+LBonus != RPriority+RBonus) 1581 return LPriority+LBonus < RPriority+RBonus; 1582 1583 if (left->Depth != right->Depth) 1584 return left->Depth < right->Depth; 1585 1586 if (left->NumSuccsLeft != right->NumSuccsLeft) 1587 return left->NumSuccsLeft > right->NumSuccsLeft; 1588 1589 assert(left->NodeQueueId && right->NodeQueueId && 1590 "NodeQueueId cannot be zero"); 1591 return (left->NodeQueueId > right->NodeQueueId); 1592} 1593 1594//===----------------------------------------------------------------------===// 1595// Public Constructor Functions 1596//===----------------------------------------------------------------------===// 1597 1598llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1599 SelectionDAG *DAG, 1600 const TargetMachine *TM, 1601 MachineBasicBlock *BB, 1602 bool) { 1603 const TargetInstrInfo *TII = TM->getInstrInfo(); 1604 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1605 1606 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1607 1608 ScheduleDAGRRList *SD = 1609 new ScheduleDAGRRList(DAG, BB, *TM, true, PQ); 1610 PQ->setScheduleDAG(SD); 1611 return SD; 1612} 1613 1614llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1615 SelectionDAG *DAG, 1616 const TargetMachine *TM, 1617 MachineBasicBlock *BB, 1618 bool) { 1619 const TargetInstrInfo *TII = TM->getInstrInfo(); 1620 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1621 1622 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1623 1624 ScheduleDAGRRList *SD = new ScheduleDAGRRList(DAG, BB, *TM, false, PQ); 1625 PQ->setScheduleDAG(SD); 1626 return SD; 1627} 1628