ScheduleDAGRRList.cpp revision 1da5cacca96a3488451cb9627ec3407d861db35a
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/ErrorHandling.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/raw_ostream.h" 33#include <climits> 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumPRCopies, "Number of physical register copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 "Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 "Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49 50namespace { 51//===----------------------------------------------------------------------===// 52/// ScheduleDAGRRList - The actual register reduction list scheduler 53/// implementation. This supports both top-down and bottom-up scheduling. 54/// 55class ScheduleDAGRRList : public ScheduleDAGSDNodes { 56private: 57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 58 /// it is top-down. 59 bool isBottomUp; 60 61 /// AvailableQueue - The priority queue to use for the available SUnits. 62 SchedulingPriorityQueue *AvailableQueue; 63 64 /// LiveRegDefs - A set of physical registers and their definition 65 /// that are "live". These nodes must be scheduled before any other nodes that 66 /// modifies the registers can be scheduled. 67 unsigned NumLiveRegs; 68 std::vector<SUnit*> LiveRegDefs; 69 std::vector<unsigned> LiveRegCycles; 70 71 /// Topo - A topological ordering for SUnits which permits fast IsReachable 72 /// and similar queries. 73 ScheduleDAGTopologicalSort Topo; 74 75public: 76 ScheduleDAGRRList(MachineFunction &mf, 77 bool isbottomup, 78 SchedulingPriorityQueue *availqueue) 79 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 80 AvailableQueue(availqueue), Topo(SUnits) { 81 } 82 83 ~ScheduleDAGRRList() { 84 delete AvailableQueue; 85 } 86 87 void Schedule(); 88 89 /// IsReachable - Checks if SU is reachable from TargetSU. 90 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 91 return Topo.IsReachable(SU, TargetSU); 92 } 93 94 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 95 /// create a cycle. 96 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 97 return Topo.WillCreateCycle(SU, TargetSU); 98 } 99 100 /// AddPred - adds a predecessor edge to SUnit SU. 101 /// This returns true if this is a new predecessor. 102 /// Updates the topological ordering if required. 103 void AddPred(SUnit *SU, const SDep &D) { 104 Topo.AddPred(SU, D.getSUnit()); 105 SU->addPred(D); 106 } 107 108 /// RemovePred - removes a predecessor edge from SUnit SU. 109 /// This returns true if an edge was removed. 110 /// Updates the topological ordering if required. 111 void RemovePred(SUnit *SU, const SDep &D) { 112 Topo.RemovePred(SU, D.getSUnit()); 113 SU->removePred(D); 114 } 115 116private: 117 void ReleasePred(SUnit *SU, const SDep *PredEdge); 118 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 119 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 120 void ReleaseSuccessors(SUnit *SU); 121 void CapturePred(SDep *PredEdge); 122 void ScheduleNodeBottomUp(SUnit*, unsigned); 123 void ScheduleNodeTopDown(SUnit*, unsigned); 124 void UnscheduleNodeBottomUp(SUnit*); 125 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 126 SUnit *CopyAndMoveSuccessors(SUnit*); 127 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 128 const TargetRegisterClass*, 129 const TargetRegisterClass*, 130 SmallVector<SUnit*, 2>&); 131 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 132 void ListScheduleTopDown(); 133 void ListScheduleBottomUp(); 134 135 136 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 137 /// Updates the topological ordering if required. 138 SUnit *CreateNewSUnit(SDNode *N) { 139 unsigned NumSUnits = SUnits.size(); 140 SUnit *NewNode = NewSUnit(N); 141 // Update the topological ordering. 142 if (NewNode->NodeNum >= NumSUnits) 143 Topo.InitDAGTopologicalSorting(); 144 return NewNode; 145 } 146 147 /// CreateClone - Creates a new SUnit from an existing one. 148 /// Updates the topological ordering if required. 149 SUnit *CreateClone(SUnit *N) { 150 unsigned NumSUnits = SUnits.size(); 151 SUnit *NewNode = Clone(N); 152 // Update the topological ordering. 153 if (NewNode->NodeNum >= NumSUnits) 154 Topo.InitDAGTopologicalSorting(); 155 return NewNode; 156 } 157 158 /// ForceUnitLatencies - Return true, since register-pressure-reducing 159 /// scheduling doesn't need actual latency information. 160 bool ForceUnitLatencies() const { return true; } 161}; 162} // end anonymous namespace 163 164 165/// Schedule - Schedule the DAG using list scheduling. 166void ScheduleDAGRRList::Schedule() { 167 DEBUG(dbgs() << "********** List Scheduling **********\n"); 168 169 NumLiveRegs = 0; 170 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 171 LiveRegCycles.resize(TRI->getNumRegs(), 0); 172 173 // Build the scheduling graph. 174 BuildSchedGraph(NULL); 175 176 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 177 SUnits[su].dumpAll(this)); 178 Topo.InitDAGTopologicalSorting(); 179 180 AvailableQueue->initNodes(SUnits); 181 182 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 183 if (isBottomUp) 184 ListScheduleBottomUp(); 185 else 186 ListScheduleTopDown(); 187 188 AvailableQueue->releaseState(); 189} 190 191//===----------------------------------------------------------------------===// 192// Bottom-Up Scheduling 193//===----------------------------------------------------------------------===// 194 195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 196/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 197void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 198 SUnit *PredSU = PredEdge->getSUnit(); 199 200#ifndef NDEBUG 201 if (PredSU->NumSuccsLeft == 0) { 202 dbgs() << "*** Scheduling failed! ***\n"; 203 PredSU->dump(this); 204 dbgs() << " has been released too many times!\n"; 205 llvm_unreachable(0); 206 } 207#endif 208 --PredSU->NumSuccsLeft; 209 210 // If all the node's successors are scheduled, this node is ready 211 // to be scheduled. Ignore the special EntrySU node. 212 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 213 PredSU->isAvailable = true; 214 AvailableQueue->push(PredSU); 215 } 216} 217 218void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 219 // Bottom up: release predecessors 220 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 221 I != E; ++I) { 222 ReleasePred(SU, &*I); 223 if (I->isAssignedRegDep()) { 224 // This is a physical register dependency and it's impossible or 225 // expensive to copy the register. Make sure nothing that can 226 // clobber the register is scheduled between the predecessor and 227 // this node. 228 if (!LiveRegDefs[I->getReg()]) { 229 ++NumLiveRegs; 230 LiveRegDefs[I->getReg()] = I->getSUnit(); 231 LiveRegCycles[I->getReg()] = CurCycle; 232 } 233 } 234 } 235} 236 237/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 238/// count of its predecessors. If a predecessor pending count is zero, add it to 239/// the Available queue. 240void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 241 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 242 DEBUG(SU->dump(this)); 243 244 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 245 SU->setHeightToAtLeast(CurCycle); 246 Sequence.push_back(SU); 247 248 ReleasePredecessors(SU, CurCycle); 249 250 // Release all the implicit physical register defs that are live. 251 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 252 I != E; ++I) { 253 if (I->isAssignedRegDep()) { 254 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 255 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 256 assert(LiveRegDefs[I->getReg()] == SU && 257 "Physical register dependency violated?"); 258 --NumLiveRegs; 259 LiveRegDefs[I->getReg()] = NULL; 260 LiveRegCycles[I->getReg()] = 0; 261 } 262 } 263 } 264 265 SU->isScheduled = true; 266 AvailableQueue->ScheduledNode(SU); 267} 268 269/// CapturePred - This does the opposite of ReleasePred. Since SU is being 270/// unscheduled, incrcease the succ left count of its predecessors. Remove 271/// them from AvailableQueue if necessary. 272void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 273 SUnit *PredSU = PredEdge->getSUnit(); 274 if (PredSU->isAvailable) { 275 PredSU->isAvailable = false; 276 if (!PredSU->isPending) 277 AvailableQueue->remove(PredSU); 278 } 279 280 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 281 ++PredSU->NumSuccsLeft; 282} 283 284/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 285/// its predecessor states to reflect the change. 286void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 287 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 288 DEBUG(SU->dump(this)); 289 290 AvailableQueue->UnscheduledNode(SU); 291 292 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 293 I != E; ++I) { 294 CapturePred(&*I); 295 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 296 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 297 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 298 "Physical register dependency violated?"); 299 --NumLiveRegs; 300 LiveRegDefs[I->getReg()] = NULL; 301 LiveRegCycles[I->getReg()] = 0; 302 } 303 } 304 305 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 306 I != E; ++I) { 307 if (I->isAssignedRegDep()) { 308 if (!LiveRegDefs[I->getReg()]) { 309 LiveRegDefs[I->getReg()] = SU; 310 ++NumLiveRegs; 311 } 312 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 313 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 314 } 315 } 316 317 SU->setHeightDirty(); 318 SU->isScheduled = false; 319 SU->isAvailable = true; 320 AvailableQueue->push(SU); 321} 322 323/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 324/// BTCycle in order to schedule a specific node. 325void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 326 unsigned &CurCycle) { 327 SUnit *OldSU = NULL; 328 while (CurCycle > BtCycle) { 329 OldSU = Sequence.back(); 330 Sequence.pop_back(); 331 if (SU->isSucc(OldSU)) 332 // Don't try to remove SU from AvailableQueue. 333 SU->isAvailable = false; 334 UnscheduleNodeBottomUp(OldSU); 335 --CurCycle; 336 } 337 338 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 339 340 ++NumBacktracks; 341} 342 343/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 344/// successors to the newly created node. 345SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 346 if (SU->getNode()->getFlaggedNode()) 347 return NULL; 348 349 SDNode *N = SU->getNode(); 350 if (!N) 351 return NULL; 352 353 SUnit *NewSU; 354 bool TryUnfold = false; 355 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 356 EVT VT = N->getValueType(i); 357 if (VT == MVT::Flag) 358 return NULL; 359 else if (VT == MVT::Other) 360 TryUnfold = true; 361 } 362 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 363 const SDValue &Op = N->getOperand(i); 364 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 365 if (VT == MVT::Flag) 366 return NULL; 367 } 368 369 if (TryUnfold) { 370 SmallVector<SDNode*, 2> NewNodes; 371 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 372 return NULL; 373 374 DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 375 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 376 377 N = NewNodes[1]; 378 SDNode *LoadNode = NewNodes[0]; 379 unsigned NumVals = N->getNumValues(); 380 unsigned OldNumVals = SU->getNode()->getNumValues(); 381 for (unsigned i = 0; i != NumVals; ++i) 382 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 383 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 384 SDValue(LoadNode, 1)); 385 386 // LoadNode may already exist. This can happen when there is another 387 // load from the same location and producing the same type of value 388 // but it has different alignment or volatileness. 389 bool isNewLoad = true; 390 SUnit *LoadSU; 391 if (LoadNode->getNodeId() != -1) { 392 LoadSU = &SUnits[LoadNode->getNodeId()]; 393 isNewLoad = false; 394 } else { 395 LoadSU = CreateNewSUnit(LoadNode); 396 LoadNode->setNodeId(LoadSU->NodeNum); 397 ComputeLatency(LoadSU); 398 } 399 400 SUnit *NewSU = CreateNewSUnit(N); 401 assert(N->getNodeId() == -1 && "Node already inserted!"); 402 N->setNodeId(NewSU->NodeNum); 403 404 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 405 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 406 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 407 NewSU->isTwoAddress = true; 408 break; 409 } 410 } 411 if (TID.isCommutable()) 412 NewSU->isCommutable = true; 413 ComputeLatency(NewSU); 414 415 // Record all the edges to and from the old SU, by category. 416 SmallVector<SDep, 4> ChainPreds; 417 SmallVector<SDep, 4> ChainSuccs; 418 SmallVector<SDep, 4> LoadPreds; 419 SmallVector<SDep, 4> NodePreds; 420 SmallVector<SDep, 4> NodeSuccs; 421 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 422 I != E; ++I) { 423 if (I->isCtrl()) 424 ChainPreds.push_back(*I); 425 else if (I->getSUnit()->getNode() && 426 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 427 LoadPreds.push_back(*I); 428 else 429 NodePreds.push_back(*I); 430 } 431 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 432 I != E; ++I) { 433 if (I->isCtrl()) 434 ChainSuccs.push_back(*I); 435 else 436 NodeSuccs.push_back(*I); 437 } 438 439 // Now assign edges to the newly-created nodes. 440 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 441 const SDep &Pred = ChainPreds[i]; 442 RemovePred(SU, Pred); 443 if (isNewLoad) 444 AddPred(LoadSU, Pred); 445 } 446 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 447 const SDep &Pred = LoadPreds[i]; 448 RemovePred(SU, Pred); 449 if (isNewLoad) 450 AddPred(LoadSU, Pred); 451 } 452 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 453 const SDep &Pred = NodePreds[i]; 454 RemovePred(SU, Pred); 455 AddPred(NewSU, Pred); 456 } 457 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 458 SDep D = NodeSuccs[i]; 459 SUnit *SuccDep = D.getSUnit(); 460 D.setSUnit(SU); 461 RemovePred(SuccDep, D); 462 D.setSUnit(NewSU); 463 AddPred(SuccDep, D); 464 } 465 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 466 SDep D = ChainSuccs[i]; 467 SUnit *SuccDep = D.getSUnit(); 468 D.setSUnit(SU); 469 RemovePred(SuccDep, D); 470 if (isNewLoad) { 471 D.setSUnit(LoadSU); 472 AddPred(SuccDep, D); 473 } 474 } 475 476 // Add a data dependency to reflect that NewSU reads the value defined 477 // by LoadSU. 478 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 479 480 if (isNewLoad) 481 AvailableQueue->addNode(LoadSU); 482 AvailableQueue->addNode(NewSU); 483 484 ++NumUnfolds; 485 486 if (NewSU->NumSuccsLeft == 0) { 487 NewSU->isAvailable = true; 488 return NewSU; 489 } 490 SU = NewSU; 491 } 492 493 DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 494 NewSU = CreateClone(SU); 495 496 // New SUnit has the exact same predecessors. 497 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 498 I != E; ++I) 499 if (!I->isArtificial()) 500 AddPred(NewSU, *I); 501 502 // Only copy scheduled successors. Cut them from old node's successor 503 // list and move them over. 504 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 505 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 506 I != E; ++I) { 507 if (I->isArtificial()) 508 continue; 509 SUnit *SuccSU = I->getSUnit(); 510 if (SuccSU->isScheduled) { 511 SDep D = *I; 512 D.setSUnit(NewSU); 513 AddPred(SuccSU, D); 514 D.setSUnit(SU); 515 DelDeps.push_back(std::make_pair(SuccSU, D)); 516 } 517 } 518 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 519 RemovePred(DelDeps[i].first, DelDeps[i].second); 520 521 AvailableQueue->updateNode(SU); 522 AvailableQueue->addNode(NewSU); 523 524 ++NumDups; 525 return NewSU; 526} 527 528/// InsertCopiesAndMoveSuccs - Insert register copies and move all 529/// scheduled successors of the given SUnit to the last copy. 530void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 531 const TargetRegisterClass *DestRC, 532 const TargetRegisterClass *SrcRC, 533 SmallVector<SUnit*, 2> &Copies) { 534 SUnit *CopyFromSU = CreateNewSUnit(NULL); 535 CopyFromSU->CopySrcRC = SrcRC; 536 CopyFromSU->CopyDstRC = DestRC; 537 538 SUnit *CopyToSU = CreateNewSUnit(NULL); 539 CopyToSU->CopySrcRC = DestRC; 540 CopyToSU->CopyDstRC = SrcRC; 541 542 // Only copy scheduled successors. Cut them from old node's successor 543 // list and move them over. 544 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 545 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 546 I != E; ++I) { 547 if (I->isArtificial()) 548 continue; 549 SUnit *SuccSU = I->getSUnit(); 550 if (SuccSU->isScheduled) { 551 SDep D = *I; 552 D.setSUnit(CopyToSU); 553 AddPred(SuccSU, D); 554 DelDeps.push_back(std::make_pair(SuccSU, *I)); 555 } 556 } 557 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 558 RemovePred(DelDeps[i].first, DelDeps[i].second); 559 560 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 561 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 562 563 AvailableQueue->updateNode(SU); 564 AvailableQueue->addNode(CopyFromSU); 565 AvailableQueue->addNode(CopyToSU); 566 Copies.push_back(CopyFromSU); 567 Copies.push_back(CopyToSU); 568 569 ++NumPRCopies; 570} 571 572/// getPhysicalRegisterVT - Returns the ValueType of the physical register 573/// definition of the specified node. 574/// FIXME: Move to SelectionDAG? 575static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 576 const TargetInstrInfo *TII) { 577 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 578 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 579 unsigned NumRes = TID.getNumDefs(); 580 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 581 if (Reg == *ImpDef) 582 break; 583 ++NumRes; 584 } 585 return N->getValueType(NumRes); 586} 587 588/// CheckForLiveRegDef - Return true and update live register vector if the 589/// specified register def of the specified SUnit clobbers any "live" registers. 590static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 591 std::vector<SUnit*> &LiveRegDefs, 592 SmallSet<unsigned, 4> &RegAdded, 593 SmallVector<unsigned, 4> &LRegs, 594 const TargetRegisterInfo *TRI) { 595 bool Added = false; 596 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 597 if (RegAdded.insert(Reg)) { 598 LRegs.push_back(Reg); 599 Added = true; 600 } 601 } 602 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 603 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 604 if (RegAdded.insert(*Alias)) { 605 LRegs.push_back(*Alias); 606 Added = true; 607 } 608 } 609 return Added; 610} 611 612/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 613/// scheduling of the given node to satisfy live physical register dependencies. 614/// If the specific node is the last one that's available to schedule, do 615/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 616bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 617 SmallVector<unsigned, 4> &LRegs){ 618 if (NumLiveRegs == 0) 619 return false; 620 621 SmallSet<unsigned, 4> RegAdded; 622 // If this node would clobber any "live" register, then it's not ready. 623 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 624 I != E; ++I) { 625 if (I->isAssignedRegDep()) 626 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 627 RegAdded, LRegs, TRI); 628 } 629 630 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 631 if (Node->getOpcode() == ISD::INLINEASM) { 632 // Inline asm can clobber physical defs. 633 unsigned NumOps = Node->getNumOperands(); 634 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 635 --NumOps; // Ignore the flag operand. 636 637 for (unsigned i = 2; i != NumOps;) { 638 unsigned Flags = 639 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 640 unsigned NumVals = (Flags & 0xffff) >> 3; 641 642 ++i; // Skip the ID value. 643 if ((Flags & 7) == 2 || (Flags & 7) == 6) { 644 // Check for def of register or earlyclobber register. 645 for (; NumVals; --NumVals, ++i) { 646 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 647 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 648 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 649 } 650 } else 651 i += NumVals; 652 } 653 continue; 654 } 655 656 if (!Node->isMachineOpcode()) 657 continue; 658 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 659 if (!TID.ImplicitDefs) 660 continue; 661 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 662 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 663 } 664 return !LRegs.empty(); 665} 666 667 668/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 669/// schedulers. 670void ScheduleDAGRRList::ListScheduleBottomUp() { 671 unsigned CurCycle = 0; 672 673 // Release any predecessors of the special Exit node. 674 ReleasePredecessors(&ExitSU, CurCycle); 675 676 // Add root to Available queue. 677 if (!SUnits.empty()) { 678 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 679 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 680 RootSU->isAvailable = true; 681 AvailableQueue->push(RootSU); 682 } 683 684 // While Available queue is not empty, grab the node with the highest 685 // priority. If it is not ready put it back. Schedule the node. 686 SmallVector<SUnit*, 4> NotReady; 687 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 688 Sequence.reserve(SUnits.size()); 689 while (!AvailableQueue->empty()) { 690 bool Delayed = false; 691 LRegsMap.clear(); 692 SUnit *CurSU = AvailableQueue->pop(); 693 while (CurSU) { 694 SmallVector<unsigned, 4> LRegs; 695 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 696 break; 697 Delayed = true; 698 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 699 700 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 701 NotReady.push_back(CurSU); 702 CurSU = AvailableQueue->pop(); 703 } 704 705 // All candidates are delayed due to live physical reg dependencies. 706 // Try backtracking, code duplication, or inserting cross class copies 707 // to resolve it. 708 if (Delayed && !CurSU) { 709 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 710 SUnit *TrySU = NotReady[i]; 711 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 712 713 // Try unscheduling up to the point where it's safe to schedule 714 // this node. 715 unsigned LiveCycle = CurCycle; 716 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 717 unsigned Reg = LRegs[j]; 718 unsigned LCycle = LiveRegCycles[Reg]; 719 LiveCycle = std::min(LiveCycle, LCycle); 720 } 721 SUnit *OldSU = Sequence[LiveCycle]; 722 if (!WillCreateCycle(TrySU, OldSU)) { 723 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 724 // Force the current node to be scheduled before the node that 725 // requires the physical reg dep. 726 if (OldSU->isAvailable) { 727 OldSU->isAvailable = false; 728 AvailableQueue->remove(OldSU); 729 } 730 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 731 /*Reg=*/0, /*isNormalMemory=*/false, 732 /*isMustAlias=*/false, /*isArtificial=*/true)); 733 // If one or more successors has been unscheduled, then the current 734 // node is no longer avaialable. Schedule a successor that's now 735 // available instead. 736 if (!TrySU->isAvailable) 737 CurSU = AvailableQueue->pop(); 738 else { 739 CurSU = TrySU; 740 TrySU->isPending = false; 741 NotReady.erase(NotReady.begin()+i); 742 } 743 break; 744 } 745 } 746 747 if (!CurSU) { 748 // Can't backtrack. If it's too expensive to copy the value, then try 749 // duplicate the nodes that produces these "too expensive to copy" 750 // values to break the dependency. In case even that doesn't work, 751 // insert cross class copies. 752 // If it's not too expensive, i.e. cost != -1, issue copies. 753 SUnit *TrySU = NotReady[0]; 754 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 755 assert(LRegs.size() == 1 && "Can't handle this yet!"); 756 unsigned Reg = LRegs[0]; 757 SUnit *LRDef = LiveRegDefs[Reg]; 758 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 759 const TargetRegisterClass *RC = 760 TRI->getPhysicalRegisterRegClass(Reg, VT); 761 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 762 763 // If cross copy register class is null, then it must be possible copy 764 // the value directly. Do not try duplicate the def. 765 SUnit *NewDef = 0; 766 if (DestRC) 767 NewDef = CopyAndMoveSuccessors(LRDef); 768 else 769 DestRC = RC; 770 if (!NewDef) { 771 // Issue copies, these can be expensive cross register class copies. 772 SmallVector<SUnit*, 2> Copies; 773 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 774 DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum 775 << " to SU #" << Copies.front()->NodeNum << "\n"); 776 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 777 /*Reg=*/0, /*isNormalMemory=*/false, 778 /*isMustAlias=*/false, 779 /*isArtificial=*/true)); 780 NewDef = Copies.back(); 781 } 782 783 DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum 784 << " to SU #" << TrySU->NodeNum << "\n"); 785 LiveRegDefs[Reg] = NewDef; 786 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 787 /*Reg=*/0, /*isNormalMemory=*/false, 788 /*isMustAlias=*/false, 789 /*isArtificial=*/true)); 790 TrySU->isAvailable = false; 791 CurSU = NewDef; 792 } 793 794 assert(CurSU && "Unable to resolve live physical register dependencies!"); 795 } 796 797 // Add the nodes that aren't ready back onto the available list. 798 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 799 NotReady[i]->isPending = false; 800 // May no longer be available due to backtracking. 801 if (NotReady[i]->isAvailable) 802 AvailableQueue->push(NotReady[i]); 803 } 804 NotReady.clear(); 805 806 if (CurSU) 807 ScheduleNodeBottomUp(CurSU, CurCycle); 808 ++CurCycle; 809 } 810 811 // Reverse the order if it is bottom up. 812 std::reverse(Sequence.begin(), Sequence.end()); 813 814#ifndef NDEBUG 815 VerifySchedule(isBottomUp); 816#endif 817} 818 819//===----------------------------------------------------------------------===// 820// Top-Down Scheduling 821//===----------------------------------------------------------------------===// 822 823/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 824/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 825void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 826 SUnit *SuccSU = SuccEdge->getSUnit(); 827 828#ifndef NDEBUG 829 if (SuccSU->NumPredsLeft == 0) { 830 dbgs() << "*** Scheduling failed! ***\n"; 831 SuccSU->dump(this); 832 dbgs() << " has been released too many times!\n"; 833 llvm_unreachable(0); 834 } 835#endif 836 --SuccSU->NumPredsLeft; 837 838 // If all the node's predecessors are scheduled, this node is ready 839 // to be scheduled. Ignore the special ExitSU node. 840 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 841 SuccSU->isAvailable = true; 842 AvailableQueue->push(SuccSU); 843 } 844} 845 846void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 847 // Top down: release successors 848 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 849 I != E; ++I) { 850 assert(!I->isAssignedRegDep() && 851 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 852 853 ReleaseSucc(SU, &*I); 854 } 855} 856 857/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 858/// count of its successors. If a successor pending count is zero, add it to 859/// the Available queue. 860void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 861 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 862 DEBUG(SU->dump(this)); 863 864 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 865 SU->setDepthToAtLeast(CurCycle); 866 Sequence.push_back(SU); 867 868 ReleaseSuccessors(SU); 869 SU->isScheduled = true; 870 AvailableQueue->ScheduledNode(SU); 871} 872 873/// ListScheduleTopDown - The main loop of list scheduling for top-down 874/// schedulers. 875void ScheduleDAGRRList::ListScheduleTopDown() { 876 unsigned CurCycle = 0; 877 878 // Release any successors of the special Entry node. 879 ReleaseSuccessors(&EntrySU); 880 881 // All leaves to Available queue. 882 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 883 // It is available if it has no predecessors. 884 if (SUnits[i].Preds.empty()) { 885 AvailableQueue->push(&SUnits[i]); 886 SUnits[i].isAvailable = true; 887 } 888 } 889 890 // While Available queue is not empty, grab the node with the highest 891 // priority. If it is not ready put it back. Schedule the node. 892 Sequence.reserve(SUnits.size()); 893 while (!AvailableQueue->empty()) { 894 SUnit *CurSU = AvailableQueue->pop(); 895 896 if (CurSU) 897 ScheduleNodeTopDown(CurSU, CurCycle); 898 ++CurCycle; 899 } 900 901#ifndef NDEBUG 902 VerifySchedule(isBottomUp); 903#endif 904} 905 906 907//===----------------------------------------------------------------------===// 908// RegReductionPriorityQueue Implementation 909//===----------------------------------------------------------------------===// 910// 911// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 912// to reduce register pressure. 913// 914namespace { 915 template<class SF> 916 class RegReductionPriorityQueue; 917 918 /// Sorting functions for the Available queue. 919 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 920 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 921 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 922 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 923 924 bool operator()(const SUnit* left, const SUnit* right) const; 925 }; 926 927 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 928 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 929 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 930 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 931 932 bool operator()(const SUnit* left, const SUnit* right) const; 933 }; 934} // end anonymous namespace 935 936/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 937/// Smaller number is the higher priority. 938static unsigned 939CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 940 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 941 if (SethiUllmanNumber != 0) 942 return SethiUllmanNumber; 943 944 unsigned Extra = 0; 945 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 946 I != E; ++I) { 947 if (I->isCtrl()) continue; // ignore chain preds 948 SUnit *PredSU = I->getSUnit(); 949 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 950 if (PredSethiUllman > SethiUllmanNumber) { 951 SethiUllmanNumber = PredSethiUllman; 952 Extra = 0; 953 } else if (PredSethiUllman == SethiUllmanNumber) 954 ++Extra; 955 } 956 957 SethiUllmanNumber += Extra; 958 959 if (SethiUllmanNumber == 0) 960 SethiUllmanNumber = 1; 961 962 return SethiUllmanNumber; 963} 964 965namespace { 966 template<class SF> 967 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 968 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 969 unsigned currentQueueId; 970 971 protected: 972 // SUnits - The SUnits for the current graph. 973 std::vector<SUnit> *SUnits; 974 975 const TargetInstrInfo *TII; 976 const TargetRegisterInfo *TRI; 977 ScheduleDAGRRList *scheduleDAG; 978 979 // SethiUllmanNumbers - The SethiUllman number for each node. 980 std::vector<unsigned> SethiUllmanNumbers; 981 982 public: 983 RegReductionPriorityQueue(const TargetInstrInfo *tii, 984 const TargetRegisterInfo *tri) : 985 Queue(SF(this)), currentQueueId(0), 986 TII(tii), TRI(tri), scheduleDAG(NULL) {} 987 988 void initNodes(std::vector<SUnit> &sunits) { 989 SUnits = &sunits; 990 // Add pseudo dependency edges for two-address nodes. 991 AddPseudoTwoAddrDeps(); 992 // Reroute edges to nodes with multiple uses. 993 PrescheduleNodesWithMultipleUses(); 994 // Calculate node priorities. 995 CalculateSethiUllmanNumbers(); 996 } 997 998 void addNode(const SUnit *SU) { 999 unsigned SUSize = SethiUllmanNumbers.size(); 1000 if (SUnits->size() > SUSize) 1001 SethiUllmanNumbers.resize(SUSize*2, 0); 1002 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1003 } 1004 1005 void updateNode(const SUnit *SU) { 1006 SethiUllmanNumbers[SU->NodeNum] = 0; 1007 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1008 } 1009 1010 void releaseState() { 1011 SUnits = 0; 1012 SethiUllmanNumbers.clear(); 1013 } 1014 1015 unsigned getNodePriority(const SUnit *SU) const { 1016 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1017 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1018 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1019 // CopyToReg should be close to its uses to facilitate coalescing and 1020 // avoid spilling. 1021 return 0; 1022 if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1023 Opc == TargetInstrInfo::SUBREG_TO_REG || 1024 Opc == TargetInstrInfo::INSERT_SUBREG) 1025 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1026 // close to their uses to facilitate coalescing. 1027 return 0; 1028 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1029 // If SU does not have a register use, i.e. it doesn't produce a value 1030 // that would be consumed (e.g. store), then it terminates a chain of 1031 // computation. Give it a large SethiUllman number so it will be 1032 // scheduled right before its predecessors that it doesn't lengthen 1033 // their live ranges. 1034 return 0xffff; 1035 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1036 // If SU does not have a register def, schedule it close to its uses 1037 // because it does not lengthen any live ranges. 1038 return 0; 1039 return SethiUllmanNumbers[SU->NodeNum]; 1040 } 1041 1042 unsigned getNodeOrdering(const SUnit *SU) const { 1043 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1044 } 1045 1046 unsigned size() const { return Queue.size(); } 1047 1048 bool empty() const { return Queue.empty(); } 1049 1050 void push(SUnit *U) { 1051 assert(!U->NodeQueueId && "Node in the queue already"); 1052 U->NodeQueueId = ++currentQueueId; 1053 Queue.push(U); 1054 } 1055 1056 void push_all(const std::vector<SUnit *> &Nodes) { 1057 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1058 push(Nodes[i]); 1059 } 1060 1061 SUnit *pop() { 1062 if (empty()) return NULL; 1063 SUnit *V = Queue.top(); 1064 Queue.pop(); 1065 V->NodeQueueId = 0; 1066 return V; 1067 } 1068 1069 void remove(SUnit *SU) { 1070 assert(!Queue.empty() && "Queue is empty!"); 1071 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1072 Queue.erase_one(SU); 1073 SU->NodeQueueId = 0; 1074 } 1075 1076 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1077 scheduleDAG = scheduleDag; 1078 } 1079 1080 protected: 1081 bool canClobber(const SUnit *SU, const SUnit *Op); 1082 void AddPseudoTwoAddrDeps(); 1083 void PrescheduleNodesWithMultipleUses(); 1084 void CalculateSethiUllmanNumbers(); 1085 }; 1086 1087 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1088 BURegReductionPriorityQueue; 1089 1090 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1091 TDRegReductionPriorityQueue; 1092} 1093 1094/// closestSucc - Returns the scheduled cycle of the successor which is 1095/// closest to the current cycle. 1096static unsigned closestSucc(const SUnit *SU) { 1097 unsigned MaxHeight = 0; 1098 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1099 I != E; ++I) { 1100 if (I->isCtrl()) continue; // ignore chain succs 1101 unsigned Height = I->getSUnit()->getHeight(); 1102 // If there are bunch of CopyToRegs stacked up, they should be considered 1103 // to be at the same position. 1104 if (I->getSUnit()->getNode() && 1105 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1106 Height = closestSucc(I->getSUnit())+1; 1107 if (Height > MaxHeight) 1108 MaxHeight = Height; 1109 } 1110 return MaxHeight; 1111} 1112 1113/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1114/// for scratch registers, i.e. number of data dependencies. 1115static unsigned calcMaxScratches(const SUnit *SU) { 1116 unsigned Scratches = 0; 1117 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1118 I != E; ++I) { 1119 if (I->isCtrl()) continue; // ignore chain preds 1120 Scratches++; 1121 } 1122 return Scratches; 1123} 1124 1125// Bottom up 1126bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1127 unsigned LOrder = SPQ->getNodeOrdering(left); 1128 unsigned ROrder = SPQ->getNodeOrdering(right); 1129 1130 // Prefer an ordering where the lower the non-zero order number, the higher 1131 // the preference. 1132 if ((LOrder || ROrder) && LOrder != ROrder) 1133 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1134 1135 unsigned LPriority = SPQ->getNodePriority(left); 1136 unsigned RPriority = SPQ->getNodePriority(right); 1137 if (LPriority != RPriority) 1138 return LPriority > RPriority; 1139 1140 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1141 // e.g. 1142 // t1 = op t2, c1 1143 // t3 = op t4, c2 1144 // 1145 // and the following instructions are both ready. 1146 // t2 = op c3 1147 // t4 = op c4 1148 // 1149 // Then schedule t2 = op first. 1150 // i.e. 1151 // t4 = op c4 1152 // t2 = op c3 1153 // t1 = op t2, c1 1154 // t3 = op t4, c2 1155 // 1156 // This creates more short live intervals. 1157 unsigned LDist = closestSucc(left); 1158 unsigned RDist = closestSucc(right); 1159 if (LDist != RDist) 1160 return LDist < RDist; 1161 1162 // How many registers becomes live when the node is scheduled. 1163 unsigned LScratch = calcMaxScratches(left); 1164 unsigned RScratch = calcMaxScratches(right); 1165 if (LScratch != RScratch) 1166 return LScratch > RScratch; 1167 1168 if (left->getHeight() != right->getHeight()) 1169 return left->getHeight() > right->getHeight(); 1170 1171 if (left->getDepth() != right->getDepth()) 1172 return left->getDepth() < right->getDepth(); 1173 1174 assert(left->NodeQueueId && right->NodeQueueId && 1175 "NodeQueueId cannot be zero"); 1176 return (left->NodeQueueId > right->NodeQueueId); 1177} 1178 1179template<class SF> 1180bool 1181RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1182 if (SU->isTwoAddress) { 1183 unsigned Opc = SU->getNode()->getMachineOpcode(); 1184 const TargetInstrDesc &TID = TII->get(Opc); 1185 unsigned NumRes = TID.getNumDefs(); 1186 unsigned NumOps = TID.getNumOperands() - NumRes; 1187 for (unsigned i = 0; i != NumOps; ++i) { 1188 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1189 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1190 if (DU->getNodeId() != -1 && 1191 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1192 return true; 1193 } 1194 } 1195 } 1196 return false; 1197} 1198 1199 1200/// hasCopyToRegUse - Return true if SU has a value successor that is a 1201/// CopyToReg node. 1202static bool hasCopyToRegUse(const SUnit *SU) { 1203 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1204 I != E; ++I) { 1205 if (I->isCtrl()) continue; 1206 const SUnit *SuccSU = I->getSUnit(); 1207 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1208 return true; 1209 } 1210 return false; 1211} 1212 1213/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1214/// physical register defs. 1215static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1216 const TargetInstrInfo *TII, 1217 const TargetRegisterInfo *TRI) { 1218 SDNode *N = SuccSU->getNode(); 1219 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1220 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1221 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1222 for (const SDNode *SUNode = SU->getNode(); SUNode; 1223 SUNode = SUNode->getFlaggedNode()) { 1224 if (!SUNode->isMachineOpcode()) 1225 continue; 1226 const unsigned *SUImpDefs = 1227 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1228 if (!SUImpDefs) 1229 return false; 1230 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1231 EVT VT = N->getValueType(i); 1232 if (VT == MVT::Flag || VT == MVT::Other) 1233 continue; 1234 if (!N->hasAnyUseOfValue(i)) 1235 continue; 1236 unsigned Reg = ImpDefs[i - NumDefs]; 1237 for (;*SUImpDefs; ++SUImpDefs) { 1238 unsigned SUReg = *SUImpDefs; 1239 if (TRI->regsOverlap(Reg, SUReg)) 1240 return true; 1241 } 1242 } 1243 } 1244 return false; 1245} 1246 1247/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1248/// are not handled well by the general register pressure reduction 1249/// heuristics. When presented with code like this: 1250/// 1251/// N 1252/// / | 1253/// / | 1254/// U store 1255/// | 1256/// ... 1257/// 1258/// the heuristics tend to push the store up, but since the 1259/// operand of the store has another use (U), this would increase 1260/// the length of that other use (the U->N edge). 1261/// 1262/// This function transforms code like the above to route U's 1263/// dependence through the store when possible, like this: 1264/// 1265/// N 1266/// || 1267/// || 1268/// store 1269/// | 1270/// U 1271/// | 1272/// ... 1273/// 1274/// This results in the store being scheduled immediately 1275/// after N, which shortens the U->N live range, reducing 1276/// register pressure. 1277/// 1278template<class SF> 1279void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1280 // Visit all the nodes in topological order, working top-down. 1281 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1282 SUnit *SU = &(*SUnits)[i]; 1283 // For now, only look at nodes with no data successors, such as stores. 1284 // These are especially important, due to the heuristics in 1285 // getNodePriority for nodes with no data successors. 1286 if (SU->NumSuccs != 0) 1287 continue; 1288 // For now, only look at nodes with exactly one data predecessor. 1289 if (SU->NumPreds != 1) 1290 continue; 1291 // Avoid prescheduling copies to virtual registers, which don't behave 1292 // like other nodes from the perspective of scheduling heuristics. 1293 if (SDNode *N = SU->getNode()) 1294 if (N->getOpcode() == ISD::CopyToReg && 1295 TargetRegisterInfo::isVirtualRegister 1296 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1297 continue; 1298 1299 // Locate the single data predecessor. 1300 SUnit *PredSU = 0; 1301 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1302 EE = SU->Preds.end(); II != EE; ++II) 1303 if (!II->isCtrl()) { 1304 PredSU = II->getSUnit(); 1305 break; 1306 } 1307 assert(PredSU); 1308 1309 // Don't rewrite edges that carry physregs, because that requires additional 1310 // support infrastructure. 1311 if (PredSU->hasPhysRegDefs) 1312 continue; 1313 // Short-circuit the case where SU is PredSU's only data successor. 1314 if (PredSU->NumSuccs == 1) 1315 continue; 1316 // Avoid prescheduling to copies from virtual registers, which don't behave 1317 // like other nodes from the perspective of scheduling // heuristics. 1318 if (SDNode *N = SU->getNode()) 1319 if (N->getOpcode() == ISD::CopyFromReg && 1320 TargetRegisterInfo::isVirtualRegister 1321 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1322 continue; 1323 1324 // Perform checks on the successors of PredSU. 1325 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1326 EE = PredSU->Succs.end(); II != EE; ++II) { 1327 SUnit *PredSuccSU = II->getSUnit(); 1328 if (PredSuccSU == SU) continue; 1329 // If PredSU has another successor with no data successors, for 1330 // now don't attempt to choose either over the other. 1331 if (PredSuccSU->NumSuccs == 0) 1332 goto outer_loop_continue; 1333 // Don't break physical register dependencies. 1334 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1335 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1336 goto outer_loop_continue; 1337 // Don't introduce graph cycles. 1338 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1339 goto outer_loop_continue; 1340 } 1341 1342 // Ok, the transformation is safe and the heuristics suggest it is 1343 // profitable. Update the graph. 1344 DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum 1345 << " next to PredSU # " << PredSU->NodeNum 1346 << " to guide scheduling in the presence of multiple uses\n"); 1347 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1348 SDep Edge = PredSU->Succs[i]; 1349 assert(!Edge.isAssignedRegDep()); 1350 SUnit *SuccSU = Edge.getSUnit(); 1351 if (SuccSU != SU) { 1352 Edge.setSUnit(PredSU); 1353 scheduleDAG->RemovePred(SuccSU, Edge); 1354 scheduleDAG->AddPred(SU, Edge); 1355 Edge.setSUnit(SU); 1356 scheduleDAG->AddPred(SuccSU, Edge); 1357 --i; 1358 } 1359 } 1360 outer_loop_continue:; 1361 } 1362} 1363 1364/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1365/// it as a def&use operand. Add a pseudo control edge from it to the other 1366/// node (if it won't create a cycle) so the two-address one will be scheduled 1367/// first (lower in the schedule). If both nodes are two-address, favor the 1368/// one that has a CopyToReg use (more likely to be a loop induction update). 1369/// If both are two-address, but one is commutable while the other is not 1370/// commutable, favor the one that's not commutable. 1371template<class SF> 1372void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1373 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1374 SUnit *SU = &(*SUnits)[i]; 1375 if (!SU->isTwoAddress) 1376 continue; 1377 1378 SDNode *Node = SU->getNode(); 1379 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1380 continue; 1381 1382 unsigned Opc = Node->getMachineOpcode(); 1383 const TargetInstrDesc &TID = TII->get(Opc); 1384 unsigned NumRes = TID.getNumDefs(); 1385 unsigned NumOps = TID.getNumOperands() - NumRes; 1386 for (unsigned j = 0; j != NumOps; ++j) { 1387 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1388 continue; 1389 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1390 if (DU->getNodeId() == -1) 1391 continue; 1392 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1393 if (!DUSU) continue; 1394 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1395 E = DUSU->Succs.end(); I != E; ++I) { 1396 if (I->isCtrl()) continue; 1397 SUnit *SuccSU = I->getSUnit(); 1398 if (SuccSU == SU) 1399 continue; 1400 // Be conservative. Ignore if nodes aren't at roughly the same 1401 // depth and height. 1402 if (SuccSU->getHeight() < SU->getHeight() && 1403 (SU->getHeight() - SuccSU->getHeight()) > 1) 1404 continue; 1405 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1406 // constrains whatever is using the copy, instead of the copy 1407 // itself. In the case that the copy is coalesced, this 1408 // preserves the intent of the pseudo two-address heurietics. 1409 while (SuccSU->Succs.size() == 1 && 1410 SuccSU->getNode()->isMachineOpcode() && 1411 SuccSU->getNode()->getMachineOpcode() == 1412 TargetInstrInfo::COPY_TO_REGCLASS) 1413 SuccSU = SuccSU->Succs.front().getSUnit(); 1414 // Don't constrain non-instruction nodes. 1415 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1416 continue; 1417 // Don't constrain nodes with physical register defs if the 1418 // predecessor can clobber them. 1419 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1420 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1421 continue; 1422 } 1423 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1424 // these may be coalesced away. We want them close to their uses. 1425 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1426 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1427 SuccOpc == TargetInstrInfo::INSERT_SUBREG || 1428 SuccOpc == TargetInstrInfo::SUBREG_TO_REG) 1429 continue; 1430 if ((!canClobber(SuccSU, DUSU) || 1431 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1432 (!SU->isCommutable && SuccSU->isCommutable)) && 1433 !scheduleDAG->IsReachable(SuccSU, SU)) { 1434 DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " 1435 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1436 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1437 /*Reg=*/0, /*isNormalMemory=*/false, 1438 /*isMustAlias=*/false, 1439 /*isArtificial=*/true)); 1440 } 1441 } 1442 } 1443 } 1444} 1445 1446/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1447/// scheduling units. 1448template<class SF> 1449void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1450 SethiUllmanNumbers.assign(SUnits->size(), 0); 1451 1452 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1453 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1454} 1455 1456/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1457/// predecessors of the successors of the SUnit SU. Stop when the provided 1458/// limit is exceeded. 1459static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1460 unsigned Limit) { 1461 unsigned Sum = 0; 1462 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1463 I != E; ++I) { 1464 const SUnit *SuccSU = I->getSUnit(); 1465 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1466 EE = SuccSU->Preds.end(); II != EE; ++II) { 1467 SUnit *PredSU = II->getSUnit(); 1468 if (!PredSU->isScheduled) 1469 if (++Sum > Limit) 1470 return Sum; 1471 } 1472 } 1473 return Sum; 1474} 1475 1476 1477// Top down 1478bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1479 unsigned LPriority = SPQ->getNodePriority(left); 1480 unsigned RPriority = SPQ->getNodePriority(right); 1481 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1482 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1483 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1484 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1485 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1486 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1487 1488 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1489 return false; 1490 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1491 return true; 1492 1493 if (LIsFloater) 1494 LBonus -= 2; 1495 if (RIsFloater) 1496 RBonus -= 2; 1497 if (left->NumSuccs == 1) 1498 LBonus += 2; 1499 if (right->NumSuccs == 1) 1500 RBonus += 2; 1501 1502 if (LPriority+LBonus != RPriority+RBonus) 1503 return LPriority+LBonus < RPriority+RBonus; 1504 1505 if (left->getDepth() != right->getDepth()) 1506 return left->getDepth() < right->getDepth(); 1507 1508 if (left->NumSuccsLeft != right->NumSuccsLeft) 1509 return left->NumSuccsLeft > right->NumSuccsLeft; 1510 1511 assert(left->NodeQueueId && right->NodeQueueId && 1512 "NodeQueueId cannot be zero"); 1513 return (left->NodeQueueId > right->NodeQueueId); 1514} 1515 1516//===----------------------------------------------------------------------===// 1517// Public Constructor Functions 1518//===----------------------------------------------------------------------===// 1519 1520llvm::ScheduleDAGSDNodes * 1521llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1522 const TargetMachine &TM = IS->TM; 1523 const TargetInstrInfo *TII = TM.getInstrInfo(); 1524 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1525 1526 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1527 1528 ScheduleDAGRRList *SD = 1529 new ScheduleDAGRRList(*IS->MF, true, PQ); 1530 PQ->setScheduleDAG(SD); 1531 return SD; 1532} 1533 1534llvm::ScheduleDAGSDNodes * 1535llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1536 const TargetMachine &TM = IS->TM; 1537 const TargetInstrInfo *TII = TM.getInstrInfo(); 1538 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1539 1540 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1541 1542 ScheduleDAGRRList *SD = 1543 new ScheduleDAGRRList(*IS->MF, false, PQ); 1544 PQ->setScheduleDAG(SD); 1545 return SD; 1546} 1547