ScheduleDAGRRList.cpp revision 47ac0f0c7c39289f5970688154e385be22b7f293
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include <climits> 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 36STATISTIC(NumUnfolds, "Number of nodes unfolded"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumPRCopies, "Number of physical register copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 "Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 "Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 SchedulingPriorityQueue *AvailableQueue; 62 63 /// LiveRegDefs - A set of physical registers and their definition 64 /// that are "live". These nodes must be scheduled before any other nodes that 65 /// modifies the registers can be scheduled. 66 unsigned NumLiveRegs; 67 std::vector<SUnit*> LiveRegDefs; 68 std::vector<unsigned> LiveRegCycles; 69 70 /// Topo - A topological ordering for SUnits which permits fast IsReachable 71 /// and similar queries. 72 ScheduleDAGTopologicalSort Topo; 73 74public: 75 ScheduleDAGRRList(MachineFunction &mf, 76 bool isbottomup, 77 SchedulingPriorityQueue *availqueue) 78 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 79 AvailableQueue(availqueue), Topo(SUnits) { 80 } 81 82 ~ScheduleDAGRRList() { 83 delete AvailableQueue; 84 } 85 86 void Schedule(); 87 88 /// IsReachable - Checks if SU is reachable from TargetSU. 89 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 90 return Topo.IsReachable(SU, TargetSU); 91 } 92 93 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 94 /// create a cycle. 95 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 96 return Topo.WillCreateCycle(SU, TargetSU); 97 } 98 99 /// AddPred - adds a predecessor edge to SUnit SU. 100 /// This returns true if this is a new predecessor. 101 /// Updates the topological ordering if required. 102 void AddPred(SUnit *SU, const SDep &D) { 103 Topo.AddPred(SU, D.getSUnit()); 104 SU->addPred(D); 105 } 106 107 /// RemovePred - removes a predecessor edge from SUnit SU. 108 /// This returns true if an edge was removed. 109 /// Updates the topological ordering if required. 110 void RemovePred(SUnit *SU, const SDep &D) { 111 Topo.RemovePred(SU, D.getSUnit()); 112 SU->removePred(D); 113 } 114 115private: 116 void ReleasePred(SUnit *SU, const SDep *PredEdge); 117 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 118 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 119 void ReleaseSuccessors(SUnit *SU); 120 void CapturePred(SDep *PredEdge); 121 void ScheduleNodeBottomUp(SUnit*, unsigned); 122 void ScheduleNodeTopDown(SUnit*, unsigned); 123 void UnscheduleNodeBottomUp(SUnit*); 124 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 125 SUnit *CopyAndMoveSuccessors(SUnit*); 126 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 127 const TargetRegisterClass*, 128 const TargetRegisterClass*, 129 SmallVector<SUnit*, 2>&); 130 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 131 void ListScheduleTopDown(); 132 void ListScheduleBottomUp(); 133 134 135 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 136 /// Updates the topological ordering if required. 137 SUnit *CreateNewSUnit(SDNode *N) { 138 unsigned NumSUnits = SUnits.size(); 139 SUnit *NewNode = NewSUnit(N); 140 // Update the topological ordering. 141 if (NewNode->NodeNum >= NumSUnits) 142 Topo.InitDAGTopologicalSorting(); 143 return NewNode; 144 } 145 146 /// CreateClone - Creates a new SUnit from an existing one. 147 /// Updates the topological ordering if required. 148 SUnit *CreateClone(SUnit *N) { 149 unsigned NumSUnits = SUnits.size(); 150 SUnit *NewNode = Clone(N); 151 // Update the topological ordering. 152 if (NewNode->NodeNum >= NumSUnits) 153 Topo.InitDAGTopologicalSorting(); 154 return NewNode; 155 } 156 157 /// ForceUnitLatencies - Return true, since register-pressure-reducing 158 /// scheduling doesn't need actual latency information. 159 bool ForceUnitLatencies() const { return true; } 160}; 161} // end anonymous namespace 162 163 164/// Schedule - Schedule the DAG using list scheduling. 165void ScheduleDAGRRList::Schedule() { 166 DOUT << "********** List Scheduling **********\n"; 167 168 NumLiveRegs = 0; 169 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 170 LiveRegCycles.resize(TRI->getNumRegs(), 0); 171 172 // Build the scheduling graph. 173 BuildSchedGraph(); 174 175 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 176 SUnits[su].dumpAll(this)); 177 Topo.InitDAGTopologicalSorting(); 178 179 AvailableQueue->initNodes(SUnits); 180 181 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 182 if (isBottomUp) 183 ListScheduleBottomUp(); 184 else 185 ListScheduleTopDown(); 186 187 AvailableQueue->releaseState(); 188} 189 190//===----------------------------------------------------------------------===// 191// Bottom-Up Scheduling 192//===----------------------------------------------------------------------===// 193 194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 195/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 196void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 197 SUnit *PredSU = PredEdge->getSUnit(); 198 --PredSU->NumSuccsLeft; 199 200#ifndef NDEBUG 201 if (PredSU->NumSuccsLeft < 0) { 202 cerr << "*** Scheduling failed! ***\n"; 203 PredSU->dump(this); 204 cerr << " has been released too many times!\n"; 205 assert(0); 206 } 207#endif 208 209 // If all the node's successors are scheduled, this node is ready 210 // to be scheduled. Ignore the special EntrySU node. 211 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 212 PredSU->isAvailable = true; 213 AvailableQueue->push(PredSU); 214 } 215} 216 217void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 218 // Bottom up: release predecessors 219 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 220 I != E; ++I) { 221 ReleasePred(SU, &*I); 222 if (I->isAssignedRegDep()) { 223 // This is a physical register dependency and it's impossible or 224 // expensive to copy the register. Make sure nothing that can 225 // clobber the register is scheduled between the predecessor and 226 // this node. 227 if (!LiveRegDefs[I->getReg()]) { 228 ++NumLiveRegs; 229 LiveRegDefs[I->getReg()] = I->getSUnit(); 230 LiveRegCycles[I->getReg()] = CurCycle; 231 } 232 } 233 } 234} 235 236/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 237/// count of its predecessors. If a predecessor pending count is zero, add it to 238/// the Available queue. 239void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 240 DOUT << "*** Scheduling [" << CurCycle << "]: "; 241 DEBUG(SU->dump(this)); 242 243 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 244 SU->setHeightToAtLeast(CurCycle); 245 Sequence.push_back(SU); 246 247 ReleasePredecessors(SU, CurCycle); 248 249 // Release all the implicit physical register defs that are live. 250 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 251 I != E; ++I) { 252 if (I->isAssignedRegDep()) { 253 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 254 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 255 assert(LiveRegDefs[I->getReg()] == SU && 256 "Physical register dependency violated?"); 257 --NumLiveRegs; 258 LiveRegDefs[I->getReg()] = NULL; 259 LiveRegCycles[I->getReg()] = 0; 260 } 261 } 262 } 263 264 SU->isScheduled = true; 265 AvailableQueue->ScheduledNode(SU); 266} 267 268/// CapturePred - This does the opposite of ReleasePred. Since SU is being 269/// unscheduled, incrcease the succ left count of its predecessors. Remove 270/// them from AvailableQueue if necessary. 271void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 272 SUnit *PredSU = PredEdge->getSUnit(); 273 if (PredSU->isAvailable) { 274 PredSU->isAvailable = false; 275 if (!PredSU->isPending) 276 AvailableQueue->remove(PredSU); 277 } 278 279 ++PredSU->NumSuccsLeft; 280} 281 282/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 283/// its predecessor states to reflect the change. 284void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 285 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; 286 DEBUG(SU->dump(this)); 287 288 AvailableQueue->UnscheduledNode(SU); 289 290 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 291 I != E; ++I) { 292 CapturePred(&*I); 293 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 295 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 296 "Physical register dependency violated?"); 297 --NumLiveRegs; 298 LiveRegDefs[I->getReg()] = NULL; 299 LiveRegCycles[I->getReg()] = 0; 300 } 301 } 302 303 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 304 I != E; ++I) { 305 if (I->isAssignedRegDep()) { 306 if (!LiveRegDefs[I->getReg()]) { 307 LiveRegDefs[I->getReg()] = SU; 308 ++NumLiveRegs; 309 } 310 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 311 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 312 } 313 } 314 315 SU->setHeightDirty(); 316 SU->isScheduled = false; 317 SU->isAvailable = true; 318 AvailableQueue->push(SU); 319} 320 321/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 322/// BTCycle in order to schedule a specific node. 323void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 324 unsigned &CurCycle) { 325 SUnit *OldSU = NULL; 326 while (CurCycle > BtCycle) { 327 OldSU = Sequence.back(); 328 Sequence.pop_back(); 329 if (SU->isSucc(OldSU)) 330 // Don't try to remove SU from AvailableQueue. 331 SU->isAvailable = false; 332 UnscheduleNodeBottomUp(OldSU); 333 --CurCycle; 334 } 335 336 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 337 338 ++NumBacktracks; 339} 340 341/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 342/// successors to the newly created node. 343SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 344 if (SU->getNode()->getFlaggedNode()) 345 return NULL; 346 347 SDNode *N = SU->getNode(); 348 if (!N) 349 return NULL; 350 351 SUnit *NewSU; 352 bool TryUnfold = false; 353 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 354 MVT VT = N->getValueType(i); 355 if (VT == MVT::Flag) 356 return NULL; 357 else if (VT == MVT::Other) 358 TryUnfold = true; 359 } 360 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 361 const SDValue &Op = N->getOperand(i); 362 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 363 if (VT == MVT::Flag) 364 return NULL; 365 } 366 367 if (TryUnfold) { 368 SmallVector<SDNode*, 2> NewNodes; 369 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 370 return NULL; 371 372 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 373 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 374 375 N = NewNodes[1]; 376 SDNode *LoadNode = NewNodes[0]; 377 unsigned NumVals = N->getNumValues(); 378 unsigned OldNumVals = SU->getNode()->getNumValues(); 379 for (unsigned i = 0; i != NumVals; ++i) 380 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 381 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 382 SDValue(LoadNode, 1)); 383 384 // LoadNode may already exist. This can happen when there is another 385 // load from the same location and producing the same type of value 386 // but it has different alignment or volatileness. 387 bool isNewLoad = true; 388 SUnit *LoadSU; 389 if (LoadNode->getNodeId() != -1) { 390 LoadSU = &SUnits[LoadNode->getNodeId()]; 391 isNewLoad = false; 392 } else { 393 LoadSU = CreateNewSUnit(LoadNode); 394 LoadNode->setNodeId(LoadSU->NodeNum); 395 ComputeLatency(LoadSU); 396 } 397 398 SUnit *NewSU = CreateNewSUnit(N); 399 assert(N->getNodeId() == -1 && "Node already inserted!"); 400 N->setNodeId(NewSU->NodeNum); 401 402 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 403 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 404 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 405 NewSU->isTwoAddress = true; 406 break; 407 } 408 } 409 if (TID.isCommutable()) 410 NewSU->isCommutable = true; 411 ComputeLatency(NewSU); 412 413 SDep ChainPred; 414 SmallVector<SDep, 4> ChainSuccs; 415 SmallVector<SDep, 4> LoadPreds; 416 SmallVector<SDep, 4> NodePreds; 417 SmallVector<SDep, 4> NodeSuccs; 418 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 419 I != E; ++I) { 420 if (I->isCtrl()) 421 ChainPred = *I; 422 else if (I->getSUnit()->getNode() && 423 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 424 LoadPreds.push_back(*I); 425 else 426 NodePreds.push_back(*I); 427 } 428 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 429 I != E; ++I) { 430 if (I->isCtrl()) 431 ChainSuccs.push_back(*I); 432 else 433 NodeSuccs.push_back(*I); 434 } 435 436 if (ChainPred.getSUnit()) { 437 RemovePred(SU, ChainPred); 438 if (isNewLoad) 439 AddPred(LoadSU, ChainPred); 440 } 441 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 442 const SDep &Pred = LoadPreds[i]; 443 RemovePred(SU, Pred); 444 if (isNewLoad) { 445 AddPred(LoadSU, Pred); 446 } 447 } 448 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 449 const SDep &Pred = NodePreds[i]; 450 RemovePred(SU, Pred); 451 AddPred(NewSU, Pred); 452 } 453 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 454 SDep D = NodeSuccs[i]; 455 SUnit *SuccDep = D.getSUnit(); 456 D.setSUnit(SU); 457 RemovePred(SuccDep, D); 458 D.setSUnit(NewSU); 459 AddPred(SuccDep, D); 460 } 461 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 462 SDep D = ChainSuccs[i]; 463 SUnit *SuccDep = D.getSUnit(); 464 D.setSUnit(SU); 465 RemovePred(SuccDep, D); 466 if (isNewLoad) { 467 D.setSUnit(LoadSU); 468 AddPred(SuccDep, D); 469 } 470 } 471 if (isNewLoad) { 472 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); 473 } 474 475 if (isNewLoad) 476 AvailableQueue->addNode(LoadSU); 477 AvailableQueue->addNode(NewSU); 478 479 ++NumUnfolds; 480 481 if (NewSU->NumSuccsLeft == 0) { 482 NewSU->isAvailable = true; 483 return NewSU; 484 } 485 SU = NewSU; 486 } 487 488 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 489 NewSU = CreateClone(SU); 490 491 // New SUnit has the exact same predecessors. 492 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 493 I != E; ++I) 494 if (!I->isArtificial()) 495 AddPred(NewSU, *I); 496 497 // Only copy scheduled successors. Cut them from old node's successor 498 // list and move them over. 499 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 500 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 501 I != E; ++I) { 502 if (I->isArtificial()) 503 continue; 504 SUnit *SuccSU = I->getSUnit(); 505 if (SuccSU->isScheduled) { 506 SDep D = *I; 507 D.setSUnit(NewSU); 508 AddPred(SuccSU, D); 509 D.setSUnit(SU); 510 DelDeps.push_back(std::make_pair(SuccSU, D)); 511 } 512 } 513 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 514 RemovePred(DelDeps[i].first, DelDeps[i].second); 515 516 AvailableQueue->updateNode(SU); 517 AvailableQueue->addNode(NewSU); 518 519 ++NumDups; 520 return NewSU; 521} 522 523/// InsertCopiesAndMoveSuccs - Insert register copies and move all 524/// scheduled successors of the given SUnit to the last copy. 525void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 526 const TargetRegisterClass *DestRC, 527 const TargetRegisterClass *SrcRC, 528 SmallVector<SUnit*, 2> &Copies) { 529 SUnit *CopyFromSU = CreateNewSUnit(NULL); 530 CopyFromSU->CopySrcRC = SrcRC; 531 CopyFromSU->CopyDstRC = DestRC; 532 533 SUnit *CopyToSU = CreateNewSUnit(NULL); 534 CopyToSU->CopySrcRC = DestRC; 535 CopyToSU->CopyDstRC = SrcRC; 536 537 // Only copy scheduled successors. Cut them from old node's successor 538 // list and move them over. 539 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 540 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 541 I != E; ++I) { 542 if (I->isArtificial()) 543 continue; 544 SUnit *SuccSU = I->getSUnit(); 545 if (SuccSU->isScheduled) { 546 SDep D = *I; 547 D.setSUnit(CopyToSU); 548 AddPred(SuccSU, D); 549 DelDeps.push_back(std::make_pair(SuccSU, *I)); 550 } 551 } 552 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 553 RemovePred(DelDeps[i].first, DelDeps[i].second); 554 555 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 556 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 557 558 AvailableQueue->updateNode(SU); 559 AvailableQueue->addNode(CopyFromSU); 560 AvailableQueue->addNode(CopyToSU); 561 Copies.push_back(CopyFromSU); 562 Copies.push_back(CopyToSU); 563 564 ++NumPRCopies; 565} 566 567/// getPhysicalRegisterVT - Returns the ValueType of the physical register 568/// definition of the specified node. 569/// FIXME: Move to SelectionDAG? 570static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 571 const TargetInstrInfo *TII) { 572 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 573 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 574 unsigned NumRes = TID.getNumDefs(); 575 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 576 if (Reg == *ImpDef) 577 break; 578 ++NumRes; 579 } 580 return N->getValueType(NumRes); 581} 582 583/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 584/// scheduling of the given node to satisfy live physical register dependencies. 585/// If the specific node is the last one that's available to schedule, do 586/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 587bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 588 SmallVector<unsigned, 4> &LRegs){ 589 if (NumLiveRegs == 0) 590 return false; 591 592 SmallSet<unsigned, 4> RegAdded; 593 // If this node would clobber any "live" register, then it's not ready. 594 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 595 I != E; ++I) { 596 if (I->isAssignedRegDep()) { 597 unsigned Reg = I->getReg(); 598 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { 599 if (RegAdded.insert(Reg)) 600 LRegs.push_back(Reg); 601 } 602 for (const unsigned *Alias = TRI->getAliasSet(Reg); 603 *Alias; ++Alias) 604 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { 605 if (RegAdded.insert(*Alias)) 606 LRegs.push_back(*Alias); 607 } 608 } 609 } 610 611 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 612 if (!Node->isMachineOpcode()) 613 continue; 614 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 615 if (!TID.ImplicitDefs) 616 continue; 617 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 618 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { 619 if (RegAdded.insert(*Reg)) 620 LRegs.push_back(*Reg); 621 } 622 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 623 *Alias; ++Alias) 624 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 625 if (RegAdded.insert(*Alias)) 626 LRegs.push_back(*Alias); 627 } 628 } 629 } 630 return !LRegs.empty(); 631} 632 633 634/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 635/// schedulers. 636void ScheduleDAGRRList::ListScheduleBottomUp() { 637 unsigned CurCycle = 0; 638 639 // Release any predecessors of the special Exit node. 640 ReleasePredecessors(&ExitSU, CurCycle); 641 642 // Add root to Available queue. 643 if (!SUnits.empty()) { 644 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 645 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 646 RootSU->isAvailable = true; 647 AvailableQueue->push(RootSU); 648 } 649 650 // While Available queue is not empty, grab the node with the highest 651 // priority. If it is not ready put it back. Schedule the node. 652 SmallVector<SUnit*, 4> NotReady; 653 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 654 Sequence.reserve(SUnits.size()); 655 while (!AvailableQueue->empty()) { 656 bool Delayed = false; 657 LRegsMap.clear(); 658 SUnit *CurSU = AvailableQueue->pop(); 659 while (CurSU) { 660 SmallVector<unsigned, 4> LRegs; 661 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 662 break; 663 Delayed = true; 664 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 665 666 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 667 NotReady.push_back(CurSU); 668 CurSU = AvailableQueue->pop(); 669 } 670 671 // All candidates are delayed due to live physical reg dependencies. 672 // Try backtracking, code duplication, or inserting cross class copies 673 // to resolve it. 674 if (Delayed && !CurSU) { 675 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 676 SUnit *TrySU = NotReady[i]; 677 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 678 679 // Try unscheduling up to the point where it's safe to schedule 680 // this node. 681 unsigned LiveCycle = CurCycle; 682 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 683 unsigned Reg = LRegs[j]; 684 unsigned LCycle = LiveRegCycles[Reg]; 685 LiveCycle = std::min(LiveCycle, LCycle); 686 } 687 SUnit *OldSU = Sequence[LiveCycle]; 688 if (!WillCreateCycle(TrySU, OldSU)) { 689 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 690 // Force the current node to be scheduled before the node that 691 // requires the physical reg dep. 692 if (OldSU->isAvailable) { 693 OldSU->isAvailable = false; 694 AvailableQueue->remove(OldSU); 695 } 696 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 697 /*Reg=*/0, /*isNormalMemory=*/false, 698 /*isMustAlias=*/false, /*isArtificial=*/true)); 699 // If one or more successors has been unscheduled, then the current 700 // node is no longer avaialable. Schedule a successor that's now 701 // available instead. 702 if (!TrySU->isAvailable) 703 CurSU = AvailableQueue->pop(); 704 else { 705 CurSU = TrySU; 706 TrySU->isPending = false; 707 NotReady.erase(NotReady.begin()+i); 708 } 709 break; 710 } 711 } 712 713 if (!CurSU) { 714 // Can't backtrack. If it's too expensive to copy the value, then try 715 // duplicate the nodes that produces these "too expensive to copy" 716 // values to break the dependency. In case even that doesn't work, 717 // insert cross class copies. 718 // If it's not too expensive, i.e. cost != -1, issue copies. 719 SUnit *TrySU = NotReady[0]; 720 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 721 assert(LRegs.size() == 1 && "Can't handle this yet!"); 722 unsigned Reg = LRegs[0]; 723 SUnit *LRDef = LiveRegDefs[Reg]; 724 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 725 const TargetRegisterClass *RC = 726 TRI->getPhysicalRegisterRegClass(Reg, VT); 727 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 728 729 // If cross copy register class is null, then it must be possible copy 730 // the value directly. Do not try duplicate the def. 731 SUnit *NewDef = 0; 732 if (DestRC) 733 NewDef = CopyAndMoveSuccessors(LRDef); 734 else 735 DestRC = RC; 736 if (!NewDef) { 737 // Issue copies, these can be expensive cross register class copies. 738 SmallVector<SUnit*, 2> Copies; 739 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 740 DOUT << "Adding an edge from SU #" << TrySU->NodeNum 741 << " to SU #" << Copies.front()->NodeNum << "\n"; 742 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 743 /*Reg=*/0, /*isNormalMemory=*/false, 744 /*isMustAlias=*/false, 745 /*isArtificial=*/true)); 746 NewDef = Copies.back(); 747 } 748 749 DOUT << "Adding an edge from SU #" << NewDef->NodeNum 750 << " to SU #" << TrySU->NodeNum << "\n"; 751 LiveRegDefs[Reg] = NewDef; 752 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 753 /*Reg=*/0, /*isNormalMemory=*/false, 754 /*isMustAlias=*/false, 755 /*isArtificial=*/true)); 756 TrySU->isAvailable = false; 757 CurSU = NewDef; 758 } 759 760 assert(CurSU && "Unable to resolve live physical register dependencies!"); 761 } 762 763 // Add the nodes that aren't ready back onto the available list. 764 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 765 NotReady[i]->isPending = false; 766 // May no longer be available due to backtracking. 767 if (NotReady[i]->isAvailable) 768 AvailableQueue->push(NotReady[i]); 769 } 770 NotReady.clear(); 771 772 if (CurSU) 773 ScheduleNodeBottomUp(CurSU, CurCycle); 774 ++CurCycle; 775 } 776 777 // Reverse the order if it is bottom up. 778 std::reverse(Sequence.begin(), Sequence.end()); 779 780#ifndef NDEBUG 781 VerifySchedule(isBottomUp); 782#endif 783} 784 785//===----------------------------------------------------------------------===// 786// Top-Down Scheduling 787//===----------------------------------------------------------------------===// 788 789/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 790/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 791void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 792 SUnit *SuccSU = SuccEdge->getSUnit(); 793 --SuccSU->NumPredsLeft; 794 795#ifndef NDEBUG 796 if (SuccSU->NumPredsLeft < 0) { 797 cerr << "*** Scheduling failed! ***\n"; 798 SuccSU->dump(this); 799 cerr << " has been released too many times!\n"; 800 assert(0); 801 } 802#endif 803 804 // If all the node's predecessors are scheduled, this node is ready 805 // to be scheduled. Ignore the special ExitSU node. 806 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 807 SuccSU->isAvailable = true; 808 AvailableQueue->push(SuccSU); 809 } 810} 811 812void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 813 // Top down: release successors 814 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 815 I != E; ++I) { 816 assert(!I->isAssignedRegDep() && 817 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 818 819 ReleaseSucc(SU, &*I); 820 } 821} 822 823/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 824/// count of its successors. If a successor pending count is zero, add it to 825/// the Available queue. 826void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 827 DOUT << "*** Scheduling [" << CurCycle << "]: "; 828 DEBUG(SU->dump(this)); 829 830 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 831 SU->setDepthToAtLeast(CurCycle); 832 Sequence.push_back(SU); 833 834 ReleaseSuccessors(SU); 835 SU->isScheduled = true; 836 AvailableQueue->ScheduledNode(SU); 837} 838 839/// ListScheduleTopDown - The main loop of list scheduling for top-down 840/// schedulers. 841void ScheduleDAGRRList::ListScheduleTopDown() { 842 unsigned CurCycle = 0; 843 844 // Release any successors of the special Entry node. 845 ReleaseSuccessors(&EntrySU); 846 847 // All leaves to Available queue. 848 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 849 // It is available if it has no predecessors. 850 if (SUnits[i].Preds.empty()) { 851 AvailableQueue->push(&SUnits[i]); 852 SUnits[i].isAvailable = true; 853 } 854 } 855 856 // While Available queue is not empty, grab the node with the highest 857 // priority. If it is not ready put it back. Schedule the node. 858 Sequence.reserve(SUnits.size()); 859 while (!AvailableQueue->empty()) { 860 SUnit *CurSU = AvailableQueue->pop(); 861 862 if (CurSU) 863 ScheduleNodeTopDown(CurSU, CurCycle); 864 ++CurCycle; 865 } 866 867#ifndef NDEBUG 868 VerifySchedule(isBottomUp); 869#endif 870} 871 872 873//===----------------------------------------------------------------------===// 874// RegReductionPriorityQueue Implementation 875//===----------------------------------------------------------------------===// 876// 877// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 878// to reduce register pressure. 879// 880namespace { 881 template<class SF> 882 class RegReductionPriorityQueue; 883 884 /// Sorting functions for the Available queue. 885 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 886 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 887 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 888 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 889 890 bool operator()(const SUnit* left, const SUnit* right) const; 891 }; 892 893 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 894 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 895 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 896 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 897 898 bool operator()(const SUnit* left, const SUnit* right) const; 899 }; 900} // end anonymous namespace 901 902static inline bool isCopyFromLiveIn(const SUnit *SU) { 903 SDNode *N = SU->getNode(); 904 return N && N->getOpcode() == ISD::CopyFromReg && 905 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 906} 907 908/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 909/// Smaller number is the higher priority. 910static unsigned 911CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 912 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 913 if (SethiUllmanNumber != 0) 914 return SethiUllmanNumber; 915 916 unsigned Extra = 0; 917 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 918 I != E; ++I) { 919 if (I->isCtrl()) continue; // ignore chain preds 920 SUnit *PredSU = I->getSUnit(); 921 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 922 if (PredSethiUllman > SethiUllmanNumber) { 923 SethiUllmanNumber = PredSethiUllman; 924 Extra = 0; 925 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl()) 926 ++Extra; 927 } 928 929 SethiUllmanNumber += Extra; 930 931 if (SethiUllmanNumber == 0) 932 SethiUllmanNumber = 1; 933 934 return SethiUllmanNumber; 935} 936 937namespace { 938 template<class SF> 939 class VISIBILITY_HIDDEN RegReductionPriorityQueue 940 : public SchedulingPriorityQueue { 941 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 942 unsigned currentQueueId; 943 944 protected: 945 // SUnits - The SUnits for the current graph. 946 std::vector<SUnit> *SUnits; 947 948 const TargetInstrInfo *TII; 949 const TargetRegisterInfo *TRI; 950 ScheduleDAGRRList *scheduleDAG; 951 952 // SethiUllmanNumbers - The SethiUllman number for each node. 953 std::vector<unsigned> SethiUllmanNumbers; 954 955 public: 956 RegReductionPriorityQueue(const TargetInstrInfo *tii, 957 const TargetRegisterInfo *tri) : 958 Queue(SF(this)), currentQueueId(0), 959 TII(tii), TRI(tri), scheduleDAG(NULL) {} 960 961 void initNodes(std::vector<SUnit> &sunits) { 962 SUnits = &sunits; 963 // Add pseudo dependency edges for two-address nodes. 964 AddPseudoTwoAddrDeps(); 965 // Calculate node priorities. 966 CalculateSethiUllmanNumbers(); 967 } 968 969 void addNode(const SUnit *SU) { 970 unsigned SUSize = SethiUllmanNumbers.size(); 971 if (SUnits->size() > SUSize) 972 SethiUllmanNumbers.resize(SUSize*2, 0); 973 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 974 } 975 976 void updateNode(const SUnit *SU) { 977 SethiUllmanNumbers[SU->NodeNum] = 0; 978 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 979 } 980 981 void releaseState() { 982 SUnits = 0; 983 SethiUllmanNumbers.clear(); 984 } 985 986 unsigned getNodePriority(const SUnit *SU) const { 987 assert(SU->NodeNum < SethiUllmanNumbers.size()); 988 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 989 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 990 // CopyFromReg should be close to its def because it restricts 991 // allocation choices. But if it is a livein then perhaps we want it 992 // closer to its uses so it can be coalesced. 993 return 0xffff; 994 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 995 // CopyToReg should be close to its uses to facilitate coalescing and 996 // avoid spilling. 997 return 0; 998 if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 999 Opc == TargetInstrInfo::INSERT_SUBREG) 1000 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1001 // facilitate coalescing. 1002 return 0; 1003 if (SU->NumSuccs == 0) 1004 // If SU does not have a use, i.e. it doesn't produce a value that would 1005 // be consumed (e.g. store), then it terminates a chain of computation. 1006 // Give it a large SethiUllman number so it will be scheduled right 1007 // before its predecessors that it doesn't lengthen their live ranges. 1008 return 0xffff; 1009 if (SU->NumPreds == 0) 1010 // If SU does not have a def, schedule it close to its uses because it 1011 // does not lengthen any live ranges. 1012 return 0; 1013 return SethiUllmanNumbers[SU->NodeNum]; 1014 } 1015 1016 unsigned size() const { return Queue.size(); } 1017 1018 bool empty() const { return Queue.empty(); } 1019 1020 void push(SUnit *U) { 1021 assert(!U->NodeQueueId && "Node in the queue already"); 1022 U->NodeQueueId = ++currentQueueId; 1023 Queue.push(U); 1024 } 1025 1026 void push_all(const std::vector<SUnit *> &Nodes) { 1027 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1028 push(Nodes[i]); 1029 } 1030 1031 SUnit *pop() { 1032 if (empty()) return NULL; 1033 SUnit *V = Queue.top(); 1034 Queue.pop(); 1035 V->NodeQueueId = 0; 1036 return V; 1037 } 1038 1039 void remove(SUnit *SU) { 1040 assert(!Queue.empty() && "Queue is empty!"); 1041 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1042 Queue.erase_one(SU); 1043 SU->NodeQueueId = 0; 1044 } 1045 1046 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1047 scheduleDAG = scheduleDag; 1048 } 1049 1050 protected: 1051 bool canClobber(const SUnit *SU, const SUnit *Op); 1052 void AddPseudoTwoAddrDeps(); 1053 void CalculateSethiUllmanNumbers(); 1054 }; 1055 1056 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1057 BURegReductionPriorityQueue; 1058 1059 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1060 TDRegReductionPriorityQueue; 1061} 1062 1063/// closestSucc - Returns the scheduled cycle of the successor which is 1064/// closet to the current cycle. 1065static unsigned closestSucc(const SUnit *SU) { 1066 unsigned MaxHeight = 0; 1067 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1068 I != E; ++I) { 1069 if (I->isCtrl()) continue; // ignore chain succs 1070 unsigned Height = I->getSUnit()->getHeight(); 1071 // If there are bunch of CopyToRegs stacked up, they should be considered 1072 // to be at the same position. 1073 if (I->getSUnit()->getNode() && 1074 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1075 Height = closestSucc(I->getSUnit())+1; 1076 if (Height > MaxHeight) 1077 MaxHeight = Height; 1078 } 1079 return MaxHeight; 1080} 1081 1082/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1083/// for scratch registers. Live-in operands and live-out results don't count 1084/// since they are "fixed". 1085static unsigned calcMaxScratches(const SUnit *SU) { 1086 unsigned Scratches = 0; 1087 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1088 I != E; ++I) { 1089 if (I->isCtrl()) continue; // ignore chain preds 1090 if (!I->getSUnit()->getNode() || 1091 I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg) 1092 Scratches++; 1093 } 1094 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1095 I != E; ++I) { 1096 if (I->isCtrl()) continue; // ignore chain succs 1097 if (!I->getSUnit()->getNode() || 1098 I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg) 1099 Scratches += 10; 1100 } 1101 return Scratches; 1102} 1103 1104// Bottom up 1105bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1106 unsigned LPriority = SPQ->getNodePriority(left); 1107 unsigned RPriority = SPQ->getNodePriority(right); 1108 if (LPriority != RPriority) 1109 return LPriority > RPriority; 1110 1111 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1112 // e.g. 1113 // t1 = op t2, c1 1114 // t3 = op t4, c2 1115 // 1116 // and the following instructions are both ready. 1117 // t2 = op c3 1118 // t4 = op c4 1119 // 1120 // Then schedule t2 = op first. 1121 // i.e. 1122 // t4 = op c4 1123 // t2 = op c3 1124 // t1 = op t2, c1 1125 // t3 = op t4, c2 1126 // 1127 // This creates more short live intervals. 1128 unsigned LDist = closestSucc(left); 1129 unsigned RDist = closestSucc(right); 1130 if (LDist != RDist) 1131 return LDist < RDist; 1132 1133 // Intuitively, it's good to push down instructions whose results are 1134 // liveout so their long live ranges won't conflict with other values 1135 // which are needed inside the BB. Further prioritize liveout instructions 1136 // by the number of operands which are calculated within the BB. 1137 unsigned LScratch = calcMaxScratches(left); 1138 unsigned RScratch = calcMaxScratches(right); 1139 if (LScratch != RScratch) 1140 return LScratch > RScratch; 1141 1142 if (left->getHeight() != right->getHeight()) 1143 return left->getHeight() > right->getHeight(); 1144 1145 if (left->getDepth() != right->getDepth()) 1146 return left->getDepth() < right->getDepth(); 1147 1148 assert(left->NodeQueueId && right->NodeQueueId && 1149 "NodeQueueId cannot be zero"); 1150 return (left->NodeQueueId > right->NodeQueueId); 1151} 1152 1153template<class SF> 1154bool 1155RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1156 if (SU->isTwoAddress) { 1157 unsigned Opc = SU->getNode()->getMachineOpcode(); 1158 const TargetInstrDesc &TID = TII->get(Opc); 1159 unsigned NumRes = TID.getNumDefs(); 1160 unsigned NumOps = TID.getNumOperands() - NumRes; 1161 for (unsigned i = 0; i != NumOps; ++i) { 1162 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1163 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1164 if (DU->getNodeId() != -1 && 1165 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1166 return true; 1167 } 1168 } 1169 } 1170 return false; 1171} 1172 1173 1174/// hasCopyToRegUse - Return true if SU has a value successor that is a 1175/// CopyToReg node. 1176static bool hasCopyToRegUse(const SUnit *SU) { 1177 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1178 I != E; ++I) { 1179 if (I->isCtrl()) continue; 1180 const SUnit *SuccSU = I->getSUnit(); 1181 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1182 return true; 1183 } 1184 return false; 1185} 1186 1187/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1188/// physical register defs. 1189static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1190 const TargetInstrInfo *TII, 1191 const TargetRegisterInfo *TRI) { 1192 SDNode *N = SuccSU->getNode(); 1193 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1194 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1195 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1196 const unsigned *SUImpDefs = 1197 TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 1198 if (!SUImpDefs) 1199 return false; 1200 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1201 MVT VT = N->getValueType(i); 1202 if (VT == MVT::Flag || VT == MVT::Other) 1203 continue; 1204 if (!N->hasAnyUseOfValue(i)) 1205 continue; 1206 unsigned Reg = ImpDefs[i - NumDefs]; 1207 for (;*SUImpDefs; ++SUImpDefs) { 1208 unsigned SUReg = *SUImpDefs; 1209 if (TRI->regsOverlap(Reg, SUReg)) 1210 return true; 1211 } 1212 } 1213 return false; 1214} 1215 1216/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1217/// it as a def&use operand. Add a pseudo control edge from it to the other 1218/// node (if it won't create a cycle) so the two-address one will be scheduled 1219/// first (lower in the schedule). If both nodes are two-address, favor the 1220/// one that has a CopyToReg use (more likely to be a loop induction update). 1221/// If both are two-address, but one is commutable while the other is not 1222/// commutable, favor the one that's not commutable. 1223template<class SF> 1224void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1225 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1226 SUnit *SU = &(*SUnits)[i]; 1227 if (!SU->isTwoAddress) 1228 continue; 1229 1230 SDNode *Node = SU->getNode(); 1231 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1232 continue; 1233 1234 unsigned Opc = Node->getMachineOpcode(); 1235 const TargetInstrDesc &TID = TII->get(Opc); 1236 unsigned NumRes = TID.getNumDefs(); 1237 unsigned NumOps = TID.getNumOperands() - NumRes; 1238 for (unsigned j = 0; j != NumOps; ++j) { 1239 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1240 continue; 1241 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1242 if (DU->getNodeId() == -1) 1243 continue; 1244 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1245 if (!DUSU) continue; 1246 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1247 E = DUSU->Succs.end(); I != E; ++I) { 1248 if (I->isCtrl()) continue; 1249 SUnit *SuccSU = I->getSUnit(); 1250 if (SuccSU == SU) 1251 continue; 1252 // Be conservative. Ignore if nodes aren't at roughly the same 1253 // depth and height. 1254 if (SuccSU->getHeight() < SU->getHeight() && 1255 (SU->getHeight() - SuccSU->getHeight()) > 1) 1256 continue; 1257 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1258 continue; 1259 // Don't constrain nodes with physical register defs if the 1260 // predecessor can clobber them. 1261 if (SuccSU->hasPhysRegDefs) { 1262 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1263 continue; 1264 } 1265 // Don't constraint extract_subreg / insert_subreg these may be 1266 // coalesced away. We don't them close to their uses. 1267 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1268 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1269 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1270 continue; 1271 if ((!canClobber(SuccSU, DUSU) || 1272 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1273 (!SU->isCommutable && SuccSU->isCommutable)) && 1274 !scheduleDAG->IsReachable(SuccSU, SU)) { 1275 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1276 << " to SU #" << SuccSU->NodeNum << "\n"; 1277 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1278 /*Reg=*/0, /*isNormalMemory=*/false, 1279 /*isMustAlias=*/false, 1280 /*isArtificial=*/true)); 1281 } 1282 } 1283 } 1284 } 1285} 1286 1287/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1288/// scheduling units. 1289template<class SF> 1290void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1291 SethiUllmanNumbers.assign(SUnits->size(), 0); 1292 1293 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1294 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1295} 1296 1297/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1298/// predecessors of the successors of the SUnit SU. Stop when the provided 1299/// limit is exceeded. 1300static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1301 unsigned Limit) { 1302 unsigned Sum = 0; 1303 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1304 I != E; ++I) { 1305 const SUnit *SuccSU = I->getSUnit(); 1306 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1307 EE = SuccSU->Preds.end(); II != EE; ++II) { 1308 SUnit *PredSU = II->getSUnit(); 1309 if (!PredSU->isScheduled) 1310 if (++Sum > Limit) 1311 return Sum; 1312 } 1313 } 1314 return Sum; 1315} 1316 1317 1318// Top down 1319bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1320 unsigned LPriority = SPQ->getNodePriority(left); 1321 unsigned RPriority = SPQ->getNodePriority(right); 1322 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1323 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1324 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1325 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1326 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1327 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1328 1329 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1330 return false; 1331 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1332 return true; 1333 1334 if (LIsFloater) 1335 LBonus -= 2; 1336 if (RIsFloater) 1337 RBonus -= 2; 1338 if (left->NumSuccs == 1) 1339 LBonus += 2; 1340 if (right->NumSuccs == 1) 1341 RBonus += 2; 1342 1343 if (LPriority+LBonus != RPriority+RBonus) 1344 return LPriority+LBonus < RPriority+RBonus; 1345 1346 if (left->getDepth() != right->getDepth()) 1347 return left->getDepth() < right->getDepth(); 1348 1349 if (left->NumSuccsLeft != right->NumSuccsLeft) 1350 return left->NumSuccsLeft > right->NumSuccsLeft; 1351 1352 assert(left->NodeQueueId && right->NodeQueueId && 1353 "NodeQueueId cannot be zero"); 1354 return (left->NodeQueueId > right->NodeQueueId); 1355} 1356 1357//===----------------------------------------------------------------------===// 1358// Public Constructor Functions 1359//===----------------------------------------------------------------------===// 1360 1361llvm::ScheduleDAGSDNodes * 1362llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) { 1363 const TargetMachine &TM = IS->TM; 1364 const TargetInstrInfo *TII = TM.getInstrInfo(); 1365 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1366 1367 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1368 1369 ScheduleDAGRRList *SD = 1370 new ScheduleDAGRRList(*IS->MF, true, PQ); 1371 PQ->setScheduleDAG(SD); 1372 return SD; 1373} 1374 1375llvm::ScheduleDAGSDNodes * 1376llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, bool) { 1377 const TargetMachine &TM = IS->TM; 1378 const TargetInstrInfo *TII = TM.getInstrInfo(); 1379 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1380 1381 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1382 1383 ScheduleDAGRRList *SD = 1384 new ScheduleDAGRRList(*IS->MF, false, PQ); 1385 PQ->setScheduleDAG(SD); 1386 return SD; 1387} 1388