ScheduleDAGRRList.cpp revision ac9de036dc0e4065ee2c03419f11515a34ce505e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/Support/ErrorHandling.h" 29#include "llvm/ADT/PriorityQueue.h" 30#include "llvm/ADT/SmallSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/ADT/STLExtras.h" 33#include <climits> 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumPRCopies, "Number of physical register copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 "Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 "Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49 50namespace { 51//===----------------------------------------------------------------------===// 52/// ScheduleDAGRRList - The actual register reduction list scheduler 53/// implementation. This supports both top-down and bottom-up scheduling. 54/// 55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 56private: 57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 58 /// it is top-down. 59 bool isBottomUp; 60 61 /// AvailableQueue - The priority queue to use for the available SUnits. 62 SchedulingPriorityQueue *AvailableQueue; 63 64 /// LiveRegDefs - A set of physical registers and their definition 65 /// that are "live". These nodes must be scheduled before any other nodes that 66 /// modifies the registers can be scheduled. 67 unsigned NumLiveRegs; 68 std::vector<SUnit*> LiveRegDefs; 69 std::vector<unsigned> LiveRegCycles; 70 71 /// Topo - A topological ordering for SUnits which permits fast IsReachable 72 /// and similar queries. 73 ScheduleDAGTopologicalSort Topo; 74 75public: 76 ScheduleDAGRRList(MachineFunction &mf, 77 bool isbottomup, 78 SchedulingPriorityQueue *availqueue) 79 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 80 AvailableQueue(availqueue), Topo(SUnits) { 81 } 82 83 ~ScheduleDAGRRList() { 84 delete AvailableQueue; 85 } 86 87 void Schedule(); 88 89 /// IsReachable - Checks if SU is reachable from TargetSU. 90 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 91 return Topo.IsReachable(SU, TargetSU); 92 } 93 94 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 95 /// create a cycle. 96 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 97 return Topo.WillCreateCycle(SU, TargetSU); 98 } 99 100 /// AddPred - adds a predecessor edge to SUnit SU. 101 /// This returns true if this is a new predecessor. 102 /// Updates the topological ordering if required. 103 void AddPred(SUnit *SU, const SDep &D) { 104 Topo.AddPred(SU, D.getSUnit()); 105 SU->addPred(D); 106 } 107 108 /// RemovePred - removes a predecessor edge from SUnit SU. 109 /// This returns true if an edge was removed. 110 /// Updates the topological ordering if required. 111 void RemovePred(SUnit *SU, const SDep &D) { 112 Topo.RemovePred(SU, D.getSUnit()); 113 SU->removePred(D); 114 } 115 116private: 117 void ReleasePred(SUnit *SU, const SDep *PredEdge); 118 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 119 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 120 void ReleaseSuccessors(SUnit *SU); 121 void CapturePred(SDep *PredEdge); 122 void ScheduleNodeBottomUp(SUnit*, unsigned); 123 void ScheduleNodeTopDown(SUnit*, unsigned); 124 void UnscheduleNodeBottomUp(SUnit*); 125 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 126 SUnit *CopyAndMoveSuccessors(SUnit*); 127 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 128 const TargetRegisterClass*, 129 const TargetRegisterClass*, 130 SmallVector<SUnit*, 2>&); 131 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 132 void ListScheduleTopDown(); 133 void ListScheduleBottomUp(); 134 135 136 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 137 /// Updates the topological ordering if required. 138 SUnit *CreateNewSUnit(SDNode *N) { 139 unsigned NumSUnits = SUnits.size(); 140 SUnit *NewNode = NewSUnit(N); 141 // Update the topological ordering. 142 if (NewNode->NodeNum >= NumSUnits) 143 Topo.InitDAGTopologicalSorting(); 144 return NewNode; 145 } 146 147 /// CreateClone - Creates a new SUnit from an existing one. 148 /// Updates the topological ordering if required. 149 SUnit *CreateClone(SUnit *N) { 150 unsigned NumSUnits = SUnits.size(); 151 SUnit *NewNode = Clone(N); 152 // Update the topological ordering. 153 if (NewNode->NodeNum >= NumSUnits) 154 Topo.InitDAGTopologicalSorting(); 155 return NewNode; 156 } 157 158 /// ForceUnitLatencies - Return true, since register-pressure-reducing 159 /// scheduling doesn't need actual latency information. 160 bool ForceUnitLatencies() const { return true; } 161}; 162} // end anonymous namespace 163 164 165/// Schedule - Schedule the DAG using list scheduling. 166void ScheduleDAGRRList::Schedule() { 167 DOUT << "********** List Scheduling **********\n"; 168 169 NumLiveRegs = 0; 170 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 171 LiveRegCycles.resize(TRI->getNumRegs(), 0); 172 173 // Build the scheduling graph. 174 BuildSchedGraph(); 175 176 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 177 SUnits[su].dumpAll(this)); 178 Topo.InitDAGTopologicalSorting(); 179 180 AvailableQueue->initNodes(SUnits); 181 182 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 183 if (isBottomUp) 184 ListScheduleBottomUp(); 185 else 186 ListScheduleTopDown(); 187 188 AvailableQueue->releaseState(); 189} 190 191//===----------------------------------------------------------------------===// 192// Bottom-Up Scheduling 193//===----------------------------------------------------------------------===// 194 195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 196/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 197void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 198 SUnit *PredSU = PredEdge->getSUnit(); 199 --PredSU->NumSuccsLeft; 200 201#ifndef NDEBUG 202 if (PredSU->NumSuccsLeft < 0) { 203 cerr << "*** Scheduling failed! ***\n"; 204 PredSU->dump(this); 205 cerr << " has been released too many times!\n"; 206 llvm_unreachable(0); 207 } 208#endif 209 210 // If all the node's successors are scheduled, this node is ready 211 // to be scheduled. Ignore the special EntrySU node. 212 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 213 PredSU->isAvailable = true; 214 AvailableQueue->push(PredSU); 215 } 216} 217 218void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 219 // Bottom up: release predecessors 220 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 221 I != E; ++I) { 222 ReleasePred(SU, &*I); 223 if (I->isAssignedRegDep()) { 224 // This is a physical register dependency and it's impossible or 225 // expensive to copy the register. Make sure nothing that can 226 // clobber the register is scheduled between the predecessor and 227 // this node. 228 if (!LiveRegDefs[I->getReg()]) { 229 ++NumLiveRegs; 230 LiveRegDefs[I->getReg()] = I->getSUnit(); 231 LiveRegCycles[I->getReg()] = CurCycle; 232 } 233 } 234 } 235} 236 237/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 238/// count of its predecessors. If a predecessor pending count is zero, add it to 239/// the Available queue. 240void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 241 DOUT << "*** Scheduling [" << CurCycle << "]: "; 242 DEBUG(SU->dump(this)); 243 244 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 245 SU->setHeightToAtLeast(CurCycle); 246 Sequence.push_back(SU); 247 248 ReleasePredecessors(SU, CurCycle); 249 250 // Release all the implicit physical register defs that are live. 251 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 252 I != E; ++I) { 253 if (I->isAssignedRegDep()) { 254 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 255 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 256 assert(LiveRegDefs[I->getReg()] == SU && 257 "Physical register dependency violated?"); 258 --NumLiveRegs; 259 LiveRegDefs[I->getReg()] = NULL; 260 LiveRegCycles[I->getReg()] = 0; 261 } 262 } 263 } 264 265 SU->isScheduled = true; 266 AvailableQueue->ScheduledNode(SU); 267} 268 269/// CapturePred - This does the opposite of ReleasePred. Since SU is being 270/// unscheduled, incrcease the succ left count of its predecessors. Remove 271/// them from AvailableQueue if necessary. 272void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 273 SUnit *PredSU = PredEdge->getSUnit(); 274 if (PredSU->isAvailable) { 275 PredSU->isAvailable = false; 276 if (!PredSU->isPending) 277 AvailableQueue->remove(PredSU); 278 } 279 280 ++PredSU->NumSuccsLeft; 281} 282 283/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 284/// its predecessor states to reflect the change. 285void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 286 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; 287 DEBUG(SU->dump(this)); 288 289 AvailableQueue->UnscheduledNode(SU); 290 291 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 292 I != E; ++I) { 293 CapturePred(&*I); 294 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 295 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 296 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 297 "Physical register dependency violated?"); 298 --NumLiveRegs; 299 LiveRegDefs[I->getReg()] = NULL; 300 LiveRegCycles[I->getReg()] = 0; 301 } 302 } 303 304 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 305 I != E; ++I) { 306 if (I->isAssignedRegDep()) { 307 if (!LiveRegDefs[I->getReg()]) { 308 LiveRegDefs[I->getReg()] = SU; 309 ++NumLiveRegs; 310 } 311 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 312 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 313 } 314 } 315 316 SU->setHeightDirty(); 317 SU->isScheduled = false; 318 SU->isAvailable = true; 319 AvailableQueue->push(SU); 320} 321 322/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 323/// BTCycle in order to schedule a specific node. 324void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 325 unsigned &CurCycle) { 326 SUnit *OldSU = NULL; 327 while (CurCycle > BtCycle) { 328 OldSU = Sequence.back(); 329 Sequence.pop_back(); 330 if (SU->isSucc(OldSU)) 331 // Don't try to remove SU from AvailableQueue. 332 SU->isAvailable = false; 333 UnscheduleNodeBottomUp(OldSU); 334 --CurCycle; 335 } 336 337 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 338 339 ++NumBacktracks; 340} 341 342/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 343/// successors to the newly created node. 344SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 345 if (SU->getNode()->getFlaggedNode()) 346 return NULL; 347 348 SDNode *N = SU->getNode(); 349 if (!N) 350 return NULL; 351 352 SUnit *NewSU; 353 bool TryUnfold = false; 354 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 355 EVT VT = N->getValueType(i); 356 if (VT == EVT::Flag) 357 return NULL; 358 else if (VT == EVT::Other) 359 TryUnfold = true; 360 } 361 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 362 const SDValue &Op = N->getOperand(i); 363 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 364 if (VT == EVT::Flag) 365 return NULL; 366 } 367 368 if (TryUnfold) { 369 SmallVector<SDNode*, 2> NewNodes; 370 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 371 return NULL; 372 373 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 374 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 375 376 N = NewNodes[1]; 377 SDNode *LoadNode = NewNodes[0]; 378 unsigned NumVals = N->getNumValues(); 379 unsigned OldNumVals = SU->getNode()->getNumValues(); 380 for (unsigned i = 0; i != NumVals; ++i) 381 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 382 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 383 SDValue(LoadNode, 1)); 384 385 // LoadNode may already exist. This can happen when there is another 386 // load from the same location and producing the same type of value 387 // but it has different alignment or volatileness. 388 bool isNewLoad = true; 389 SUnit *LoadSU; 390 if (LoadNode->getNodeId() != -1) { 391 LoadSU = &SUnits[LoadNode->getNodeId()]; 392 isNewLoad = false; 393 } else { 394 LoadSU = CreateNewSUnit(LoadNode); 395 LoadNode->setNodeId(LoadSU->NodeNum); 396 ComputeLatency(LoadSU); 397 } 398 399 SUnit *NewSU = CreateNewSUnit(N); 400 assert(N->getNodeId() == -1 && "Node already inserted!"); 401 N->setNodeId(NewSU->NodeNum); 402 403 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 404 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 405 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 406 NewSU->isTwoAddress = true; 407 break; 408 } 409 } 410 if (TID.isCommutable()) 411 NewSU->isCommutable = true; 412 ComputeLatency(NewSU); 413 414 // Record all the edges to and from the old SU, by category. 415 SmallVector<SDep, 4> ChainPreds; 416 SmallVector<SDep, 4> ChainSuccs; 417 SmallVector<SDep, 4> LoadPreds; 418 SmallVector<SDep, 4> NodePreds; 419 SmallVector<SDep, 4> NodeSuccs; 420 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 421 I != E; ++I) { 422 if (I->isCtrl()) 423 ChainPreds.push_back(*I); 424 else if (I->getSUnit()->getNode() && 425 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 426 LoadPreds.push_back(*I); 427 else 428 NodePreds.push_back(*I); 429 } 430 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 431 I != E; ++I) { 432 if (I->isCtrl()) 433 ChainSuccs.push_back(*I); 434 else 435 NodeSuccs.push_back(*I); 436 } 437 438 // Now assign edges to the newly-created nodes. 439 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 440 const SDep &Pred = ChainPreds[i]; 441 RemovePred(SU, Pred); 442 if (isNewLoad) 443 AddPred(LoadSU, Pred); 444 } 445 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 446 const SDep &Pred = LoadPreds[i]; 447 RemovePred(SU, Pred); 448 if (isNewLoad) 449 AddPred(LoadSU, Pred); 450 } 451 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 452 const SDep &Pred = NodePreds[i]; 453 RemovePred(SU, Pred); 454 AddPred(NewSU, Pred); 455 } 456 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 457 SDep D = NodeSuccs[i]; 458 SUnit *SuccDep = D.getSUnit(); 459 D.setSUnit(SU); 460 RemovePred(SuccDep, D); 461 D.setSUnit(NewSU); 462 AddPred(SuccDep, D); 463 } 464 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 465 SDep D = ChainSuccs[i]; 466 SUnit *SuccDep = D.getSUnit(); 467 D.setSUnit(SU); 468 RemovePred(SuccDep, D); 469 if (isNewLoad) { 470 D.setSUnit(LoadSU); 471 AddPred(SuccDep, D); 472 } 473 } 474 475 // Add a data dependency to reflect that NewSU reads the value defined 476 // by LoadSU. 477 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 478 479 if (isNewLoad) 480 AvailableQueue->addNode(LoadSU); 481 AvailableQueue->addNode(NewSU); 482 483 ++NumUnfolds; 484 485 if (NewSU->NumSuccsLeft == 0) { 486 NewSU->isAvailable = true; 487 return NewSU; 488 } 489 SU = NewSU; 490 } 491 492 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 493 NewSU = CreateClone(SU); 494 495 // New SUnit has the exact same predecessors. 496 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 497 I != E; ++I) 498 if (!I->isArtificial()) 499 AddPred(NewSU, *I); 500 501 // Only copy scheduled successors. Cut them from old node's successor 502 // list and move them over. 503 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 504 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 505 I != E; ++I) { 506 if (I->isArtificial()) 507 continue; 508 SUnit *SuccSU = I->getSUnit(); 509 if (SuccSU->isScheduled) { 510 SDep D = *I; 511 D.setSUnit(NewSU); 512 AddPred(SuccSU, D); 513 D.setSUnit(SU); 514 DelDeps.push_back(std::make_pair(SuccSU, D)); 515 } 516 } 517 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 518 RemovePred(DelDeps[i].first, DelDeps[i].second); 519 520 AvailableQueue->updateNode(SU); 521 AvailableQueue->addNode(NewSU); 522 523 ++NumDups; 524 return NewSU; 525} 526 527/// InsertCopiesAndMoveSuccs - Insert register copies and move all 528/// scheduled successors of the given SUnit to the last copy. 529void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 530 const TargetRegisterClass *DestRC, 531 const TargetRegisterClass *SrcRC, 532 SmallVector<SUnit*, 2> &Copies) { 533 SUnit *CopyFromSU = CreateNewSUnit(NULL); 534 CopyFromSU->CopySrcRC = SrcRC; 535 CopyFromSU->CopyDstRC = DestRC; 536 537 SUnit *CopyToSU = CreateNewSUnit(NULL); 538 CopyToSU->CopySrcRC = DestRC; 539 CopyToSU->CopyDstRC = SrcRC; 540 541 // Only copy scheduled successors. Cut them from old node's successor 542 // list and move them over. 543 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 544 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 545 I != E; ++I) { 546 if (I->isArtificial()) 547 continue; 548 SUnit *SuccSU = I->getSUnit(); 549 if (SuccSU->isScheduled) { 550 SDep D = *I; 551 D.setSUnit(CopyToSU); 552 AddPred(SuccSU, D); 553 DelDeps.push_back(std::make_pair(SuccSU, *I)); 554 } 555 } 556 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 557 RemovePred(DelDeps[i].first, DelDeps[i].second); 558 559 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 560 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 561 562 AvailableQueue->updateNode(SU); 563 AvailableQueue->addNode(CopyFromSU); 564 AvailableQueue->addNode(CopyToSU); 565 Copies.push_back(CopyFromSU); 566 Copies.push_back(CopyToSU); 567 568 ++NumPRCopies; 569} 570 571/// getPhysicalRegisterVT - Returns the ValueType of the physical register 572/// definition of the specified node. 573/// FIXME: Move to SelectionDAG? 574static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 575 const TargetInstrInfo *TII) { 576 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 577 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 578 unsigned NumRes = TID.getNumDefs(); 579 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 580 if (Reg == *ImpDef) 581 break; 582 ++NumRes; 583 } 584 return N->getValueType(NumRes); 585} 586 587/// CheckForLiveRegDef - Return true and update live register vector if the 588/// specified register def of the specified SUnit clobbers any "live" registers. 589static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 590 std::vector<SUnit*> &LiveRegDefs, 591 SmallSet<unsigned, 4> &RegAdded, 592 SmallVector<unsigned, 4> &LRegs, 593 const TargetRegisterInfo *TRI) { 594 bool Added = false; 595 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 596 if (RegAdded.insert(Reg)) { 597 LRegs.push_back(Reg); 598 Added = true; 599 } 600 } 601 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 602 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 603 if (RegAdded.insert(*Alias)) { 604 LRegs.push_back(*Alias); 605 Added = true; 606 } 607 } 608 return Added; 609} 610 611/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 612/// scheduling of the given node to satisfy live physical register dependencies. 613/// If the specific node is the last one that's available to schedule, do 614/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 615bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 616 SmallVector<unsigned, 4> &LRegs){ 617 if (NumLiveRegs == 0) 618 return false; 619 620 SmallSet<unsigned, 4> RegAdded; 621 // If this node would clobber any "live" register, then it's not ready. 622 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 623 I != E; ++I) { 624 if (I->isAssignedRegDep()) 625 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 626 RegAdded, LRegs, TRI); 627 } 628 629 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 630 if (Node->getOpcode() == ISD::INLINEASM) { 631 // Inline asm can clobber physical defs. 632 unsigned NumOps = Node->getNumOperands(); 633 if (Node->getOperand(NumOps-1).getValueType() == EVT::Flag) 634 --NumOps; // Ignore the flag operand. 635 636 for (unsigned i = 2; i != NumOps;) { 637 unsigned Flags = 638 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 639 unsigned NumVals = (Flags & 0xffff) >> 3; 640 641 ++i; // Skip the ID value. 642 if ((Flags & 7) == 2 || (Flags & 7) == 6) { 643 // Check for def of register or earlyclobber register. 644 for (; NumVals; --NumVals, ++i) { 645 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 646 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 647 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 648 } 649 } else 650 i += NumVals; 651 } 652 continue; 653 } 654 655 if (!Node->isMachineOpcode()) 656 continue; 657 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 658 if (!TID.ImplicitDefs) 659 continue; 660 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 661 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 662 } 663 return !LRegs.empty(); 664} 665 666 667/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 668/// schedulers. 669void ScheduleDAGRRList::ListScheduleBottomUp() { 670 unsigned CurCycle = 0; 671 672 // Release any predecessors of the special Exit node. 673 ReleasePredecessors(&ExitSU, CurCycle); 674 675 // Add root to Available queue. 676 if (!SUnits.empty()) { 677 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 678 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 679 RootSU->isAvailable = true; 680 AvailableQueue->push(RootSU); 681 } 682 683 // While Available queue is not empty, grab the node with the highest 684 // priority. If it is not ready put it back. Schedule the node. 685 SmallVector<SUnit*, 4> NotReady; 686 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 687 Sequence.reserve(SUnits.size()); 688 while (!AvailableQueue->empty()) { 689 bool Delayed = false; 690 LRegsMap.clear(); 691 SUnit *CurSU = AvailableQueue->pop(); 692 while (CurSU) { 693 SmallVector<unsigned, 4> LRegs; 694 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 695 break; 696 Delayed = true; 697 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 698 699 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 700 NotReady.push_back(CurSU); 701 CurSU = AvailableQueue->pop(); 702 } 703 704 // All candidates are delayed due to live physical reg dependencies. 705 // Try backtracking, code duplication, or inserting cross class copies 706 // to resolve it. 707 if (Delayed && !CurSU) { 708 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 709 SUnit *TrySU = NotReady[i]; 710 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 711 712 // Try unscheduling up to the point where it's safe to schedule 713 // this node. 714 unsigned LiveCycle = CurCycle; 715 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 716 unsigned Reg = LRegs[j]; 717 unsigned LCycle = LiveRegCycles[Reg]; 718 LiveCycle = std::min(LiveCycle, LCycle); 719 } 720 SUnit *OldSU = Sequence[LiveCycle]; 721 if (!WillCreateCycle(TrySU, OldSU)) { 722 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 723 // Force the current node to be scheduled before the node that 724 // requires the physical reg dep. 725 if (OldSU->isAvailable) { 726 OldSU->isAvailable = false; 727 AvailableQueue->remove(OldSU); 728 } 729 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 730 /*Reg=*/0, /*isNormalMemory=*/false, 731 /*isMustAlias=*/false, /*isArtificial=*/true)); 732 // If one or more successors has been unscheduled, then the current 733 // node is no longer avaialable. Schedule a successor that's now 734 // available instead. 735 if (!TrySU->isAvailable) 736 CurSU = AvailableQueue->pop(); 737 else { 738 CurSU = TrySU; 739 TrySU->isPending = false; 740 NotReady.erase(NotReady.begin()+i); 741 } 742 break; 743 } 744 } 745 746 if (!CurSU) { 747 // Can't backtrack. If it's too expensive to copy the value, then try 748 // duplicate the nodes that produces these "too expensive to copy" 749 // values to break the dependency. In case even that doesn't work, 750 // insert cross class copies. 751 // If it's not too expensive, i.e. cost != -1, issue copies. 752 SUnit *TrySU = NotReady[0]; 753 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 754 assert(LRegs.size() == 1 && "Can't handle this yet!"); 755 unsigned Reg = LRegs[0]; 756 SUnit *LRDef = LiveRegDefs[Reg]; 757 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 758 const TargetRegisterClass *RC = 759 TRI->getPhysicalRegisterRegClass(Reg, VT); 760 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 761 762 // If cross copy register class is null, then it must be possible copy 763 // the value directly. Do not try duplicate the def. 764 SUnit *NewDef = 0; 765 if (DestRC) 766 NewDef = CopyAndMoveSuccessors(LRDef); 767 else 768 DestRC = RC; 769 if (!NewDef) { 770 // Issue copies, these can be expensive cross register class copies. 771 SmallVector<SUnit*, 2> Copies; 772 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 773 DOUT << "Adding an edge from SU #" << TrySU->NodeNum 774 << " to SU #" << Copies.front()->NodeNum << "\n"; 775 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 776 /*Reg=*/0, /*isNormalMemory=*/false, 777 /*isMustAlias=*/false, 778 /*isArtificial=*/true)); 779 NewDef = Copies.back(); 780 } 781 782 DOUT << "Adding an edge from SU #" << NewDef->NodeNum 783 << " to SU #" << TrySU->NodeNum << "\n"; 784 LiveRegDefs[Reg] = NewDef; 785 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 786 /*Reg=*/0, /*isNormalMemory=*/false, 787 /*isMustAlias=*/false, 788 /*isArtificial=*/true)); 789 TrySU->isAvailable = false; 790 CurSU = NewDef; 791 } 792 793 assert(CurSU && "Unable to resolve live physical register dependencies!"); 794 } 795 796 // Add the nodes that aren't ready back onto the available list. 797 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 798 NotReady[i]->isPending = false; 799 // May no longer be available due to backtracking. 800 if (NotReady[i]->isAvailable) 801 AvailableQueue->push(NotReady[i]); 802 } 803 NotReady.clear(); 804 805 if (CurSU) 806 ScheduleNodeBottomUp(CurSU, CurCycle); 807 ++CurCycle; 808 } 809 810 // Reverse the order if it is bottom up. 811 std::reverse(Sequence.begin(), Sequence.end()); 812 813#ifndef NDEBUG 814 VerifySchedule(isBottomUp); 815#endif 816} 817 818//===----------------------------------------------------------------------===// 819// Top-Down Scheduling 820//===----------------------------------------------------------------------===// 821 822/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 823/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 824void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 825 SUnit *SuccSU = SuccEdge->getSUnit(); 826 --SuccSU->NumPredsLeft; 827 828#ifndef NDEBUG 829 if (SuccSU->NumPredsLeft < 0) { 830 cerr << "*** Scheduling failed! ***\n"; 831 SuccSU->dump(this); 832 cerr << " has been released too many times!\n"; 833 llvm_unreachable(0); 834 } 835#endif 836 837 // If all the node's predecessors are scheduled, this node is ready 838 // to be scheduled. Ignore the special ExitSU node. 839 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 840 SuccSU->isAvailable = true; 841 AvailableQueue->push(SuccSU); 842 } 843} 844 845void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 846 // Top down: release successors 847 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 848 I != E; ++I) { 849 assert(!I->isAssignedRegDep() && 850 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 851 852 ReleaseSucc(SU, &*I); 853 } 854} 855 856/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 857/// count of its successors. If a successor pending count is zero, add it to 858/// the Available queue. 859void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 860 DOUT << "*** Scheduling [" << CurCycle << "]: "; 861 DEBUG(SU->dump(this)); 862 863 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 864 SU->setDepthToAtLeast(CurCycle); 865 Sequence.push_back(SU); 866 867 ReleaseSuccessors(SU); 868 SU->isScheduled = true; 869 AvailableQueue->ScheduledNode(SU); 870} 871 872/// ListScheduleTopDown - The main loop of list scheduling for top-down 873/// schedulers. 874void ScheduleDAGRRList::ListScheduleTopDown() { 875 unsigned CurCycle = 0; 876 877 // Release any successors of the special Entry node. 878 ReleaseSuccessors(&EntrySU); 879 880 // All leaves to Available queue. 881 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 882 // It is available if it has no predecessors. 883 if (SUnits[i].Preds.empty()) { 884 AvailableQueue->push(&SUnits[i]); 885 SUnits[i].isAvailable = true; 886 } 887 } 888 889 // While Available queue is not empty, grab the node with the highest 890 // priority. If it is not ready put it back. Schedule the node. 891 Sequence.reserve(SUnits.size()); 892 while (!AvailableQueue->empty()) { 893 SUnit *CurSU = AvailableQueue->pop(); 894 895 if (CurSU) 896 ScheduleNodeTopDown(CurSU, CurCycle); 897 ++CurCycle; 898 } 899 900#ifndef NDEBUG 901 VerifySchedule(isBottomUp); 902#endif 903} 904 905 906//===----------------------------------------------------------------------===// 907// RegReductionPriorityQueue Implementation 908//===----------------------------------------------------------------------===// 909// 910// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 911// to reduce register pressure. 912// 913namespace { 914 template<class SF> 915 class RegReductionPriorityQueue; 916 917 /// Sorting functions for the Available queue. 918 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 919 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 920 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 921 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 922 923 bool operator()(const SUnit* left, const SUnit* right) const; 924 }; 925 926 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 927 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 928 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 929 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 930 931 bool operator()(const SUnit* left, const SUnit* right) const; 932 }; 933} // end anonymous namespace 934 935/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 936/// Smaller number is the higher priority. 937static unsigned 938CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 939 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 940 if (SethiUllmanNumber != 0) 941 return SethiUllmanNumber; 942 943 unsigned Extra = 0; 944 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 945 I != E; ++I) { 946 if (I->isCtrl()) continue; // ignore chain preds 947 SUnit *PredSU = I->getSUnit(); 948 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 949 if (PredSethiUllman > SethiUllmanNumber) { 950 SethiUllmanNumber = PredSethiUllman; 951 Extra = 0; 952 } else if (PredSethiUllman == SethiUllmanNumber) 953 ++Extra; 954 } 955 956 SethiUllmanNumber += Extra; 957 958 if (SethiUllmanNumber == 0) 959 SethiUllmanNumber = 1; 960 961 return SethiUllmanNumber; 962} 963 964namespace { 965 template<class SF> 966 class VISIBILITY_HIDDEN RegReductionPriorityQueue 967 : public SchedulingPriorityQueue { 968 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 969 unsigned currentQueueId; 970 971 protected: 972 // SUnits - The SUnits for the current graph. 973 std::vector<SUnit> *SUnits; 974 975 const TargetInstrInfo *TII; 976 const TargetRegisterInfo *TRI; 977 ScheduleDAGRRList *scheduleDAG; 978 979 // SethiUllmanNumbers - The SethiUllman number for each node. 980 std::vector<unsigned> SethiUllmanNumbers; 981 982 public: 983 RegReductionPriorityQueue(const TargetInstrInfo *tii, 984 const TargetRegisterInfo *tri) : 985 Queue(SF(this)), currentQueueId(0), 986 TII(tii), TRI(tri), scheduleDAG(NULL) {} 987 988 void initNodes(std::vector<SUnit> &sunits) { 989 SUnits = &sunits; 990 // Add pseudo dependency edges for two-address nodes. 991 AddPseudoTwoAddrDeps(); 992 // Reroute edges to nodes with multiple uses. 993 PrescheduleNodesWithMultipleUses(); 994 // Calculate node priorities. 995 CalculateSethiUllmanNumbers(); 996 } 997 998 void addNode(const SUnit *SU) { 999 unsigned SUSize = SethiUllmanNumbers.size(); 1000 if (SUnits->size() > SUSize) 1001 SethiUllmanNumbers.resize(SUSize*2, 0); 1002 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1003 } 1004 1005 void updateNode(const SUnit *SU) { 1006 SethiUllmanNumbers[SU->NodeNum] = 0; 1007 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1008 } 1009 1010 void releaseState() { 1011 SUnits = 0; 1012 SethiUllmanNumbers.clear(); 1013 } 1014 1015 unsigned getNodePriority(const SUnit *SU) const { 1016 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1017 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1018 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1019 // CopyToReg should be close to its uses to facilitate coalescing and 1020 // avoid spilling. 1021 return 0; 1022 if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1023 Opc == TargetInstrInfo::SUBREG_TO_REG || 1024 Opc == TargetInstrInfo::INSERT_SUBREG) 1025 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1026 // close to their uses to facilitate coalescing. 1027 return 0; 1028 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1029 // If SU does not have a register use, i.e. it doesn't produce a value 1030 // that would be consumed (e.g. store), then it terminates a chain of 1031 // computation. Give it a large SethiUllman number so it will be 1032 // scheduled right before its predecessors that it doesn't lengthen 1033 // their live ranges. 1034 return 0xffff; 1035 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1036 // If SU does not have a register def, schedule it close to its uses 1037 // because it does not lengthen any live ranges. 1038 return 0; 1039 return SethiUllmanNumbers[SU->NodeNum]; 1040 } 1041 1042 unsigned size() const { return Queue.size(); } 1043 1044 bool empty() const { return Queue.empty(); } 1045 1046 void push(SUnit *U) { 1047 assert(!U->NodeQueueId && "Node in the queue already"); 1048 U->NodeQueueId = ++currentQueueId; 1049 Queue.push(U); 1050 } 1051 1052 void push_all(const std::vector<SUnit *> &Nodes) { 1053 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1054 push(Nodes[i]); 1055 } 1056 1057 SUnit *pop() { 1058 if (empty()) return NULL; 1059 SUnit *V = Queue.top(); 1060 Queue.pop(); 1061 V->NodeQueueId = 0; 1062 return V; 1063 } 1064 1065 void remove(SUnit *SU) { 1066 assert(!Queue.empty() && "Queue is empty!"); 1067 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1068 Queue.erase_one(SU); 1069 SU->NodeQueueId = 0; 1070 } 1071 1072 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1073 scheduleDAG = scheduleDag; 1074 } 1075 1076 protected: 1077 bool canClobber(const SUnit *SU, const SUnit *Op); 1078 void AddPseudoTwoAddrDeps(); 1079 void PrescheduleNodesWithMultipleUses(); 1080 void CalculateSethiUllmanNumbers(); 1081 }; 1082 1083 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1084 BURegReductionPriorityQueue; 1085 1086 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1087 TDRegReductionPriorityQueue; 1088} 1089 1090/// closestSucc - Returns the scheduled cycle of the successor which is 1091/// closest to the current cycle. 1092static unsigned closestSucc(const SUnit *SU) { 1093 unsigned MaxHeight = 0; 1094 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1095 I != E; ++I) { 1096 if (I->isCtrl()) continue; // ignore chain succs 1097 unsigned Height = I->getSUnit()->getHeight(); 1098 // If there are bunch of CopyToRegs stacked up, they should be considered 1099 // to be at the same position. 1100 if (I->getSUnit()->getNode() && 1101 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1102 Height = closestSucc(I->getSUnit())+1; 1103 if (Height > MaxHeight) 1104 MaxHeight = Height; 1105 } 1106 return MaxHeight; 1107} 1108 1109/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1110/// for scratch registers, i.e. number of data dependencies. 1111static unsigned calcMaxScratches(const SUnit *SU) { 1112 unsigned Scratches = 0; 1113 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1114 I != E; ++I) { 1115 if (I->isCtrl()) continue; // ignore chain preds 1116 Scratches++; 1117 } 1118 return Scratches; 1119} 1120 1121// Bottom up 1122bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1123 unsigned LPriority = SPQ->getNodePriority(left); 1124 unsigned RPriority = SPQ->getNodePriority(right); 1125 if (LPriority != RPriority) 1126 return LPriority > RPriority; 1127 1128 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1129 // e.g. 1130 // t1 = op t2, c1 1131 // t3 = op t4, c2 1132 // 1133 // and the following instructions are both ready. 1134 // t2 = op c3 1135 // t4 = op c4 1136 // 1137 // Then schedule t2 = op first. 1138 // i.e. 1139 // t4 = op c4 1140 // t2 = op c3 1141 // t1 = op t2, c1 1142 // t3 = op t4, c2 1143 // 1144 // This creates more short live intervals. 1145 unsigned LDist = closestSucc(left); 1146 unsigned RDist = closestSucc(right); 1147 if (LDist != RDist) 1148 return LDist < RDist; 1149 1150 // How many registers becomes live when the node is scheduled. 1151 unsigned LScratch = calcMaxScratches(left); 1152 unsigned RScratch = calcMaxScratches(right); 1153 if (LScratch != RScratch) 1154 return LScratch > RScratch; 1155 1156 if (left->getHeight() != right->getHeight()) 1157 return left->getHeight() > right->getHeight(); 1158 1159 if (left->getDepth() != right->getDepth()) 1160 return left->getDepth() < right->getDepth(); 1161 1162 assert(left->NodeQueueId && right->NodeQueueId && 1163 "NodeQueueId cannot be zero"); 1164 return (left->NodeQueueId > right->NodeQueueId); 1165} 1166 1167template<class SF> 1168bool 1169RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1170 if (SU->isTwoAddress) { 1171 unsigned Opc = SU->getNode()->getMachineOpcode(); 1172 const TargetInstrDesc &TID = TII->get(Opc); 1173 unsigned NumRes = TID.getNumDefs(); 1174 unsigned NumOps = TID.getNumOperands() - NumRes; 1175 for (unsigned i = 0; i != NumOps; ++i) { 1176 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1177 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1178 if (DU->getNodeId() != -1 && 1179 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1180 return true; 1181 } 1182 } 1183 } 1184 return false; 1185} 1186 1187 1188/// hasCopyToRegUse - Return true if SU has a value successor that is a 1189/// CopyToReg node. 1190static bool hasCopyToRegUse(const SUnit *SU) { 1191 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1192 I != E; ++I) { 1193 if (I->isCtrl()) continue; 1194 const SUnit *SuccSU = I->getSUnit(); 1195 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1196 return true; 1197 } 1198 return false; 1199} 1200 1201/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1202/// physical register defs. 1203static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1204 const TargetInstrInfo *TII, 1205 const TargetRegisterInfo *TRI) { 1206 SDNode *N = SuccSU->getNode(); 1207 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1208 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1209 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1210 for (const SDNode *SUNode = SU->getNode(); SUNode; 1211 SUNode = SUNode->getFlaggedNode()) { 1212 if (!SUNode->isMachineOpcode()) 1213 continue; 1214 const unsigned *SUImpDefs = 1215 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1216 if (!SUImpDefs) 1217 return false; 1218 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1219 EVT VT = N->getValueType(i); 1220 if (VT == EVT::Flag || VT == EVT::Other) 1221 continue; 1222 if (!N->hasAnyUseOfValue(i)) 1223 continue; 1224 unsigned Reg = ImpDefs[i - NumDefs]; 1225 for (;*SUImpDefs; ++SUImpDefs) { 1226 unsigned SUReg = *SUImpDefs; 1227 if (TRI->regsOverlap(Reg, SUReg)) 1228 return true; 1229 } 1230 } 1231 } 1232 return false; 1233} 1234 1235/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1236/// are not handled well by the general register pressure reduction 1237/// heuristics. When presented with code like this: 1238/// 1239/// N 1240/// / | 1241/// / | 1242/// U store 1243/// | 1244/// ... 1245/// 1246/// the heuristics tend to push the store up, but since the 1247/// operand of the store has another use (U), this would increase 1248/// the length of that other use (the U->N edge). 1249/// 1250/// This function transforms code like the above to route U's 1251/// dependence through the store when possible, like this: 1252/// 1253/// N 1254/// || 1255/// || 1256/// store 1257/// | 1258/// U 1259/// | 1260/// ... 1261/// 1262/// This results in the store being scheduled immediately 1263/// after N, which shortens the U->N live range, reducing 1264/// register pressure. 1265/// 1266template<class SF> 1267void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1268 // Visit all the nodes in topological order, working top-down. 1269 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1270 SUnit *SU = &(*SUnits)[i]; 1271 // For now, only look at nodes with no data successors, such as stores. 1272 // These are especially important, due to the heuristics in 1273 // getNodePriority for nodes with no data successors. 1274 if (SU->NumSuccs != 0) 1275 continue; 1276 // For now, only look at nodes with exactly one data predecessor. 1277 if (SU->NumPreds != 1) 1278 continue; 1279 // Avoid prescheduling copies to virtual registers, which don't behave 1280 // like other nodes from the perspective of scheduling heuristics. 1281 if (SDNode *N = SU->getNode()) 1282 if (N->getOpcode() == ISD::CopyToReg && 1283 TargetRegisterInfo::isVirtualRegister 1284 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1285 continue; 1286 1287 // Locate the single data predecessor. 1288 SUnit *PredSU = 0; 1289 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1290 EE = SU->Preds.end(); II != EE; ++II) 1291 if (!II->isCtrl()) { 1292 PredSU = II->getSUnit(); 1293 break; 1294 } 1295 assert(PredSU); 1296 1297 // Don't rewrite edges that carry physregs, because that requires additional 1298 // support infrastructure. 1299 if (PredSU->hasPhysRegDefs) 1300 continue; 1301 // Short-circuit the case where SU is PredSU's only data successor. 1302 if (PredSU->NumSuccs == 1) 1303 continue; 1304 // Avoid prescheduling to copies from virtual registers, which don't behave 1305 // like other nodes from the perspective of scheduling // heuristics. 1306 if (SDNode *N = SU->getNode()) 1307 if (N->getOpcode() == ISD::CopyFromReg && 1308 TargetRegisterInfo::isVirtualRegister 1309 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1310 continue; 1311 1312 // Perform checks on the successors of PredSU. 1313 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1314 EE = PredSU->Succs.end(); II != EE; ++II) { 1315 SUnit *PredSuccSU = II->getSUnit(); 1316 if (PredSuccSU == SU) continue; 1317 // If PredSU has another successor with no data successors, for 1318 // now don't attempt to choose either over the other. 1319 if (PredSuccSU->NumSuccs == 0) 1320 goto outer_loop_continue; 1321 // Don't break physical register dependencies. 1322 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1323 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1324 goto outer_loop_continue; 1325 // Don't introduce graph cycles. 1326 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1327 goto outer_loop_continue; 1328 } 1329 1330 // Ok, the transformation is safe and the heuristics suggest it is 1331 // profitable. Update the graph. 1332 DOUT << "Prescheduling SU # " << SU->NodeNum 1333 << " next to PredSU # " << PredSU->NodeNum 1334 << " to guide scheduling in the presence of multiple uses\n"; 1335 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1336 SDep Edge = PredSU->Succs[i]; 1337 assert(!Edge.isAssignedRegDep()); 1338 SUnit *SuccSU = Edge.getSUnit(); 1339 if (SuccSU != SU) { 1340 Edge.setSUnit(PredSU); 1341 scheduleDAG->RemovePred(SuccSU, Edge); 1342 scheduleDAG->AddPred(SU, Edge); 1343 Edge.setSUnit(SU); 1344 scheduleDAG->AddPred(SuccSU, Edge); 1345 --i; 1346 } 1347 } 1348 outer_loop_continue:; 1349 } 1350} 1351 1352/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1353/// it as a def&use operand. Add a pseudo control edge from it to the other 1354/// node (if it won't create a cycle) so the two-address one will be scheduled 1355/// first (lower in the schedule). If both nodes are two-address, favor the 1356/// one that has a CopyToReg use (more likely to be a loop induction update). 1357/// If both are two-address, but one is commutable while the other is not 1358/// commutable, favor the one that's not commutable. 1359template<class SF> 1360void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1361 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1362 SUnit *SU = &(*SUnits)[i]; 1363 if (!SU->isTwoAddress) 1364 continue; 1365 1366 SDNode *Node = SU->getNode(); 1367 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1368 continue; 1369 1370 unsigned Opc = Node->getMachineOpcode(); 1371 const TargetInstrDesc &TID = TII->get(Opc); 1372 unsigned NumRes = TID.getNumDefs(); 1373 unsigned NumOps = TID.getNumOperands() - NumRes; 1374 for (unsigned j = 0; j != NumOps; ++j) { 1375 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1376 continue; 1377 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1378 if (DU->getNodeId() == -1) 1379 continue; 1380 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1381 if (!DUSU) continue; 1382 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1383 E = DUSU->Succs.end(); I != E; ++I) { 1384 if (I->isCtrl()) continue; 1385 SUnit *SuccSU = I->getSUnit(); 1386 if (SuccSU == SU) 1387 continue; 1388 // Be conservative. Ignore if nodes aren't at roughly the same 1389 // depth and height. 1390 if (SuccSU->getHeight() < SU->getHeight() && 1391 (SU->getHeight() - SuccSU->getHeight()) > 1) 1392 continue; 1393 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1394 // constrains whatever is using the copy, instead of the copy 1395 // itself. In the case that the copy is coalesced, this 1396 // preserves the intent of the pseudo two-address heurietics. 1397 while (SuccSU->Succs.size() == 1 && 1398 SuccSU->getNode()->isMachineOpcode() && 1399 SuccSU->getNode()->getMachineOpcode() == 1400 TargetInstrInfo::COPY_TO_REGCLASS) 1401 SuccSU = SuccSU->Succs.front().getSUnit(); 1402 // Don't constrain non-instruction nodes. 1403 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1404 continue; 1405 // Don't constrain nodes with physical register defs if the 1406 // predecessor can clobber them. 1407 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1408 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1409 continue; 1410 } 1411 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1412 // these may be coalesced away. We want them close to their uses. 1413 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1414 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1415 SuccOpc == TargetInstrInfo::INSERT_SUBREG || 1416 SuccOpc == TargetInstrInfo::SUBREG_TO_REG) 1417 continue; 1418 if ((!canClobber(SuccSU, DUSU) || 1419 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1420 (!SU->isCommutable && SuccSU->isCommutable)) && 1421 !scheduleDAG->IsReachable(SuccSU, SU)) { 1422 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1423 << " to SU #" << SuccSU->NodeNum << "\n"; 1424 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1425 /*Reg=*/0, /*isNormalMemory=*/false, 1426 /*isMustAlias=*/false, 1427 /*isArtificial=*/true)); 1428 } 1429 } 1430 } 1431 } 1432} 1433 1434/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1435/// scheduling units. 1436template<class SF> 1437void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1438 SethiUllmanNumbers.assign(SUnits->size(), 0); 1439 1440 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1441 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1442} 1443 1444/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1445/// predecessors of the successors of the SUnit SU. Stop when the provided 1446/// limit is exceeded. 1447static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1448 unsigned Limit) { 1449 unsigned Sum = 0; 1450 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1451 I != E; ++I) { 1452 const SUnit *SuccSU = I->getSUnit(); 1453 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1454 EE = SuccSU->Preds.end(); II != EE; ++II) { 1455 SUnit *PredSU = II->getSUnit(); 1456 if (!PredSU->isScheduled) 1457 if (++Sum > Limit) 1458 return Sum; 1459 } 1460 } 1461 return Sum; 1462} 1463 1464 1465// Top down 1466bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1467 unsigned LPriority = SPQ->getNodePriority(left); 1468 unsigned RPriority = SPQ->getNodePriority(right); 1469 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1470 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1471 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1472 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1473 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1474 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1475 1476 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1477 return false; 1478 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1479 return true; 1480 1481 if (LIsFloater) 1482 LBonus -= 2; 1483 if (RIsFloater) 1484 RBonus -= 2; 1485 if (left->NumSuccs == 1) 1486 LBonus += 2; 1487 if (right->NumSuccs == 1) 1488 RBonus += 2; 1489 1490 if (LPriority+LBonus != RPriority+RBonus) 1491 return LPriority+LBonus < RPriority+RBonus; 1492 1493 if (left->getDepth() != right->getDepth()) 1494 return left->getDepth() < right->getDepth(); 1495 1496 if (left->NumSuccsLeft != right->NumSuccsLeft) 1497 return left->NumSuccsLeft > right->NumSuccsLeft; 1498 1499 assert(left->NodeQueueId && right->NodeQueueId && 1500 "NodeQueueId cannot be zero"); 1501 return (left->NodeQueueId > right->NodeQueueId); 1502} 1503 1504//===----------------------------------------------------------------------===// 1505// Public Constructor Functions 1506//===----------------------------------------------------------------------===// 1507 1508llvm::ScheduleDAGSDNodes * 1509llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1510 const TargetMachine &TM = IS->TM; 1511 const TargetInstrInfo *TII = TM.getInstrInfo(); 1512 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1513 1514 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1515 1516 ScheduleDAGRRList *SD = 1517 new ScheduleDAGRRList(*IS->MF, true, PQ); 1518 PQ->setScheduleDAG(SD); 1519 return SD; 1520} 1521 1522llvm::ScheduleDAGSDNodes * 1523llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1524 const TargetMachine &TM = IS->TM; 1525 const TargetInstrInfo *TII = TM.getInstrInfo(); 1526 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1527 1528 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1529 1530 ScheduleDAGRRList *SD = 1531 new ScheduleDAGRRList(*IS->MF, false, PQ); 1532 PQ->setScheduleDAG(SD); 1533 return SD; 1534} 1535