ScheduleDAGRRList.cpp revision b9a3f3d2d395ff32a4f40fe6642d5f0e38a2c0be
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/ADT/PriorityQueue.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include <climits> 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumPRCopies, "Number of physical register copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50static RegisterScheduler 51 sourceListDAGScheduler("source", 52 "Similar to list-burr but schedules in source " 53 "order when possible", 54 createSourceListDAGScheduler); 55 56namespace { 57//===----------------------------------------------------------------------===// 58/// ScheduleDAGRRList - The actual register reduction list scheduler 59/// implementation. This supports both top-down and bottom-up scheduling. 60/// 61class ScheduleDAGRRList : public ScheduleDAGSDNodes { 62private: 63 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 64 /// it is top-down. 65 bool isBottomUp; 66 67 /// AvailableQueue - The priority queue to use for the available SUnits. 68 SchedulingPriorityQueue *AvailableQueue; 69 70 /// LiveRegDefs - A set of physical registers and their definition 71 /// that are "live". These nodes must be scheduled before any other nodes that 72 /// modifies the registers can be scheduled. 73 unsigned NumLiveRegs; 74 std::vector<SUnit*> LiveRegDefs; 75 std::vector<unsigned> LiveRegCycles; 76 77 /// Topo - A topological ordering for SUnits which permits fast IsReachable 78 /// and similar queries. 79 ScheduleDAGTopologicalSort Topo; 80 81public: 82 ScheduleDAGRRList(MachineFunction &mf, 83 bool isbottomup, 84 SchedulingPriorityQueue *availqueue) 85 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 86 AvailableQueue(availqueue), Topo(SUnits) { 87 } 88 89 ~ScheduleDAGRRList() { 90 delete AvailableQueue; 91 } 92 93 void Schedule(); 94 95 /// IsReachable - Checks if SU is reachable from TargetSU. 96 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 97 return Topo.IsReachable(SU, TargetSU); 98 } 99 100 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 101 /// create a cycle. 102 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 103 return Topo.WillCreateCycle(SU, TargetSU); 104 } 105 106 /// AddPred - adds a predecessor edge to SUnit SU. 107 /// This returns true if this is a new predecessor. 108 /// Updates the topological ordering if required. 109 void AddPred(SUnit *SU, const SDep &D) { 110 Topo.AddPred(SU, D.getSUnit()); 111 SU->addPred(D); 112 } 113 114 /// RemovePred - removes a predecessor edge from SUnit SU. 115 /// This returns true if an edge was removed. 116 /// Updates the topological ordering if required. 117 void RemovePred(SUnit *SU, const SDep &D) { 118 Topo.RemovePred(SU, D.getSUnit()); 119 SU->removePred(D); 120 } 121 122private: 123 void ReleasePred(SUnit *SU, const SDep *PredEdge); 124 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 125 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 126 void ReleaseSuccessors(SUnit *SU); 127 void CapturePred(SDep *PredEdge); 128 void ScheduleNodeBottomUp(SUnit*, unsigned); 129 void ScheduleNodeTopDown(SUnit*, unsigned); 130 void UnscheduleNodeBottomUp(SUnit*); 131 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 132 SUnit *CopyAndMoveSuccessors(SUnit*); 133 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 134 const TargetRegisterClass*, 135 const TargetRegisterClass*, 136 SmallVector<SUnit*, 2>&); 137 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 138 void ListScheduleTopDown(); 139 void ListScheduleBottomUp(); 140 141 142 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 143 /// Updates the topological ordering if required. 144 SUnit *CreateNewSUnit(SDNode *N) { 145 unsigned NumSUnits = SUnits.size(); 146 SUnit *NewNode = NewSUnit(N); 147 // Update the topological ordering. 148 if (NewNode->NodeNum >= NumSUnits) 149 Topo.InitDAGTopologicalSorting(); 150 return NewNode; 151 } 152 153 /// CreateClone - Creates a new SUnit from an existing one. 154 /// Updates the topological ordering if required. 155 SUnit *CreateClone(SUnit *N) { 156 unsigned NumSUnits = SUnits.size(); 157 SUnit *NewNode = Clone(N); 158 // Update the topological ordering. 159 if (NewNode->NodeNum >= NumSUnits) 160 Topo.InitDAGTopologicalSorting(); 161 return NewNode; 162 } 163 164 /// ForceUnitLatencies - Return true, since register-pressure-reducing 165 /// scheduling doesn't need actual latency information. 166 bool ForceUnitLatencies() const { return true; } 167}; 168} // end anonymous namespace 169 170 171/// Schedule - Schedule the DAG using list scheduling. 172void ScheduleDAGRRList::Schedule() { 173 DEBUG(dbgs() << "********** List Scheduling **********\n"); 174 175 NumLiveRegs = 0; 176 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 177 LiveRegCycles.resize(TRI->getNumRegs(), 0); 178 179 // Build the scheduling graph. 180 BuildSchedGraph(NULL); 181 182 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 183 SUnits[su].dumpAll(this)); 184 Topo.InitDAGTopologicalSorting(); 185 186 AvailableQueue->initNodes(SUnits); 187 188 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 189 if (isBottomUp) 190 ListScheduleBottomUp(); 191 else 192 ListScheduleTopDown(); 193 194 AvailableQueue->releaseState(); 195} 196 197//===----------------------------------------------------------------------===// 198// Bottom-Up Scheduling 199//===----------------------------------------------------------------------===// 200 201/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 202/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 203void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 204 SUnit *PredSU = PredEdge->getSUnit(); 205 206#ifndef NDEBUG 207 if (PredSU->NumSuccsLeft == 0) { 208 dbgs() << "*** Scheduling failed! ***\n"; 209 PredSU->dump(this); 210 dbgs() << " has been released too many times!\n"; 211 llvm_unreachable(0); 212 } 213#endif 214 --PredSU->NumSuccsLeft; 215 216 // If all the node's successors are scheduled, this node is ready 217 // to be scheduled. Ignore the special EntrySU node. 218 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 219 PredSU->isAvailable = true; 220 AvailableQueue->push(PredSU); 221 } 222} 223 224void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 225 // Bottom up: release predecessors 226 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 227 I != E; ++I) { 228 ReleasePred(SU, &*I); 229 if (I->isAssignedRegDep()) { 230 // This is a physical register dependency and it's impossible or 231 // expensive to copy the register. Make sure nothing that can 232 // clobber the register is scheduled between the predecessor and 233 // this node. 234 if (!LiveRegDefs[I->getReg()]) { 235 ++NumLiveRegs; 236 LiveRegDefs[I->getReg()] = I->getSUnit(); 237 LiveRegCycles[I->getReg()] = CurCycle; 238 } 239 } 240 } 241} 242 243/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 244/// count of its predecessors. If a predecessor pending count is zero, add it to 245/// the Available queue. 246void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 247 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 248 DEBUG(SU->dump(this)); 249 250 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 251 SU->setHeightToAtLeast(CurCycle); 252 Sequence.push_back(SU); 253 254 ReleasePredecessors(SU, CurCycle); 255 256 // Release all the implicit physical register defs that are live. 257 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 258 I != E; ++I) { 259 if (I->isAssignedRegDep()) { 260 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 261 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 262 assert(LiveRegDefs[I->getReg()] == SU && 263 "Physical register dependency violated?"); 264 --NumLiveRegs; 265 LiveRegDefs[I->getReg()] = NULL; 266 LiveRegCycles[I->getReg()] = 0; 267 } 268 } 269 } 270 271 SU->isScheduled = true; 272 AvailableQueue->ScheduledNode(SU); 273} 274 275/// CapturePred - This does the opposite of ReleasePred. Since SU is being 276/// unscheduled, incrcease the succ left count of its predecessors. Remove 277/// them from AvailableQueue if necessary. 278void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 279 SUnit *PredSU = PredEdge->getSUnit(); 280 if (PredSU->isAvailable) { 281 PredSU->isAvailable = false; 282 if (!PredSU->isPending) 283 AvailableQueue->remove(PredSU); 284 } 285 286 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 287 ++PredSU->NumSuccsLeft; 288} 289 290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 291/// its predecessor states to reflect the change. 292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 293 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 294 DEBUG(SU->dump(this)); 295 296 AvailableQueue->UnscheduledNode(SU); 297 298 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 299 I != E; ++I) { 300 CapturePred(&*I); 301 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 302 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 303 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 304 "Physical register dependency violated?"); 305 --NumLiveRegs; 306 LiveRegDefs[I->getReg()] = NULL; 307 LiveRegCycles[I->getReg()] = 0; 308 } 309 } 310 311 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 312 I != E; ++I) { 313 if (I->isAssignedRegDep()) { 314 if (!LiveRegDefs[I->getReg()]) { 315 LiveRegDefs[I->getReg()] = SU; 316 ++NumLiveRegs; 317 } 318 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 319 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 320 } 321 } 322 323 SU->setHeightDirty(); 324 SU->isScheduled = false; 325 SU->isAvailable = true; 326 AvailableQueue->push(SU); 327} 328 329/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 330/// BTCycle in order to schedule a specific node. 331void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 332 unsigned &CurCycle) { 333 SUnit *OldSU = NULL; 334 while (CurCycle > BtCycle) { 335 OldSU = Sequence.back(); 336 Sequence.pop_back(); 337 if (SU->isSucc(OldSU)) 338 // Don't try to remove SU from AvailableQueue. 339 SU->isAvailable = false; 340 UnscheduleNodeBottomUp(OldSU); 341 --CurCycle; 342 } 343 344 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 345 346 ++NumBacktracks; 347} 348 349static bool isOperandOf(const SUnit *SU, SDNode *N) { 350 for (const SDNode *SUNode = SU->getNode(); SUNode; 351 SUNode = SUNode->getFlaggedNode()) { 352 if (SUNode->isOperandOf(N)) 353 return true; 354 } 355 return false; 356} 357 358/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 359/// successors to the newly created node. 360SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 361 if (SU->getNode()->getFlaggedNode()) 362 return NULL; 363 364 SDNode *N = SU->getNode(); 365 if (!N) 366 return NULL; 367 368 SUnit *NewSU; 369 bool TryUnfold = false; 370 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 371 EVT VT = N->getValueType(i); 372 if (VT == MVT::Flag) 373 return NULL; 374 else if (VT == MVT::Other) 375 TryUnfold = true; 376 } 377 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 378 const SDValue &Op = N->getOperand(i); 379 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 380 if (VT == MVT::Flag) 381 return NULL; 382 } 383 384 if (TryUnfold) { 385 SmallVector<SDNode*, 2> NewNodes; 386 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 387 return NULL; 388 389 DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 390 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 391 392 N = NewNodes[1]; 393 SDNode *LoadNode = NewNodes[0]; 394 unsigned NumVals = N->getNumValues(); 395 unsigned OldNumVals = SU->getNode()->getNumValues(); 396 for (unsigned i = 0; i != NumVals; ++i) 397 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 398 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 399 SDValue(LoadNode, 1)); 400 401 // LoadNode may already exist. This can happen when there is another 402 // load from the same location and producing the same type of value 403 // but it has different alignment or volatileness. 404 bool isNewLoad = true; 405 SUnit *LoadSU; 406 if (LoadNode->getNodeId() != -1) { 407 LoadSU = &SUnits[LoadNode->getNodeId()]; 408 isNewLoad = false; 409 } else { 410 LoadSU = CreateNewSUnit(LoadNode); 411 LoadNode->setNodeId(LoadSU->NodeNum); 412 ComputeLatency(LoadSU); 413 } 414 415 SUnit *NewSU = CreateNewSUnit(N); 416 assert(N->getNodeId() == -1 && "Node already inserted!"); 417 N->setNodeId(NewSU->NodeNum); 418 419 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 420 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 421 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 422 NewSU->isTwoAddress = true; 423 break; 424 } 425 } 426 if (TID.isCommutable()) 427 NewSU->isCommutable = true; 428 ComputeLatency(NewSU); 429 430 // Record all the edges to and from the old SU, by category. 431 SmallVector<SDep, 4> ChainPreds; 432 SmallVector<SDep, 4> ChainSuccs; 433 SmallVector<SDep, 4> LoadPreds; 434 SmallVector<SDep, 4> NodePreds; 435 SmallVector<SDep, 4> NodeSuccs; 436 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 437 I != E; ++I) { 438 if (I->isCtrl()) 439 ChainPreds.push_back(*I); 440 else if (isOperandOf(I->getSUnit(), LoadNode)) 441 LoadPreds.push_back(*I); 442 else 443 NodePreds.push_back(*I); 444 } 445 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 446 I != E; ++I) { 447 if (I->isCtrl()) 448 ChainSuccs.push_back(*I); 449 else 450 NodeSuccs.push_back(*I); 451 } 452 453 // Now assign edges to the newly-created nodes. 454 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 455 const SDep &Pred = ChainPreds[i]; 456 RemovePred(SU, Pred); 457 if (isNewLoad) 458 AddPred(LoadSU, Pred); 459 } 460 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 461 const SDep &Pred = LoadPreds[i]; 462 RemovePred(SU, Pred); 463 if (isNewLoad) 464 AddPred(LoadSU, Pred); 465 } 466 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 467 const SDep &Pred = NodePreds[i]; 468 RemovePred(SU, Pred); 469 AddPred(NewSU, Pred); 470 } 471 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 472 SDep D = NodeSuccs[i]; 473 SUnit *SuccDep = D.getSUnit(); 474 D.setSUnit(SU); 475 RemovePred(SuccDep, D); 476 D.setSUnit(NewSU); 477 AddPred(SuccDep, D); 478 } 479 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 480 SDep D = ChainSuccs[i]; 481 SUnit *SuccDep = D.getSUnit(); 482 D.setSUnit(SU); 483 RemovePred(SuccDep, D); 484 if (isNewLoad) { 485 D.setSUnit(LoadSU); 486 AddPred(SuccDep, D); 487 } 488 } 489 490 // Add a data dependency to reflect that NewSU reads the value defined 491 // by LoadSU. 492 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 493 494 if (isNewLoad) 495 AvailableQueue->addNode(LoadSU); 496 AvailableQueue->addNode(NewSU); 497 498 ++NumUnfolds; 499 500 if (NewSU->NumSuccsLeft == 0) { 501 NewSU->isAvailable = true; 502 return NewSU; 503 } 504 SU = NewSU; 505 } 506 507 DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 508 NewSU = CreateClone(SU); 509 510 // New SUnit has the exact same predecessors. 511 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 512 I != E; ++I) 513 if (!I->isArtificial()) 514 AddPred(NewSU, *I); 515 516 // Only copy scheduled successors. Cut them from old node's successor 517 // list and move them over. 518 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 519 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 520 I != E; ++I) { 521 if (I->isArtificial()) 522 continue; 523 SUnit *SuccSU = I->getSUnit(); 524 if (SuccSU->isScheduled) { 525 SDep D = *I; 526 D.setSUnit(NewSU); 527 AddPred(SuccSU, D); 528 D.setSUnit(SU); 529 DelDeps.push_back(std::make_pair(SuccSU, D)); 530 } 531 } 532 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 533 RemovePred(DelDeps[i].first, DelDeps[i].second); 534 535 AvailableQueue->updateNode(SU); 536 AvailableQueue->addNode(NewSU); 537 538 ++NumDups; 539 return NewSU; 540} 541 542/// InsertCopiesAndMoveSuccs - Insert register copies and move all 543/// scheduled successors of the given SUnit to the last copy. 544void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 545 const TargetRegisterClass *DestRC, 546 const TargetRegisterClass *SrcRC, 547 SmallVector<SUnit*, 2> &Copies) { 548 SUnit *CopyFromSU = CreateNewSUnit(NULL); 549 CopyFromSU->CopySrcRC = SrcRC; 550 CopyFromSU->CopyDstRC = DestRC; 551 552 SUnit *CopyToSU = CreateNewSUnit(NULL); 553 CopyToSU->CopySrcRC = DestRC; 554 CopyToSU->CopyDstRC = SrcRC; 555 556 // Only copy scheduled successors. Cut them from old node's successor 557 // list and move them over. 558 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 559 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 560 I != E; ++I) { 561 if (I->isArtificial()) 562 continue; 563 SUnit *SuccSU = I->getSUnit(); 564 if (SuccSU->isScheduled) { 565 SDep D = *I; 566 D.setSUnit(CopyToSU); 567 AddPred(SuccSU, D); 568 DelDeps.push_back(std::make_pair(SuccSU, *I)); 569 } 570 } 571 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 572 RemovePred(DelDeps[i].first, DelDeps[i].second); 573 574 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 575 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 576 577 AvailableQueue->updateNode(SU); 578 AvailableQueue->addNode(CopyFromSU); 579 AvailableQueue->addNode(CopyToSU); 580 Copies.push_back(CopyFromSU); 581 Copies.push_back(CopyToSU); 582 583 ++NumPRCopies; 584} 585 586/// getPhysicalRegisterVT - Returns the ValueType of the physical register 587/// definition of the specified node. 588/// FIXME: Move to SelectionDAG? 589static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 590 const TargetInstrInfo *TII) { 591 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 592 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 593 unsigned NumRes = TID.getNumDefs(); 594 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 595 if (Reg == *ImpDef) 596 break; 597 ++NumRes; 598 } 599 return N->getValueType(NumRes); 600} 601 602/// CheckForLiveRegDef - Return true and update live register vector if the 603/// specified register def of the specified SUnit clobbers any "live" registers. 604static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 605 std::vector<SUnit*> &LiveRegDefs, 606 SmallSet<unsigned, 4> &RegAdded, 607 SmallVector<unsigned, 4> &LRegs, 608 const TargetRegisterInfo *TRI) { 609 bool Added = false; 610 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 611 if (RegAdded.insert(Reg)) { 612 LRegs.push_back(Reg); 613 Added = true; 614 } 615 } 616 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 617 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 618 if (RegAdded.insert(*Alias)) { 619 LRegs.push_back(*Alias); 620 Added = true; 621 } 622 } 623 return Added; 624} 625 626/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 627/// scheduling of the given node to satisfy live physical register dependencies. 628/// If the specific node is the last one that's available to schedule, do 629/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 630bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 631 SmallVector<unsigned, 4> &LRegs){ 632 if (NumLiveRegs == 0) 633 return false; 634 635 SmallSet<unsigned, 4> RegAdded; 636 // If this node would clobber any "live" register, then it's not ready. 637 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 638 I != E; ++I) { 639 if (I->isAssignedRegDep()) 640 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 641 RegAdded, LRegs, TRI); 642 } 643 644 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 645 if (Node->getOpcode() == ISD::INLINEASM) { 646 // Inline asm can clobber physical defs. 647 unsigned NumOps = Node->getNumOperands(); 648 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 649 --NumOps; // Ignore the flag operand. 650 651 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 652 unsigned Flags = 653 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 654 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 655 656 ++i; // Skip the ID value. 657 if (InlineAsm::isRegDefKind(Flags) || 658 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 659 // Check for def of register or earlyclobber register. 660 for (; NumVals; --NumVals, ++i) { 661 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 662 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 663 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 664 } 665 } else 666 i += NumVals; 667 } 668 continue; 669 } 670 671 if (!Node->isMachineOpcode()) 672 continue; 673 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 674 if (!TID.ImplicitDefs) 675 continue; 676 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 677 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 678 } 679 return !LRegs.empty(); 680} 681 682 683/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 684/// schedulers. 685void ScheduleDAGRRList::ListScheduleBottomUp() { 686 unsigned CurCycle = 0; 687 688 // Release any predecessors of the special Exit node. 689 ReleasePredecessors(&ExitSU, CurCycle); 690 691 // Add root to Available queue. 692 if (!SUnits.empty()) { 693 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 694 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 695 RootSU->isAvailable = true; 696 AvailableQueue->push(RootSU); 697 } 698 699 // While Available queue is not empty, grab the node with the highest 700 // priority. If it is not ready put it back. Schedule the node. 701 SmallVector<SUnit*, 4> NotReady; 702 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 703 Sequence.reserve(SUnits.size()); 704 while (!AvailableQueue->empty()) { 705 bool Delayed = false; 706 LRegsMap.clear(); 707 SUnit *CurSU = AvailableQueue->pop(); 708 while (CurSU) { 709 SmallVector<unsigned, 4> LRegs; 710 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 711 break; 712 Delayed = true; 713 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 714 715 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 716 NotReady.push_back(CurSU); 717 CurSU = AvailableQueue->pop(); 718 } 719 720 // All candidates are delayed due to live physical reg dependencies. 721 // Try backtracking, code duplication, or inserting cross class copies 722 // to resolve it. 723 if (Delayed && !CurSU) { 724 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 725 SUnit *TrySU = NotReady[i]; 726 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 727 728 // Try unscheduling up to the point where it's safe to schedule 729 // this node. 730 unsigned LiveCycle = CurCycle; 731 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 732 unsigned Reg = LRegs[j]; 733 unsigned LCycle = LiveRegCycles[Reg]; 734 LiveCycle = std::min(LiveCycle, LCycle); 735 } 736 SUnit *OldSU = Sequence[LiveCycle]; 737 if (!WillCreateCycle(TrySU, OldSU)) { 738 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 739 // Force the current node to be scheduled before the node that 740 // requires the physical reg dep. 741 if (OldSU->isAvailable) { 742 OldSU->isAvailable = false; 743 AvailableQueue->remove(OldSU); 744 } 745 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 746 /*Reg=*/0, /*isNormalMemory=*/false, 747 /*isMustAlias=*/false, /*isArtificial=*/true)); 748 // If one or more successors has been unscheduled, then the current 749 // node is no longer avaialable. Schedule a successor that's now 750 // available instead. 751 if (!TrySU->isAvailable) 752 CurSU = AvailableQueue->pop(); 753 else { 754 CurSU = TrySU; 755 TrySU->isPending = false; 756 NotReady.erase(NotReady.begin()+i); 757 } 758 break; 759 } 760 } 761 762 if (!CurSU) { 763 // Can't backtrack. If it's too expensive to copy the value, then try 764 // duplicate the nodes that produces these "too expensive to copy" 765 // values to break the dependency. In case even that doesn't work, 766 // insert cross class copies. 767 // If it's not too expensive, i.e. cost != -1, issue copies. 768 SUnit *TrySU = NotReady[0]; 769 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 770 assert(LRegs.size() == 1 && "Can't handle this yet!"); 771 unsigned Reg = LRegs[0]; 772 SUnit *LRDef = LiveRegDefs[Reg]; 773 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 774 const TargetRegisterClass *RC = 775 TRI->getPhysicalRegisterRegClass(Reg, VT); 776 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 777 778 // If cross copy register class is null, then it must be possible copy 779 // the value directly. Do not try duplicate the def. 780 SUnit *NewDef = 0; 781 if (DestRC) 782 NewDef = CopyAndMoveSuccessors(LRDef); 783 else 784 DestRC = RC; 785 if (!NewDef) { 786 // Issue copies, these can be expensive cross register class copies. 787 SmallVector<SUnit*, 2> Copies; 788 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 789 DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum 790 << " to SU #" << Copies.front()->NodeNum << "\n"); 791 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 792 /*Reg=*/0, /*isNormalMemory=*/false, 793 /*isMustAlias=*/false, 794 /*isArtificial=*/true)); 795 NewDef = Copies.back(); 796 } 797 798 DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum 799 << " to SU #" << TrySU->NodeNum << "\n"); 800 LiveRegDefs[Reg] = NewDef; 801 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 802 /*Reg=*/0, /*isNormalMemory=*/false, 803 /*isMustAlias=*/false, 804 /*isArtificial=*/true)); 805 TrySU->isAvailable = false; 806 CurSU = NewDef; 807 } 808 809 assert(CurSU && "Unable to resolve live physical register dependencies!"); 810 } 811 812 // Add the nodes that aren't ready back onto the available list. 813 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 814 NotReady[i]->isPending = false; 815 // May no longer be available due to backtracking. 816 if (NotReady[i]->isAvailable) 817 AvailableQueue->push(NotReady[i]); 818 } 819 NotReady.clear(); 820 821 if (CurSU) 822 ScheduleNodeBottomUp(CurSU, CurCycle); 823 ++CurCycle; 824 } 825 826 // Reverse the order if it is bottom up. 827 std::reverse(Sequence.begin(), Sequence.end()); 828 829#ifndef NDEBUG 830 VerifySchedule(isBottomUp); 831#endif 832} 833 834//===----------------------------------------------------------------------===// 835// Top-Down Scheduling 836//===----------------------------------------------------------------------===// 837 838/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 839/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 840void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 841 SUnit *SuccSU = SuccEdge->getSUnit(); 842 843#ifndef NDEBUG 844 if (SuccSU->NumPredsLeft == 0) { 845 dbgs() << "*** Scheduling failed! ***\n"; 846 SuccSU->dump(this); 847 dbgs() << " has been released too many times!\n"; 848 llvm_unreachable(0); 849 } 850#endif 851 --SuccSU->NumPredsLeft; 852 853 // If all the node's predecessors are scheduled, this node is ready 854 // to be scheduled. Ignore the special ExitSU node. 855 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 856 SuccSU->isAvailable = true; 857 AvailableQueue->push(SuccSU); 858 } 859} 860 861void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 862 // Top down: release successors 863 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 864 I != E; ++I) { 865 assert(!I->isAssignedRegDep() && 866 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 867 868 ReleaseSucc(SU, &*I); 869 } 870} 871 872/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 873/// count of its successors. If a successor pending count is zero, add it to 874/// the Available queue. 875void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 876 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 877 DEBUG(SU->dump(this)); 878 879 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 880 SU->setDepthToAtLeast(CurCycle); 881 Sequence.push_back(SU); 882 883 ReleaseSuccessors(SU); 884 SU->isScheduled = true; 885 AvailableQueue->ScheduledNode(SU); 886} 887 888/// ListScheduleTopDown - The main loop of list scheduling for top-down 889/// schedulers. 890void ScheduleDAGRRList::ListScheduleTopDown() { 891 unsigned CurCycle = 0; 892 893 // Release any successors of the special Entry node. 894 ReleaseSuccessors(&EntrySU); 895 896 // All leaves to Available queue. 897 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 898 // It is available if it has no predecessors. 899 if (SUnits[i].Preds.empty()) { 900 AvailableQueue->push(&SUnits[i]); 901 SUnits[i].isAvailable = true; 902 } 903 } 904 905 // While Available queue is not empty, grab the node with the highest 906 // priority. If it is not ready put it back. Schedule the node. 907 Sequence.reserve(SUnits.size()); 908 while (!AvailableQueue->empty()) { 909 SUnit *CurSU = AvailableQueue->pop(); 910 911 if (CurSU) 912 ScheduleNodeTopDown(CurSU, CurCycle); 913 ++CurCycle; 914 } 915 916#ifndef NDEBUG 917 VerifySchedule(isBottomUp); 918#endif 919} 920 921 922//===----------------------------------------------------------------------===// 923// RegReductionPriorityQueue Implementation 924//===----------------------------------------------------------------------===// 925// 926// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 927// to reduce register pressure. 928// 929namespace { 930 template<class SF> 931 class RegReductionPriorityQueue; 932 933 /// Sorting functions for the Available queue. 934 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 935 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 936 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 937 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 938 939 bool operator()(const SUnit* left, const SUnit* right) const; 940 }; 941 942 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 943 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 944 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 945 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 946 947 bool operator()(const SUnit* left, const SUnit* right) const; 948 }; 949 950 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 951 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 952 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 953 : SPQ(spq) {} 954 src_ls_rr_sort(const src_ls_rr_sort &RHS) 955 : SPQ(RHS.SPQ) {} 956 957 bool operator()(const SUnit* left, const SUnit* right) const; 958 }; 959} // end anonymous namespace 960 961/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 962/// Smaller number is the higher priority. 963static unsigned 964CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 965 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 966 if (SethiUllmanNumber != 0) 967 return SethiUllmanNumber; 968 969 unsigned Extra = 0; 970 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 971 I != E; ++I) { 972 if (I->isCtrl()) continue; // ignore chain preds 973 SUnit *PredSU = I->getSUnit(); 974 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 975 if (PredSethiUllman > SethiUllmanNumber) { 976 SethiUllmanNumber = PredSethiUllman; 977 Extra = 0; 978 } else if (PredSethiUllman == SethiUllmanNumber) 979 ++Extra; 980 } 981 982 SethiUllmanNumber += Extra; 983 984 if (SethiUllmanNumber == 0) 985 SethiUllmanNumber = 1; 986 987 return SethiUllmanNumber; 988} 989 990namespace { 991 template<class SF> 992 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 993 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 994 unsigned currentQueueId; 995 996 protected: 997 // SUnits - The SUnits for the current graph. 998 std::vector<SUnit> *SUnits; 999 1000 const TargetInstrInfo *TII; 1001 const TargetRegisterInfo *TRI; 1002 ScheduleDAGRRList *scheduleDAG; 1003 1004 // SethiUllmanNumbers - The SethiUllman number for each node. 1005 std::vector<unsigned> SethiUllmanNumbers; 1006 1007 public: 1008 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1009 const TargetRegisterInfo *tri) 1010 : Queue(SF(this)), currentQueueId(0), 1011 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1012 1013 void initNodes(std::vector<SUnit> &sunits) { 1014 SUnits = &sunits; 1015 // Add pseudo dependency edges for two-address nodes. 1016 AddPseudoTwoAddrDeps(); 1017 // Reroute edges to nodes with multiple uses. 1018 PrescheduleNodesWithMultipleUses(); 1019 // Calculate node priorities. 1020 CalculateSethiUllmanNumbers(); 1021 } 1022 1023 void addNode(const SUnit *SU) { 1024 unsigned SUSize = SethiUllmanNumbers.size(); 1025 if (SUnits->size() > SUSize) 1026 SethiUllmanNumbers.resize(SUSize*2, 0); 1027 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1028 } 1029 1030 void updateNode(const SUnit *SU) { 1031 SethiUllmanNumbers[SU->NodeNum] = 0; 1032 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1033 } 1034 1035 void releaseState() { 1036 SUnits = 0; 1037 SethiUllmanNumbers.clear(); 1038 } 1039 1040 unsigned getNodePriority(const SUnit *SU) const { 1041 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1042 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1043 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1044 // CopyToReg should be close to its uses to facilitate coalescing and 1045 // avoid spilling. 1046 return 0; 1047 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1048 Opc == TargetOpcode::SUBREG_TO_REG || 1049 Opc == TargetOpcode::INSERT_SUBREG) 1050 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1051 // close to their uses to facilitate coalescing. 1052 return 0; 1053 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1054 // If SU does not have a register use, i.e. it doesn't produce a value 1055 // that would be consumed (e.g. store), then it terminates a chain of 1056 // computation. Give it a large SethiUllman number so it will be 1057 // scheduled right before its predecessors that it doesn't lengthen 1058 // their live ranges. 1059 return 0xffff; 1060 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1061 // If SU does not have a register def, schedule it close to its uses 1062 // because it does not lengthen any live ranges. 1063 return 0; 1064 return SethiUllmanNumbers[SU->NodeNum]; 1065 } 1066 1067 unsigned getNodeOrdering(const SUnit *SU) const { 1068 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1069 } 1070 1071 unsigned size() const { return Queue.size(); } 1072 1073 bool empty() const { return Queue.empty(); } 1074 1075 void push(SUnit *U) { 1076 assert(!U->NodeQueueId && "Node in the queue already"); 1077 U->NodeQueueId = ++currentQueueId; 1078 Queue.push(U); 1079 } 1080 1081 void push_all(const std::vector<SUnit *> &Nodes) { 1082 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1083 push(Nodes[i]); 1084 } 1085 1086 SUnit *pop() { 1087 if (empty()) return NULL; 1088 SUnit *V = Queue.top(); 1089 Queue.pop(); 1090 V->NodeQueueId = 0; 1091 return V; 1092 } 1093 1094 void remove(SUnit *SU) { 1095 assert(!Queue.empty() && "Queue is empty!"); 1096 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1097 Queue.erase_one(SU); 1098 SU->NodeQueueId = 0; 1099 } 1100 1101 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1102 scheduleDAG = scheduleDag; 1103 } 1104 1105 protected: 1106 bool canClobber(const SUnit *SU, const SUnit *Op); 1107 void AddPseudoTwoAddrDeps(); 1108 void PrescheduleNodesWithMultipleUses(); 1109 void CalculateSethiUllmanNumbers(); 1110 }; 1111 1112 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1113 BURegReductionPriorityQueue; 1114 1115 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1116 TDRegReductionPriorityQueue; 1117 1118 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1119 SrcRegReductionPriorityQueue; 1120} 1121 1122/// closestSucc - Returns the scheduled cycle of the successor which is 1123/// closest to the current cycle. 1124static unsigned closestSucc(const SUnit *SU) { 1125 unsigned MaxHeight = 0; 1126 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1127 I != E; ++I) { 1128 if (I->isCtrl()) continue; // ignore chain succs 1129 unsigned Height = I->getSUnit()->getHeight(); 1130 // If there are bunch of CopyToRegs stacked up, they should be considered 1131 // to be at the same position. 1132 if (I->getSUnit()->getNode() && 1133 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1134 Height = closestSucc(I->getSUnit())+1; 1135 if (Height > MaxHeight) 1136 MaxHeight = Height; 1137 } 1138 return MaxHeight; 1139} 1140 1141/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1142/// for scratch registers, i.e. number of data dependencies. 1143static unsigned calcMaxScratches(const SUnit *SU) { 1144 unsigned Scratches = 0; 1145 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1146 I != E; ++I) { 1147 if (I->isCtrl()) continue; // ignore chain preds 1148 Scratches++; 1149 } 1150 return Scratches; 1151} 1152 1153template <typename RRSort> 1154static bool BURRSort(const SUnit *left, const SUnit *right, 1155 const RegReductionPriorityQueue<RRSort> *SPQ) { 1156 unsigned LPriority = SPQ->getNodePriority(left); 1157 unsigned RPriority = SPQ->getNodePriority(right); 1158 if (LPriority != RPriority) 1159 return LPriority > RPriority; 1160 1161 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1162 // e.g. 1163 // t1 = op t2, c1 1164 // t3 = op t4, c2 1165 // 1166 // and the following instructions are both ready. 1167 // t2 = op c3 1168 // t4 = op c4 1169 // 1170 // Then schedule t2 = op first. 1171 // i.e. 1172 // t4 = op c4 1173 // t2 = op c3 1174 // t1 = op t2, c1 1175 // t3 = op t4, c2 1176 // 1177 // This creates more short live intervals. 1178 unsigned LDist = closestSucc(left); 1179 unsigned RDist = closestSucc(right); 1180 if (LDist != RDist) 1181 return LDist < RDist; 1182 1183 // How many registers becomes live when the node is scheduled. 1184 unsigned LScratch = calcMaxScratches(left); 1185 unsigned RScratch = calcMaxScratches(right); 1186 if (LScratch != RScratch) 1187 return LScratch > RScratch; 1188 1189 if (left->getHeight() != right->getHeight()) 1190 return left->getHeight() > right->getHeight(); 1191 1192 if (left->getDepth() != right->getDepth()) 1193 return left->getDepth() < right->getDepth(); 1194 1195 assert(left->NodeQueueId && right->NodeQueueId && 1196 "NodeQueueId cannot be zero"); 1197 return (left->NodeQueueId > right->NodeQueueId); 1198} 1199 1200// Bottom up 1201bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1202 return BURRSort(left, right, SPQ); 1203} 1204 1205// Source order, otherwise bottom up. 1206bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1207 unsigned LOrder = SPQ->getNodeOrdering(left); 1208 unsigned ROrder = SPQ->getNodeOrdering(right); 1209 1210 // Prefer an ordering where the lower the non-zero order number, the higher 1211 // the preference. 1212 if ((LOrder || ROrder) && LOrder != ROrder) 1213 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1214 1215 return BURRSort(left, right, SPQ); 1216} 1217 1218template<class SF> 1219bool 1220RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1221 if (SU->isTwoAddress) { 1222 unsigned Opc = SU->getNode()->getMachineOpcode(); 1223 const TargetInstrDesc &TID = TII->get(Opc); 1224 unsigned NumRes = TID.getNumDefs(); 1225 unsigned NumOps = TID.getNumOperands() - NumRes; 1226 for (unsigned i = 0; i != NumOps; ++i) { 1227 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1228 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1229 if (DU->getNodeId() != -1 && 1230 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1231 return true; 1232 } 1233 } 1234 } 1235 return false; 1236} 1237 1238/// hasCopyToRegUse - Return true if SU has a value successor that is a 1239/// CopyToReg node. 1240static bool hasCopyToRegUse(const SUnit *SU) { 1241 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1242 I != E; ++I) { 1243 if (I->isCtrl()) continue; 1244 const SUnit *SuccSU = I->getSUnit(); 1245 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1246 return true; 1247 } 1248 return false; 1249} 1250 1251/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1252/// physical register defs. 1253static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1254 const TargetInstrInfo *TII, 1255 const TargetRegisterInfo *TRI) { 1256 SDNode *N = SuccSU->getNode(); 1257 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1258 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1259 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1260 for (const SDNode *SUNode = SU->getNode(); SUNode; 1261 SUNode = SUNode->getFlaggedNode()) { 1262 if (!SUNode->isMachineOpcode()) 1263 continue; 1264 const unsigned *SUImpDefs = 1265 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1266 if (!SUImpDefs) 1267 return false; 1268 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1269 EVT VT = N->getValueType(i); 1270 if (VT == MVT::Flag || VT == MVT::Other) 1271 continue; 1272 if (!N->hasAnyUseOfValue(i)) 1273 continue; 1274 unsigned Reg = ImpDefs[i - NumDefs]; 1275 for (;*SUImpDefs; ++SUImpDefs) { 1276 unsigned SUReg = *SUImpDefs; 1277 if (TRI->regsOverlap(Reg, SUReg)) 1278 return true; 1279 } 1280 } 1281 } 1282 return false; 1283} 1284 1285/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1286/// are not handled well by the general register pressure reduction 1287/// heuristics. When presented with code like this: 1288/// 1289/// N 1290/// / | 1291/// / | 1292/// U store 1293/// | 1294/// ... 1295/// 1296/// the heuristics tend to push the store up, but since the 1297/// operand of the store has another use (U), this would increase 1298/// the length of that other use (the U->N edge). 1299/// 1300/// This function transforms code like the above to route U's 1301/// dependence through the store when possible, like this: 1302/// 1303/// N 1304/// || 1305/// || 1306/// store 1307/// | 1308/// U 1309/// | 1310/// ... 1311/// 1312/// This results in the store being scheduled immediately 1313/// after N, which shortens the U->N live range, reducing 1314/// register pressure. 1315/// 1316template<class SF> 1317void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1318 // Visit all the nodes in topological order, working top-down. 1319 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1320 SUnit *SU = &(*SUnits)[i]; 1321 // For now, only look at nodes with no data successors, such as stores. 1322 // These are especially important, due to the heuristics in 1323 // getNodePriority for nodes with no data successors. 1324 if (SU->NumSuccs != 0) 1325 continue; 1326 // For now, only look at nodes with exactly one data predecessor. 1327 if (SU->NumPreds != 1) 1328 continue; 1329 // Avoid prescheduling copies to virtual registers, which don't behave 1330 // like other nodes from the perspective of scheduling heuristics. 1331 if (SDNode *N = SU->getNode()) 1332 if (N->getOpcode() == ISD::CopyToReg && 1333 TargetRegisterInfo::isVirtualRegister 1334 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1335 continue; 1336 1337 // Locate the single data predecessor. 1338 SUnit *PredSU = 0; 1339 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1340 EE = SU->Preds.end(); II != EE; ++II) 1341 if (!II->isCtrl()) { 1342 PredSU = II->getSUnit(); 1343 break; 1344 } 1345 assert(PredSU); 1346 1347 // Don't rewrite edges that carry physregs, because that requires additional 1348 // support infrastructure. 1349 if (PredSU->hasPhysRegDefs) 1350 continue; 1351 // Short-circuit the case where SU is PredSU's only data successor. 1352 if (PredSU->NumSuccs == 1) 1353 continue; 1354 // Avoid prescheduling to copies from virtual registers, which don't behave 1355 // like other nodes from the perspective of scheduling // heuristics. 1356 if (SDNode *N = SU->getNode()) 1357 if (N->getOpcode() == ISD::CopyFromReg && 1358 TargetRegisterInfo::isVirtualRegister 1359 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1360 continue; 1361 1362 // Perform checks on the successors of PredSU. 1363 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1364 EE = PredSU->Succs.end(); II != EE; ++II) { 1365 SUnit *PredSuccSU = II->getSUnit(); 1366 if (PredSuccSU == SU) continue; 1367 // If PredSU has another successor with no data successors, for 1368 // now don't attempt to choose either over the other. 1369 if (PredSuccSU->NumSuccs == 0) 1370 goto outer_loop_continue; 1371 // Don't break physical register dependencies. 1372 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1373 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1374 goto outer_loop_continue; 1375 // Don't introduce graph cycles. 1376 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1377 goto outer_loop_continue; 1378 } 1379 1380 // Ok, the transformation is safe and the heuristics suggest it is 1381 // profitable. Update the graph. 1382 DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum 1383 << " next to PredSU # " << PredSU->NodeNum 1384 << " to guide scheduling in the presence of multiple uses\n"); 1385 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1386 SDep Edge = PredSU->Succs[i]; 1387 assert(!Edge.isAssignedRegDep()); 1388 SUnit *SuccSU = Edge.getSUnit(); 1389 if (SuccSU != SU) { 1390 Edge.setSUnit(PredSU); 1391 scheduleDAG->RemovePred(SuccSU, Edge); 1392 scheduleDAG->AddPred(SU, Edge); 1393 Edge.setSUnit(SU); 1394 scheduleDAG->AddPred(SuccSU, Edge); 1395 --i; 1396 } 1397 } 1398 outer_loop_continue:; 1399 } 1400} 1401 1402/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1403/// it as a def&use operand. Add a pseudo control edge from it to the other 1404/// node (if it won't create a cycle) so the two-address one will be scheduled 1405/// first (lower in the schedule). If both nodes are two-address, favor the 1406/// one that has a CopyToReg use (more likely to be a loop induction update). 1407/// If both are two-address, but one is commutable while the other is not 1408/// commutable, favor the one that's not commutable. 1409template<class SF> 1410void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1411 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1412 SUnit *SU = &(*SUnits)[i]; 1413 if (!SU->isTwoAddress) 1414 continue; 1415 1416 SDNode *Node = SU->getNode(); 1417 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1418 continue; 1419 1420 unsigned Opc = Node->getMachineOpcode(); 1421 const TargetInstrDesc &TID = TII->get(Opc); 1422 unsigned NumRes = TID.getNumDefs(); 1423 unsigned NumOps = TID.getNumOperands() - NumRes; 1424 for (unsigned j = 0; j != NumOps; ++j) { 1425 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1426 continue; 1427 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1428 if (DU->getNodeId() == -1) 1429 continue; 1430 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1431 if (!DUSU) continue; 1432 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1433 E = DUSU->Succs.end(); I != E; ++I) { 1434 if (I->isCtrl()) continue; 1435 SUnit *SuccSU = I->getSUnit(); 1436 if (SuccSU == SU) 1437 continue; 1438 // Be conservative. Ignore if nodes aren't at roughly the same 1439 // depth and height. 1440 if (SuccSU->getHeight() < SU->getHeight() && 1441 (SU->getHeight() - SuccSU->getHeight()) > 1) 1442 continue; 1443 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1444 // constrains whatever is using the copy, instead of the copy 1445 // itself. In the case that the copy is coalesced, this 1446 // preserves the intent of the pseudo two-address heurietics. 1447 while (SuccSU->Succs.size() == 1 && 1448 SuccSU->getNode()->isMachineOpcode() && 1449 SuccSU->getNode()->getMachineOpcode() == 1450 TargetOpcode::COPY_TO_REGCLASS) 1451 SuccSU = SuccSU->Succs.front().getSUnit(); 1452 // Don't constrain non-instruction nodes. 1453 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1454 continue; 1455 // Don't constrain nodes with physical register defs if the 1456 // predecessor can clobber them. 1457 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1458 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1459 continue; 1460 } 1461 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1462 // these may be coalesced away. We want them close to their uses. 1463 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1464 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1465 SuccOpc == TargetOpcode::INSERT_SUBREG || 1466 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1467 continue; 1468 if ((!canClobber(SuccSU, DUSU) || 1469 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1470 (!SU->isCommutable && SuccSU->isCommutable)) && 1471 !scheduleDAG->IsReachable(SuccSU, SU)) { 1472 DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " 1473 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1474 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1475 /*Reg=*/0, /*isNormalMemory=*/false, 1476 /*isMustAlias=*/false, 1477 /*isArtificial=*/true)); 1478 } 1479 } 1480 } 1481 } 1482} 1483 1484/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1485/// scheduling units. 1486template<class SF> 1487void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1488 SethiUllmanNumbers.assign(SUnits->size(), 0); 1489 1490 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1491 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1492} 1493 1494/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1495/// predecessors of the successors of the SUnit SU. Stop when the provided 1496/// limit is exceeded. 1497static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1498 unsigned Limit) { 1499 unsigned Sum = 0; 1500 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1501 I != E; ++I) { 1502 const SUnit *SuccSU = I->getSUnit(); 1503 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1504 EE = SuccSU->Preds.end(); II != EE; ++II) { 1505 SUnit *PredSU = II->getSUnit(); 1506 if (!PredSU->isScheduled) 1507 if (++Sum > Limit) 1508 return Sum; 1509 } 1510 } 1511 return Sum; 1512} 1513 1514 1515// Top down 1516bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1517 unsigned LPriority = SPQ->getNodePriority(left); 1518 unsigned RPriority = SPQ->getNodePriority(right); 1519 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1520 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1521 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1522 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1523 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1524 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1525 1526 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1527 return false; 1528 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1529 return true; 1530 1531 if (LIsFloater) 1532 LBonus -= 2; 1533 if (RIsFloater) 1534 RBonus -= 2; 1535 if (left->NumSuccs == 1) 1536 LBonus += 2; 1537 if (right->NumSuccs == 1) 1538 RBonus += 2; 1539 1540 if (LPriority+LBonus != RPriority+RBonus) 1541 return LPriority+LBonus < RPriority+RBonus; 1542 1543 if (left->getDepth() != right->getDepth()) 1544 return left->getDepth() < right->getDepth(); 1545 1546 if (left->NumSuccsLeft != right->NumSuccsLeft) 1547 return left->NumSuccsLeft > right->NumSuccsLeft; 1548 1549 assert(left->NodeQueueId && right->NodeQueueId && 1550 "NodeQueueId cannot be zero"); 1551 return (left->NodeQueueId > right->NodeQueueId); 1552} 1553 1554//===----------------------------------------------------------------------===// 1555// Public Constructor Functions 1556//===----------------------------------------------------------------------===// 1557 1558llvm::ScheduleDAGSDNodes * 1559llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1560 const TargetMachine &TM = IS->TM; 1561 const TargetInstrInfo *TII = TM.getInstrInfo(); 1562 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1563 1564 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1565 1566 ScheduleDAGRRList *SD = 1567 new ScheduleDAGRRList(*IS->MF, true, PQ); 1568 PQ->setScheduleDAG(SD); 1569 return SD; 1570} 1571 1572llvm::ScheduleDAGSDNodes * 1573llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1574 const TargetMachine &TM = IS->TM; 1575 const TargetInstrInfo *TII = TM.getInstrInfo(); 1576 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1577 1578 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1579 1580 ScheduleDAGRRList *SD = 1581 new ScheduleDAGRRList(*IS->MF, false, PQ); 1582 PQ->setScheduleDAG(SD); 1583 return SD; 1584} 1585 1586llvm::ScheduleDAGSDNodes * 1587llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1588 const TargetMachine &TM = IS->TM; 1589 const TargetInstrInfo *TII = TM.getInstrInfo(); 1590 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1591 1592 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); 1593 1594 ScheduleDAGRRList *SD = 1595 new ScheduleDAGRRList(*IS->MF, true, PQ); 1596 PQ->setScheduleDAG(SD); 1597 return SD; 1598} 1599