ScheduleDAGRRList.cpp revision e264f62ca09a8f65c87a46d562a4d0f9ec5d457e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/ErrorHandling.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include "llvm/Support/raw_ostream.h" 33#include <climits> 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumPRCopies, "Number of physical register copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 "Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 "Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49static RegisterScheduler 50 sourceListDAGScheduler("source", 51 "Similar to list-burr but schedules in source " 52 "order when possible", 53 createSourceListDAGScheduler); 54 55namespace { 56//===----------------------------------------------------------------------===// 57/// ScheduleDAGRRList - The actual register reduction list scheduler 58/// implementation. This supports both top-down and bottom-up scheduling. 59/// 60class ScheduleDAGRRList : public ScheduleDAGSDNodes { 61private: 62 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 63 /// it is top-down. 64 bool isBottomUp; 65 66 /// AvailableQueue - The priority queue to use for the available SUnits. 67 SchedulingPriorityQueue *AvailableQueue; 68 69 /// LiveRegDefs - A set of physical registers and their definition 70 /// that are "live". These nodes must be scheduled before any other nodes that 71 /// modifies the registers can be scheduled. 72 unsigned NumLiveRegs; 73 std::vector<SUnit*> LiveRegDefs; 74 std::vector<unsigned> LiveRegCycles; 75 76 /// Topo - A topological ordering for SUnits which permits fast IsReachable 77 /// and similar queries. 78 ScheduleDAGTopologicalSort Topo; 79 80public: 81 ScheduleDAGRRList(MachineFunction &mf, 82 bool isbottomup, 83 SchedulingPriorityQueue *availqueue) 84 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 85 AvailableQueue(availqueue), Topo(SUnits) { 86 } 87 88 ~ScheduleDAGRRList() { 89 delete AvailableQueue; 90 } 91 92 void Schedule(); 93 94 /// IsReachable - Checks if SU is reachable from TargetSU. 95 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 96 return Topo.IsReachable(SU, TargetSU); 97 } 98 99 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 100 /// create a cycle. 101 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 102 return Topo.WillCreateCycle(SU, TargetSU); 103 } 104 105 /// AddPred - adds a predecessor edge to SUnit SU. 106 /// This returns true if this is a new predecessor. 107 /// Updates the topological ordering if required. 108 void AddPred(SUnit *SU, const SDep &D) { 109 Topo.AddPred(SU, D.getSUnit()); 110 SU->addPred(D); 111 } 112 113 /// RemovePred - removes a predecessor edge from SUnit SU. 114 /// This returns true if an edge was removed. 115 /// Updates the topological ordering if required. 116 void RemovePred(SUnit *SU, const SDep &D) { 117 Topo.RemovePred(SU, D.getSUnit()); 118 SU->removePred(D); 119 } 120 121private: 122 void ReleasePred(SUnit *SU, const SDep *PredEdge); 123 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 124 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 125 void ReleaseSuccessors(SUnit *SU); 126 void CapturePred(SDep *PredEdge); 127 void ScheduleNodeBottomUp(SUnit*, unsigned); 128 void ScheduleNodeTopDown(SUnit*, unsigned); 129 void UnscheduleNodeBottomUp(SUnit*); 130 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 131 SUnit *CopyAndMoveSuccessors(SUnit*); 132 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 133 const TargetRegisterClass*, 134 const TargetRegisterClass*, 135 SmallVector<SUnit*, 2>&); 136 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 137 void ListScheduleTopDown(); 138 void ListScheduleBottomUp(); 139 140 141 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 142 /// Updates the topological ordering if required. 143 SUnit *CreateNewSUnit(SDNode *N) { 144 unsigned NumSUnits = SUnits.size(); 145 SUnit *NewNode = NewSUnit(N); 146 // Update the topological ordering. 147 if (NewNode->NodeNum >= NumSUnits) 148 Topo.InitDAGTopologicalSorting(); 149 return NewNode; 150 } 151 152 /// CreateClone - Creates a new SUnit from an existing one. 153 /// Updates the topological ordering if required. 154 SUnit *CreateClone(SUnit *N) { 155 unsigned NumSUnits = SUnits.size(); 156 SUnit *NewNode = Clone(N); 157 // Update the topological ordering. 158 if (NewNode->NodeNum >= NumSUnits) 159 Topo.InitDAGTopologicalSorting(); 160 return NewNode; 161 } 162 163 /// ForceUnitLatencies - Return true, since register-pressure-reducing 164 /// scheduling doesn't need actual latency information. 165 bool ForceUnitLatencies() const { return true; } 166}; 167} // end anonymous namespace 168 169 170/// Schedule - Schedule the DAG using list scheduling. 171void ScheduleDAGRRList::Schedule() { 172 DEBUG(dbgs() << "********** List Scheduling **********\n"); 173 174 NumLiveRegs = 0; 175 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 176 LiveRegCycles.resize(TRI->getNumRegs(), 0); 177 178 // Build the scheduling graph. 179 BuildSchedGraph(NULL); 180 181 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 182 SUnits[su].dumpAll(this)); 183 Topo.InitDAGTopologicalSorting(); 184 185 AvailableQueue->initNodes(SUnits); 186 187 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 188 if (isBottomUp) 189 ListScheduleBottomUp(); 190 else 191 ListScheduleTopDown(); 192 193 AvailableQueue->releaseState(); 194} 195 196//===----------------------------------------------------------------------===// 197// Bottom-Up Scheduling 198//===----------------------------------------------------------------------===// 199 200/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 201/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 202void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 203 SUnit *PredSU = PredEdge->getSUnit(); 204 205#ifndef NDEBUG 206 if (PredSU->NumSuccsLeft == 0) { 207 dbgs() << "*** Scheduling failed! ***\n"; 208 PredSU->dump(this); 209 dbgs() << " has been released too many times!\n"; 210 llvm_unreachable(0); 211 } 212#endif 213 --PredSU->NumSuccsLeft; 214 215 // If all the node's successors are scheduled, this node is ready 216 // to be scheduled. Ignore the special EntrySU node. 217 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 218 PredSU->isAvailable = true; 219 AvailableQueue->push(PredSU); 220 } 221} 222 223void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 224 // Bottom up: release predecessors 225 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 226 I != E; ++I) { 227 ReleasePred(SU, &*I); 228 if (I->isAssignedRegDep()) { 229 // This is a physical register dependency and it's impossible or 230 // expensive to copy the register. Make sure nothing that can 231 // clobber the register is scheduled between the predecessor and 232 // this node. 233 if (!LiveRegDefs[I->getReg()]) { 234 ++NumLiveRegs; 235 LiveRegDefs[I->getReg()] = I->getSUnit(); 236 LiveRegCycles[I->getReg()] = CurCycle; 237 } 238 } 239 } 240} 241 242/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 243/// count of its predecessors. If a predecessor pending count is zero, add it to 244/// the Available queue. 245void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 246 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 247 DEBUG(SU->dump(this)); 248 249 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 250 SU->setHeightToAtLeast(CurCycle); 251 Sequence.push_back(SU); 252 253 ReleasePredecessors(SU, CurCycle); 254 255 // Release all the implicit physical register defs that are live. 256 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 257 I != E; ++I) { 258 if (I->isAssignedRegDep()) { 259 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 260 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 261 assert(LiveRegDefs[I->getReg()] == SU && 262 "Physical register dependency violated?"); 263 --NumLiveRegs; 264 LiveRegDefs[I->getReg()] = NULL; 265 LiveRegCycles[I->getReg()] = 0; 266 } 267 } 268 } 269 270 SU->isScheduled = true; 271 AvailableQueue->ScheduledNode(SU); 272} 273 274/// CapturePred - This does the opposite of ReleasePred. Since SU is being 275/// unscheduled, incrcease the succ left count of its predecessors. Remove 276/// them from AvailableQueue if necessary. 277void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 278 SUnit *PredSU = PredEdge->getSUnit(); 279 if (PredSU->isAvailable) { 280 PredSU->isAvailable = false; 281 if (!PredSU->isPending) 282 AvailableQueue->remove(PredSU); 283 } 284 285 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 286 ++PredSU->NumSuccsLeft; 287} 288 289/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 290/// its predecessor states to reflect the change. 291void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 292 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 293 DEBUG(SU->dump(this)); 294 295 AvailableQueue->UnscheduledNode(SU); 296 297 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 298 I != E; ++I) { 299 CapturePred(&*I); 300 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 301 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 302 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 303 "Physical register dependency violated?"); 304 --NumLiveRegs; 305 LiveRegDefs[I->getReg()] = NULL; 306 LiveRegCycles[I->getReg()] = 0; 307 } 308 } 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 if (I->isAssignedRegDep()) { 313 if (!LiveRegDefs[I->getReg()]) { 314 LiveRegDefs[I->getReg()] = SU; 315 ++NumLiveRegs; 316 } 317 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 318 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 319 } 320 } 321 322 SU->setHeightDirty(); 323 SU->isScheduled = false; 324 SU->isAvailable = true; 325 AvailableQueue->push(SU); 326} 327 328/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 329/// BTCycle in order to schedule a specific node. 330void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 331 unsigned &CurCycle) { 332 SUnit *OldSU = NULL; 333 while (CurCycle > BtCycle) { 334 OldSU = Sequence.back(); 335 Sequence.pop_back(); 336 if (SU->isSucc(OldSU)) 337 // Don't try to remove SU from AvailableQueue. 338 SU->isAvailable = false; 339 UnscheduleNodeBottomUp(OldSU); 340 --CurCycle; 341 } 342 343 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 344 345 ++NumBacktracks; 346} 347 348static bool isOperandOf(const SUnit *SU, SDNode *N) { 349 for (const SDNode *SUNode = SU->getNode(); SUNode; 350 SUNode = SUNode->getFlaggedNode()) { 351 if (SUNode->isOperandOf(N)) 352 return true; 353 } 354 return false; 355} 356 357/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 358/// successors to the newly created node. 359SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 360 if (SU->getNode()->getFlaggedNode()) 361 return NULL; 362 363 SDNode *N = SU->getNode(); 364 if (!N) 365 return NULL; 366 367 SUnit *NewSU; 368 bool TryUnfold = false; 369 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 370 EVT VT = N->getValueType(i); 371 if (VT == MVT::Flag) 372 return NULL; 373 else if (VT == MVT::Other) 374 TryUnfold = true; 375 } 376 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 377 const SDValue &Op = N->getOperand(i); 378 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 379 if (VT == MVT::Flag) 380 return NULL; 381 } 382 383 if (TryUnfold) { 384 SmallVector<SDNode*, 2> NewNodes; 385 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 386 return NULL; 387 388 DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); 389 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 390 391 N = NewNodes[1]; 392 SDNode *LoadNode = NewNodes[0]; 393 unsigned NumVals = N->getNumValues(); 394 unsigned OldNumVals = SU->getNode()->getNumValues(); 395 for (unsigned i = 0; i != NumVals; ++i) 396 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 397 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 398 SDValue(LoadNode, 1)); 399 400 // LoadNode may already exist. This can happen when there is another 401 // load from the same location and producing the same type of value 402 // but it has different alignment or volatileness. 403 bool isNewLoad = true; 404 SUnit *LoadSU; 405 if (LoadNode->getNodeId() != -1) { 406 LoadSU = &SUnits[LoadNode->getNodeId()]; 407 isNewLoad = false; 408 } else { 409 LoadSU = CreateNewSUnit(LoadNode); 410 LoadNode->setNodeId(LoadSU->NodeNum); 411 ComputeLatency(LoadSU); 412 } 413 414 SUnit *NewSU = CreateNewSUnit(N); 415 assert(N->getNodeId() == -1 && "Node already inserted!"); 416 N->setNodeId(NewSU->NodeNum); 417 418 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 419 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 420 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 421 NewSU->isTwoAddress = true; 422 break; 423 } 424 } 425 if (TID.isCommutable()) 426 NewSU->isCommutable = true; 427 ComputeLatency(NewSU); 428 429 // Record all the edges to and from the old SU, by category. 430 SmallVector<SDep, 4> ChainPreds; 431 SmallVector<SDep, 4> ChainSuccs; 432 SmallVector<SDep, 4> LoadPreds; 433 SmallVector<SDep, 4> NodePreds; 434 SmallVector<SDep, 4> NodeSuccs; 435 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 436 I != E; ++I) { 437 if (I->isCtrl()) 438 ChainPreds.push_back(*I); 439 else if (isOperandOf(I->getSUnit(), LoadNode)) 440 LoadPreds.push_back(*I); 441 else 442 NodePreds.push_back(*I); 443 } 444 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 445 I != E; ++I) { 446 if (I->isCtrl()) 447 ChainSuccs.push_back(*I); 448 else 449 NodeSuccs.push_back(*I); 450 } 451 452 // Now assign edges to the newly-created nodes. 453 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 454 const SDep &Pred = ChainPreds[i]; 455 RemovePred(SU, Pred); 456 if (isNewLoad) 457 AddPred(LoadSU, Pred); 458 } 459 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 460 const SDep &Pred = LoadPreds[i]; 461 RemovePred(SU, Pred); 462 if (isNewLoad) 463 AddPred(LoadSU, Pred); 464 } 465 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 466 const SDep &Pred = NodePreds[i]; 467 RemovePred(SU, Pred); 468 AddPred(NewSU, Pred); 469 } 470 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 471 SDep D = NodeSuccs[i]; 472 SUnit *SuccDep = D.getSUnit(); 473 D.setSUnit(SU); 474 RemovePred(SuccDep, D); 475 D.setSUnit(NewSU); 476 AddPred(SuccDep, D); 477 } 478 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 479 SDep D = ChainSuccs[i]; 480 SUnit *SuccDep = D.getSUnit(); 481 D.setSUnit(SU); 482 RemovePred(SuccDep, D); 483 if (isNewLoad) { 484 D.setSUnit(LoadSU); 485 AddPred(SuccDep, D); 486 } 487 } 488 489 // Add a data dependency to reflect that NewSU reads the value defined 490 // by LoadSU. 491 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 492 493 if (isNewLoad) 494 AvailableQueue->addNode(LoadSU); 495 AvailableQueue->addNode(NewSU); 496 497 ++NumUnfolds; 498 499 if (NewSU->NumSuccsLeft == 0) { 500 NewSU->isAvailable = true; 501 return NewSU; 502 } 503 SU = NewSU; 504 } 505 506 DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); 507 NewSU = CreateClone(SU); 508 509 // New SUnit has the exact same predecessors. 510 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 511 I != E; ++I) 512 if (!I->isArtificial()) 513 AddPred(NewSU, *I); 514 515 // Only copy scheduled successors. Cut them from old node's successor 516 // list and move them over. 517 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 518 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 519 I != E; ++I) { 520 if (I->isArtificial()) 521 continue; 522 SUnit *SuccSU = I->getSUnit(); 523 if (SuccSU->isScheduled) { 524 SDep D = *I; 525 D.setSUnit(NewSU); 526 AddPred(SuccSU, D); 527 D.setSUnit(SU); 528 DelDeps.push_back(std::make_pair(SuccSU, D)); 529 } 530 } 531 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 532 RemovePred(DelDeps[i].first, DelDeps[i].second); 533 534 AvailableQueue->updateNode(SU); 535 AvailableQueue->addNode(NewSU); 536 537 ++NumDups; 538 return NewSU; 539} 540 541/// InsertCopiesAndMoveSuccs - Insert register copies and move all 542/// scheduled successors of the given SUnit to the last copy. 543void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 544 const TargetRegisterClass *DestRC, 545 const TargetRegisterClass *SrcRC, 546 SmallVector<SUnit*, 2> &Copies) { 547 SUnit *CopyFromSU = CreateNewSUnit(NULL); 548 CopyFromSU->CopySrcRC = SrcRC; 549 CopyFromSU->CopyDstRC = DestRC; 550 551 SUnit *CopyToSU = CreateNewSUnit(NULL); 552 CopyToSU->CopySrcRC = DestRC; 553 CopyToSU->CopyDstRC = SrcRC; 554 555 // Only copy scheduled successors. Cut them from old node's successor 556 // list and move them over. 557 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 558 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 559 I != E; ++I) { 560 if (I->isArtificial()) 561 continue; 562 SUnit *SuccSU = I->getSUnit(); 563 if (SuccSU->isScheduled) { 564 SDep D = *I; 565 D.setSUnit(CopyToSU); 566 AddPred(SuccSU, D); 567 DelDeps.push_back(std::make_pair(SuccSU, *I)); 568 } 569 } 570 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 571 RemovePred(DelDeps[i].first, DelDeps[i].second); 572 573 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 574 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 575 576 AvailableQueue->updateNode(SU); 577 AvailableQueue->addNode(CopyFromSU); 578 AvailableQueue->addNode(CopyToSU); 579 Copies.push_back(CopyFromSU); 580 Copies.push_back(CopyToSU); 581 582 ++NumPRCopies; 583} 584 585/// getPhysicalRegisterVT - Returns the ValueType of the physical register 586/// definition of the specified node. 587/// FIXME: Move to SelectionDAG? 588static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 589 const TargetInstrInfo *TII) { 590 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 591 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 592 unsigned NumRes = TID.getNumDefs(); 593 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 594 if (Reg == *ImpDef) 595 break; 596 ++NumRes; 597 } 598 return N->getValueType(NumRes); 599} 600 601/// CheckForLiveRegDef - Return true and update live register vector if the 602/// specified register def of the specified SUnit clobbers any "live" registers. 603static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 604 std::vector<SUnit*> &LiveRegDefs, 605 SmallSet<unsigned, 4> &RegAdded, 606 SmallVector<unsigned, 4> &LRegs, 607 const TargetRegisterInfo *TRI) { 608 bool Added = false; 609 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 610 if (RegAdded.insert(Reg)) { 611 LRegs.push_back(Reg); 612 Added = true; 613 } 614 } 615 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 616 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 617 if (RegAdded.insert(*Alias)) { 618 LRegs.push_back(*Alias); 619 Added = true; 620 } 621 } 622 return Added; 623} 624 625/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 626/// scheduling of the given node to satisfy live physical register dependencies. 627/// If the specific node is the last one that's available to schedule, do 628/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 629bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 630 SmallVector<unsigned, 4> &LRegs){ 631 if (NumLiveRegs == 0) 632 return false; 633 634 SmallSet<unsigned, 4> RegAdded; 635 // If this node would clobber any "live" register, then it's not ready. 636 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 637 I != E; ++I) { 638 if (I->isAssignedRegDep()) 639 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 640 RegAdded, LRegs, TRI); 641 } 642 643 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 644 if (Node->getOpcode() == ISD::INLINEASM) { 645 // Inline asm can clobber physical defs. 646 unsigned NumOps = Node->getNumOperands(); 647 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 648 --NumOps; // Ignore the flag operand. 649 650 for (unsigned i = 2; i != NumOps;) { 651 unsigned Flags = 652 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 653 unsigned NumVals = (Flags & 0xffff) >> 3; 654 655 ++i; // Skip the ID value. 656 if ((Flags & 7) == 2 || (Flags & 7) == 6) { 657 // Check for def of register or earlyclobber register. 658 for (; NumVals; --NumVals, ++i) { 659 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 660 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 661 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 662 } 663 } else 664 i += NumVals; 665 } 666 continue; 667 } 668 669 if (!Node->isMachineOpcode()) 670 continue; 671 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 672 if (!TID.ImplicitDefs) 673 continue; 674 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 675 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 676 } 677 return !LRegs.empty(); 678} 679 680 681/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 682/// schedulers. 683void ScheduleDAGRRList::ListScheduleBottomUp() { 684 unsigned CurCycle = 0; 685 686 // Release any predecessors of the special Exit node. 687 ReleasePredecessors(&ExitSU, CurCycle); 688 689 // Add root to Available queue. 690 if (!SUnits.empty()) { 691 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 692 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 693 RootSU->isAvailable = true; 694 AvailableQueue->push(RootSU); 695 } 696 697 // While Available queue is not empty, grab the node with the highest 698 // priority. If it is not ready put it back. Schedule the node. 699 SmallVector<SUnit*, 4> NotReady; 700 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 701 Sequence.reserve(SUnits.size()); 702 while (!AvailableQueue->empty()) { 703 bool Delayed = false; 704 LRegsMap.clear(); 705 SUnit *CurSU = AvailableQueue->pop(); 706 while (CurSU) { 707 SmallVector<unsigned, 4> LRegs; 708 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 709 break; 710 Delayed = true; 711 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 712 713 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 714 NotReady.push_back(CurSU); 715 CurSU = AvailableQueue->pop(); 716 } 717 718 // All candidates are delayed due to live physical reg dependencies. 719 // Try backtracking, code duplication, or inserting cross class copies 720 // to resolve it. 721 if (Delayed && !CurSU) { 722 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 723 SUnit *TrySU = NotReady[i]; 724 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 725 726 // Try unscheduling up to the point where it's safe to schedule 727 // this node. 728 unsigned LiveCycle = CurCycle; 729 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 730 unsigned Reg = LRegs[j]; 731 unsigned LCycle = LiveRegCycles[Reg]; 732 LiveCycle = std::min(LiveCycle, LCycle); 733 } 734 SUnit *OldSU = Sequence[LiveCycle]; 735 if (!WillCreateCycle(TrySU, OldSU)) { 736 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 737 // Force the current node to be scheduled before the node that 738 // requires the physical reg dep. 739 if (OldSU->isAvailable) { 740 OldSU->isAvailable = false; 741 AvailableQueue->remove(OldSU); 742 } 743 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 744 /*Reg=*/0, /*isNormalMemory=*/false, 745 /*isMustAlias=*/false, /*isArtificial=*/true)); 746 // If one or more successors has been unscheduled, then the current 747 // node is no longer avaialable. Schedule a successor that's now 748 // available instead. 749 if (!TrySU->isAvailable) 750 CurSU = AvailableQueue->pop(); 751 else { 752 CurSU = TrySU; 753 TrySU->isPending = false; 754 NotReady.erase(NotReady.begin()+i); 755 } 756 break; 757 } 758 } 759 760 if (!CurSU) { 761 // Can't backtrack. If it's too expensive to copy the value, then try 762 // duplicate the nodes that produces these "too expensive to copy" 763 // values to break the dependency. In case even that doesn't work, 764 // insert cross class copies. 765 // If it's not too expensive, i.e. cost != -1, issue copies. 766 SUnit *TrySU = NotReady[0]; 767 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 768 assert(LRegs.size() == 1 && "Can't handle this yet!"); 769 unsigned Reg = LRegs[0]; 770 SUnit *LRDef = LiveRegDefs[Reg]; 771 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 772 const TargetRegisterClass *RC = 773 TRI->getPhysicalRegisterRegClass(Reg, VT); 774 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 775 776 // If cross copy register class is null, then it must be possible copy 777 // the value directly. Do not try duplicate the def. 778 SUnit *NewDef = 0; 779 if (DestRC) 780 NewDef = CopyAndMoveSuccessors(LRDef); 781 else 782 DestRC = RC; 783 if (!NewDef) { 784 // Issue copies, these can be expensive cross register class copies. 785 SmallVector<SUnit*, 2> Copies; 786 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 787 DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum 788 << " to SU #" << Copies.front()->NodeNum << "\n"); 789 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 790 /*Reg=*/0, /*isNormalMemory=*/false, 791 /*isMustAlias=*/false, 792 /*isArtificial=*/true)); 793 NewDef = Copies.back(); 794 } 795 796 DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum 797 << " to SU #" << TrySU->NodeNum << "\n"); 798 LiveRegDefs[Reg] = NewDef; 799 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 800 /*Reg=*/0, /*isNormalMemory=*/false, 801 /*isMustAlias=*/false, 802 /*isArtificial=*/true)); 803 TrySU->isAvailable = false; 804 CurSU = NewDef; 805 } 806 807 assert(CurSU && "Unable to resolve live physical register dependencies!"); 808 } 809 810 // Add the nodes that aren't ready back onto the available list. 811 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 812 NotReady[i]->isPending = false; 813 // May no longer be available due to backtracking. 814 if (NotReady[i]->isAvailable) 815 AvailableQueue->push(NotReady[i]); 816 } 817 NotReady.clear(); 818 819 if (CurSU) 820 ScheduleNodeBottomUp(CurSU, CurCycle); 821 ++CurCycle; 822 } 823 824 // Reverse the order if it is bottom up. 825 std::reverse(Sequence.begin(), Sequence.end()); 826 827#ifndef NDEBUG 828 VerifySchedule(isBottomUp); 829#endif 830} 831 832//===----------------------------------------------------------------------===// 833// Top-Down Scheduling 834//===----------------------------------------------------------------------===// 835 836/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 837/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 838void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 839 SUnit *SuccSU = SuccEdge->getSUnit(); 840 841#ifndef NDEBUG 842 if (SuccSU->NumPredsLeft == 0) { 843 dbgs() << "*** Scheduling failed! ***\n"; 844 SuccSU->dump(this); 845 dbgs() << " has been released too many times!\n"; 846 llvm_unreachable(0); 847 } 848#endif 849 --SuccSU->NumPredsLeft; 850 851 // If all the node's predecessors are scheduled, this node is ready 852 // to be scheduled. Ignore the special ExitSU node. 853 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 854 SuccSU->isAvailable = true; 855 AvailableQueue->push(SuccSU); 856 } 857} 858 859void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 860 // Top down: release successors 861 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 862 I != E; ++I) { 863 assert(!I->isAssignedRegDep() && 864 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 865 866 ReleaseSucc(SU, &*I); 867 } 868} 869 870/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 871/// count of its successors. If a successor pending count is zero, add it to 872/// the Available queue. 873void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 874 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 875 DEBUG(SU->dump(this)); 876 877 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 878 SU->setDepthToAtLeast(CurCycle); 879 Sequence.push_back(SU); 880 881 ReleaseSuccessors(SU); 882 SU->isScheduled = true; 883 AvailableQueue->ScheduledNode(SU); 884} 885 886/// ListScheduleTopDown - The main loop of list scheduling for top-down 887/// schedulers. 888void ScheduleDAGRRList::ListScheduleTopDown() { 889 unsigned CurCycle = 0; 890 891 // Release any successors of the special Entry node. 892 ReleaseSuccessors(&EntrySU); 893 894 // All leaves to Available queue. 895 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 896 // It is available if it has no predecessors. 897 if (SUnits[i].Preds.empty()) { 898 AvailableQueue->push(&SUnits[i]); 899 SUnits[i].isAvailable = true; 900 } 901 } 902 903 // While Available queue is not empty, grab the node with the highest 904 // priority. If it is not ready put it back. Schedule the node. 905 Sequence.reserve(SUnits.size()); 906 while (!AvailableQueue->empty()) { 907 SUnit *CurSU = AvailableQueue->pop(); 908 909 if (CurSU) 910 ScheduleNodeTopDown(CurSU, CurCycle); 911 ++CurCycle; 912 } 913 914#ifndef NDEBUG 915 VerifySchedule(isBottomUp); 916#endif 917} 918 919 920//===----------------------------------------------------------------------===// 921// RegReductionPriorityQueue Implementation 922//===----------------------------------------------------------------------===// 923// 924// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 925// to reduce register pressure. 926// 927namespace { 928 template<class SF> 929 class RegReductionPriorityQueue; 930 931 /// Sorting functions for the Available queue. 932 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 933 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 934 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 935 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 936 937 bool operator()(const SUnit* left, const SUnit* right) const; 938 }; 939 940 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 941 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 942 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 943 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 944 945 bool operator()(const SUnit* left, const SUnit* right) const; 946 }; 947 948 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 949 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 950 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 951 : SPQ(spq) {} 952 src_ls_rr_sort(const src_ls_rr_sort &RHS) 953 : SPQ(RHS.SPQ) {} 954 955 bool operator()(const SUnit* left, const SUnit* right) const; 956 }; 957} // end anonymous namespace 958 959/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 960/// Smaller number is the higher priority. 961static unsigned 962CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 963 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 964 if (SethiUllmanNumber != 0) 965 return SethiUllmanNumber; 966 967 unsigned Extra = 0; 968 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 969 I != E; ++I) { 970 if (I->isCtrl()) continue; // ignore chain preds 971 SUnit *PredSU = I->getSUnit(); 972 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 973 if (PredSethiUllman > SethiUllmanNumber) { 974 SethiUllmanNumber = PredSethiUllman; 975 Extra = 0; 976 } else if (PredSethiUllman == SethiUllmanNumber) 977 ++Extra; 978 } 979 980 SethiUllmanNumber += Extra; 981 982 if (SethiUllmanNumber == 0) 983 SethiUllmanNumber = 1; 984 985 return SethiUllmanNumber; 986} 987 988namespace { 989 template<class SF> 990 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 991 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 992 unsigned currentQueueId; 993 994 protected: 995 // SUnits - The SUnits for the current graph. 996 std::vector<SUnit> *SUnits; 997 998 const TargetInstrInfo *TII; 999 const TargetRegisterInfo *TRI; 1000 ScheduleDAGRRList *scheduleDAG; 1001 1002 // SethiUllmanNumbers - The SethiUllman number for each node. 1003 std::vector<unsigned> SethiUllmanNumbers; 1004 1005 public: 1006 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1007 const TargetRegisterInfo *tri) 1008 : Queue(SF(this)), currentQueueId(0), 1009 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1010 1011 void initNodes(std::vector<SUnit> &sunits) { 1012 SUnits = &sunits; 1013 // Add pseudo dependency edges for two-address nodes. 1014 AddPseudoTwoAddrDeps(); 1015 // Reroute edges to nodes with multiple uses. 1016 PrescheduleNodesWithMultipleUses(); 1017 // Calculate node priorities. 1018 CalculateSethiUllmanNumbers(); 1019 } 1020 1021 void addNode(const SUnit *SU) { 1022 unsigned SUSize = SethiUllmanNumbers.size(); 1023 if (SUnits->size() > SUSize) 1024 SethiUllmanNumbers.resize(SUSize*2, 0); 1025 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1026 } 1027 1028 void updateNode(const SUnit *SU) { 1029 SethiUllmanNumbers[SU->NodeNum] = 0; 1030 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1031 } 1032 1033 void releaseState() { 1034 SUnits = 0; 1035 SethiUllmanNumbers.clear(); 1036 } 1037 1038 unsigned getNodePriority(const SUnit *SU) const { 1039 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1040 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1041 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1042 // CopyToReg should be close to its uses to facilitate coalescing and 1043 // avoid spilling. 1044 return 0; 1045 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1046 Opc == TargetOpcode::SUBREG_TO_REG || 1047 Opc == TargetOpcode::INSERT_SUBREG) 1048 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1049 // close to their uses to facilitate coalescing. 1050 return 0; 1051 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1052 // If SU does not have a register use, i.e. it doesn't produce a value 1053 // that would be consumed (e.g. store), then it terminates a chain of 1054 // computation. Give it a large SethiUllman number so it will be 1055 // scheduled right before its predecessors that it doesn't lengthen 1056 // their live ranges. 1057 return 0xffff; 1058 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1059 // If SU does not have a register def, schedule it close to its uses 1060 // because it does not lengthen any live ranges. 1061 return 0; 1062 return SethiUllmanNumbers[SU->NodeNum]; 1063 } 1064 1065 unsigned getNodeOrdering(const SUnit *SU) const { 1066 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1067 } 1068 1069 unsigned size() const { return Queue.size(); } 1070 1071 bool empty() const { return Queue.empty(); } 1072 1073 void push(SUnit *U) { 1074 assert(!U->NodeQueueId && "Node in the queue already"); 1075 U->NodeQueueId = ++currentQueueId; 1076 Queue.push(U); 1077 } 1078 1079 void push_all(const std::vector<SUnit *> &Nodes) { 1080 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1081 push(Nodes[i]); 1082 } 1083 1084 SUnit *pop() { 1085 if (empty()) return NULL; 1086 SUnit *V = Queue.top(); 1087 Queue.pop(); 1088 V->NodeQueueId = 0; 1089 return V; 1090 } 1091 1092 void remove(SUnit *SU) { 1093 assert(!Queue.empty() && "Queue is empty!"); 1094 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1095 Queue.erase_one(SU); 1096 SU->NodeQueueId = 0; 1097 } 1098 1099 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1100 scheduleDAG = scheduleDag; 1101 } 1102 1103 protected: 1104 bool canClobber(const SUnit *SU, const SUnit *Op); 1105 void AddPseudoTwoAddrDeps(); 1106 void PrescheduleNodesWithMultipleUses(); 1107 void CalculateSethiUllmanNumbers(); 1108 }; 1109 1110 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1111 BURegReductionPriorityQueue; 1112 1113 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1114 TDRegReductionPriorityQueue; 1115 1116 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1117 SrcRegReductionPriorityQueue; 1118} 1119 1120/// closestSucc - Returns the scheduled cycle of the successor which is 1121/// closest to the current cycle. 1122static unsigned closestSucc(const SUnit *SU) { 1123 unsigned MaxHeight = 0; 1124 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1125 I != E; ++I) { 1126 if (I->isCtrl()) continue; // ignore chain succs 1127 unsigned Height = I->getSUnit()->getHeight(); 1128 // If there are bunch of CopyToRegs stacked up, they should be considered 1129 // to be at the same position. 1130 if (I->getSUnit()->getNode() && 1131 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1132 Height = closestSucc(I->getSUnit())+1; 1133 if (Height > MaxHeight) 1134 MaxHeight = Height; 1135 } 1136 return MaxHeight; 1137} 1138 1139/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1140/// for scratch registers, i.e. number of data dependencies. 1141static unsigned calcMaxScratches(const SUnit *SU) { 1142 unsigned Scratches = 0; 1143 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1144 I != E; ++I) { 1145 if (I->isCtrl()) continue; // ignore chain preds 1146 Scratches++; 1147 } 1148 return Scratches; 1149} 1150 1151template <typename RRSort> 1152static bool BURRSort(const SUnit *left, const SUnit *right, 1153 const RegReductionPriorityQueue<RRSort> *SPQ) { 1154 unsigned LPriority = SPQ->getNodePriority(left); 1155 unsigned RPriority = SPQ->getNodePriority(right); 1156 if (LPriority != RPriority) 1157 return LPriority > RPriority; 1158 1159 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1160 // e.g. 1161 // t1 = op t2, c1 1162 // t3 = op t4, c2 1163 // 1164 // and the following instructions are both ready. 1165 // t2 = op c3 1166 // t4 = op c4 1167 // 1168 // Then schedule t2 = op first. 1169 // i.e. 1170 // t4 = op c4 1171 // t2 = op c3 1172 // t1 = op t2, c1 1173 // t3 = op t4, c2 1174 // 1175 // This creates more short live intervals. 1176 unsigned LDist = closestSucc(left); 1177 unsigned RDist = closestSucc(right); 1178 if (LDist != RDist) 1179 return LDist < RDist; 1180 1181 // How many registers becomes live when the node is scheduled. 1182 unsigned LScratch = calcMaxScratches(left); 1183 unsigned RScratch = calcMaxScratches(right); 1184 if (LScratch != RScratch) 1185 return LScratch > RScratch; 1186 1187 if (left->getHeight() != right->getHeight()) 1188 return left->getHeight() > right->getHeight(); 1189 1190 if (left->getDepth() != right->getDepth()) 1191 return left->getDepth() < right->getDepth(); 1192 1193 assert(left->NodeQueueId && right->NodeQueueId && 1194 "NodeQueueId cannot be zero"); 1195 return (left->NodeQueueId > right->NodeQueueId); 1196} 1197 1198// Bottom up 1199bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1200 return BURRSort(left, right, SPQ); 1201} 1202 1203// Source order, otherwise bottom up. 1204bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1205 unsigned LOrder = SPQ->getNodeOrdering(left); 1206 unsigned ROrder = SPQ->getNodeOrdering(right); 1207 1208 // Prefer an ordering where the lower the non-zero order number, the higher 1209 // the preference. 1210 if ((LOrder || ROrder) && LOrder != ROrder) 1211 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1212 1213 return BURRSort(left, right, SPQ); 1214} 1215 1216template<class SF> 1217bool 1218RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1219 if (SU->isTwoAddress) { 1220 unsigned Opc = SU->getNode()->getMachineOpcode(); 1221 const TargetInstrDesc &TID = TII->get(Opc); 1222 unsigned NumRes = TID.getNumDefs(); 1223 unsigned NumOps = TID.getNumOperands() - NumRes; 1224 for (unsigned i = 0; i != NumOps; ++i) { 1225 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1226 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1227 if (DU->getNodeId() != -1 && 1228 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1229 return true; 1230 } 1231 } 1232 } 1233 return false; 1234} 1235 1236/// hasCopyToRegUse - Return true if SU has a value successor that is a 1237/// CopyToReg node. 1238static bool hasCopyToRegUse(const SUnit *SU) { 1239 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1240 I != E; ++I) { 1241 if (I->isCtrl()) continue; 1242 const SUnit *SuccSU = I->getSUnit(); 1243 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1244 return true; 1245 } 1246 return false; 1247} 1248 1249/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1250/// physical register defs. 1251static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1252 const TargetInstrInfo *TII, 1253 const TargetRegisterInfo *TRI) { 1254 SDNode *N = SuccSU->getNode(); 1255 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1256 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1257 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1258 for (const SDNode *SUNode = SU->getNode(); SUNode; 1259 SUNode = SUNode->getFlaggedNode()) { 1260 if (!SUNode->isMachineOpcode()) 1261 continue; 1262 const unsigned *SUImpDefs = 1263 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1264 if (!SUImpDefs) 1265 return false; 1266 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1267 EVT VT = N->getValueType(i); 1268 if (VT == MVT::Flag || VT == MVT::Other) 1269 continue; 1270 if (!N->hasAnyUseOfValue(i)) 1271 continue; 1272 unsigned Reg = ImpDefs[i - NumDefs]; 1273 for (;*SUImpDefs; ++SUImpDefs) { 1274 unsigned SUReg = *SUImpDefs; 1275 if (TRI->regsOverlap(Reg, SUReg)) 1276 return true; 1277 } 1278 } 1279 } 1280 return false; 1281} 1282 1283/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1284/// are not handled well by the general register pressure reduction 1285/// heuristics. When presented with code like this: 1286/// 1287/// N 1288/// / | 1289/// / | 1290/// U store 1291/// | 1292/// ... 1293/// 1294/// the heuristics tend to push the store up, but since the 1295/// operand of the store has another use (U), this would increase 1296/// the length of that other use (the U->N edge). 1297/// 1298/// This function transforms code like the above to route U's 1299/// dependence through the store when possible, like this: 1300/// 1301/// N 1302/// || 1303/// || 1304/// store 1305/// | 1306/// U 1307/// | 1308/// ... 1309/// 1310/// This results in the store being scheduled immediately 1311/// after N, which shortens the U->N live range, reducing 1312/// register pressure. 1313/// 1314template<class SF> 1315void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1316 // Visit all the nodes in topological order, working top-down. 1317 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1318 SUnit *SU = &(*SUnits)[i]; 1319 // For now, only look at nodes with no data successors, such as stores. 1320 // These are especially important, due to the heuristics in 1321 // getNodePriority for nodes with no data successors. 1322 if (SU->NumSuccs != 0) 1323 continue; 1324 // For now, only look at nodes with exactly one data predecessor. 1325 if (SU->NumPreds != 1) 1326 continue; 1327 // Avoid prescheduling copies to virtual registers, which don't behave 1328 // like other nodes from the perspective of scheduling heuristics. 1329 if (SDNode *N = SU->getNode()) 1330 if (N->getOpcode() == ISD::CopyToReg && 1331 TargetRegisterInfo::isVirtualRegister 1332 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1333 continue; 1334 1335 // Locate the single data predecessor. 1336 SUnit *PredSU = 0; 1337 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1338 EE = SU->Preds.end(); II != EE; ++II) 1339 if (!II->isCtrl()) { 1340 PredSU = II->getSUnit(); 1341 break; 1342 } 1343 assert(PredSU); 1344 1345 // Don't rewrite edges that carry physregs, because that requires additional 1346 // support infrastructure. 1347 if (PredSU->hasPhysRegDefs) 1348 continue; 1349 // Short-circuit the case where SU is PredSU's only data successor. 1350 if (PredSU->NumSuccs == 1) 1351 continue; 1352 // Avoid prescheduling to copies from virtual registers, which don't behave 1353 // like other nodes from the perspective of scheduling // heuristics. 1354 if (SDNode *N = SU->getNode()) 1355 if (N->getOpcode() == ISD::CopyFromReg && 1356 TargetRegisterInfo::isVirtualRegister 1357 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1358 continue; 1359 1360 // Perform checks on the successors of PredSU. 1361 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1362 EE = PredSU->Succs.end(); II != EE; ++II) { 1363 SUnit *PredSuccSU = II->getSUnit(); 1364 if (PredSuccSU == SU) continue; 1365 // If PredSU has another successor with no data successors, for 1366 // now don't attempt to choose either over the other. 1367 if (PredSuccSU->NumSuccs == 0) 1368 goto outer_loop_continue; 1369 // Don't break physical register dependencies. 1370 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1371 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1372 goto outer_loop_continue; 1373 // Don't introduce graph cycles. 1374 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1375 goto outer_loop_continue; 1376 } 1377 1378 // Ok, the transformation is safe and the heuristics suggest it is 1379 // profitable. Update the graph. 1380 DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum 1381 << " next to PredSU # " << PredSU->NodeNum 1382 << " to guide scheduling in the presence of multiple uses\n"); 1383 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1384 SDep Edge = PredSU->Succs[i]; 1385 assert(!Edge.isAssignedRegDep()); 1386 SUnit *SuccSU = Edge.getSUnit(); 1387 if (SuccSU != SU) { 1388 Edge.setSUnit(PredSU); 1389 scheduleDAG->RemovePred(SuccSU, Edge); 1390 scheduleDAG->AddPred(SU, Edge); 1391 Edge.setSUnit(SU); 1392 scheduleDAG->AddPred(SuccSU, Edge); 1393 --i; 1394 } 1395 } 1396 outer_loop_continue:; 1397 } 1398} 1399 1400/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1401/// it as a def&use operand. Add a pseudo control edge from it to the other 1402/// node (if it won't create a cycle) so the two-address one will be scheduled 1403/// first (lower in the schedule). If both nodes are two-address, favor the 1404/// one that has a CopyToReg use (more likely to be a loop induction update). 1405/// If both are two-address, but one is commutable while the other is not 1406/// commutable, favor the one that's not commutable. 1407template<class SF> 1408void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1409 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1410 SUnit *SU = &(*SUnits)[i]; 1411 if (!SU->isTwoAddress) 1412 continue; 1413 1414 SDNode *Node = SU->getNode(); 1415 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1416 continue; 1417 1418 unsigned Opc = Node->getMachineOpcode(); 1419 const TargetInstrDesc &TID = TII->get(Opc); 1420 unsigned NumRes = TID.getNumDefs(); 1421 unsigned NumOps = TID.getNumOperands() - NumRes; 1422 for (unsigned j = 0; j != NumOps; ++j) { 1423 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1424 continue; 1425 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1426 if (DU->getNodeId() == -1) 1427 continue; 1428 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1429 if (!DUSU) continue; 1430 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1431 E = DUSU->Succs.end(); I != E; ++I) { 1432 if (I->isCtrl()) continue; 1433 SUnit *SuccSU = I->getSUnit(); 1434 if (SuccSU == SU) 1435 continue; 1436 // Be conservative. Ignore if nodes aren't at roughly the same 1437 // depth and height. 1438 if (SuccSU->getHeight() < SU->getHeight() && 1439 (SU->getHeight() - SuccSU->getHeight()) > 1) 1440 continue; 1441 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1442 // constrains whatever is using the copy, instead of the copy 1443 // itself. In the case that the copy is coalesced, this 1444 // preserves the intent of the pseudo two-address heurietics. 1445 while (SuccSU->Succs.size() == 1 && 1446 SuccSU->getNode()->isMachineOpcode() && 1447 SuccSU->getNode()->getMachineOpcode() == 1448 TargetOpcode::COPY_TO_REGCLASS) 1449 SuccSU = SuccSU->Succs.front().getSUnit(); 1450 // Don't constrain non-instruction nodes. 1451 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1452 continue; 1453 // Don't constrain nodes with physical register defs if the 1454 // predecessor can clobber them. 1455 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1456 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1457 continue; 1458 } 1459 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1460 // these may be coalesced away. We want them close to their uses. 1461 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1462 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1463 SuccOpc == TargetOpcode::INSERT_SUBREG || 1464 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1465 continue; 1466 if ((!canClobber(SuccSU, DUSU) || 1467 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1468 (!SU->isCommutable && SuccSU->isCommutable)) && 1469 !scheduleDAG->IsReachable(SuccSU, SU)) { 1470 DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # " 1471 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1472 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1473 /*Reg=*/0, /*isNormalMemory=*/false, 1474 /*isMustAlias=*/false, 1475 /*isArtificial=*/true)); 1476 } 1477 } 1478 } 1479 } 1480} 1481 1482/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1483/// scheduling units. 1484template<class SF> 1485void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1486 SethiUllmanNumbers.assign(SUnits->size(), 0); 1487 1488 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1489 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1490} 1491 1492/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1493/// predecessors of the successors of the SUnit SU. Stop when the provided 1494/// limit is exceeded. 1495static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1496 unsigned Limit) { 1497 unsigned Sum = 0; 1498 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1499 I != E; ++I) { 1500 const SUnit *SuccSU = I->getSUnit(); 1501 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1502 EE = SuccSU->Preds.end(); II != EE; ++II) { 1503 SUnit *PredSU = II->getSUnit(); 1504 if (!PredSU->isScheduled) 1505 if (++Sum > Limit) 1506 return Sum; 1507 } 1508 } 1509 return Sum; 1510} 1511 1512 1513// Top down 1514bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1515 unsigned LPriority = SPQ->getNodePriority(left); 1516 unsigned RPriority = SPQ->getNodePriority(right); 1517 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1518 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1519 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1520 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1521 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1522 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1523 1524 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1525 return false; 1526 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1527 return true; 1528 1529 if (LIsFloater) 1530 LBonus -= 2; 1531 if (RIsFloater) 1532 RBonus -= 2; 1533 if (left->NumSuccs == 1) 1534 LBonus += 2; 1535 if (right->NumSuccs == 1) 1536 RBonus += 2; 1537 1538 if (LPriority+LBonus != RPriority+RBonus) 1539 return LPriority+LBonus < RPriority+RBonus; 1540 1541 if (left->getDepth() != right->getDepth()) 1542 return left->getDepth() < right->getDepth(); 1543 1544 if (left->NumSuccsLeft != right->NumSuccsLeft) 1545 return left->NumSuccsLeft > right->NumSuccsLeft; 1546 1547 assert(left->NodeQueueId && right->NodeQueueId && 1548 "NodeQueueId cannot be zero"); 1549 return (left->NodeQueueId > right->NodeQueueId); 1550} 1551 1552//===----------------------------------------------------------------------===// 1553// Public Constructor Functions 1554//===----------------------------------------------------------------------===// 1555 1556llvm::ScheduleDAGSDNodes * 1557llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1558 const TargetMachine &TM = IS->TM; 1559 const TargetInstrInfo *TII = TM.getInstrInfo(); 1560 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1561 1562 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1563 1564 ScheduleDAGRRList *SD = 1565 new ScheduleDAGRRList(*IS->MF, true, PQ); 1566 PQ->setScheduleDAG(SD); 1567 return SD; 1568} 1569 1570llvm::ScheduleDAGSDNodes * 1571llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1572 const TargetMachine &TM = IS->TM; 1573 const TargetInstrInfo *TII = TM.getInstrInfo(); 1574 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1575 1576 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1577 1578 ScheduleDAGRRList *SD = 1579 new ScheduleDAGRRList(*IS->MF, false, PQ); 1580 PQ->setScheduleDAG(SD); 1581 return SD; 1582} 1583 1584llvm::ScheduleDAGSDNodes * 1585llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1586 const TargetMachine &TM = IS->TM; 1587 const TargetInstrInfo *TII = TM.getInstrInfo(); 1588 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1589 1590 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); 1591 1592 ScheduleDAGRRList *SD = 1593 new ScheduleDAGRRList(*IS->MF, true, PQ); 1594 PQ->setScheduleDAG(SD); 1595 return SD; 1596} 1597