ScheduleDAGRRList.cpp revision 5373b294648bf0285621a82281f136578ba70082
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/ADT/PriorityQueue.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include <climits> 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumPRCopies, "Number of physical register copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50static RegisterScheduler 51 sourceListDAGScheduler("source", 52 "Similar to list-burr but schedules in source " 53 "order when possible", 54 createSourceListDAGScheduler); 55 56static RegisterScheduler 57 hybridListDAGScheduler("hybrid", 58 "Bottom-up rr list scheduling which avoid stalls for " 59 "long latency instructions", 60 createHybridListDAGScheduler); 61 62namespace { 63//===----------------------------------------------------------------------===// 64/// ScheduleDAGRRList - The actual register reduction list scheduler 65/// implementation. This supports both top-down and bottom-up scheduling. 66/// 67class ScheduleDAGRRList : public ScheduleDAGSDNodes { 68private: 69 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 70 /// it is top-down. 71 bool isBottomUp; 72 73 /// NeedLatency - True if the scheduler will make use of latency information. 74 /// 75 bool NeedLatency; 76 77 /// AvailableQueue - The priority queue to use for the available SUnits. 78 SchedulingPriorityQueue *AvailableQueue; 79 80 /// LiveRegDefs - A set of physical registers and their definition 81 /// that are "live". These nodes must be scheduled before any other nodes that 82 /// modifies the registers can be scheduled. 83 unsigned NumLiveRegs; 84 std::vector<SUnit*> LiveRegDefs; 85 std::vector<unsigned> LiveRegCycles; 86 87 /// Topo - A topological ordering for SUnits which permits fast IsReachable 88 /// and similar queries. 89 ScheduleDAGTopologicalSort Topo; 90 91public: 92 ScheduleDAGRRList(MachineFunction &mf, 93 bool isbottomup, bool needlatency, 94 SchedulingPriorityQueue *availqueue) 95 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 96 AvailableQueue(availqueue), Topo(SUnits) { 97 } 98 99 ~ScheduleDAGRRList() { 100 delete AvailableQueue; 101 } 102 103 void Schedule(); 104 105 /// IsReachable - Checks if SU is reachable from TargetSU. 106 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 107 return Topo.IsReachable(SU, TargetSU); 108 } 109 110 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 111 /// create a cycle. 112 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 113 return Topo.WillCreateCycle(SU, TargetSU); 114 } 115 116 /// AddPred - adds a predecessor edge to SUnit SU. 117 /// This returns true if this is a new predecessor. 118 /// Updates the topological ordering if required. 119 void AddPred(SUnit *SU, const SDep &D) { 120 Topo.AddPred(SU, D.getSUnit()); 121 SU->addPred(D); 122 } 123 124 /// RemovePred - removes a predecessor edge from SUnit SU. 125 /// This returns true if an edge was removed. 126 /// Updates the topological ordering if required. 127 void RemovePred(SUnit *SU, const SDep &D) { 128 Topo.RemovePred(SU, D.getSUnit()); 129 SU->removePred(D); 130 } 131 132private: 133 void ReleasePred(SUnit *SU, const SDep *PredEdge); 134 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 135 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 136 void ReleaseSuccessors(SUnit *SU); 137 void CapturePred(SDep *PredEdge); 138 void ScheduleNodeBottomUp(SUnit*, unsigned); 139 void ScheduleNodeTopDown(SUnit*, unsigned); 140 void UnscheduleNodeBottomUp(SUnit*); 141 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 142 SUnit *CopyAndMoveSuccessors(SUnit*); 143 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 144 const TargetRegisterClass*, 145 const TargetRegisterClass*, 146 SmallVector<SUnit*, 2>&); 147 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 148 void ListScheduleTopDown(); 149 void ListScheduleBottomUp(); 150 151 152 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 153 /// Updates the topological ordering if required. 154 SUnit *CreateNewSUnit(SDNode *N) { 155 unsigned NumSUnits = SUnits.size(); 156 SUnit *NewNode = NewSUnit(N); 157 // Update the topological ordering. 158 if (NewNode->NodeNum >= NumSUnits) 159 Topo.InitDAGTopologicalSorting(); 160 return NewNode; 161 } 162 163 /// CreateClone - Creates a new SUnit from an existing one. 164 /// Updates the topological ordering if required. 165 SUnit *CreateClone(SUnit *N) { 166 unsigned NumSUnits = SUnits.size(); 167 SUnit *NewNode = Clone(N); 168 // Update the topological ordering. 169 if (NewNode->NodeNum >= NumSUnits) 170 Topo.InitDAGTopologicalSorting(); 171 return NewNode; 172 } 173 174 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 175 /// need actual latency information but the hybrid scheduler does. 176 bool ForceUnitLatencies() const { 177 return !NeedLatency; 178 } 179}; 180} // end anonymous namespace 181 182 183/// Schedule - Schedule the DAG using list scheduling. 184void ScheduleDAGRRList::Schedule() { 185 DEBUG(dbgs() << "********** List Scheduling **********\n"); 186 187 NumLiveRegs = 0; 188 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 189 LiveRegCycles.resize(TRI->getNumRegs(), 0); 190 191 // Build the scheduling graph. 192 BuildSchedGraph(NULL); 193 194 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 195 SUnits[su].dumpAll(this)); 196 Topo.InitDAGTopologicalSorting(); 197 198 AvailableQueue->initNodes(SUnits); 199 200 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 201 if (isBottomUp) 202 ListScheduleBottomUp(); 203 else 204 ListScheduleTopDown(); 205 206 AvailableQueue->releaseState(); 207} 208 209//===----------------------------------------------------------------------===// 210// Bottom-Up Scheduling 211//===----------------------------------------------------------------------===// 212 213/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 214/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 215void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 216 SUnit *PredSU = PredEdge->getSUnit(); 217 218#ifndef NDEBUG 219 if (PredSU->NumSuccsLeft == 0) { 220 dbgs() << "*** Scheduling failed! ***\n"; 221 PredSU->dump(this); 222 dbgs() << " has been released too many times!\n"; 223 llvm_unreachable(0); 224 } 225#endif 226 --PredSU->NumSuccsLeft; 227 228 if (!ForceUnitLatencies()) { 229 // Updating predecessor's height. This is now the cycle when the 230 // predecessor can be scheduled without causing a pipeline stall. 231 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 232 } 233 234 // If all the node's successors are scheduled, this node is ready 235 // to be scheduled. Ignore the special EntrySU node. 236 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 237 PredSU->isAvailable = true; 238 AvailableQueue->push(PredSU); 239 } 240} 241 242void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 243 // Bottom up: release predecessors 244 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 245 I != E; ++I) { 246 ReleasePred(SU, &*I); 247 if (I->isAssignedRegDep()) { 248 // This is a physical register dependency and it's impossible or 249 // expensive to copy the register. Make sure nothing that can 250 // clobber the register is scheduled between the predecessor and 251 // this node. 252 if (!LiveRegDefs[I->getReg()]) { 253 ++NumLiveRegs; 254 LiveRegDefs[I->getReg()] = I->getSUnit(); 255 LiveRegCycles[I->getReg()] = CurCycle; 256 } 257 } 258 } 259} 260 261/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 262/// count of its predecessors. If a predecessor pending count is zero, add it to 263/// the Available queue. 264void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 265 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 266 DEBUG(SU->dump(this)); 267 268#ifndef NDEBUG 269 if (CurCycle < SU->getHeight()) 270 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 271#endif 272 273 // FIXME: Handle noop hazard. 274 SU->setHeightToAtLeast(CurCycle); 275 Sequence.push_back(SU); 276 277 ReleasePredecessors(SU, CurCycle); 278 279 // Release all the implicit physical register defs that are live. 280 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 281 I != E; ++I) { 282 if (I->isAssignedRegDep()) { 283 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 284 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 285 assert(LiveRegDefs[I->getReg()] == SU && 286 "Physical register dependency violated?"); 287 --NumLiveRegs; 288 LiveRegDefs[I->getReg()] = NULL; 289 LiveRegCycles[I->getReg()] = 0; 290 } 291 } 292 } 293 294 SU->isScheduled = true; 295 AvailableQueue->ScheduledNode(SU); 296} 297 298/// CapturePred - This does the opposite of ReleasePred. Since SU is being 299/// unscheduled, incrcease the succ left count of its predecessors. Remove 300/// them from AvailableQueue if necessary. 301void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 302 SUnit *PredSU = PredEdge->getSUnit(); 303 if (PredSU->isAvailable) { 304 PredSU->isAvailable = false; 305 if (!PredSU->isPending) 306 AvailableQueue->remove(PredSU); 307 } 308 309 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 310 ++PredSU->NumSuccsLeft; 311} 312 313/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 314/// its predecessor states to reflect the change. 315void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 316 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 317 DEBUG(SU->dump(this)); 318 319 AvailableQueue->UnscheduledNode(SU); 320 321 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 322 I != E; ++I) { 323 CapturePred(&*I); 324 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 325 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 326 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 327 "Physical register dependency violated?"); 328 --NumLiveRegs; 329 LiveRegDefs[I->getReg()] = NULL; 330 LiveRegCycles[I->getReg()] = 0; 331 } 332 } 333 334 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 335 I != E; ++I) { 336 if (I->isAssignedRegDep()) { 337 if (!LiveRegDefs[I->getReg()]) { 338 LiveRegDefs[I->getReg()] = SU; 339 ++NumLiveRegs; 340 } 341 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 342 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 343 } 344 } 345 346 SU->setHeightDirty(); 347 SU->isScheduled = false; 348 SU->isAvailable = true; 349 AvailableQueue->push(SU); 350} 351 352/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 353/// BTCycle in order to schedule a specific node. 354void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 355 unsigned &CurCycle) { 356 SUnit *OldSU = NULL; 357 while (CurCycle > BtCycle) { 358 OldSU = Sequence.back(); 359 Sequence.pop_back(); 360 if (SU->isSucc(OldSU)) 361 // Don't try to remove SU from AvailableQueue. 362 SU->isAvailable = false; 363 UnscheduleNodeBottomUp(OldSU); 364 --CurCycle; 365 AvailableQueue->setCurCycle(CurCycle); 366 } 367 368 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 369 370 ++NumBacktracks; 371} 372 373static bool isOperandOf(const SUnit *SU, SDNode *N) { 374 for (const SDNode *SUNode = SU->getNode(); SUNode; 375 SUNode = SUNode->getFlaggedNode()) { 376 if (SUNode->isOperandOf(N)) 377 return true; 378 } 379 return false; 380} 381 382/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 383/// successors to the newly created node. 384SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 385 if (SU->getNode()->getFlaggedNode()) 386 return NULL; 387 388 SDNode *N = SU->getNode(); 389 if (!N) 390 return NULL; 391 392 SUnit *NewSU; 393 bool TryUnfold = false; 394 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 395 EVT VT = N->getValueType(i); 396 if (VT == MVT::Flag) 397 return NULL; 398 else if (VT == MVT::Other) 399 TryUnfold = true; 400 } 401 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 402 const SDValue &Op = N->getOperand(i); 403 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 404 if (VT == MVT::Flag) 405 return NULL; 406 } 407 408 if (TryUnfold) { 409 SmallVector<SDNode*, 2> NewNodes; 410 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 411 return NULL; 412 413 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 414 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 415 416 N = NewNodes[1]; 417 SDNode *LoadNode = NewNodes[0]; 418 unsigned NumVals = N->getNumValues(); 419 unsigned OldNumVals = SU->getNode()->getNumValues(); 420 for (unsigned i = 0; i != NumVals; ++i) 421 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 422 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 423 SDValue(LoadNode, 1)); 424 425 // LoadNode may already exist. This can happen when there is another 426 // load from the same location and producing the same type of value 427 // but it has different alignment or volatileness. 428 bool isNewLoad = true; 429 SUnit *LoadSU; 430 if (LoadNode->getNodeId() != -1) { 431 LoadSU = &SUnits[LoadNode->getNodeId()]; 432 isNewLoad = false; 433 } else { 434 LoadSU = CreateNewSUnit(LoadNode); 435 LoadNode->setNodeId(LoadSU->NodeNum); 436 ComputeLatency(LoadSU); 437 } 438 439 SUnit *NewSU = CreateNewSUnit(N); 440 assert(N->getNodeId() == -1 && "Node already inserted!"); 441 N->setNodeId(NewSU->NodeNum); 442 443 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 444 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 445 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 446 NewSU->isTwoAddress = true; 447 break; 448 } 449 } 450 if (TID.isCommutable()) 451 NewSU->isCommutable = true; 452 ComputeLatency(NewSU); 453 454 // Record all the edges to and from the old SU, by category. 455 SmallVector<SDep, 4> ChainPreds; 456 SmallVector<SDep, 4> ChainSuccs; 457 SmallVector<SDep, 4> LoadPreds; 458 SmallVector<SDep, 4> NodePreds; 459 SmallVector<SDep, 4> NodeSuccs; 460 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 461 I != E; ++I) { 462 if (I->isCtrl()) 463 ChainPreds.push_back(*I); 464 else if (isOperandOf(I->getSUnit(), LoadNode)) 465 LoadPreds.push_back(*I); 466 else 467 NodePreds.push_back(*I); 468 } 469 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 470 I != E; ++I) { 471 if (I->isCtrl()) 472 ChainSuccs.push_back(*I); 473 else 474 NodeSuccs.push_back(*I); 475 } 476 477 // Now assign edges to the newly-created nodes. 478 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 479 const SDep &Pred = ChainPreds[i]; 480 RemovePred(SU, Pred); 481 if (isNewLoad) 482 AddPred(LoadSU, Pred); 483 } 484 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 485 const SDep &Pred = LoadPreds[i]; 486 RemovePred(SU, Pred); 487 if (isNewLoad) 488 AddPred(LoadSU, Pred); 489 } 490 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 491 const SDep &Pred = NodePreds[i]; 492 RemovePred(SU, Pred); 493 AddPred(NewSU, Pred); 494 } 495 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 496 SDep D = NodeSuccs[i]; 497 SUnit *SuccDep = D.getSUnit(); 498 D.setSUnit(SU); 499 RemovePred(SuccDep, D); 500 D.setSUnit(NewSU); 501 AddPred(SuccDep, D); 502 } 503 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 504 SDep D = ChainSuccs[i]; 505 SUnit *SuccDep = D.getSUnit(); 506 D.setSUnit(SU); 507 RemovePred(SuccDep, D); 508 if (isNewLoad) { 509 D.setSUnit(LoadSU); 510 AddPred(SuccDep, D); 511 } 512 } 513 514 // Add a data dependency to reflect that NewSU reads the value defined 515 // by LoadSU. 516 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 517 518 if (isNewLoad) 519 AvailableQueue->addNode(LoadSU); 520 AvailableQueue->addNode(NewSU); 521 522 ++NumUnfolds; 523 524 if (NewSU->NumSuccsLeft == 0) { 525 NewSU->isAvailable = true; 526 return NewSU; 527 } 528 SU = NewSU; 529 } 530 531 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 532 NewSU = CreateClone(SU); 533 534 // New SUnit has the exact same predecessors. 535 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 536 I != E; ++I) 537 if (!I->isArtificial()) 538 AddPred(NewSU, *I); 539 540 // Only copy scheduled successors. Cut them from old node's successor 541 // list and move them over. 542 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 543 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 544 I != E; ++I) { 545 if (I->isArtificial()) 546 continue; 547 SUnit *SuccSU = I->getSUnit(); 548 if (SuccSU->isScheduled) { 549 SDep D = *I; 550 D.setSUnit(NewSU); 551 AddPred(SuccSU, D); 552 D.setSUnit(SU); 553 DelDeps.push_back(std::make_pair(SuccSU, D)); 554 } 555 } 556 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 557 RemovePred(DelDeps[i].first, DelDeps[i].second); 558 559 AvailableQueue->updateNode(SU); 560 AvailableQueue->addNode(NewSU); 561 562 ++NumDups; 563 return NewSU; 564} 565 566/// InsertCopiesAndMoveSuccs - Insert register copies and move all 567/// scheduled successors of the given SUnit to the last copy. 568void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 569 const TargetRegisterClass *DestRC, 570 const TargetRegisterClass *SrcRC, 571 SmallVector<SUnit*, 2> &Copies) { 572 SUnit *CopyFromSU = CreateNewSUnit(NULL); 573 CopyFromSU->CopySrcRC = SrcRC; 574 CopyFromSU->CopyDstRC = DestRC; 575 576 SUnit *CopyToSU = CreateNewSUnit(NULL); 577 CopyToSU->CopySrcRC = DestRC; 578 CopyToSU->CopyDstRC = SrcRC; 579 580 // Only copy scheduled successors. Cut them from old node's successor 581 // list and move them over. 582 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 583 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 584 I != E; ++I) { 585 if (I->isArtificial()) 586 continue; 587 SUnit *SuccSU = I->getSUnit(); 588 if (SuccSU->isScheduled) { 589 SDep D = *I; 590 D.setSUnit(CopyToSU); 591 AddPred(SuccSU, D); 592 DelDeps.push_back(std::make_pair(SuccSU, *I)); 593 } 594 } 595 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 596 RemovePred(DelDeps[i].first, DelDeps[i].second); 597 598 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 599 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 600 601 AvailableQueue->updateNode(SU); 602 AvailableQueue->addNode(CopyFromSU); 603 AvailableQueue->addNode(CopyToSU); 604 Copies.push_back(CopyFromSU); 605 Copies.push_back(CopyToSU); 606 607 ++NumPRCopies; 608} 609 610/// getPhysicalRegisterVT - Returns the ValueType of the physical register 611/// definition of the specified node. 612/// FIXME: Move to SelectionDAG? 613static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 614 const TargetInstrInfo *TII) { 615 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 616 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 617 unsigned NumRes = TID.getNumDefs(); 618 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 619 if (Reg == *ImpDef) 620 break; 621 ++NumRes; 622 } 623 return N->getValueType(NumRes); 624} 625 626/// CheckForLiveRegDef - Return true and update live register vector if the 627/// specified register def of the specified SUnit clobbers any "live" registers. 628static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 629 std::vector<SUnit*> &LiveRegDefs, 630 SmallSet<unsigned, 4> &RegAdded, 631 SmallVector<unsigned, 4> &LRegs, 632 const TargetRegisterInfo *TRI) { 633 bool Added = false; 634 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 635 if (RegAdded.insert(Reg)) { 636 LRegs.push_back(Reg); 637 Added = true; 638 } 639 } 640 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 641 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 642 if (RegAdded.insert(*Alias)) { 643 LRegs.push_back(*Alias); 644 Added = true; 645 } 646 } 647 return Added; 648} 649 650/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 651/// scheduling of the given node to satisfy live physical register dependencies. 652/// If the specific node is the last one that's available to schedule, do 653/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 654bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 655 SmallVector<unsigned, 4> &LRegs){ 656 if (NumLiveRegs == 0) 657 return false; 658 659 SmallSet<unsigned, 4> RegAdded; 660 // If this node would clobber any "live" register, then it's not ready. 661 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 662 I != E; ++I) { 663 if (I->isAssignedRegDep()) 664 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 665 RegAdded, LRegs, TRI); 666 } 667 668 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 669 if (Node->getOpcode() == ISD::INLINEASM) { 670 // Inline asm can clobber physical defs. 671 unsigned NumOps = Node->getNumOperands(); 672 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 673 --NumOps; // Ignore the flag operand. 674 675 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 676 unsigned Flags = 677 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 678 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 679 680 ++i; // Skip the ID value. 681 if (InlineAsm::isRegDefKind(Flags) || 682 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 683 // Check for def of register or earlyclobber register. 684 for (; NumVals; --NumVals, ++i) { 685 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 686 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 687 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 688 } 689 } else 690 i += NumVals; 691 } 692 continue; 693 } 694 695 if (!Node->isMachineOpcode()) 696 continue; 697 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 698 if (!TID.ImplicitDefs) 699 continue; 700 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 701 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 702 } 703 return !LRegs.empty(); 704} 705 706 707/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 708/// schedulers. 709void ScheduleDAGRRList::ListScheduleBottomUp() { 710 unsigned CurCycle = 0; 711 712 // Release any predecessors of the special Exit node. 713 ReleasePredecessors(&ExitSU, CurCycle); 714 715 // Add root to Available queue. 716 if (!SUnits.empty()) { 717 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 718 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 719 RootSU->isAvailable = true; 720 AvailableQueue->push(RootSU); 721 } 722 723 // While Available queue is not empty, grab the node with the highest 724 // priority. If it is not ready put it back. Schedule the node. 725 SmallVector<SUnit*, 4> NotReady; 726 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 727 Sequence.reserve(SUnits.size()); 728 while (!AvailableQueue->empty()) { 729 bool Delayed = false; 730 LRegsMap.clear(); 731 SUnit *CurSU = AvailableQueue->pop(); 732 while (CurSU) { 733 SmallVector<unsigned, 4> LRegs; 734 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 735 break; 736 Delayed = true; 737 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 738 739 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 740 NotReady.push_back(CurSU); 741 CurSU = AvailableQueue->pop(); 742 } 743 744 // All candidates are delayed due to live physical reg dependencies. 745 // Try backtracking, code duplication, or inserting cross class copies 746 // to resolve it. 747 if (Delayed && !CurSU) { 748 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 749 SUnit *TrySU = NotReady[i]; 750 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 751 752 // Try unscheduling up to the point where it's safe to schedule 753 // this node. 754 unsigned LiveCycle = CurCycle; 755 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 756 unsigned Reg = LRegs[j]; 757 unsigned LCycle = LiveRegCycles[Reg]; 758 LiveCycle = std::min(LiveCycle, LCycle); 759 } 760 SUnit *OldSU = Sequence[LiveCycle]; 761 if (!WillCreateCycle(TrySU, OldSU)) { 762 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 763 // Force the current node to be scheduled before the node that 764 // requires the physical reg dep. 765 if (OldSU->isAvailable) { 766 OldSU->isAvailable = false; 767 AvailableQueue->remove(OldSU); 768 } 769 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 770 /*Reg=*/0, /*isNormalMemory=*/false, 771 /*isMustAlias=*/false, /*isArtificial=*/true)); 772 // If one or more successors has been unscheduled, then the current 773 // node is no longer avaialable. Schedule a successor that's now 774 // available instead. 775 if (!TrySU->isAvailable) 776 CurSU = AvailableQueue->pop(); 777 else { 778 CurSU = TrySU; 779 TrySU->isPending = false; 780 NotReady.erase(NotReady.begin()+i); 781 } 782 break; 783 } 784 } 785 786 if (!CurSU) { 787 // Can't backtrack. If it's too expensive to copy the value, then try 788 // duplicate the nodes that produces these "too expensive to copy" 789 // values to break the dependency. In case even that doesn't work, 790 // insert cross class copies. 791 // If it's not too expensive, i.e. cost != -1, issue copies. 792 SUnit *TrySU = NotReady[0]; 793 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 794 assert(LRegs.size() == 1 && "Can't handle this yet!"); 795 unsigned Reg = LRegs[0]; 796 SUnit *LRDef = LiveRegDefs[Reg]; 797 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 798 const TargetRegisterClass *RC = 799 TRI->getPhysicalRegisterRegClass(Reg, VT); 800 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 801 802 // If cross copy register class is null, then it must be possible copy 803 // the value directly. Do not try duplicate the def. 804 SUnit *NewDef = 0; 805 if (DestRC) 806 NewDef = CopyAndMoveSuccessors(LRDef); 807 else 808 DestRC = RC; 809 if (!NewDef) { 810 // Issue copies, these can be expensive cross register class copies. 811 SmallVector<SUnit*, 2> Copies; 812 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 813 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 814 << " to SU #" << Copies.front()->NodeNum << "\n"); 815 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 816 /*Reg=*/0, /*isNormalMemory=*/false, 817 /*isMustAlias=*/false, 818 /*isArtificial=*/true)); 819 NewDef = Copies.back(); 820 } 821 822 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 823 << " to SU #" << TrySU->NodeNum << "\n"); 824 LiveRegDefs[Reg] = NewDef; 825 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 826 /*Reg=*/0, /*isNormalMemory=*/false, 827 /*isMustAlias=*/false, 828 /*isArtificial=*/true)); 829 TrySU->isAvailable = false; 830 CurSU = NewDef; 831 } 832 833 assert(CurSU && "Unable to resolve live physical register dependencies!"); 834 } 835 836 // Add the nodes that aren't ready back onto the available list. 837 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 838 NotReady[i]->isPending = false; 839 // May no longer be available due to backtracking. 840 if (NotReady[i]->isAvailable) 841 AvailableQueue->push(NotReady[i]); 842 } 843 NotReady.clear(); 844 845 if (CurSU) 846 ScheduleNodeBottomUp(CurSU, CurCycle); 847 ++CurCycle; 848 AvailableQueue->setCurCycle(CurCycle); 849 } 850 851 // Reverse the order if it is bottom up. 852 std::reverse(Sequence.begin(), Sequence.end()); 853 854#ifndef NDEBUG 855 VerifySchedule(isBottomUp); 856#endif 857} 858 859//===----------------------------------------------------------------------===// 860// Top-Down Scheduling 861//===----------------------------------------------------------------------===// 862 863/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 864/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 865void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 866 SUnit *SuccSU = SuccEdge->getSUnit(); 867 868#ifndef NDEBUG 869 if (SuccSU->NumPredsLeft == 0) { 870 dbgs() << "*** Scheduling failed! ***\n"; 871 SuccSU->dump(this); 872 dbgs() << " has been released too many times!\n"; 873 llvm_unreachable(0); 874 } 875#endif 876 --SuccSU->NumPredsLeft; 877 878 // If all the node's predecessors are scheduled, this node is ready 879 // to be scheduled. Ignore the special ExitSU node. 880 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 881 SuccSU->isAvailable = true; 882 AvailableQueue->push(SuccSU); 883 } 884} 885 886void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 887 // Top down: release successors 888 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 889 I != E; ++I) { 890 assert(!I->isAssignedRegDep() && 891 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 892 893 ReleaseSucc(SU, &*I); 894 } 895} 896 897/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 898/// count of its successors. If a successor pending count is zero, add it to 899/// the Available queue. 900void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 901 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 902 DEBUG(SU->dump(this)); 903 904 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 905 SU->setDepthToAtLeast(CurCycle); 906 Sequence.push_back(SU); 907 908 ReleaseSuccessors(SU); 909 SU->isScheduled = true; 910 AvailableQueue->ScheduledNode(SU); 911} 912 913/// ListScheduleTopDown - The main loop of list scheduling for top-down 914/// schedulers. 915void ScheduleDAGRRList::ListScheduleTopDown() { 916 unsigned CurCycle = 0; 917 AvailableQueue->setCurCycle(CurCycle); 918 919 // Release any successors of the special Entry node. 920 ReleaseSuccessors(&EntrySU); 921 922 // All leaves to Available queue. 923 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 924 // It is available if it has no predecessors. 925 if (SUnits[i].Preds.empty()) { 926 AvailableQueue->push(&SUnits[i]); 927 SUnits[i].isAvailable = true; 928 } 929 } 930 931 // While Available queue is not empty, grab the node with the highest 932 // priority. If it is not ready put it back. Schedule the node. 933 Sequence.reserve(SUnits.size()); 934 while (!AvailableQueue->empty()) { 935 SUnit *CurSU = AvailableQueue->pop(); 936 937 if (CurSU) 938 ScheduleNodeTopDown(CurSU, CurCycle); 939 ++CurCycle; 940 AvailableQueue->setCurCycle(CurCycle); 941 } 942 943#ifndef NDEBUG 944 VerifySchedule(isBottomUp); 945#endif 946} 947 948 949//===----------------------------------------------------------------------===// 950// RegReductionPriorityQueue Implementation 951//===----------------------------------------------------------------------===// 952// 953// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 954// to reduce register pressure. 955// 956namespace { 957 template<class SF> 958 class RegReductionPriorityQueue; 959 960 /// Sorting functions for the Available queue. 961 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 962 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 963 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 964 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 965 966 bool operator()(const SUnit* left, const SUnit* right) const; 967 }; 968 969 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 970 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 971 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 972 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 973 974 bool operator()(const SUnit* left, const SUnit* right) const; 975 }; 976 977 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 978 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 979 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 980 : SPQ(spq) {} 981 src_ls_rr_sort(const src_ls_rr_sort &RHS) 982 : SPQ(RHS.SPQ) {} 983 984 bool operator()(const SUnit* left, const SUnit* right) const; 985 }; 986 987 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 988 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 989 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 990 : SPQ(spq) {} 991 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 992 : SPQ(RHS.SPQ) {} 993 994 bool operator()(const SUnit* left, const SUnit* right) const; 995 }; 996} // end anonymous namespace 997 998/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 999/// Smaller number is the higher priority. 1000static unsigned 1001CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1002 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1003 if (SethiUllmanNumber != 0) 1004 return SethiUllmanNumber; 1005 1006 unsigned Extra = 0; 1007 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1008 I != E; ++I) { 1009 if (I->isCtrl()) continue; // ignore chain preds 1010 SUnit *PredSU = I->getSUnit(); 1011 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1012 if (PredSethiUllman > SethiUllmanNumber) { 1013 SethiUllmanNumber = PredSethiUllman; 1014 Extra = 0; 1015 } else if (PredSethiUllman == SethiUllmanNumber) 1016 ++Extra; 1017 } 1018 1019 SethiUllmanNumber += Extra; 1020 1021 if (SethiUllmanNumber == 0) 1022 SethiUllmanNumber = 1; 1023 1024 return SethiUllmanNumber; 1025} 1026 1027namespace { 1028 template<class SF> 1029 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1030 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 1031 unsigned CurQueueId; 1032 1033 protected: 1034 // SUnits - The SUnits for the current graph. 1035 std::vector<SUnit> *SUnits; 1036 1037 const TargetInstrInfo *TII; 1038 const TargetRegisterInfo *TRI; 1039 ScheduleDAGRRList *scheduleDAG; 1040 1041 // SethiUllmanNumbers - The SethiUllman number for each node. 1042 std::vector<unsigned> SethiUllmanNumbers; 1043 1044 public: 1045 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1046 const TargetRegisterInfo *tri) 1047 : Queue(SF(this)), CurQueueId(0), 1048 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1049 1050 void initNodes(std::vector<SUnit> &sunits) { 1051 SUnits = &sunits; 1052 // Add pseudo dependency edges for two-address nodes. 1053 AddPseudoTwoAddrDeps(); 1054 // Reroute edges to nodes with multiple uses. 1055 PrescheduleNodesWithMultipleUses(); 1056 // Calculate node priorities. 1057 CalculateSethiUllmanNumbers(); 1058 } 1059 1060 void addNode(const SUnit *SU) { 1061 unsigned SUSize = SethiUllmanNumbers.size(); 1062 if (SUnits->size() > SUSize) 1063 SethiUllmanNumbers.resize(SUSize*2, 0); 1064 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1065 } 1066 1067 void updateNode(const SUnit *SU) { 1068 SethiUllmanNumbers[SU->NodeNum] = 0; 1069 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1070 } 1071 1072 void releaseState() { 1073 SUnits = 0; 1074 SethiUllmanNumbers.clear(); 1075 } 1076 1077 unsigned getNodePriority(const SUnit *SU) const { 1078 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1079 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1080 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1081 // CopyToReg should be close to its uses to facilitate coalescing and 1082 // avoid spilling. 1083 return 0; 1084 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1085 Opc == TargetOpcode::SUBREG_TO_REG || 1086 Opc == TargetOpcode::INSERT_SUBREG) 1087 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1088 // close to their uses to facilitate coalescing. 1089 return 0; 1090 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1091 // If SU does not have a register use, i.e. it doesn't produce a value 1092 // that would be consumed (e.g. store), then it terminates a chain of 1093 // computation. Give it a large SethiUllman number so it will be 1094 // scheduled right before its predecessors that it doesn't lengthen 1095 // their live ranges. 1096 return 0xffff; 1097 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1098 // If SU does not have a register def, schedule it close to its uses 1099 // because it does not lengthen any live ranges. 1100 return 0; 1101 return SethiUllmanNumbers[SU->NodeNum]; 1102 } 1103 1104 unsigned getNodeOrdering(const SUnit *SU) const { 1105 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1106 } 1107 1108 unsigned size() const { return Queue.size(); } 1109 1110 bool empty() const { return Queue.empty(); } 1111 1112 void push(SUnit *U) { 1113 assert(!U->NodeQueueId && "Node in the queue already"); 1114 U->NodeQueueId = ++CurQueueId; 1115 Queue.push(U); 1116 } 1117 1118 void push_all(const std::vector<SUnit *> &Nodes) { 1119 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1120 push(Nodes[i]); 1121 } 1122 1123 SUnit *pop() { 1124 if (empty()) return NULL; 1125 SUnit *V = Queue.top(); 1126 Queue.pop(); 1127 V->NodeQueueId = 0; 1128 return V; 1129 } 1130 1131 void remove(SUnit *SU) { 1132 assert(!Queue.empty() && "Queue is empty!"); 1133 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1134 Queue.erase_one(SU); 1135 SU->NodeQueueId = 0; 1136 } 1137 1138 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1139 scheduleDAG = scheduleDag; 1140 } 1141 1142 protected: 1143 bool canClobber(const SUnit *SU, const SUnit *Op); 1144 void AddPseudoTwoAddrDeps(); 1145 void PrescheduleNodesWithMultipleUses(); 1146 void CalculateSethiUllmanNumbers(); 1147 }; 1148 1149 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1150 BURegReductionPriorityQueue; 1151 1152 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1153 TDRegReductionPriorityQueue; 1154 1155 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1156 SrcRegReductionPriorityQueue; 1157 1158 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1159 HybridBURRPriorityQueue; 1160} 1161 1162/// closestSucc - Returns the scheduled cycle of the successor which is 1163/// closest to the current cycle. 1164static unsigned closestSucc(const SUnit *SU) { 1165 unsigned MaxHeight = 0; 1166 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1167 I != E; ++I) { 1168 if (I->isCtrl()) continue; // ignore chain succs 1169 unsigned Height = I->getSUnit()->getHeight(); 1170 // If there are bunch of CopyToRegs stacked up, they should be considered 1171 // to be at the same position. 1172 if (I->getSUnit()->getNode() && 1173 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1174 Height = closestSucc(I->getSUnit())+1; 1175 if (Height > MaxHeight) 1176 MaxHeight = Height; 1177 } 1178 return MaxHeight; 1179} 1180 1181/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1182/// for scratch registers, i.e. number of data dependencies. 1183static unsigned calcMaxScratches(const SUnit *SU) { 1184 unsigned Scratches = 0; 1185 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1186 I != E; ++I) { 1187 if (I->isCtrl()) continue; // ignore chain preds 1188 Scratches++; 1189 } 1190 return Scratches; 1191} 1192 1193template <typename RRSort> 1194static bool BURRSort(const SUnit *left, const SUnit *right, 1195 const RegReductionPriorityQueue<RRSort> *SPQ) { 1196 unsigned LPriority = SPQ->getNodePriority(left); 1197 unsigned RPriority = SPQ->getNodePriority(right); 1198 if (LPriority != RPriority) 1199 return LPriority > RPriority; 1200 1201 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1202 // e.g. 1203 // t1 = op t2, c1 1204 // t3 = op t4, c2 1205 // 1206 // and the following instructions are both ready. 1207 // t2 = op c3 1208 // t4 = op c4 1209 // 1210 // Then schedule t2 = op first. 1211 // i.e. 1212 // t4 = op c4 1213 // t2 = op c3 1214 // t1 = op t2, c1 1215 // t3 = op t4, c2 1216 // 1217 // This creates more short live intervals. 1218 unsigned LDist = closestSucc(left); 1219 unsigned RDist = closestSucc(right); 1220 if (LDist != RDist) 1221 return LDist < RDist; 1222 1223 // How many registers becomes live when the node is scheduled. 1224 unsigned LScratch = calcMaxScratches(left); 1225 unsigned RScratch = calcMaxScratches(right); 1226 if (LScratch != RScratch) 1227 return LScratch > RScratch; 1228 1229 if (left->getHeight() != right->getHeight()) 1230 return left->getHeight() > right->getHeight(); 1231 1232 if (left->getDepth() != right->getDepth()) 1233 return left->getDepth() < right->getDepth(); 1234 1235 assert(left->NodeQueueId && right->NodeQueueId && 1236 "NodeQueueId cannot be zero"); 1237 return (left->NodeQueueId > right->NodeQueueId); 1238} 1239 1240// Bottom up 1241bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1242 return BURRSort(left, right, SPQ); 1243} 1244 1245// Source order, otherwise bottom up. 1246bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1247 unsigned LOrder = SPQ->getNodeOrdering(left); 1248 unsigned ROrder = SPQ->getNodeOrdering(right); 1249 1250 // Prefer an ordering where the lower the non-zero order number, the higher 1251 // the preference. 1252 if ((LOrder || ROrder) && LOrder != ROrder) 1253 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1254 1255 return BURRSort(left, right, SPQ); 1256} 1257 1258bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1259 bool LStall = left->SchedulingPref == Sched::Latency && 1260 SPQ->getCurCycle() < left->getHeight(); 1261 bool RStall = right->SchedulingPref == Sched::Latency && 1262 SPQ->getCurCycle() < right->getHeight(); 1263 // If scheduling one of the node will cause a pipeline stall, delay it. 1264 // If scheduling either one of the node will cause a pipeline stall, sort them 1265 // according to their height. 1266 // If neither will cause a pipeline stall, try to reduce register pressure. 1267 if (LStall) { 1268 if (!RStall) 1269 return true; 1270 if (left->getHeight() != right->getHeight()) 1271 return left->getHeight() > right->getHeight(); 1272 } else if (RStall) 1273 return false; 1274 return BURRSort(left, right, SPQ); 1275} 1276 1277template<class SF> 1278bool 1279RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1280 if (SU->isTwoAddress) { 1281 unsigned Opc = SU->getNode()->getMachineOpcode(); 1282 const TargetInstrDesc &TID = TII->get(Opc); 1283 unsigned NumRes = TID.getNumDefs(); 1284 unsigned NumOps = TID.getNumOperands() - NumRes; 1285 for (unsigned i = 0; i != NumOps; ++i) { 1286 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1287 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1288 if (DU->getNodeId() != -1 && 1289 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1290 return true; 1291 } 1292 } 1293 } 1294 return false; 1295} 1296 1297/// hasCopyToRegUse - Return true if SU has a value successor that is a 1298/// CopyToReg node. 1299static bool hasCopyToRegUse(const SUnit *SU) { 1300 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1301 I != E; ++I) { 1302 if (I->isCtrl()) continue; 1303 const SUnit *SuccSU = I->getSUnit(); 1304 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1305 return true; 1306 } 1307 return false; 1308} 1309 1310/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1311/// physical register defs. 1312static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1313 const TargetInstrInfo *TII, 1314 const TargetRegisterInfo *TRI) { 1315 SDNode *N = SuccSU->getNode(); 1316 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1317 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1318 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1319 for (const SDNode *SUNode = SU->getNode(); SUNode; 1320 SUNode = SUNode->getFlaggedNode()) { 1321 if (!SUNode->isMachineOpcode()) 1322 continue; 1323 const unsigned *SUImpDefs = 1324 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1325 if (!SUImpDefs) 1326 return false; 1327 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1328 EVT VT = N->getValueType(i); 1329 if (VT == MVT::Flag || VT == MVT::Other) 1330 continue; 1331 if (!N->hasAnyUseOfValue(i)) 1332 continue; 1333 unsigned Reg = ImpDefs[i - NumDefs]; 1334 for (;*SUImpDefs; ++SUImpDefs) { 1335 unsigned SUReg = *SUImpDefs; 1336 if (TRI->regsOverlap(Reg, SUReg)) 1337 return true; 1338 } 1339 } 1340 } 1341 return false; 1342} 1343 1344/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1345/// are not handled well by the general register pressure reduction 1346/// heuristics. When presented with code like this: 1347/// 1348/// N 1349/// / | 1350/// / | 1351/// U store 1352/// | 1353/// ... 1354/// 1355/// the heuristics tend to push the store up, but since the 1356/// operand of the store has another use (U), this would increase 1357/// the length of that other use (the U->N edge). 1358/// 1359/// This function transforms code like the above to route U's 1360/// dependence through the store when possible, like this: 1361/// 1362/// N 1363/// || 1364/// || 1365/// store 1366/// | 1367/// U 1368/// | 1369/// ... 1370/// 1371/// This results in the store being scheduled immediately 1372/// after N, which shortens the U->N live range, reducing 1373/// register pressure. 1374/// 1375template<class SF> 1376void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1377 // Visit all the nodes in topological order, working top-down. 1378 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1379 SUnit *SU = &(*SUnits)[i]; 1380 // For now, only look at nodes with no data successors, such as stores. 1381 // These are especially important, due to the heuristics in 1382 // getNodePriority for nodes with no data successors. 1383 if (SU->NumSuccs != 0) 1384 continue; 1385 // For now, only look at nodes with exactly one data predecessor. 1386 if (SU->NumPreds != 1) 1387 continue; 1388 // Avoid prescheduling copies to virtual registers, which don't behave 1389 // like other nodes from the perspective of scheduling heuristics. 1390 if (SDNode *N = SU->getNode()) 1391 if (N->getOpcode() == ISD::CopyToReg && 1392 TargetRegisterInfo::isVirtualRegister 1393 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1394 continue; 1395 1396 // Locate the single data predecessor. 1397 SUnit *PredSU = 0; 1398 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1399 EE = SU->Preds.end(); II != EE; ++II) 1400 if (!II->isCtrl()) { 1401 PredSU = II->getSUnit(); 1402 break; 1403 } 1404 assert(PredSU); 1405 1406 // Don't rewrite edges that carry physregs, because that requires additional 1407 // support infrastructure. 1408 if (PredSU->hasPhysRegDefs) 1409 continue; 1410 // Short-circuit the case where SU is PredSU's only data successor. 1411 if (PredSU->NumSuccs == 1) 1412 continue; 1413 // Avoid prescheduling to copies from virtual registers, which don't behave 1414 // like other nodes from the perspective of scheduling // heuristics. 1415 if (SDNode *N = SU->getNode()) 1416 if (N->getOpcode() == ISD::CopyFromReg && 1417 TargetRegisterInfo::isVirtualRegister 1418 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1419 continue; 1420 1421 // Perform checks on the successors of PredSU. 1422 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1423 EE = PredSU->Succs.end(); II != EE; ++II) { 1424 SUnit *PredSuccSU = II->getSUnit(); 1425 if (PredSuccSU == SU) continue; 1426 // If PredSU has another successor with no data successors, for 1427 // now don't attempt to choose either over the other. 1428 if (PredSuccSU->NumSuccs == 0) 1429 goto outer_loop_continue; 1430 // Don't break physical register dependencies. 1431 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1432 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1433 goto outer_loop_continue; 1434 // Don't introduce graph cycles. 1435 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1436 goto outer_loop_continue; 1437 } 1438 1439 // Ok, the transformation is safe and the heuristics suggest it is 1440 // profitable. Update the graph. 1441 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1442 << " next to PredSU #" << PredSU->NodeNum 1443 << " to guide scheduling in the presence of multiple uses\n"); 1444 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1445 SDep Edge = PredSU->Succs[i]; 1446 assert(!Edge.isAssignedRegDep()); 1447 SUnit *SuccSU = Edge.getSUnit(); 1448 if (SuccSU != SU) { 1449 Edge.setSUnit(PredSU); 1450 scheduleDAG->RemovePred(SuccSU, Edge); 1451 scheduleDAG->AddPred(SU, Edge); 1452 Edge.setSUnit(SU); 1453 scheduleDAG->AddPred(SuccSU, Edge); 1454 --i; 1455 } 1456 } 1457 outer_loop_continue:; 1458 } 1459} 1460 1461/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1462/// it as a def&use operand. Add a pseudo control edge from it to the other 1463/// node (if it won't create a cycle) so the two-address one will be scheduled 1464/// first (lower in the schedule). If both nodes are two-address, favor the 1465/// one that has a CopyToReg use (more likely to be a loop induction update). 1466/// If both are two-address, but one is commutable while the other is not 1467/// commutable, favor the one that's not commutable. 1468template<class SF> 1469void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1470 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1471 SUnit *SU = &(*SUnits)[i]; 1472 if (!SU->isTwoAddress) 1473 continue; 1474 1475 SDNode *Node = SU->getNode(); 1476 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1477 continue; 1478 1479 unsigned Opc = Node->getMachineOpcode(); 1480 const TargetInstrDesc &TID = TII->get(Opc); 1481 unsigned NumRes = TID.getNumDefs(); 1482 unsigned NumOps = TID.getNumOperands() - NumRes; 1483 for (unsigned j = 0; j != NumOps; ++j) { 1484 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1485 continue; 1486 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1487 if (DU->getNodeId() == -1) 1488 continue; 1489 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1490 if (!DUSU) continue; 1491 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1492 E = DUSU->Succs.end(); I != E; ++I) { 1493 if (I->isCtrl()) continue; 1494 SUnit *SuccSU = I->getSUnit(); 1495 if (SuccSU == SU) 1496 continue; 1497 // Be conservative. Ignore if nodes aren't at roughly the same 1498 // depth and height. 1499 if (SuccSU->getHeight() < SU->getHeight() && 1500 (SU->getHeight() - SuccSU->getHeight()) > 1) 1501 continue; 1502 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1503 // constrains whatever is using the copy, instead of the copy 1504 // itself. In the case that the copy is coalesced, this 1505 // preserves the intent of the pseudo two-address heurietics. 1506 while (SuccSU->Succs.size() == 1 && 1507 SuccSU->getNode()->isMachineOpcode() && 1508 SuccSU->getNode()->getMachineOpcode() == 1509 TargetOpcode::COPY_TO_REGCLASS) 1510 SuccSU = SuccSU->Succs.front().getSUnit(); 1511 // Don't constrain non-instruction nodes. 1512 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1513 continue; 1514 // Don't constrain nodes with physical register defs if the 1515 // predecessor can clobber them. 1516 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1517 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1518 continue; 1519 } 1520 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1521 // these may be coalesced away. We want them close to their uses. 1522 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1523 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1524 SuccOpc == TargetOpcode::INSERT_SUBREG || 1525 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1526 continue; 1527 if ((!canClobber(SuccSU, DUSU) || 1528 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1529 (!SU->isCommutable && SuccSU->isCommutable)) && 1530 !scheduleDAG->IsReachable(SuccSU, SU)) { 1531 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1532 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1533 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1534 /*Reg=*/0, /*isNormalMemory=*/false, 1535 /*isMustAlias=*/false, 1536 /*isArtificial=*/true)); 1537 } 1538 } 1539 } 1540 } 1541} 1542 1543/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1544/// scheduling units. 1545template<class SF> 1546void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1547 SethiUllmanNumbers.assign(SUnits->size(), 0); 1548 1549 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1550 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1551} 1552 1553/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1554/// predecessors of the successors of the SUnit SU. Stop when the provided 1555/// limit is exceeded. 1556static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1557 unsigned Limit) { 1558 unsigned Sum = 0; 1559 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1560 I != E; ++I) { 1561 const SUnit *SuccSU = I->getSUnit(); 1562 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1563 EE = SuccSU->Preds.end(); II != EE; ++II) { 1564 SUnit *PredSU = II->getSUnit(); 1565 if (!PredSU->isScheduled) 1566 if (++Sum > Limit) 1567 return Sum; 1568 } 1569 } 1570 return Sum; 1571} 1572 1573 1574// Top down 1575bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1576 unsigned LPriority = SPQ->getNodePriority(left); 1577 unsigned RPriority = SPQ->getNodePriority(right); 1578 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1579 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1580 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1581 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1582 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1583 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1584 1585 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1586 return false; 1587 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1588 return true; 1589 1590 if (LIsFloater) 1591 LBonus -= 2; 1592 if (RIsFloater) 1593 RBonus -= 2; 1594 if (left->NumSuccs == 1) 1595 LBonus += 2; 1596 if (right->NumSuccs == 1) 1597 RBonus += 2; 1598 1599 if (LPriority+LBonus != RPriority+RBonus) 1600 return LPriority+LBonus < RPriority+RBonus; 1601 1602 if (left->getDepth() != right->getDepth()) 1603 return left->getDepth() < right->getDepth(); 1604 1605 if (left->NumSuccsLeft != right->NumSuccsLeft) 1606 return left->NumSuccsLeft > right->NumSuccsLeft; 1607 1608 assert(left->NodeQueueId && right->NodeQueueId && 1609 "NodeQueueId cannot be zero"); 1610 return (left->NodeQueueId > right->NodeQueueId); 1611} 1612 1613//===----------------------------------------------------------------------===// 1614// Public Constructor Functions 1615//===----------------------------------------------------------------------===// 1616 1617llvm::ScheduleDAGSDNodes * 1618llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1619 const TargetMachine &TM = IS->TM; 1620 const TargetInstrInfo *TII = TM.getInstrInfo(); 1621 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1622 1623 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1624 1625 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1626 PQ->setScheduleDAG(SD); 1627 return SD; 1628} 1629 1630llvm::ScheduleDAGSDNodes * 1631llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1632 const TargetMachine &TM = IS->TM; 1633 const TargetInstrInfo *TII = TM.getInstrInfo(); 1634 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1635 1636 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1637 1638 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 1639 PQ->setScheduleDAG(SD); 1640 return SD; 1641} 1642 1643llvm::ScheduleDAGSDNodes * 1644llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1645 const TargetMachine &TM = IS->TM; 1646 const TargetInstrInfo *TII = TM.getInstrInfo(); 1647 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1648 1649 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); 1650 1651 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1652 PQ->setScheduleDAG(SD); 1653 return SD; 1654} 1655 1656llvm::ScheduleDAGSDNodes * 1657llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1658 const TargetMachine &TM = IS->TM; 1659 const TargetInstrInfo *TII = TM.getInstrInfo(); 1660 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1661 1662 HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); 1663 1664 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 1665 PQ->setScheduleDAG(SD); 1666 return SD; 1667} 1668