ScheduleDAGRRList.cpp revision c5c2c5f54202791a3a853cd8ae5c2dbfd9d3f930
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/ADT/SmallSet.h" 28#include "llvm/ADT/Statistic.h" 29#include "llvm/ADT/STLExtras.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/Support/raw_ostream.h" 33#include <climits> 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumPRCopies, "Number of physical register copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 "Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 "Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49static RegisterScheduler 50 sourceListDAGScheduler("source", 51 "Similar to list-burr but schedules in source " 52 "order when possible", 53 createSourceListDAGScheduler); 54 55static RegisterScheduler 56 hybridListDAGScheduler("list-hybrid", 57 "Bottom-up rr list scheduling which avoid stalls for " 58 "long latency instructions", 59 createHybridListDAGScheduler); 60 61namespace { 62//===----------------------------------------------------------------------===// 63/// ScheduleDAGRRList - The actual register reduction list scheduler 64/// implementation. This supports both top-down and bottom-up scheduling. 65/// 66class ScheduleDAGRRList : public ScheduleDAGSDNodes { 67private: 68 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 69 /// it is top-down. 70 bool isBottomUp; 71 72 /// NeedLatency - True if the scheduler will make use of latency information. 73 /// 74 bool NeedLatency; 75 76 /// AvailableQueue - The priority queue to use for the available SUnits. 77 SchedulingPriorityQueue *AvailableQueue; 78 79 /// LiveRegDefs - A set of physical registers and their definition 80 /// that are "live". These nodes must be scheduled before any other nodes that 81 /// modifies the registers can be scheduled. 82 unsigned NumLiveRegs; 83 std::vector<SUnit*> LiveRegDefs; 84 std::vector<unsigned> LiveRegCycles; 85 86 /// Topo - A topological ordering for SUnits which permits fast IsReachable 87 /// and similar queries. 88 ScheduleDAGTopologicalSort Topo; 89 90public: 91 ScheduleDAGRRList(MachineFunction &mf, 92 bool isbottomup, bool needlatency, 93 SchedulingPriorityQueue *availqueue) 94 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 95 AvailableQueue(availqueue), Topo(SUnits) { 96 } 97 98 ~ScheduleDAGRRList() { 99 delete AvailableQueue; 100 } 101 102 void Schedule(); 103 104 /// IsReachable - Checks if SU is reachable from TargetSU. 105 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 106 return Topo.IsReachable(SU, TargetSU); 107 } 108 109 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 110 /// create a cycle. 111 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 112 return Topo.WillCreateCycle(SU, TargetSU); 113 } 114 115 /// AddPred - adds a predecessor edge to SUnit SU. 116 /// This returns true if this is a new predecessor. 117 /// Updates the topological ordering if required. 118 void AddPred(SUnit *SU, const SDep &D) { 119 Topo.AddPred(SU, D.getSUnit()); 120 SU->addPred(D); 121 } 122 123 /// RemovePred - removes a predecessor edge from SUnit SU. 124 /// This returns true if an edge was removed. 125 /// Updates the topological ordering if required. 126 void RemovePred(SUnit *SU, const SDep &D) { 127 Topo.RemovePred(SU, D.getSUnit()); 128 SU->removePred(D); 129 } 130 131private: 132 void ReleasePred(SUnit *SU, const SDep *PredEdge); 133 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 134 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 135 void ReleaseSuccessors(SUnit *SU); 136 void CapturePred(SDep *PredEdge); 137 void ScheduleNodeBottomUp(SUnit*, unsigned); 138 void ScheduleNodeTopDown(SUnit*, unsigned); 139 void UnscheduleNodeBottomUp(SUnit*); 140 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 141 SUnit *CopyAndMoveSuccessors(SUnit*); 142 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 143 const TargetRegisterClass*, 144 const TargetRegisterClass*, 145 SmallVector<SUnit*, 2>&); 146 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 147 void ListScheduleTopDown(); 148 void ListScheduleBottomUp(); 149 150 151 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 152 /// Updates the topological ordering if required. 153 SUnit *CreateNewSUnit(SDNode *N) { 154 unsigned NumSUnits = SUnits.size(); 155 SUnit *NewNode = NewSUnit(N); 156 // Update the topological ordering. 157 if (NewNode->NodeNum >= NumSUnits) 158 Topo.InitDAGTopologicalSorting(); 159 return NewNode; 160 } 161 162 /// CreateClone - Creates a new SUnit from an existing one. 163 /// Updates the topological ordering if required. 164 SUnit *CreateClone(SUnit *N) { 165 unsigned NumSUnits = SUnits.size(); 166 SUnit *NewNode = Clone(N); 167 // Update the topological ordering. 168 if (NewNode->NodeNum >= NumSUnits) 169 Topo.InitDAGTopologicalSorting(); 170 return NewNode; 171 } 172 173 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 174 /// need actual latency information but the hybrid scheduler does. 175 bool ForceUnitLatencies() const { 176 return !NeedLatency; 177 } 178}; 179} // end anonymous namespace 180 181 182/// Schedule - Schedule the DAG using list scheduling. 183void ScheduleDAGRRList::Schedule() { 184 DEBUG(dbgs() << "********** List Scheduling **********\n"); 185 186 NumLiveRegs = 0; 187 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 188 LiveRegCycles.resize(TRI->getNumRegs(), 0); 189 190 // Build the scheduling graph. 191 BuildSchedGraph(NULL); 192 193 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 194 SUnits[su].dumpAll(this)); 195 Topo.InitDAGTopologicalSorting(); 196 197 AvailableQueue->initNodes(SUnits); 198 199 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 200 if (isBottomUp) 201 ListScheduleBottomUp(); 202 else 203 ListScheduleTopDown(); 204 205 AvailableQueue->releaseState(); 206} 207 208//===----------------------------------------------------------------------===// 209// Bottom-Up Scheduling 210//===----------------------------------------------------------------------===// 211 212/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 213/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 214void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 215 SUnit *PredSU = PredEdge->getSUnit(); 216 217#ifndef NDEBUG 218 if (PredSU->NumSuccsLeft == 0) { 219 dbgs() << "*** Scheduling failed! ***\n"; 220 PredSU->dump(this); 221 dbgs() << " has been released too many times!\n"; 222 llvm_unreachable(0); 223 } 224#endif 225 --PredSU->NumSuccsLeft; 226 227 if (!ForceUnitLatencies()) { 228 // Updating predecessor's height. This is now the cycle when the 229 // predecessor can be scheduled without causing a pipeline stall. 230 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 231 } 232 233 // If all the node's successors are scheduled, this node is ready 234 // to be scheduled. Ignore the special EntrySU node. 235 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 236 PredSU->isAvailable = true; 237 AvailableQueue->push(PredSU); 238 } 239} 240 241void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 242 // Bottom up: release predecessors 243 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 244 I != E; ++I) { 245 ReleasePred(SU, &*I); 246 if (I->isAssignedRegDep()) { 247 // This is a physical register dependency and it's impossible or 248 // expensive to copy the register. Make sure nothing that can 249 // clobber the register is scheduled between the predecessor and 250 // this node. 251 if (!LiveRegDefs[I->getReg()]) { 252 ++NumLiveRegs; 253 LiveRegDefs[I->getReg()] = I->getSUnit(); 254 LiveRegCycles[I->getReg()] = CurCycle; 255 } 256 } 257 } 258} 259 260/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 261/// count of its predecessors. If a predecessor pending count is zero, add it to 262/// the Available queue. 263void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 264 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 265 DEBUG(SU->dump(this)); 266 267#ifndef NDEBUG 268 if (CurCycle < SU->getHeight()) 269 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 270#endif 271 272 // FIXME: Handle noop hazard. 273 SU->setHeightToAtLeast(CurCycle); 274 Sequence.push_back(SU); 275 276 ReleasePredecessors(SU, CurCycle); 277 278 // Release all the implicit physical register defs that are live. 279 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 280 I != E; ++I) { 281 if (I->isAssignedRegDep()) { 282 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 283 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 284 assert(LiveRegDefs[I->getReg()] == SU && 285 "Physical register dependency violated?"); 286 --NumLiveRegs; 287 LiveRegDefs[I->getReg()] = NULL; 288 LiveRegCycles[I->getReg()] = 0; 289 } 290 } 291 } 292 293 SU->isScheduled = true; 294 AvailableQueue->ScheduledNode(SU); 295} 296 297/// CapturePred - This does the opposite of ReleasePred. Since SU is being 298/// unscheduled, incrcease the succ left count of its predecessors. Remove 299/// them from AvailableQueue if necessary. 300void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 301 SUnit *PredSU = PredEdge->getSUnit(); 302 if (PredSU->isAvailable) { 303 PredSU->isAvailable = false; 304 if (!PredSU->isPending) 305 AvailableQueue->remove(PredSU); 306 } 307 308 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 309 ++PredSU->NumSuccsLeft; 310} 311 312/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 313/// its predecessor states to reflect the change. 314void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 315 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 316 DEBUG(SU->dump(this)); 317 318 AvailableQueue->UnscheduledNode(SU); 319 320 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 321 I != E; ++I) { 322 CapturePred(&*I); 323 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ 324 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 325 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 326 "Physical register dependency violated?"); 327 --NumLiveRegs; 328 LiveRegDefs[I->getReg()] = NULL; 329 LiveRegCycles[I->getReg()] = 0; 330 } 331 } 332 333 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 334 I != E; ++I) { 335 if (I->isAssignedRegDep()) { 336 if (!LiveRegDefs[I->getReg()]) { 337 LiveRegDefs[I->getReg()] = SU; 338 ++NumLiveRegs; 339 } 340 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 341 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 342 } 343 } 344 345 SU->setHeightDirty(); 346 SU->isScheduled = false; 347 SU->isAvailable = true; 348 AvailableQueue->push(SU); 349} 350 351/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 352/// BTCycle in order to schedule a specific node. 353void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 354 unsigned &CurCycle) { 355 SUnit *OldSU = NULL; 356 while (CurCycle > BtCycle) { 357 OldSU = Sequence.back(); 358 Sequence.pop_back(); 359 if (SU->isSucc(OldSU)) 360 // Don't try to remove SU from AvailableQueue. 361 SU->isAvailable = false; 362 UnscheduleNodeBottomUp(OldSU); 363 --CurCycle; 364 AvailableQueue->setCurCycle(CurCycle); 365 } 366 367 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 368 369 ++NumBacktracks; 370} 371 372static bool isOperandOf(const SUnit *SU, SDNode *N) { 373 for (const SDNode *SUNode = SU->getNode(); SUNode; 374 SUNode = SUNode->getFlaggedNode()) { 375 if (SUNode->isOperandOf(N)) 376 return true; 377 } 378 return false; 379} 380 381/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 382/// successors to the newly created node. 383SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 384 if (SU->getNode()->getFlaggedNode()) 385 return NULL; 386 387 SDNode *N = SU->getNode(); 388 if (!N) 389 return NULL; 390 391 SUnit *NewSU; 392 bool TryUnfold = false; 393 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 394 EVT VT = N->getValueType(i); 395 if (VT == MVT::Flag) 396 return NULL; 397 else if (VT == MVT::Other) 398 TryUnfold = true; 399 } 400 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 401 const SDValue &Op = N->getOperand(i); 402 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 403 if (VT == MVT::Flag) 404 return NULL; 405 } 406 407 if (TryUnfold) { 408 SmallVector<SDNode*, 2> NewNodes; 409 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 410 return NULL; 411 412 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 413 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 414 415 N = NewNodes[1]; 416 SDNode *LoadNode = NewNodes[0]; 417 unsigned NumVals = N->getNumValues(); 418 unsigned OldNumVals = SU->getNode()->getNumValues(); 419 for (unsigned i = 0; i != NumVals; ++i) 420 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 421 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 422 SDValue(LoadNode, 1)); 423 424 // LoadNode may already exist. This can happen when there is another 425 // load from the same location and producing the same type of value 426 // but it has different alignment or volatileness. 427 bool isNewLoad = true; 428 SUnit *LoadSU; 429 if (LoadNode->getNodeId() != -1) { 430 LoadSU = &SUnits[LoadNode->getNodeId()]; 431 isNewLoad = false; 432 } else { 433 LoadSU = CreateNewSUnit(LoadNode); 434 LoadNode->setNodeId(LoadSU->NodeNum); 435 ComputeLatency(LoadSU); 436 } 437 438 SUnit *NewSU = CreateNewSUnit(N); 439 assert(N->getNodeId() == -1 && "Node already inserted!"); 440 N->setNodeId(NewSU->NodeNum); 441 442 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 443 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 444 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 445 NewSU->isTwoAddress = true; 446 break; 447 } 448 } 449 if (TID.isCommutable()) 450 NewSU->isCommutable = true; 451 ComputeLatency(NewSU); 452 453 // Record all the edges to and from the old SU, by category. 454 SmallVector<SDep, 4> ChainPreds; 455 SmallVector<SDep, 4> ChainSuccs; 456 SmallVector<SDep, 4> LoadPreds; 457 SmallVector<SDep, 4> NodePreds; 458 SmallVector<SDep, 4> NodeSuccs; 459 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 460 I != E; ++I) { 461 if (I->isCtrl()) 462 ChainPreds.push_back(*I); 463 else if (isOperandOf(I->getSUnit(), LoadNode)) 464 LoadPreds.push_back(*I); 465 else 466 NodePreds.push_back(*I); 467 } 468 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 469 I != E; ++I) { 470 if (I->isCtrl()) 471 ChainSuccs.push_back(*I); 472 else 473 NodeSuccs.push_back(*I); 474 } 475 476 // Now assign edges to the newly-created nodes. 477 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 478 const SDep &Pred = ChainPreds[i]; 479 RemovePred(SU, Pred); 480 if (isNewLoad) 481 AddPred(LoadSU, Pred); 482 } 483 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 484 const SDep &Pred = LoadPreds[i]; 485 RemovePred(SU, Pred); 486 if (isNewLoad) 487 AddPred(LoadSU, Pred); 488 } 489 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 490 const SDep &Pred = NodePreds[i]; 491 RemovePred(SU, Pred); 492 AddPred(NewSU, Pred); 493 } 494 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 495 SDep D = NodeSuccs[i]; 496 SUnit *SuccDep = D.getSUnit(); 497 D.setSUnit(SU); 498 RemovePred(SuccDep, D); 499 D.setSUnit(NewSU); 500 AddPred(SuccDep, D); 501 } 502 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 503 SDep D = ChainSuccs[i]; 504 SUnit *SuccDep = D.getSUnit(); 505 D.setSUnit(SU); 506 RemovePred(SuccDep, D); 507 if (isNewLoad) { 508 D.setSUnit(LoadSU); 509 AddPred(SuccDep, D); 510 } 511 } 512 513 // Add a data dependency to reflect that NewSU reads the value defined 514 // by LoadSU. 515 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 516 517 if (isNewLoad) 518 AvailableQueue->addNode(LoadSU); 519 AvailableQueue->addNode(NewSU); 520 521 ++NumUnfolds; 522 523 if (NewSU->NumSuccsLeft == 0) { 524 NewSU->isAvailable = true; 525 return NewSU; 526 } 527 SU = NewSU; 528 } 529 530 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 531 NewSU = CreateClone(SU); 532 533 // New SUnit has the exact same predecessors. 534 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 535 I != E; ++I) 536 if (!I->isArtificial()) 537 AddPred(NewSU, *I); 538 539 // Only copy scheduled successors. Cut them from old node's successor 540 // list and move them over. 541 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 542 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 543 I != E; ++I) { 544 if (I->isArtificial()) 545 continue; 546 SUnit *SuccSU = I->getSUnit(); 547 if (SuccSU->isScheduled) { 548 SDep D = *I; 549 D.setSUnit(NewSU); 550 AddPred(SuccSU, D); 551 D.setSUnit(SU); 552 DelDeps.push_back(std::make_pair(SuccSU, D)); 553 } 554 } 555 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 556 RemovePred(DelDeps[i].first, DelDeps[i].second); 557 558 AvailableQueue->updateNode(SU); 559 AvailableQueue->addNode(NewSU); 560 561 ++NumDups; 562 return NewSU; 563} 564 565/// InsertCopiesAndMoveSuccs - Insert register copies and move all 566/// scheduled successors of the given SUnit to the last copy. 567void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 568 const TargetRegisterClass *DestRC, 569 const TargetRegisterClass *SrcRC, 570 SmallVector<SUnit*, 2> &Copies) { 571 SUnit *CopyFromSU = CreateNewSUnit(NULL); 572 CopyFromSU->CopySrcRC = SrcRC; 573 CopyFromSU->CopyDstRC = DestRC; 574 575 SUnit *CopyToSU = CreateNewSUnit(NULL); 576 CopyToSU->CopySrcRC = DestRC; 577 CopyToSU->CopyDstRC = SrcRC; 578 579 // Only copy scheduled successors. Cut them from old node's successor 580 // list and move them over. 581 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 582 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 583 I != E; ++I) { 584 if (I->isArtificial()) 585 continue; 586 SUnit *SuccSU = I->getSUnit(); 587 if (SuccSU->isScheduled) { 588 SDep D = *I; 589 D.setSUnit(CopyToSU); 590 AddPred(SuccSU, D); 591 DelDeps.push_back(std::make_pair(SuccSU, *I)); 592 } 593 } 594 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 595 RemovePred(DelDeps[i].first, DelDeps[i].second); 596 597 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 598 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 599 600 AvailableQueue->updateNode(SU); 601 AvailableQueue->addNode(CopyFromSU); 602 AvailableQueue->addNode(CopyToSU); 603 Copies.push_back(CopyFromSU); 604 Copies.push_back(CopyToSU); 605 606 ++NumPRCopies; 607} 608 609/// getPhysicalRegisterVT - Returns the ValueType of the physical register 610/// definition of the specified node. 611/// FIXME: Move to SelectionDAG? 612static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 613 const TargetInstrInfo *TII) { 614 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 615 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 616 unsigned NumRes = TID.getNumDefs(); 617 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 618 if (Reg == *ImpDef) 619 break; 620 ++NumRes; 621 } 622 return N->getValueType(NumRes); 623} 624 625/// CheckForLiveRegDef - Return true and update live register vector if the 626/// specified register def of the specified SUnit clobbers any "live" registers. 627static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 628 std::vector<SUnit*> &LiveRegDefs, 629 SmallSet<unsigned, 4> &RegAdded, 630 SmallVector<unsigned, 4> &LRegs, 631 const TargetRegisterInfo *TRI) { 632 bool Added = false; 633 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 634 if (RegAdded.insert(Reg)) { 635 LRegs.push_back(Reg); 636 Added = true; 637 } 638 } 639 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 640 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 641 if (RegAdded.insert(*Alias)) { 642 LRegs.push_back(*Alias); 643 Added = true; 644 } 645 } 646 return Added; 647} 648 649/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 650/// scheduling of the given node to satisfy live physical register dependencies. 651/// If the specific node is the last one that's available to schedule, do 652/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 653bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 654 SmallVector<unsigned, 4> &LRegs){ 655 if (NumLiveRegs == 0) 656 return false; 657 658 SmallSet<unsigned, 4> RegAdded; 659 // If this node would clobber any "live" register, then it's not ready. 660 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 661 I != E; ++I) { 662 if (I->isAssignedRegDep()) 663 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 664 RegAdded, LRegs, TRI); 665 } 666 667 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 668 if (Node->getOpcode() == ISD::INLINEASM) { 669 // Inline asm can clobber physical defs. 670 unsigned NumOps = Node->getNumOperands(); 671 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 672 --NumOps; // Ignore the flag operand. 673 674 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 675 unsigned Flags = 676 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 677 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 678 679 ++i; // Skip the ID value. 680 if (InlineAsm::isRegDefKind(Flags) || 681 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 682 // Check for def of register or earlyclobber register. 683 for (; NumVals; --NumVals, ++i) { 684 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 685 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 686 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 687 } 688 } else 689 i += NumVals; 690 } 691 continue; 692 } 693 694 if (!Node->isMachineOpcode()) 695 continue; 696 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 697 if (!TID.ImplicitDefs) 698 continue; 699 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 700 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 701 } 702 return !LRegs.empty(); 703} 704 705 706/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 707/// schedulers. 708void ScheduleDAGRRList::ListScheduleBottomUp() { 709 unsigned CurCycle = 0; 710 711 // Release any predecessors of the special Exit node. 712 ReleasePredecessors(&ExitSU, CurCycle); 713 714 // Add root to Available queue. 715 if (!SUnits.empty()) { 716 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 717 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 718 RootSU->isAvailable = true; 719 AvailableQueue->push(RootSU); 720 } 721 722 // While Available queue is not empty, grab the node with the highest 723 // priority. If it is not ready put it back. Schedule the node. 724 SmallVector<SUnit*, 4> NotReady; 725 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 726 Sequence.reserve(SUnits.size()); 727 while (!AvailableQueue->empty()) { 728 bool Delayed = false; 729 LRegsMap.clear(); 730 SUnit *CurSU = AvailableQueue->pop(); 731 while (CurSU) { 732 SmallVector<unsigned, 4> LRegs; 733 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 734 break; 735 Delayed = true; 736 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 737 738 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 739 NotReady.push_back(CurSU); 740 CurSU = AvailableQueue->pop(); 741 } 742 743 // All candidates are delayed due to live physical reg dependencies. 744 // Try backtracking, code duplication, or inserting cross class copies 745 // to resolve it. 746 if (Delayed && !CurSU) { 747 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 748 SUnit *TrySU = NotReady[i]; 749 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 750 751 // Try unscheduling up to the point where it's safe to schedule 752 // this node. 753 unsigned LiveCycle = CurCycle; 754 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 755 unsigned Reg = LRegs[j]; 756 unsigned LCycle = LiveRegCycles[Reg]; 757 LiveCycle = std::min(LiveCycle, LCycle); 758 } 759 SUnit *OldSU = Sequence[LiveCycle]; 760 if (!WillCreateCycle(TrySU, OldSU)) { 761 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 762 // Force the current node to be scheduled before the node that 763 // requires the physical reg dep. 764 if (OldSU->isAvailable) { 765 OldSU->isAvailable = false; 766 AvailableQueue->remove(OldSU); 767 } 768 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 769 /*Reg=*/0, /*isNormalMemory=*/false, 770 /*isMustAlias=*/false, /*isArtificial=*/true)); 771 // If one or more successors has been unscheduled, then the current 772 // node is no longer avaialable. Schedule a successor that's now 773 // available instead. 774 if (!TrySU->isAvailable) 775 CurSU = AvailableQueue->pop(); 776 else { 777 CurSU = TrySU; 778 TrySU->isPending = false; 779 NotReady.erase(NotReady.begin()+i); 780 } 781 break; 782 } 783 } 784 785 if (!CurSU) { 786 // Can't backtrack. If it's too expensive to copy the value, then try 787 // duplicate the nodes that produces these "too expensive to copy" 788 // values to break the dependency. In case even that doesn't work, 789 // insert cross class copies. 790 // If it's not too expensive, i.e. cost != -1, issue copies. 791 SUnit *TrySU = NotReady[0]; 792 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 793 assert(LRegs.size() == 1 && "Can't handle this yet!"); 794 unsigned Reg = LRegs[0]; 795 SUnit *LRDef = LiveRegDefs[Reg]; 796 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 797 const TargetRegisterClass *RC = 798 TRI->getPhysicalRegisterRegClass(Reg, VT); 799 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 800 801 // If cross copy register class is null, then it must be possible copy 802 // the value directly. Do not try duplicate the def. 803 SUnit *NewDef = 0; 804 if (DestRC) 805 NewDef = CopyAndMoveSuccessors(LRDef); 806 else 807 DestRC = RC; 808 if (!NewDef) { 809 // Issue copies, these can be expensive cross register class copies. 810 SmallVector<SUnit*, 2> Copies; 811 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 812 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 813 << " to SU #" << Copies.front()->NodeNum << "\n"); 814 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 815 /*Reg=*/0, /*isNormalMemory=*/false, 816 /*isMustAlias=*/false, 817 /*isArtificial=*/true)); 818 NewDef = Copies.back(); 819 } 820 821 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 822 << " to SU #" << TrySU->NodeNum << "\n"); 823 LiveRegDefs[Reg] = NewDef; 824 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 825 /*Reg=*/0, /*isNormalMemory=*/false, 826 /*isMustAlias=*/false, 827 /*isArtificial=*/true)); 828 TrySU->isAvailable = false; 829 CurSU = NewDef; 830 } 831 832 assert(CurSU && "Unable to resolve live physical register dependencies!"); 833 } 834 835 // Add the nodes that aren't ready back onto the available list. 836 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 837 NotReady[i]->isPending = false; 838 // May no longer be available due to backtracking. 839 if (NotReady[i]->isAvailable) 840 AvailableQueue->push(NotReady[i]); 841 } 842 NotReady.clear(); 843 844 if (CurSU) 845 ScheduleNodeBottomUp(CurSU, CurCycle); 846 ++CurCycle; 847 AvailableQueue->setCurCycle(CurCycle); 848 } 849 850 // Reverse the order if it is bottom up. 851 std::reverse(Sequence.begin(), Sequence.end()); 852 853#ifndef NDEBUG 854 VerifySchedule(isBottomUp); 855#endif 856} 857 858//===----------------------------------------------------------------------===// 859// Top-Down Scheduling 860//===----------------------------------------------------------------------===// 861 862/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 863/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 864void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 865 SUnit *SuccSU = SuccEdge->getSUnit(); 866 867#ifndef NDEBUG 868 if (SuccSU->NumPredsLeft == 0) { 869 dbgs() << "*** Scheduling failed! ***\n"; 870 SuccSU->dump(this); 871 dbgs() << " has been released too many times!\n"; 872 llvm_unreachable(0); 873 } 874#endif 875 --SuccSU->NumPredsLeft; 876 877 // If all the node's predecessors are scheduled, this node is ready 878 // to be scheduled. Ignore the special ExitSU node. 879 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 880 SuccSU->isAvailable = true; 881 AvailableQueue->push(SuccSU); 882 } 883} 884 885void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 886 // Top down: release successors 887 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 888 I != E; ++I) { 889 assert(!I->isAssignedRegDep() && 890 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 891 892 ReleaseSucc(SU, &*I); 893 } 894} 895 896/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 897/// count of its successors. If a successor pending count is zero, add it to 898/// the Available queue. 899void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 900 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 901 DEBUG(SU->dump(this)); 902 903 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 904 SU->setDepthToAtLeast(CurCycle); 905 Sequence.push_back(SU); 906 907 ReleaseSuccessors(SU); 908 SU->isScheduled = true; 909 AvailableQueue->ScheduledNode(SU); 910} 911 912/// ListScheduleTopDown - The main loop of list scheduling for top-down 913/// schedulers. 914void ScheduleDAGRRList::ListScheduleTopDown() { 915 unsigned CurCycle = 0; 916 AvailableQueue->setCurCycle(CurCycle); 917 918 // Release any successors of the special Entry node. 919 ReleaseSuccessors(&EntrySU); 920 921 // All leaves to Available queue. 922 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 923 // It is available if it has no predecessors. 924 if (SUnits[i].Preds.empty()) { 925 AvailableQueue->push(&SUnits[i]); 926 SUnits[i].isAvailable = true; 927 } 928 } 929 930 // While Available queue is not empty, grab the node with the highest 931 // priority. If it is not ready put it back. Schedule the node. 932 Sequence.reserve(SUnits.size()); 933 while (!AvailableQueue->empty()) { 934 SUnit *CurSU = AvailableQueue->pop(); 935 936 if (CurSU) 937 ScheduleNodeTopDown(CurSU, CurCycle); 938 ++CurCycle; 939 AvailableQueue->setCurCycle(CurCycle); 940 } 941 942#ifndef NDEBUG 943 VerifySchedule(isBottomUp); 944#endif 945} 946 947 948//===----------------------------------------------------------------------===// 949// RegReductionPriorityQueue Implementation 950//===----------------------------------------------------------------------===// 951// 952// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 953// to reduce register pressure. 954// 955namespace { 956 template<class SF> 957 class RegReductionPriorityQueue; 958 959 /// Sorting functions for the Available queue. 960 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 961 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 962 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 963 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 964 965 bool operator()(const SUnit* left, const SUnit* right) const; 966 }; 967 968 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 969 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 970 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 971 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 972 973 bool operator()(const SUnit* left, const SUnit* right) const; 974 }; 975 976 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 977 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 978 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 979 : SPQ(spq) {} 980 src_ls_rr_sort(const src_ls_rr_sort &RHS) 981 : SPQ(RHS.SPQ) {} 982 983 bool operator()(const SUnit* left, const SUnit* right) const; 984 }; 985 986 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 987 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 988 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 989 : SPQ(spq) {} 990 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 991 : SPQ(RHS.SPQ) {} 992 993 bool operator()(const SUnit* left, const SUnit* right) const; 994 }; 995} // end anonymous namespace 996 997/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 998/// Smaller number is the higher priority. 999static unsigned 1000CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1001 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1002 if (SethiUllmanNumber != 0) 1003 return SethiUllmanNumber; 1004 1005 unsigned Extra = 0; 1006 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1007 I != E; ++I) { 1008 if (I->isCtrl()) continue; // ignore chain preds 1009 SUnit *PredSU = I->getSUnit(); 1010 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1011 if (PredSethiUllman > SethiUllmanNumber) { 1012 SethiUllmanNumber = PredSethiUllman; 1013 Extra = 0; 1014 } else if (PredSethiUllman == SethiUllmanNumber) 1015 ++Extra; 1016 } 1017 1018 SethiUllmanNumber += Extra; 1019 1020 if (SethiUllmanNumber == 0) 1021 SethiUllmanNumber = 1; 1022 1023 return SethiUllmanNumber; 1024} 1025 1026namespace { 1027 template<class SF> 1028 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1029 std::vector<SUnit*> Queue; 1030 SF Picker; 1031 unsigned CurQueueId; 1032 1033 protected: 1034 // SUnits - The SUnits for the current graph. 1035 std::vector<SUnit> *SUnits; 1036 1037 const TargetInstrInfo *TII; 1038 const TargetRegisterInfo *TRI; 1039 ScheduleDAGRRList *scheduleDAG; 1040 1041 // SethiUllmanNumbers - The SethiUllman number for each node. 1042 std::vector<unsigned> SethiUllmanNumbers; 1043 1044 public: 1045 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1046 const TargetRegisterInfo *tri) 1047 : Picker(this), CurQueueId(0), 1048 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1049 1050 void initNodes(std::vector<SUnit> &sunits) { 1051 SUnits = &sunits; 1052 // Add pseudo dependency edges for two-address nodes. 1053 AddPseudoTwoAddrDeps(); 1054 // Reroute edges to nodes with multiple uses. 1055 PrescheduleNodesWithMultipleUses(); 1056 // Calculate node priorities. 1057 CalculateSethiUllmanNumbers(); 1058 } 1059 1060 void addNode(const SUnit *SU) { 1061 unsigned SUSize = SethiUllmanNumbers.size(); 1062 if (SUnits->size() > SUSize) 1063 SethiUllmanNumbers.resize(SUSize*2, 0); 1064 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1065 } 1066 1067 void updateNode(const SUnit *SU) { 1068 SethiUllmanNumbers[SU->NodeNum] = 0; 1069 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1070 } 1071 1072 void releaseState() { 1073 SUnits = 0; 1074 SethiUllmanNumbers.clear(); 1075 } 1076 1077 unsigned getNodePriority(const SUnit *SU) const { 1078 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1079 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1080 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1081 // CopyToReg should be close to its uses to facilitate coalescing and 1082 // avoid spilling. 1083 return 0; 1084 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1085 Opc == TargetOpcode::SUBREG_TO_REG || 1086 Opc == TargetOpcode::INSERT_SUBREG) 1087 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1088 // close to their uses to facilitate coalescing. 1089 return 0; 1090 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1091 // If SU does not have a register use, i.e. it doesn't produce a value 1092 // that would be consumed (e.g. store), then it terminates a chain of 1093 // computation. Give it a large SethiUllman number so it will be 1094 // scheduled right before its predecessors that it doesn't lengthen 1095 // their live ranges. 1096 return 0xffff; 1097 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1098 // If SU does not have a register def, schedule it close to its uses 1099 // because it does not lengthen any live ranges. 1100 return 0; 1101 return SethiUllmanNumbers[SU->NodeNum]; 1102 } 1103 1104 unsigned getNodeOrdering(const SUnit *SU) const { 1105 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1106 } 1107 1108 bool empty() const { return Queue.empty(); } 1109 1110 void push(SUnit *U) { 1111 assert(!U->NodeQueueId && "Node in the queue already"); 1112 U->NodeQueueId = ++CurQueueId; 1113 Queue.push_back(U); 1114 } 1115 1116 SUnit *pop() { 1117 if (empty()) return NULL; 1118 std::vector<SUnit *>::iterator Best = Queue.begin(); 1119 for (std::vector<SUnit *>::iterator I = next(Queue.begin()), 1120 E = Queue.end(); I != E; ++I) 1121 if (Picker(*Best, *I)) 1122 Best = I; 1123 SUnit *V = *Best; 1124 if (Best != prior(Queue.end())) 1125 std::swap(*Best, Queue.back()); 1126 Queue.pop_back(); 1127 V->NodeQueueId = 0; 1128 return V; 1129 } 1130 1131 void remove(SUnit *SU) { 1132 assert(!Queue.empty() && "Queue is empty!"); 1133 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1134 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1135 SU); 1136 if (I != prior(Queue.end())) 1137 std::swap(*I, Queue.back()); 1138 Queue.pop_back(); 1139 SU->NodeQueueId = 0; 1140 } 1141 1142 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1143 scheduleDAG = scheduleDag; 1144 } 1145 1146 protected: 1147 bool canClobber(const SUnit *SU, const SUnit *Op); 1148 void AddPseudoTwoAddrDeps(); 1149 void PrescheduleNodesWithMultipleUses(); 1150 void CalculateSethiUllmanNumbers(); 1151 }; 1152 1153 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1154 BURegReductionPriorityQueue; 1155 1156 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1157 TDRegReductionPriorityQueue; 1158 1159 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1160 SrcRegReductionPriorityQueue; 1161 1162 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1163 HybridBURRPriorityQueue; 1164} 1165 1166/// closestSucc - Returns the scheduled cycle of the successor which is 1167/// closest to the current cycle. 1168static unsigned closestSucc(const SUnit *SU) { 1169 unsigned MaxHeight = 0; 1170 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1171 I != E; ++I) { 1172 if (I->isCtrl()) continue; // ignore chain succs 1173 unsigned Height = I->getSUnit()->getHeight(); 1174 // If there are bunch of CopyToRegs stacked up, they should be considered 1175 // to be at the same position. 1176 if (I->getSUnit()->getNode() && 1177 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1178 Height = closestSucc(I->getSUnit())+1; 1179 if (Height > MaxHeight) 1180 MaxHeight = Height; 1181 } 1182 return MaxHeight; 1183} 1184 1185/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1186/// for scratch registers, i.e. number of data dependencies. 1187static unsigned calcMaxScratches(const SUnit *SU) { 1188 unsigned Scratches = 0; 1189 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1190 I != E; ++I) { 1191 if (I->isCtrl()) continue; // ignore chain preds 1192 Scratches++; 1193 } 1194 return Scratches; 1195} 1196 1197template <typename RRSort> 1198static bool BURRSort(const SUnit *left, const SUnit *right, 1199 const RegReductionPriorityQueue<RRSort> *SPQ) { 1200 unsigned LPriority = SPQ->getNodePriority(left); 1201 unsigned RPriority = SPQ->getNodePriority(right); 1202 if (LPriority != RPriority) 1203 return LPriority > RPriority; 1204 1205 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1206 // e.g. 1207 // t1 = op t2, c1 1208 // t3 = op t4, c2 1209 // 1210 // and the following instructions are both ready. 1211 // t2 = op c3 1212 // t4 = op c4 1213 // 1214 // Then schedule t2 = op first. 1215 // i.e. 1216 // t4 = op c4 1217 // t2 = op c3 1218 // t1 = op t2, c1 1219 // t3 = op t4, c2 1220 // 1221 // This creates more short live intervals. 1222 unsigned LDist = closestSucc(left); 1223 unsigned RDist = closestSucc(right); 1224 if (LDist != RDist) 1225 return LDist < RDist; 1226 1227 // How many registers becomes live when the node is scheduled. 1228 unsigned LScratch = calcMaxScratches(left); 1229 unsigned RScratch = calcMaxScratches(right); 1230 if (LScratch != RScratch) 1231 return LScratch > RScratch; 1232 1233 if (left->getHeight() != right->getHeight()) 1234 return left->getHeight() > right->getHeight(); 1235 1236 if (left->getDepth() != right->getDepth()) 1237 return left->getDepth() < right->getDepth(); 1238 1239 assert(left->NodeQueueId && right->NodeQueueId && 1240 "NodeQueueId cannot be zero"); 1241 return (left->NodeQueueId > right->NodeQueueId); 1242} 1243 1244// Bottom up 1245bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1246 return BURRSort(left, right, SPQ); 1247} 1248 1249// Source order, otherwise bottom up. 1250bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1251 unsigned LOrder = SPQ->getNodeOrdering(left); 1252 unsigned ROrder = SPQ->getNodeOrdering(right); 1253 1254 // Prefer an ordering where the lower the non-zero order number, the higher 1255 // the preference. 1256 if ((LOrder || ROrder) && LOrder != ROrder) 1257 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1258 1259 return BURRSort(left, right, SPQ); 1260} 1261 1262bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1263 bool LStall = left->SchedulingPref == Sched::Latency && 1264 SPQ->getCurCycle() < left->getHeight(); 1265 bool RStall = right->SchedulingPref == Sched::Latency && 1266 SPQ->getCurCycle() < right->getHeight(); 1267 // If scheduling one of the node will cause a pipeline stall, delay it. 1268 // If scheduling either one of the node will cause a pipeline stall, sort them 1269 // according to their height. 1270 // If neither will cause a pipeline stall, try to reduce register pressure. 1271 if (LStall) { 1272 if (!RStall) 1273 return true; 1274 if (left->getHeight() != right->getHeight()) 1275 return left->getHeight() > right->getHeight(); 1276 } else if (RStall) 1277 return false; 1278 1279 // If either node is scheduling for latency, sort them by height and latency 1280 // first. 1281 if (left->SchedulingPref == Sched::Latency || 1282 right->SchedulingPref == Sched::Latency) { 1283 if (left->getHeight() != right->getHeight()) 1284 return left->getHeight() > right->getHeight(); 1285 if (left->Latency != right->Latency) 1286 return left->Latency > right->Latency; 1287 } 1288 1289 return BURRSort(left, right, SPQ); 1290} 1291 1292template<class SF> 1293bool 1294RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1295 if (SU->isTwoAddress) { 1296 unsigned Opc = SU->getNode()->getMachineOpcode(); 1297 const TargetInstrDesc &TID = TII->get(Opc); 1298 unsigned NumRes = TID.getNumDefs(); 1299 unsigned NumOps = TID.getNumOperands() - NumRes; 1300 for (unsigned i = 0; i != NumOps; ++i) { 1301 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1302 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1303 if (DU->getNodeId() != -1 && 1304 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1305 return true; 1306 } 1307 } 1308 } 1309 return false; 1310} 1311 1312/// hasCopyToRegUse - Return true if SU has a value successor that is a 1313/// CopyToReg node. 1314static bool hasCopyToRegUse(const SUnit *SU) { 1315 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1316 I != E; ++I) { 1317 if (I->isCtrl()) continue; 1318 const SUnit *SuccSU = I->getSUnit(); 1319 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1320 return true; 1321 } 1322 return false; 1323} 1324 1325/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1326/// physical register defs. 1327static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1328 const TargetInstrInfo *TII, 1329 const TargetRegisterInfo *TRI) { 1330 SDNode *N = SuccSU->getNode(); 1331 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1332 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1333 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1334 for (const SDNode *SUNode = SU->getNode(); SUNode; 1335 SUNode = SUNode->getFlaggedNode()) { 1336 if (!SUNode->isMachineOpcode()) 1337 continue; 1338 const unsigned *SUImpDefs = 1339 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1340 if (!SUImpDefs) 1341 return false; 1342 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1343 EVT VT = N->getValueType(i); 1344 if (VT == MVT::Flag || VT == MVT::Other) 1345 continue; 1346 if (!N->hasAnyUseOfValue(i)) 1347 continue; 1348 unsigned Reg = ImpDefs[i - NumDefs]; 1349 for (;*SUImpDefs; ++SUImpDefs) { 1350 unsigned SUReg = *SUImpDefs; 1351 if (TRI->regsOverlap(Reg, SUReg)) 1352 return true; 1353 } 1354 } 1355 } 1356 return false; 1357} 1358 1359/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1360/// are not handled well by the general register pressure reduction 1361/// heuristics. When presented with code like this: 1362/// 1363/// N 1364/// / | 1365/// / | 1366/// U store 1367/// | 1368/// ... 1369/// 1370/// the heuristics tend to push the store up, but since the 1371/// operand of the store has another use (U), this would increase 1372/// the length of that other use (the U->N edge). 1373/// 1374/// This function transforms code like the above to route U's 1375/// dependence through the store when possible, like this: 1376/// 1377/// N 1378/// || 1379/// || 1380/// store 1381/// | 1382/// U 1383/// | 1384/// ... 1385/// 1386/// This results in the store being scheduled immediately 1387/// after N, which shortens the U->N live range, reducing 1388/// register pressure. 1389/// 1390template<class SF> 1391void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1392 // Visit all the nodes in topological order, working top-down. 1393 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1394 SUnit *SU = &(*SUnits)[i]; 1395 // For now, only look at nodes with no data successors, such as stores. 1396 // These are especially important, due to the heuristics in 1397 // getNodePriority for nodes with no data successors. 1398 if (SU->NumSuccs != 0) 1399 continue; 1400 // For now, only look at nodes with exactly one data predecessor. 1401 if (SU->NumPreds != 1) 1402 continue; 1403 // Avoid prescheduling copies to virtual registers, which don't behave 1404 // like other nodes from the perspective of scheduling heuristics. 1405 if (SDNode *N = SU->getNode()) 1406 if (N->getOpcode() == ISD::CopyToReg && 1407 TargetRegisterInfo::isVirtualRegister 1408 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1409 continue; 1410 1411 // Locate the single data predecessor. 1412 SUnit *PredSU = 0; 1413 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1414 EE = SU->Preds.end(); II != EE; ++II) 1415 if (!II->isCtrl()) { 1416 PredSU = II->getSUnit(); 1417 break; 1418 } 1419 assert(PredSU); 1420 1421 // Don't rewrite edges that carry physregs, because that requires additional 1422 // support infrastructure. 1423 if (PredSU->hasPhysRegDefs) 1424 continue; 1425 // Short-circuit the case where SU is PredSU's only data successor. 1426 if (PredSU->NumSuccs == 1) 1427 continue; 1428 // Avoid prescheduling to copies from virtual registers, which don't behave 1429 // like other nodes from the perspective of scheduling // heuristics. 1430 if (SDNode *N = SU->getNode()) 1431 if (N->getOpcode() == ISD::CopyFromReg && 1432 TargetRegisterInfo::isVirtualRegister 1433 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1434 continue; 1435 1436 // Perform checks on the successors of PredSU. 1437 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1438 EE = PredSU->Succs.end(); II != EE; ++II) { 1439 SUnit *PredSuccSU = II->getSUnit(); 1440 if (PredSuccSU == SU) continue; 1441 // If PredSU has another successor with no data successors, for 1442 // now don't attempt to choose either over the other. 1443 if (PredSuccSU->NumSuccs == 0) 1444 goto outer_loop_continue; 1445 // Don't break physical register dependencies. 1446 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1447 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1448 goto outer_loop_continue; 1449 // Don't introduce graph cycles. 1450 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1451 goto outer_loop_continue; 1452 } 1453 1454 // Ok, the transformation is safe and the heuristics suggest it is 1455 // profitable. Update the graph. 1456 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1457 << " next to PredSU #" << PredSU->NodeNum 1458 << " to guide scheduling in the presence of multiple uses\n"); 1459 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1460 SDep Edge = PredSU->Succs[i]; 1461 assert(!Edge.isAssignedRegDep()); 1462 SUnit *SuccSU = Edge.getSUnit(); 1463 if (SuccSU != SU) { 1464 Edge.setSUnit(PredSU); 1465 scheduleDAG->RemovePred(SuccSU, Edge); 1466 scheduleDAG->AddPred(SU, Edge); 1467 Edge.setSUnit(SU); 1468 scheduleDAG->AddPred(SuccSU, Edge); 1469 --i; 1470 } 1471 } 1472 outer_loop_continue:; 1473 } 1474} 1475 1476/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1477/// it as a def&use operand. Add a pseudo control edge from it to the other 1478/// node (if it won't create a cycle) so the two-address one will be scheduled 1479/// first (lower in the schedule). If both nodes are two-address, favor the 1480/// one that has a CopyToReg use (more likely to be a loop induction update). 1481/// If both are two-address, but one is commutable while the other is not 1482/// commutable, favor the one that's not commutable. 1483template<class SF> 1484void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1485 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1486 SUnit *SU = &(*SUnits)[i]; 1487 if (!SU->isTwoAddress) 1488 continue; 1489 1490 SDNode *Node = SU->getNode(); 1491 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1492 continue; 1493 1494 unsigned Opc = Node->getMachineOpcode(); 1495 const TargetInstrDesc &TID = TII->get(Opc); 1496 unsigned NumRes = TID.getNumDefs(); 1497 unsigned NumOps = TID.getNumOperands() - NumRes; 1498 for (unsigned j = 0; j != NumOps; ++j) { 1499 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1500 continue; 1501 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1502 if (DU->getNodeId() == -1) 1503 continue; 1504 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1505 if (!DUSU) continue; 1506 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1507 E = DUSU->Succs.end(); I != E; ++I) { 1508 if (I->isCtrl()) continue; 1509 SUnit *SuccSU = I->getSUnit(); 1510 if (SuccSU == SU) 1511 continue; 1512 // Be conservative. Ignore if nodes aren't at roughly the same 1513 // depth and height. 1514 if (SuccSU->getHeight() < SU->getHeight() && 1515 (SU->getHeight() - SuccSU->getHeight()) > 1) 1516 continue; 1517 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1518 // constrains whatever is using the copy, instead of the copy 1519 // itself. In the case that the copy is coalesced, this 1520 // preserves the intent of the pseudo two-address heurietics. 1521 while (SuccSU->Succs.size() == 1 && 1522 SuccSU->getNode()->isMachineOpcode() && 1523 SuccSU->getNode()->getMachineOpcode() == 1524 TargetOpcode::COPY_TO_REGCLASS) 1525 SuccSU = SuccSU->Succs.front().getSUnit(); 1526 // Don't constrain non-instruction nodes. 1527 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1528 continue; 1529 // Don't constrain nodes with physical register defs if the 1530 // predecessor can clobber them. 1531 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1532 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1533 continue; 1534 } 1535 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1536 // these may be coalesced away. We want them close to their uses. 1537 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1538 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1539 SuccOpc == TargetOpcode::INSERT_SUBREG || 1540 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1541 continue; 1542 if ((!canClobber(SuccSU, DUSU) || 1543 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1544 (!SU->isCommutable && SuccSU->isCommutable)) && 1545 !scheduleDAG->IsReachable(SuccSU, SU)) { 1546 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1547 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1548 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1549 /*Reg=*/0, /*isNormalMemory=*/false, 1550 /*isMustAlias=*/false, 1551 /*isArtificial=*/true)); 1552 } 1553 } 1554 } 1555 } 1556} 1557 1558/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1559/// scheduling units. 1560template<class SF> 1561void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1562 SethiUllmanNumbers.assign(SUnits->size(), 0); 1563 1564 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1565 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1566} 1567 1568/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1569/// predecessors of the successors of the SUnit SU. Stop when the provided 1570/// limit is exceeded. 1571static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1572 unsigned Limit) { 1573 unsigned Sum = 0; 1574 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1575 I != E; ++I) { 1576 const SUnit *SuccSU = I->getSUnit(); 1577 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1578 EE = SuccSU->Preds.end(); II != EE; ++II) { 1579 SUnit *PredSU = II->getSUnit(); 1580 if (!PredSU->isScheduled) 1581 if (++Sum > Limit) 1582 return Sum; 1583 } 1584 } 1585 return Sum; 1586} 1587 1588 1589// Top down 1590bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1591 unsigned LPriority = SPQ->getNodePriority(left); 1592 unsigned RPriority = SPQ->getNodePriority(right); 1593 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1594 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1595 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1596 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1597 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1598 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1599 1600 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1601 return false; 1602 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1603 return true; 1604 1605 if (LIsFloater) 1606 LBonus -= 2; 1607 if (RIsFloater) 1608 RBonus -= 2; 1609 if (left->NumSuccs == 1) 1610 LBonus += 2; 1611 if (right->NumSuccs == 1) 1612 RBonus += 2; 1613 1614 if (LPriority+LBonus != RPriority+RBonus) 1615 return LPriority+LBonus < RPriority+RBonus; 1616 1617 if (left->getDepth() != right->getDepth()) 1618 return left->getDepth() < right->getDepth(); 1619 1620 if (left->NumSuccsLeft != right->NumSuccsLeft) 1621 return left->NumSuccsLeft > right->NumSuccsLeft; 1622 1623 assert(left->NodeQueueId && right->NodeQueueId && 1624 "NodeQueueId cannot be zero"); 1625 return (left->NodeQueueId > right->NodeQueueId); 1626} 1627 1628//===----------------------------------------------------------------------===// 1629// Public Constructor Functions 1630//===----------------------------------------------------------------------===// 1631 1632llvm::ScheduleDAGSDNodes * 1633llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1634 const TargetMachine &TM = IS->TM; 1635 const TargetInstrInfo *TII = TM.getInstrInfo(); 1636 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1637 1638 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1639 1640 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1641 PQ->setScheduleDAG(SD); 1642 return SD; 1643} 1644 1645llvm::ScheduleDAGSDNodes * 1646llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1647 const TargetMachine &TM = IS->TM; 1648 const TargetInstrInfo *TII = TM.getInstrInfo(); 1649 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1650 1651 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1652 1653 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 1654 PQ->setScheduleDAG(SD); 1655 return SD; 1656} 1657 1658llvm::ScheduleDAGSDNodes * 1659llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1660 const TargetMachine &TM = IS->TM; 1661 const TargetInstrInfo *TII = TM.getInstrInfo(); 1662 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1663 1664 SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI); 1665 1666 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1667 PQ->setScheduleDAG(SD); 1668 return SD; 1669} 1670 1671llvm::ScheduleDAGSDNodes * 1672llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1673 const TargetMachine &TM = IS->TM; 1674 const TargetInstrInfo *TII = TM.getInstrInfo(); 1675 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1676 1677 HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI); 1678 1679 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 1680 PQ->setScheduleDAG(SD); 1681 return SD; 1682} 1683