ScheduleDAGRRList.cpp revision b42df31c2d72b3cfe7f04fad4aa3f57790272b95
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include <climits> 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumPRCopies, "Number of physical register copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50static RegisterScheduler 51 sourceListDAGScheduler("source", 52 "Similar to list-burr but schedules in source " 53 "order when possible", 54 createSourceListDAGScheduler); 55 56static RegisterScheduler 57 hybridListDAGScheduler("list-hybrid", 58 "Bottom-up register pressure aware list scheduling " 59 "which tries to balance latency and register pressure", 60 createHybridListDAGScheduler); 61 62static RegisterScheduler 63 ILPListDAGScheduler("list-ilp", 64 "Bottom-up register pressure aware list scheduling " 65 "which tries to balance ILP and register pressure", 66 createILPListDAGScheduler); 67 68namespace { 69//===----------------------------------------------------------------------===// 70/// ScheduleDAGRRList - The actual register reduction list scheduler 71/// implementation. This supports both top-down and bottom-up scheduling. 72/// 73class ScheduleDAGRRList : public ScheduleDAGSDNodes { 74private: 75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 76 /// it is top-down. 77 bool isBottomUp; 78 79 /// NeedLatency - True if the scheduler will make use of latency information. 80 /// 81 bool NeedLatency; 82 83 /// AvailableQueue - The priority queue to use for the available SUnits. 84 SchedulingPriorityQueue *AvailableQueue; 85 86 /// LiveRegDefs - A set of physical registers and their definition 87 /// that are "live". These nodes must be scheduled before any other nodes that 88 /// modifies the registers can be scheduled. 89 unsigned NumLiveRegs; 90 std::vector<SUnit*> LiveRegDefs; 91 std::vector<unsigned> LiveRegCycles; 92 93 /// Topo - A topological ordering for SUnits which permits fast IsReachable 94 /// and similar queries. 95 ScheduleDAGTopologicalSort Topo; 96 97public: 98 ScheduleDAGRRList(MachineFunction &mf, 99 bool isbottomup, bool needlatency, 100 SchedulingPriorityQueue *availqueue) 101 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 102 AvailableQueue(availqueue), Topo(SUnits) { 103 } 104 105 ~ScheduleDAGRRList() { 106 delete AvailableQueue; 107 } 108 109 void Schedule(); 110 111 /// IsReachable - Checks if SU is reachable from TargetSU. 112 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 113 return Topo.IsReachable(SU, TargetSU); 114 } 115 116 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 117 /// create a cycle. 118 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 119 return Topo.WillCreateCycle(SU, TargetSU); 120 } 121 122 /// AddPred - adds a predecessor edge to SUnit SU. 123 /// This returns true if this is a new predecessor. 124 /// Updates the topological ordering if required. 125 void AddPred(SUnit *SU, const SDep &D) { 126 Topo.AddPred(SU, D.getSUnit()); 127 SU->addPred(D); 128 } 129 130 /// RemovePred - removes a predecessor edge from SUnit SU. 131 /// This returns true if an edge was removed. 132 /// Updates the topological ordering if required. 133 void RemovePred(SUnit *SU, const SDep &D) { 134 Topo.RemovePred(SU, D.getSUnit()); 135 SU->removePred(D); 136 } 137 138private: 139 void ReleasePred(SUnit *SU, const SDep *PredEdge); 140 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 141 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 142 void ReleaseSuccessors(SUnit *SU); 143 void CapturePred(SDep *PredEdge); 144 void ScheduleNodeBottomUp(SUnit*, unsigned); 145 void ScheduleNodeTopDown(SUnit*, unsigned); 146 void UnscheduleNodeBottomUp(SUnit*); 147 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 148 SUnit *CopyAndMoveSuccessors(SUnit*); 149 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 150 const TargetRegisterClass*, 151 const TargetRegisterClass*, 152 SmallVector<SUnit*, 2>&); 153 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 154 void ListScheduleTopDown(); 155 void ListScheduleBottomUp(); 156 157 158 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 159 /// Updates the topological ordering if required. 160 SUnit *CreateNewSUnit(SDNode *N) { 161 unsigned NumSUnits = SUnits.size(); 162 SUnit *NewNode = NewSUnit(N); 163 // Update the topological ordering. 164 if (NewNode->NodeNum >= NumSUnits) 165 Topo.InitDAGTopologicalSorting(); 166 return NewNode; 167 } 168 169 /// CreateClone - Creates a new SUnit from an existing one. 170 /// Updates the topological ordering if required. 171 SUnit *CreateClone(SUnit *N) { 172 unsigned NumSUnits = SUnits.size(); 173 SUnit *NewNode = Clone(N); 174 // Update the topological ordering. 175 if (NewNode->NodeNum >= NumSUnits) 176 Topo.InitDAGTopologicalSorting(); 177 return NewNode; 178 } 179 180 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 181 /// need actual latency information but the hybrid scheduler does. 182 bool ForceUnitLatencies() const { 183 return !NeedLatency; 184 } 185}; 186} // end anonymous namespace 187 188 189/// Schedule - Schedule the DAG using list scheduling. 190void ScheduleDAGRRList::Schedule() { 191 DEBUG(dbgs() 192 << "********** List Scheduling BB#" << BB->getNumber() 193 << " **********\n"); 194 195 NumLiveRegs = 0; 196 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 197 LiveRegCycles.resize(TRI->getNumRegs(), 0); 198 199 // Build the scheduling graph. 200 BuildSchedGraph(NULL); 201 202 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 203 SUnits[su].dumpAll(this)); 204 Topo.InitDAGTopologicalSorting(); 205 206 AvailableQueue->initNodes(SUnits); 207 208 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 209 if (isBottomUp) 210 ListScheduleBottomUp(); 211 else 212 ListScheduleTopDown(); 213 214 AvailableQueue->releaseState(); 215} 216 217//===----------------------------------------------------------------------===// 218// Bottom-Up Scheduling 219//===----------------------------------------------------------------------===// 220 221/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 222/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 223void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 224 SUnit *PredSU = PredEdge->getSUnit(); 225 226#ifndef NDEBUG 227 if (PredSU->NumSuccsLeft == 0) { 228 dbgs() << "*** Scheduling failed! ***\n"; 229 PredSU->dump(this); 230 dbgs() << " has been released too many times!\n"; 231 llvm_unreachable(0); 232 } 233#endif 234 --PredSU->NumSuccsLeft; 235 236 if (!ForceUnitLatencies()) { 237 // Updating predecessor's height. This is now the cycle when the 238 // predecessor can be scheduled without causing a pipeline stall. 239 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 240 } 241 242 // If all the node's successors are scheduled, this node is ready 243 // to be scheduled. Ignore the special EntrySU node. 244 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 245 PredSU->isAvailable = true; 246 AvailableQueue->push(PredSU); 247 } 248} 249 250void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 251 // Bottom up: release predecessors 252 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 253 I != E; ++I) { 254 ReleasePred(SU, &*I); 255 if (I->isAssignedRegDep()) { 256 // This is a physical register dependency and it's impossible or 257 // expensive to copy the register. Make sure nothing that can 258 // clobber the register is scheduled between the predecessor and 259 // this node. 260 if (!LiveRegDefs[I->getReg()]) { 261 ++NumLiveRegs; 262 LiveRegDefs[I->getReg()] = I->getSUnit(); 263 LiveRegCycles[I->getReg()] = CurCycle; 264 } 265 } 266 } 267} 268 269/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 270/// count of its predecessors. If a predecessor pending count is zero, add it to 271/// the Available queue. 272void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 273 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 274 DEBUG(SU->dump(this)); 275 276#ifndef NDEBUG 277 if (CurCycle < SU->getHeight()) 278 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 279#endif 280 281 // FIXME: Handle noop hazard. 282 SU->setHeightToAtLeast(CurCycle); 283 Sequence.push_back(SU); 284 285 AvailableQueue->ScheduledNode(SU); 286 287 ReleasePredecessors(SU, CurCycle); 288 289 // Release all the implicit physical register defs that are live. 290 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 291 I != E; ++I) { 292 if (I->isAssignedRegDep()) { 293 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 295 assert(LiveRegDefs[I->getReg()] == SU && 296 "Physical register dependency violated?"); 297 --NumLiveRegs; 298 LiveRegDefs[I->getReg()] = NULL; 299 LiveRegCycles[I->getReg()] = 0; 300 } 301 } 302 } 303 304 SU->isScheduled = true; 305} 306 307/// CapturePred - This does the opposite of ReleasePred. Since SU is being 308/// unscheduled, incrcease the succ left count of its predecessors. Remove 309/// them from AvailableQueue if necessary. 310void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 311 SUnit *PredSU = PredEdge->getSUnit(); 312 if (PredSU->isAvailable) { 313 PredSU->isAvailable = false; 314 if (!PredSU->isPending) 315 AvailableQueue->remove(PredSU); 316 } 317 318 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 319 ++PredSU->NumSuccsLeft; 320} 321 322/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 323/// its predecessor states to reflect the change. 324void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 325 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 326 DEBUG(SU->dump(this)); 327 328 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 329 I != E; ++I) { 330 CapturePred(&*I); 331 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ 332 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 333 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 334 "Physical register dependency violated?"); 335 --NumLiveRegs; 336 LiveRegDefs[I->getReg()] = NULL; 337 LiveRegCycles[I->getReg()] = 0; 338 } 339 } 340 341 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 342 I != E; ++I) { 343 if (I->isAssignedRegDep()) { 344 if (!LiveRegDefs[I->getReg()]) { 345 LiveRegDefs[I->getReg()] = SU; 346 ++NumLiveRegs; 347 } 348 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 349 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 350 } 351 } 352 353 SU->setHeightDirty(); 354 SU->isScheduled = false; 355 SU->isAvailable = true; 356 AvailableQueue->push(SU); 357 AvailableQueue->UnscheduledNode(SU); 358} 359 360/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 361/// BTCycle in order to schedule a specific node. 362void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 363 unsigned &CurCycle) { 364 SUnit *OldSU = NULL; 365 while (CurCycle > BtCycle) { 366 OldSU = Sequence.back(); 367 Sequence.pop_back(); 368 if (SU->isSucc(OldSU)) 369 // Don't try to remove SU from AvailableQueue. 370 SU->isAvailable = false; 371 UnscheduleNodeBottomUp(OldSU); 372 --CurCycle; 373 AvailableQueue->setCurCycle(CurCycle); 374 } 375 376 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 377 378 ++NumBacktracks; 379} 380 381static bool isOperandOf(const SUnit *SU, SDNode *N) { 382 for (const SDNode *SUNode = SU->getNode(); SUNode; 383 SUNode = SUNode->getFlaggedNode()) { 384 if (SUNode->isOperandOf(N)) 385 return true; 386 } 387 return false; 388} 389 390/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 391/// successors to the newly created node. 392SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 393 if (SU->getNode()->getFlaggedNode()) 394 return NULL; 395 396 SDNode *N = SU->getNode(); 397 if (!N) 398 return NULL; 399 400 SUnit *NewSU; 401 bool TryUnfold = false; 402 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 403 EVT VT = N->getValueType(i); 404 if (VT == MVT::Flag) 405 return NULL; 406 else if (VT == MVT::Other) 407 TryUnfold = true; 408 } 409 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 410 const SDValue &Op = N->getOperand(i); 411 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 412 if (VT == MVT::Flag) 413 return NULL; 414 } 415 416 if (TryUnfold) { 417 SmallVector<SDNode*, 2> NewNodes; 418 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 419 return NULL; 420 421 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 422 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 423 424 N = NewNodes[1]; 425 SDNode *LoadNode = NewNodes[0]; 426 unsigned NumVals = N->getNumValues(); 427 unsigned OldNumVals = SU->getNode()->getNumValues(); 428 for (unsigned i = 0; i != NumVals; ++i) 429 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 430 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 431 SDValue(LoadNode, 1)); 432 433 // LoadNode may already exist. This can happen when there is another 434 // load from the same location and producing the same type of value 435 // but it has different alignment or volatileness. 436 bool isNewLoad = true; 437 SUnit *LoadSU; 438 if (LoadNode->getNodeId() != -1) { 439 LoadSU = &SUnits[LoadNode->getNodeId()]; 440 isNewLoad = false; 441 } else { 442 LoadSU = CreateNewSUnit(LoadNode); 443 LoadNode->setNodeId(LoadSU->NodeNum); 444 ComputeLatency(LoadSU); 445 } 446 447 SUnit *NewSU = CreateNewSUnit(N); 448 assert(N->getNodeId() == -1 && "Node already inserted!"); 449 N->setNodeId(NewSU->NodeNum); 450 451 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 452 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 453 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 454 NewSU->isTwoAddress = true; 455 break; 456 } 457 } 458 if (TID.isCommutable()) 459 NewSU->isCommutable = true; 460 ComputeLatency(NewSU); 461 462 // Record all the edges to and from the old SU, by category. 463 SmallVector<SDep, 4> ChainPreds; 464 SmallVector<SDep, 4> ChainSuccs; 465 SmallVector<SDep, 4> LoadPreds; 466 SmallVector<SDep, 4> NodePreds; 467 SmallVector<SDep, 4> NodeSuccs; 468 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 469 I != E; ++I) { 470 if (I->isCtrl()) 471 ChainPreds.push_back(*I); 472 else if (isOperandOf(I->getSUnit(), LoadNode)) 473 LoadPreds.push_back(*I); 474 else 475 NodePreds.push_back(*I); 476 } 477 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 478 I != E; ++I) { 479 if (I->isCtrl()) 480 ChainSuccs.push_back(*I); 481 else 482 NodeSuccs.push_back(*I); 483 } 484 485 // Now assign edges to the newly-created nodes. 486 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 487 const SDep &Pred = ChainPreds[i]; 488 RemovePred(SU, Pred); 489 if (isNewLoad) 490 AddPred(LoadSU, Pred); 491 } 492 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 493 const SDep &Pred = LoadPreds[i]; 494 RemovePred(SU, Pred); 495 if (isNewLoad) 496 AddPred(LoadSU, Pred); 497 } 498 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 499 const SDep &Pred = NodePreds[i]; 500 RemovePred(SU, Pred); 501 AddPred(NewSU, Pred); 502 } 503 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 504 SDep D = NodeSuccs[i]; 505 SUnit *SuccDep = D.getSUnit(); 506 D.setSUnit(SU); 507 RemovePred(SuccDep, D); 508 D.setSUnit(NewSU); 509 AddPred(SuccDep, D); 510 } 511 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 512 SDep D = ChainSuccs[i]; 513 SUnit *SuccDep = D.getSUnit(); 514 D.setSUnit(SU); 515 RemovePred(SuccDep, D); 516 if (isNewLoad) { 517 D.setSUnit(LoadSU); 518 AddPred(SuccDep, D); 519 } 520 } 521 522 // Add a data dependency to reflect that NewSU reads the value defined 523 // by LoadSU. 524 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 525 526 if (isNewLoad) 527 AvailableQueue->addNode(LoadSU); 528 AvailableQueue->addNode(NewSU); 529 530 ++NumUnfolds; 531 532 if (NewSU->NumSuccsLeft == 0) { 533 NewSU->isAvailable = true; 534 return NewSU; 535 } 536 SU = NewSU; 537 } 538 539 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 540 NewSU = CreateClone(SU); 541 542 // New SUnit has the exact same predecessors. 543 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 544 I != E; ++I) 545 if (!I->isArtificial()) 546 AddPred(NewSU, *I); 547 548 // Only copy scheduled successors. Cut them from old node's successor 549 // list and move them over. 550 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 551 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 552 I != E; ++I) { 553 if (I->isArtificial()) 554 continue; 555 SUnit *SuccSU = I->getSUnit(); 556 if (SuccSU->isScheduled) { 557 SDep D = *I; 558 D.setSUnit(NewSU); 559 AddPred(SuccSU, D); 560 D.setSUnit(SU); 561 DelDeps.push_back(std::make_pair(SuccSU, D)); 562 } 563 } 564 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 565 RemovePred(DelDeps[i].first, DelDeps[i].second); 566 567 AvailableQueue->updateNode(SU); 568 AvailableQueue->addNode(NewSU); 569 570 ++NumDups; 571 return NewSU; 572} 573 574/// InsertCopiesAndMoveSuccs - Insert register copies and move all 575/// scheduled successors of the given SUnit to the last copy. 576void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 577 const TargetRegisterClass *DestRC, 578 const TargetRegisterClass *SrcRC, 579 SmallVector<SUnit*, 2> &Copies) { 580 SUnit *CopyFromSU = CreateNewSUnit(NULL); 581 CopyFromSU->CopySrcRC = SrcRC; 582 CopyFromSU->CopyDstRC = DestRC; 583 584 SUnit *CopyToSU = CreateNewSUnit(NULL); 585 CopyToSU->CopySrcRC = DestRC; 586 CopyToSU->CopyDstRC = SrcRC; 587 588 // Only copy scheduled successors. Cut them from old node's successor 589 // list and move them over. 590 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 591 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 592 I != E; ++I) { 593 if (I->isArtificial()) 594 continue; 595 SUnit *SuccSU = I->getSUnit(); 596 if (SuccSU->isScheduled) { 597 SDep D = *I; 598 D.setSUnit(CopyToSU); 599 AddPred(SuccSU, D); 600 DelDeps.push_back(std::make_pair(SuccSU, *I)); 601 } 602 } 603 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 604 RemovePred(DelDeps[i].first, DelDeps[i].second); 605 606 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 607 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 608 609 AvailableQueue->updateNode(SU); 610 AvailableQueue->addNode(CopyFromSU); 611 AvailableQueue->addNode(CopyToSU); 612 Copies.push_back(CopyFromSU); 613 Copies.push_back(CopyToSU); 614 615 ++NumPRCopies; 616} 617 618/// getPhysicalRegisterVT - Returns the ValueType of the physical register 619/// definition of the specified node. 620/// FIXME: Move to SelectionDAG? 621static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 622 const TargetInstrInfo *TII) { 623 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 624 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 625 unsigned NumRes = TID.getNumDefs(); 626 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 627 if (Reg == *ImpDef) 628 break; 629 ++NumRes; 630 } 631 return N->getValueType(NumRes); 632} 633 634/// CheckForLiveRegDef - Return true and update live register vector if the 635/// specified register def of the specified SUnit clobbers any "live" registers. 636static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 637 std::vector<SUnit*> &LiveRegDefs, 638 SmallSet<unsigned, 4> &RegAdded, 639 SmallVector<unsigned, 4> &LRegs, 640 const TargetRegisterInfo *TRI) { 641 bool Added = false; 642 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 643 if (RegAdded.insert(Reg)) { 644 LRegs.push_back(Reg); 645 Added = true; 646 } 647 } 648 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 649 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 650 if (RegAdded.insert(*Alias)) { 651 LRegs.push_back(*Alias); 652 Added = true; 653 } 654 } 655 return Added; 656} 657 658/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 659/// scheduling of the given node to satisfy live physical register dependencies. 660/// If the specific node is the last one that's available to schedule, do 661/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 662bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 663 SmallVector<unsigned, 4> &LRegs){ 664 if (NumLiveRegs == 0) 665 return false; 666 667 SmallSet<unsigned, 4> RegAdded; 668 // If this node would clobber any "live" register, then it's not ready. 669 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 670 I != E; ++I) { 671 if (I->isAssignedRegDep()) 672 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 673 RegAdded, LRegs, TRI); 674 } 675 676 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 677 if (Node->getOpcode() == ISD::INLINEASM) { 678 // Inline asm can clobber physical defs. 679 unsigned NumOps = Node->getNumOperands(); 680 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 681 --NumOps; // Ignore the flag operand. 682 683 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 684 unsigned Flags = 685 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 686 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 687 688 ++i; // Skip the ID value. 689 if (InlineAsm::isRegDefKind(Flags) || 690 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 691 // Check for def of register or earlyclobber register. 692 for (; NumVals; --NumVals, ++i) { 693 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 694 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 695 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 696 } 697 } else 698 i += NumVals; 699 } 700 continue; 701 } 702 703 if (!Node->isMachineOpcode()) 704 continue; 705 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 706 if (!TID.ImplicitDefs) 707 continue; 708 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 709 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 710 } 711 return !LRegs.empty(); 712} 713 714 715/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 716/// schedulers. 717void ScheduleDAGRRList::ListScheduleBottomUp() { 718 unsigned CurCycle = 0; 719 720 // Release any predecessors of the special Exit node. 721 ReleasePredecessors(&ExitSU, CurCycle); 722 723 // Add root to Available queue. 724 if (!SUnits.empty()) { 725 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 726 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 727 RootSU->isAvailable = true; 728 AvailableQueue->push(RootSU); 729 } 730 731 // While Available queue is not empty, grab the node with the highest 732 // priority. If it is not ready put it back. Schedule the node. 733 SmallVector<SUnit*, 4> NotReady; 734 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 735 Sequence.reserve(SUnits.size()); 736 while (!AvailableQueue->empty()) { 737 bool Delayed = false; 738 LRegsMap.clear(); 739 SUnit *CurSU = AvailableQueue->pop(); 740 while (CurSU) { 741 SmallVector<unsigned, 4> LRegs; 742 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 743 break; 744 Delayed = true; 745 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 746 747 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 748 NotReady.push_back(CurSU); 749 CurSU = AvailableQueue->pop(); 750 } 751 752 // All candidates are delayed due to live physical reg dependencies. 753 // Try backtracking, code duplication, or inserting cross class copies 754 // to resolve it. 755 if (Delayed && !CurSU) { 756 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 757 SUnit *TrySU = NotReady[i]; 758 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 759 760 // Try unscheduling up to the point where it's safe to schedule 761 // this node. 762 unsigned LiveCycle = CurCycle; 763 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 764 unsigned Reg = LRegs[j]; 765 unsigned LCycle = LiveRegCycles[Reg]; 766 LiveCycle = std::min(LiveCycle, LCycle); 767 } 768 SUnit *OldSU = Sequence[LiveCycle]; 769 if (!WillCreateCycle(TrySU, OldSU)) { 770 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 771 // Force the current node to be scheduled before the node that 772 // requires the physical reg dep. 773 if (OldSU->isAvailable) { 774 OldSU->isAvailable = false; 775 AvailableQueue->remove(OldSU); 776 } 777 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 778 /*Reg=*/0, /*isNormalMemory=*/false, 779 /*isMustAlias=*/false, /*isArtificial=*/true)); 780 // If one or more successors has been unscheduled, then the current 781 // node is no longer avaialable. Schedule a successor that's now 782 // available instead. 783 if (!TrySU->isAvailable) 784 CurSU = AvailableQueue->pop(); 785 else { 786 CurSU = TrySU; 787 TrySU->isPending = false; 788 NotReady.erase(NotReady.begin()+i); 789 } 790 break; 791 } 792 } 793 794 if (!CurSU) { 795 // Can't backtrack. If it's too expensive to copy the value, then try 796 // duplicate the nodes that produces these "too expensive to copy" 797 // values to break the dependency. In case even that doesn't work, 798 // insert cross class copies. 799 // If it's not too expensive, i.e. cost != -1, issue copies. 800 SUnit *TrySU = NotReady[0]; 801 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 802 assert(LRegs.size() == 1 && "Can't handle this yet!"); 803 unsigned Reg = LRegs[0]; 804 SUnit *LRDef = LiveRegDefs[Reg]; 805 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 806 const TargetRegisterClass *RC = 807 TRI->getMinimalPhysRegClass(Reg, VT); 808 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 809 810 // If cross copy register class is null, then it must be possible copy 811 // the value directly. Do not try duplicate the def. 812 SUnit *NewDef = 0; 813 if (DestRC) 814 NewDef = CopyAndMoveSuccessors(LRDef); 815 else 816 DestRC = RC; 817 if (!NewDef) { 818 // Issue copies, these can be expensive cross register class copies. 819 SmallVector<SUnit*, 2> Copies; 820 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 821 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 822 << " to SU #" << Copies.front()->NodeNum << "\n"); 823 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 824 /*Reg=*/0, /*isNormalMemory=*/false, 825 /*isMustAlias=*/false, 826 /*isArtificial=*/true)); 827 NewDef = Copies.back(); 828 } 829 830 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 831 << " to SU #" << TrySU->NodeNum << "\n"); 832 LiveRegDefs[Reg] = NewDef; 833 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 834 /*Reg=*/0, /*isNormalMemory=*/false, 835 /*isMustAlias=*/false, 836 /*isArtificial=*/true)); 837 TrySU->isAvailable = false; 838 CurSU = NewDef; 839 } 840 841 assert(CurSU && "Unable to resolve live physical register dependencies!"); 842 } 843 844 // Add the nodes that aren't ready back onto the available list. 845 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 846 NotReady[i]->isPending = false; 847 // May no longer be available due to backtracking. 848 if (NotReady[i]->isAvailable) 849 AvailableQueue->push(NotReady[i]); 850 } 851 NotReady.clear(); 852 853 if (CurSU) 854 ScheduleNodeBottomUp(CurSU, CurCycle); 855 ++CurCycle; 856 AvailableQueue->setCurCycle(CurCycle); 857 } 858 859 // Reverse the order if it is bottom up. 860 std::reverse(Sequence.begin(), Sequence.end()); 861 862#ifndef NDEBUG 863 VerifySchedule(isBottomUp); 864#endif 865} 866 867//===----------------------------------------------------------------------===// 868// Top-Down Scheduling 869//===----------------------------------------------------------------------===// 870 871/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 872/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 873void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 874 SUnit *SuccSU = SuccEdge->getSUnit(); 875 876#ifndef NDEBUG 877 if (SuccSU->NumPredsLeft == 0) { 878 dbgs() << "*** Scheduling failed! ***\n"; 879 SuccSU->dump(this); 880 dbgs() << " has been released too many times!\n"; 881 llvm_unreachable(0); 882 } 883#endif 884 --SuccSU->NumPredsLeft; 885 886 // If all the node's predecessors are scheduled, this node is ready 887 // to be scheduled. Ignore the special ExitSU node. 888 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 889 SuccSU->isAvailable = true; 890 AvailableQueue->push(SuccSU); 891 } 892} 893 894void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 895 // Top down: release successors 896 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 897 I != E; ++I) { 898 assert(!I->isAssignedRegDep() && 899 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 900 901 ReleaseSucc(SU, &*I); 902 } 903} 904 905/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 906/// count of its successors. If a successor pending count is zero, add it to 907/// the Available queue. 908void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 909 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 910 DEBUG(SU->dump(this)); 911 912 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 913 SU->setDepthToAtLeast(CurCycle); 914 Sequence.push_back(SU); 915 916 ReleaseSuccessors(SU); 917 SU->isScheduled = true; 918 AvailableQueue->ScheduledNode(SU); 919} 920 921/// ListScheduleTopDown - The main loop of list scheduling for top-down 922/// schedulers. 923void ScheduleDAGRRList::ListScheduleTopDown() { 924 unsigned CurCycle = 0; 925 AvailableQueue->setCurCycle(CurCycle); 926 927 // Release any successors of the special Entry node. 928 ReleaseSuccessors(&EntrySU); 929 930 // All leaves to Available queue. 931 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 932 // It is available if it has no predecessors. 933 if (SUnits[i].Preds.empty()) { 934 AvailableQueue->push(&SUnits[i]); 935 SUnits[i].isAvailable = true; 936 } 937 } 938 939 // While Available queue is not empty, grab the node with the highest 940 // priority. If it is not ready put it back. Schedule the node. 941 Sequence.reserve(SUnits.size()); 942 while (!AvailableQueue->empty()) { 943 SUnit *CurSU = AvailableQueue->pop(); 944 945 if (CurSU) 946 ScheduleNodeTopDown(CurSU, CurCycle); 947 ++CurCycle; 948 AvailableQueue->setCurCycle(CurCycle); 949 } 950 951#ifndef NDEBUG 952 VerifySchedule(isBottomUp); 953#endif 954} 955 956 957//===----------------------------------------------------------------------===// 958// RegReductionPriorityQueue Implementation 959//===----------------------------------------------------------------------===// 960// 961// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 962// to reduce register pressure. 963// 964namespace { 965 template<class SF> 966 class RegReductionPriorityQueue; 967 968 /// bu_ls_rr_sort - Priority function for bottom up register pressure 969 // reduction scheduler. 970 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 971 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 972 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 973 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 974 975 bool operator()(const SUnit* left, const SUnit* right) const; 976 }; 977 978 // td_ls_rr_sort - Priority function for top down register pressure reduction 979 // scheduler. 980 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 981 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 982 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 983 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 984 985 bool operator()(const SUnit* left, const SUnit* right) const; 986 }; 987 988 // src_ls_rr_sort - Priority function for source order scheduler. 989 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 990 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 991 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 992 : SPQ(spq) {} 993 src_ls_rr_sort(const src_ls_rr_sort &RHS) 994 : SPQ(RHS.SPQ) {} 995 996 bool operator()(const SUnit* left, const SUnit* right) const; 997 }; 998 999 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1000 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1001 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 1002 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 1003 : SPQ(spq) {} 1004 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1005 : SPQ(RHS.SPQ) {} 1006 1007 bool operator()(const SUnit* left, const SUnit* right) const; 1008 }; 1009 1010 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1011 // scheduler. 1012 struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1013 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; 1014 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) 1015 : SPQ(spq) {} 1016 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1017 : SPQ(RHS.SPQ) {} 1018 1019 bool operator()(const SUnit* left, const SUnit* right) const; 1020 }; 1021} // end anonymous namespace 1022 1023/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1024/// Smaller number is the higher priority. 1025static unsigned 1026CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1027 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1028 if (SethiUllmanNumber != 0) 1029 return SethiUllmanNumber; 1030 1031 unsigned Extra = 0; 1032 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1033 I != E; ++I) { 1034 if (I->isCtrl()) continue; // ignore chain preds 1035 SUnit *PredSU = I->getSUnit(); 1036 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1037 if (PredSethiUllman > SethiUllmanNumber) { 1038 SethiUllmanNumber = PredSethiUllman; 1039 Extra = 0; 1040 } else if (PredSethiUllman == SethiUllmanNumber) 1041 ++Extra; 1042 } 1043 1044 SethiUllmanNumber += Extra; 1045 1046 if (SethiUllmanNumber == 0) 1047 SethiUllmanNumber = 1; 1048 1049 return SethiUllmanNumber; 1050} 1051 1052namespace { 1053 template<class SF> 1054 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1055 std::vector<SUnit*> Queue; 1056 SF Picker; 1057 unsigned CurQueueId; 1058 bool TracksRegPressure; 1059 1060 protected: 1061 // SUnits - The SUnits for the current graph. 1062 std::vector<SUnit> *SUnits; 1063 1064 MachineFunction &MF; 1065 const TargetInstrInfo *TII; 1066 const TargetRegisterInfo *TRI; 1067 const TargetLowering *TLI; 1068 ScheduleDAGRRList *scheduleDAG; 1069 1070 // SethiUllmanNumbers - The SethiUllman number for each node. 1071 std::vector<unsigned> SethiUllmanNumbers; 1072 1073 /// RegPressure - Tracking current reg pressure per register class. 1074 /// 1075 std::vector<unsigned> RegPressure; 1076 1077 /// RegLimit - Tracking the number of allocatable registers per register 1078 /// class. 1079 std::vector<unsigned> RegLimit; 1080 1081 public: 1082 RegReductionPriorityQueue(MachineFunction &mf, 1083 bool tracksrp, 1084 const TargetInstrInfo *tii, 1085 const TargetRegisterInfo *tri, 1086 const TargetLowering *tli) 1087 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), 1088 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1089 if (TracksRegPressure) { 1090 unsigned NumRC = TRI->getNumRegClasses(); 1091 RegLimit.resize(NumRC); 1092 RegPressure.resize(NumRC); 1093 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1094 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1095 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1096 E = TRI->regclass_end(); I != E; ++I) 1097 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); 1098 } 1099 } 1100 1101 void initNodes(std::vector<SUnit> &sunits) { 1102 SUnits = &sunits; 1103 // Add pseudo dependency edges for two-address nodes. 1104 AddPseudoTwoAddrDeps(); 1105 // Reroute edges to nodes with multiple uses. 1106 PrescheduleNodesWithMultipleUses(); 1107 // Calculate node priorities. 1108 CalculateSethiUllmanNumbers(); 1109 } 1110 1111 void addNode(const SUnit *SU) { 1112 unsigned SUSize = SethiUllmanNumbers.size(); 1113 if (SUnits->size() > SUSize) 1114 SethiUllmanNumbers.resize(SUSize*2, 0); 1115 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1116 } 1117 1118 void updateNode(const SUnit *SU) { 1119 SethiUllmanNumbers[SU->NodeNum] = 0; 1120 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1121 } 1122 1123 void releaseState() { 1124 SUnits = 0; 1125 SethiUllmanNumbers.clear(); 1126 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1127 } 1128 1129 unsigned getNodePriority(const SUnit *SU) const { 1130 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1131 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1132 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1133 // CopyToReg should be close to its uses to facilitate coalescing and 1134 // avoid spilling. 1135 return 0; 1136 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1137 Opc == TargetOpcode::SUBREG_TO_REG || 1138 Opc == TargetOpcode::INSERT_SUBREG) 1139 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1140 // close to their uses to facilitate coalescing. 1141 return 0; 1142 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1143 // If SU does not have a register use, i.e. it doesn't produce a value 1144 // that would be consumed (e.g. store), then it terminates a chain of 1145 // computation. Give it a large SethiUllman number so it will be 1146 // scheduled right before its predecessors that it doesn't lengthen 1147 // their live ranges. 1148 return 0xffff; 1149 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1150 // If SU does not have a register def, schedule it close to its uses 1151 // because it does not lengthen any live ranges. 1152 return 0; 1153 return SethiUllmanNumbers[SU->NodeNum]; 1154 } 1155 1156 unsigned getNodeOrdering(const SUnit *SU) const { 1157 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1158 } 1159 1160 bool empty() const { return Queue.empty(); } 1161 1162 void push(SUnit *U) { 1163 assert(!U->NodeQueueId && "Node in the queue already"); 1164 U->NodeQueueId = ++CurQueueId; 1165 Queue.push_back(U); 1166 } 1167 1168 SUnit *pop() { 1169 if (empty()) return NULL; 1170 std::vector<SUnit *>::iterator Best = Queue.begin(); 1171 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), 1172 E = Queue.end(); I != E; ++I) 1173 if (Picker(*Best, *I)) 1174 Best = I; 1175 SUnit *V = *Best; 1176 if (Best != prior(Queue.end())) 1177 std::swap(*Best, Queue.back()); 1178 Queue.pop_back(); 1179 V->NodeQueueId = 0; 1180 return V; 1181 } 1182 1183 void remove(SUnit *SU) { 1184 assert(!Queue.empty() && "Queue is empty!"); 1185 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1186 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1187 SU); 1188 if (I != prior(Queue.end())) 1189 std::swap(*I, Queue.back()); 1190 Queue.pop_back(); 1191 SU->NodeQueueId = 0; 1192 } 1193 1194 bool HighRegPressure(const SUnit *SU, unsigned &Excess) const { 1195 if (!TLI) 1196 return false; 1197 1198 bool High = false; 1199 Excess = 0; 1200 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1201 I != E; ++I) { 1202 if (I->isCtrl()) 1203 continue; 1204 SUnit *PredSU = I->getSUnit(); 1205 const SDNode *PN = PredSU->getNode(); 1206 if (!PN->isMachineOpcode()) { 1207 if (PN->getOpcode() == ISD::CopyFromReg) { 1208 EVT VT = PN->getValueType(0); 1209 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1210 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1211 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { 1212 High = true; 1213 Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; 1214 } 1215 } 1216 continue; 1217 } 1218 unsigned POpc = PN->getMachineOpcode(); 1219 if (POpc == TargetOpcode::IMPLICIT_DEF) 1220 continue; 1221 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1222 EVT VT = PN->getOperand(0).getValueType(); 1223 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1224 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1225 // Check if this increases register pressure of the specific register 1226 // class to the point where it would cause spills. 1227 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { 1228 High = true; 1229 Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; 1230 } 1231 continue; 1232 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1233 POpc == TargetOpcode::SUBREG_TO_REG) { 1234 EVT VT = PN->getValueType(0); 1235 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1236 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1237 // Check if this increases register pressure of the specific register 1238 // class to the point where it would cause spills. 1239 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { 1240 High = true; 1241 Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; 1242 } 1243 continue; 1244 } 1245 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1246 for (unsigned i = 0; i != NumDefs; ++i) { 1247 EVT VT = PN->getValueType(i); 1248 if (!PN->hasAnyUseOfValue(i)) 1249 continue; 1250 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1251 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1252 // Check if this increases register pressure of the specific register 1253 // class to the point where it would cause spills. 1254 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) { 1255 High = true; 1256 Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId]; 1257 } 1258 } 1259 } 1260 1261 return High; 1262 } 1263 1264 void ScheduledNode(SUnit *SU) { 1265 if (!TracksRegPressure) 1266 return; 1267 1268 const SDNode *N = SU->getNode(); 1269 if (!N->isMachineOpcode()) { 1270 if (N->getOpcode() != ISD::CopyToReg) 1271 return; 1272 } else { 1273 unsigned Opc = N->getMachineOpcode(); 1274 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1275 Opc == TargetOpcode::INSERT_SUBREG || 1276 Opc == TargetOpcode::SUBREG_TO_REG || 1277 Opc == TargetOpcode::REG_SEQUENCE || 1278 Opc == TargetOpcode::IMPLICIT_DEF) 1279 return; 1280 } 1281 1282 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1283 I != E; ++I) { 1284 if (I->isCtrl()) 1285 continue; 1286 SUnit *PredSU = I->getSUnit(); 1287 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1288 continue; 1289 const SDNode *PN = PredSU->getNode(); 1290 if (!PN->isMachineOpcode()) { 1291 if (PN->getOpcode() == ISD::CopyFromReg) { 1292 EVT VT = PN->getValueType(0); 1293 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1294 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1295 } 1296 continue; 1297 } 1298 unsigned POpc = PN->getMachineOpcode(); 1299 if (POpc == TargetOpcode::IMPLICIT_DEF) 1300 continue; 1301 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1302 EVT VT = PN->getOperand(0).getValueType(); 1303 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1304 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1305 continue; 1306 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1307 POpc == TargetOpcode::SUBREG_TO_REG) { 1308 EVT VT = PN->getValueType(0); 1309 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1310 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1311 continue; 1312 } 1313 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1314 for (unsigned i = 0; i != NumDefs; ++i) { 1315 EVT VT = PN->getValueType(i); 1316 if (!PN->hasAnyUseOfValue(i)) 1317 continue; 1318 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1319 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1320 } 1321 } 1322 1323 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1324 // may transfer data dependencies to CopyToReg. 1325 if (SU->NumSuccs && N->isMachineOpcode()) { 1326 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1327 for (unsigned i = 0; i != NumDefs; ++i) { 1328 EVT VT = N->getValueType(i); 1329 if (!N->hasAnyUseOfValue(i)) 1330 continue; 1331 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1332 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1333 // Register pressure tracking is imprecise. This can happen. 1334 RegPressure[RCId] = 0; 1335 else 1336 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1337 } 1338 } 1339 1340 dumpRegPressure(); 1341 } 1342 1343 void UnscheduledNode(SUnit *SU) { 1344 if (!TracksRegPressure) 1345 return; 1346 1347 const SDNode *N = SU->getNode(); 1348 if (!N->isMachineOpcode()) { 1349 if (N->getOpcode() != ISD::CopyToReg) 1350 return; 1351 } else { 1352 unsigned Opc = N->getMachineOpcode(); 1353 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1354 Opc == TargetOpcode::INSERT_SUBREG || 1355 Opc == TargetOpcode::SUBREG_TO_REG || 1356 Opc == TargetOpcode::REG_SEQUENCE || 1357 Opc == TargetOpcode::IMPLICIT_DEF) 1358 return; 1359 } 1360 1361 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1362 I != E; ++I) { 1363 if (I->isCtrl()) 1364 continue; 1365 SUnit *PredSU = I->getSUnit(); 1366 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1367 continue; 1368 const SDNode *PN = PredSU->getNode(); 1369 if (!PN->isMachineOpcode()) { 1370 if (PN->getOpcode() == ISD::CopyFromReg) { 1371 EVT VT = PN->getValueType(0); 1372 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1373 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1374 } 1375 continue; 1376 } 1377 unsigned POpc = PN->getMachineOpcode(); 1378 if (POpc == TargetOpcode::IMPLICIT_DEF) 1379 continue; 1380 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1381 EVT VT = PN->getOperand(0).getValueType(); 1382 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1383 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1384 continue; 1385 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1386 POpc == TargetOpcode::SUBREG_TO_REG) { 1387 EVT VT = PN->getValueType(0); 1388 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1389 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1390 continue; 1391 } 1392 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1393 for (unsigned i = 0; i != NumDefs; ++i) { 1394 EVT VT = PN->getValueType(i); 1395 if (!PN->hasAnyUseOfValue(i)) 1396 continue; 1397 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1398 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1399 // Register pressure tracking is imprecise. This can happen. 1400 RegPressure[RCId] = 0; 1401 else 1402 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1403 } 1404 } 1405 1406 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1407 // may transfer data dependencies to CopyToReg. 1408 if (SU->NumSuccs && N->isMachineOpcode()) { 1409 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1410 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1411 EVT VT = N->getValueType(i); 1412 if (VT == MVT::Flag || VT == MVT::Other) 1413 continue; 1414 if (!N->hasAnyUseOfValue(i)) 1415 continue; 1416 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1417 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1418 } 1419 } 1420 1421 dumpRegPressure(); 1422 } 1423 1424 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1425 scheduleDAG = scheduleDag; 1426 } 1427 1428 void dumpRegPressure() const { 1429 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1430 E = TRI->regclass_end(); I != E; ++I) { 1431 const TargetRegisterClass *RC = *I; 1432 unsigned Id = RC->getID(); 1433 unsigned RP = RegPressure[Id]; 1434 if (!RP) continue; 1435 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1436 << '\n'); 1437 } 1438 } 1439 1440 protected: 1441 bool canClobber(const SUnit *SU, const SUnit *Op); 1442 void AddPseudoTwoAddrDeps(); 1443 void PrescheduleNodesWithMultipleUses(); 1444 void CalculateSethiUllmanNumbers(); 1445 }; 1446 1447 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1448 BURegReductionPriorityQueue; 1449 1450 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1451 TDRegReductionPriorityQueue; 1452 1453 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1454 SrcRegReductionPriorityQueue; 1455 1456 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1457 HybridBURRPriorityQueue; 1458 1459 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1460 ILPBURRPriorityQueue; 1461} 1462 1463/// closestSucc - Returns the scheduled cycle of the successor which is 1464/// closest to the current cycle. 1465static unsigned closestSucc(const SUnit *SU) { 1466 unsigned MaxHeight = 0; 1467 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1468 I != E; ++I) { 1469 if (I->isCtrl()) continue; // ignore chain succs 1470 unsigned Height = I->getSUnit()->getHeight(); 1471 // If there are bunch of CopyToRegs stacked up, they should be considered 1472 // to be at the same position. 1473 if (I->getSUnit()->getNode() && 1474 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1475 Height = closestSucc(I->getSUnit())+1; 1476 if (Height > MaxHeight) 1477 MaxHeight = Height; 1478 } 1479 return MaxHeight; 1480} 1481 1482/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1483/// for scratch registers, i.e. number of data dependencies. 1484static unsigned calcMaxScratches(const SUnit *SU) { 1485 unsigned Scratches = 0; 1486 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1487 I != E; ++I) { 1488 if (I->isCtrl()) continue; // ignore chain preds 1489 Scratches++; 1490 } 1491 return Scratches; 1492} 1493 1494template <typename RRSort> 1495static bool BURRSort(const SUnit *left, const SUnit *right, 1496 const RegReductionPriorityQueue<RRSort> *SPQ) { 1497 unsigned LPriority = SPQ->getNodePriority(left); 1498 unsigned RPriority = SPQ->getNodePriority(right); 1499 if (LPriority != RPriority) 1500 return LPriority > RPriority; 1501 1502 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1503 // e.g. 1504 // t1 = op t2, c1 1505 // t3 = op t4, c2 1506 // 1507 // and the following instructions are both ready. 1508 // t2 = op c3 1509 // t4 = op c4 1510 // 1511 // Then schedule t2 = op first. 1512 // i.e. 1513 // t4 = op c4 1514 // t2 = op c3 1515 // t1 = op t2, c1 1516 // t3 = op t4, c2 1517 // 1518 // This creates more short live intervals. 1519 unsigned LDist = closestSucc(left); 1520 unsigned RDist = closestSucc(right); 1521 if (LDist != RDist) 1522 return LDist < RDist; 1523 1524 // How many registers becomes live when the node is scheduled. 1525 unsigned LScratch = calcMaxScratches(left); 1526 unsigned RScratch = calcMaxScratches(right); 1527 if (LScratch != RScratch) 1528 return LScratch > RScratch; 1529 1530 if (left->getHeight() != right->getHeight()) 1531 return left->getHeight() > right->getHeight(); 1532 1533 if (left->getDepth() != right->getDepth()) 1534 return left->getDepth() < right->getDepth(); 1535 1536 assert(left->NodeQueueId && right->NodeQueueId && 1537 "NodeQueueId cannot be zero"); 1538 return (left->NodeQueueId > right->NodeQueueId); 1539} 1540 1541// Bottom up 1542bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1543 return BURRSort(left, right, SPQ); 1544} 1545 1546// Source order, otherwise bottom up. 1547bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1548 unsigned LOrder = SPQ->getNodeOrdering(left); 1549 unsigned ROrder = SPQ->getNodeOrdering(right); 1550 1551 // Prefer an ordering where the lower the non-zero order number, the higher 1552 // the preference. 1553 if ((LOrder || ROrder) && LOrder != ROrder) 1554 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1555 1556 return BURRSort(left, right, SPQ); 1557} 1558 1559bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1560 unsigned LExcess, RExcess; 1561 bool LHigh = SPQ->HighRegPressure(left, LExcess); 1562 bool RHigh = SPQ->HighRegPressure(right, RExcess); 1563 // Avoid causing spills. If register pressure is high, schedule for 1564 // register pressure reduction. 1565 if (LHigh && !RHigh) 1566 return true; 1567 else if (!LHigh && RHigh) 1568 return false; 1569 else if (LHigh && RHigh) { 1570 if (LExcess > RExcess) 1571 return true; 1572 else if (LExcess < RExcess) 1573 return false; 1574 } else { 1575 // Low register pressure situation, schedule for latency if possible. 1576 bool LStall = left->SchedulingPref == Sched::Latency && 1577 SPQ->getCurCycle() < left->getHeight(); 1578 bool RStall = right->SchedulingPref == Sched::Latency && 1579 SPQ->getCurCycle() < right->getHeight(); 1580 // If scheduling one of the node will cause a pipeline stall, delay it. 1581 // If scheduling either one of the node will cause a pipeline stall, sort 1582 // them according to their height. 1583 // If neither will cause a pipeline stall, try to reduce register pressure. 1584 if (LStall) { 1585 if (!RStall) 1586 return true; 1587 if (left->getHeight() != right->getHeight()) 1588 return left->getHeight() > right->getHeight(); 1589 } else if (RStall) 1590 return false; 1591 1592 // If either node is scheduling for latency, sort them by height and latency 1593 // first. 1594 if (left->SchedulingPref == Sched::Latency || 1595 right->SchedulingPref == Sched::Latency) { 1596 if (left->getHeight() != right->getHeight()) 1597 return left->getHeight() > right->getHeight(); 1598 if (left->Latency != right->Latency) 1599 return left->Latency > right->Latency; 1600 } 1601 } 1602 1603 return BURRSort(left, right, SPQ); 1604} 1605 1606bool ilp_ls_rr_sort::operator()(const SUnit *left, 1607 const SUnit *right) const { 1608 unsigned LExcess, RExcess; 1609 bool LHigh = SPQ->HighRegPressure(left, LExcess); 1610 bool RHigh = SPQ->HighRegPressure(right, RExcess); 1611 // Avoid causing spills. If register pressure is high, schedule for 1612 // register pressure reduction. 1613 if (LHigh && !RHigh) 1614 return true; 1615 else if (!LHigh && RHigh) 1616 return false; 1617 else if (LHigh && RHigh) { 1618 if (LExcess > RExcess) 1619 return true; 1620 else if (LExcess < RExcess) 1621 return false; 1622 } else { 1623 // Low register pressure situation, schedule to maximize instruction level 1624 // parallelism. 1625 if (left->NumPreds > right->NumPreds) 1626 return false; 1627 else if (left->NumPreds < right->NumPreds) 1628 return false; 1629 } 1630 1631 return BURRSort(left, right, SPQ); 1632} 1633 1634template<class SF> 1635bool 1636RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1637 if (SU->isTwoAddress) { 1638 unsigned Opc = SU->getNode()->getMachineOpcode(); 1639 const TargetInstrDesc &TID = TII->get(Opc); 1640 unsigned NumRes = TID.getNumDefs(); 1641 unsigned NumOps = TID.getNumOperands() - NumRes; 1642 for (unsigned i = 0; i != NumOps; ++i) { 1643 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1644 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1645 if (DU->getNodeId() != -1 && 1646 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1647 return true; 1648 } 1649 } 1650 } 1651 return false; 1652} 1653 1654/// hasCopyToRegUse - Return true if SU has a value successor that is a 1655/// CopyToReg node. 1656static bool hasCopyToRegUse(const SUnit *SU) { 1657 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1658 I != E; ++I) { 1659 if (I->isCtrl()) continue; 1660 const SUnit *SuccSU = I->getSUnit(); 1661 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1662 return true; 1663 } 1664 return false; 1665} 1666 1667/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1668/// physical register defs. 1669static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1670 const TargetInstrInfo *TII, 1671 const TargetRegisterInfo *TRI) { 1672 SDNode *N = SuccSU->getNode(); 1673 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1674 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1675 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1676 for (const SDNode *SUNode = SU->getNode(); SUNode; 1677 SUNode = SUNode->getFlaggedNode()) { 1678 if (!SUNode->isMachineOpcode()) 1679 continue; 1680 const unsigned *SUImpDefs = 1681 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1682 if (!SUImpDefs) 1683 return false; 1684 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1685 EVT VT = N->getValueType(i); 1686 if (VT == MVT::Flag || VT == MVT::Other) 1687 continue; 1688 if (!N->hasAnyUseOfValue(i)) 1689 continue; 1690 unsigned Reg = ImpDefs[i - NumDefs]; 1691 for (;*SUImpDefs; ++SUImpDefs) { 1692 unsigned SUReg = *SUImpDefs; 1693 if (TRI->regsOverlap(Reg, SUReg)) 1694 return true; 1695 } 1696 } 1697 } 1698 return false; 1699} 1700 1701/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1702/// are not handled well by the general register pressure reduction 1703/// heuristics. When presented with code like this: 1704/// 1705/// N 1706/// / | 1707/// / | 1708/// U store 1709/// | 1710/// ... 1711/// 1712/// the heuristics tend to push the store up, but since the 1713/// operand of the store has another use (U), this would increase 1714/// the length of that other use (the U->N edge). 1715/// 1716/// This function transforms code like the above to route U's 1717/// dependence through the store when possible, like this: 1718/// 1719/// N 1720/// || 1721/// || 1722/// store 1723/// | 1724/// U 1725/// | 1726/// ... 1727/// 1728/// This results in the store being scheduled immediately 1729/// after N, which shortens the U->N live range, reducing 1730/// register pressure. 1731/// 1732template<class SF> 1733void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1734 // Visit all the nodes in topological order, working top-down. 1735 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1736 SUnit *SU = &(*SUnits)[i]; 1737 // For now, only look at nodes with no data successors, such as stores. 1738 // These are especially important, due to the heuristics in 1739 // getNodePriority for nodes with no data successors. 1740 if (SU->NumSuccs != 0) 1741 continue; 1742 // For now, only look at nodes with exactly one data predecessor. 1743 if (SU->NumPreds != 1) 1744 continue; 1745 // Avoid prescheduling copies to virtual registers, which don't behave 1746 // like other nodes from the perspective of scheduling heuristics. 1747 if (SDNode *N = SU->getNode()) 1748 if (N->getOpcode() == ISD::CopyToReg && 1749 TargetRegisterInfo::isVirtualRegister 1750 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1751 continue; 1752 1753 // Locate the single data predecessor. 1754 SUnit *PredSU = 0; 1755 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1756 EE = SU->Preds.end(); II != EE; ++II) 1757 if (!II->isCtrl()) { 1758 PredSU = II->getSUnit(); 1759 break; 1760 } 1761 assert(PredSU); 1762 1763 // Don't rewrite edges that carry physregs, because that requires additional 1764 // support infrastructure. 1765 if (PredSU->hasPhysRegDefs) 1766 continue; 1767 // Short-circuit the case where SU is PredSU's only data successor. 1768 if (PredSU->NumSuccs == 1) 1769 continue; 1770 // Avoid prescheduling to copies from virtual registers, which don't behave 1771 // like other nodes from the perspective of scheduling // heuristics. 1772 if (SDNode *N = SU->getNode()) 1773 if (N->getOpcode() == ISD::CopyFromReg && 1774 TargetRegisterInfo::isVirtualRegister 1775 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1776 continue; 1777 1778 // Perform checks on the successors of PredSU. 1779 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1780 EE = PredSU->Succs.end(); II != EE; ++II) { 1781 SUnit *PredSuccSU = II->getSUnit(); 1782 if (PredSuccSU == SU) continue; 1783 // If PredSU has another successor with no data successors, for 1784 // now don't attempt to choose either over the other. 1785 if (PredSuccSU->NumSuccs == 0) 1786 goto outer_loop_continue; 1787 // Don't break physical register dependencies. 1788 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1789 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1790 goto outer_loop_continue; 1791 // Don't introduce graph cycles. 1792 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1793 goto outer_loop_continue; 1794 } 1795 1796 // Ok, the transformation is safe and the heuristics suggest it is 1797 // profitable. Update the graph. 1798 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1799 << " next to PredSU #" << PredSU->NodeNum 1800 << " to guide scheduling in the presence of multiple uses\n"); 1801 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1802 SDep Edge = PredSU->Succs[i]; 1803 assert(!Edge.isAssignedRegDep()); 1804 SUnit *SuccSU = Edge.getSUnit(); 1805 if (SuccSU != SU) { 1806 Edge.setSUnit(PredSU); 1807 scheduleDAG->RemovePred(SuccSU, Edge); 1808 scheduleDAG->AddPred(SU, Edge); 1809 Edge.setSUnit(SU); 1810 scheduleDAG->AddPred(SuccSU, Edge); 1811 --i; 1812 } 1813 } 1814 outer_loop_continue:; 1815 } 1816} 1817 1818/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1819/// it as a def&use operand. Add a pseudo control edge from it to the other 1820/// node (if it won't create a cycle) so the two-address one will be scheduled 1821/// first (lower in the schedule). If both nodes are two-address, favor the 1822/// one that has a CopyToReg use (more likely to be a loop induction update). 1823/// If both are two-address, but one is commutable while the other is not 1824/// commutable, favor the one that's not commutable. 1825template<class SF> 1826void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1827 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1828 SUnit *SU = &(*SUnits)[i]; 1829 if (!SU->isTwoAddress) 1830 continue; 1831 1832 SDNode *Node = SU->getNode(); 1833 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1834 continue; 1835 1836 unsigned Opc = Node->getMachineOpcode(); 1837 const TargetInstrDesc &TID = TII->get(Opc); 1838 unsigned NumRes = TID.getNumDefs(); 1839 unsigned NumOps = TID.getNumOperands() - NumRes; 1840 for (unsigned j = 0; j != NumOps; ++j) { 1841 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1842 continue; 1843 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1844 if (DU->getNodeId() == -1) 1845 continue; 1846 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1847 if (!DUSU) continue; 1848 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1849 E = DUSU->Succs.end(); I != E; ++I) { 1850 if (I->isCtrl()) continue; 1851 SUnit *SuccSU = I->getSUnit(); 1852 if (SuccSU == SU) 1853 continue; 1854 // Be conservative. Ignore if nodes aren't at roughly the same 1855 // depth and height. 1856 if (SuccSU->getHeight() < SU->getHeight() && 1857 (SU->getHeight() - SuccSU->getHeight()) > 1) 1858 continue; 1859 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1860 // constrains whatever is using the copy, instead of the copy 1861 // itself. In the case that the copy is coalesced, this 1862 // preserves the intent of the pseudo two-address heurietics. 1863 while (SuccSU->Succs.size() == 1 && 1864 SuccSU->getNode()->isMachineOpcode() && 1865 SuccSU->getNode()->getMachineOpcode() == 1866 TargetOpcode::COPY_TO_REGCLASS) 1867 SuccSU = SuccSU->Succs.front().getSUnit(); 1868 // Don't constrain non-instruction nodes. 1869 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1870 continue; 1871 // Don't constrain nodes with physical register defs if the 1872 // predecessor can clobber them. 1873 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1874 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1875 continue; 1876 } 1877 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1878 // these may be coalesced away. We want them close to their uses. 1879 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1880 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1881 SuccOpc == TargetOpcode::INSERT_SUBREG || 1882 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1883 continue; 1884 if ((!canClobber(SuccSU, DUSU) || 1885 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1886 (!SU->isCommutable && SuccSU->isCommutable)) && 1887 !scheduleDAG->IsReachable(SuccSU, SU)) { 1888 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1889 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1890 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1891 /*Reg=*/0, /*isNormalMemory=*/false, 1892 /*isMustAlias=*/false, 1893 /*isArtificial=*/true)); 1894 } 1895 } 1896 } 1897 } 1898} 1899 1900/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1901/// scheduling units. 1902template<class SF> 1903void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1904 SethiUllmanNumbers.assign(SUnits->size(), 0); 1905 1906 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1907 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1908} 1909 1910/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1911/// predecessors of the successors of the SUnit SU. Stop when the provided 1912/// limit is exceeded. 1913static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1914 unsigned Limit) { 1915 unsigned Sum = 0; 1916 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1917 I != E; ++I) { 1918 const SUnit *SuccSU = I->getSUnit(); 1919 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1920 EE = SuccSU->Preds.end(); II != EE; ++II) { 1921 SUnit *PredSU = II->getSUnit(); 1922 if (!PredSU->isScheduled) 1923 if (++Sum > Limit) 1924 return Sum; 1925 } 1926 } 1927 return Sum; 1928} 1929 1930 1931// Top down 1932bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1933 unsigned LPriority = SPQ->getNodePriority(left); 1934 unsigned RPriority = SPQ->getNodePriority(right); 1935 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1936 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1937 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1938 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1939 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1940 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1941 1942 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1943 return false; 1944 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1945 return true; 1946 1947 if (LIsFloater) 1948 LBonus -= 2; 1949 if (RIsFloater) 1950 RBonus -= 2; 1951 if (left->NumSuccs == 1) 1952 LBonus += 2; 1953 if (right->NumSuccs == 1) 1954 RBonus += 2; 1955 1956 if (LPriority+LBonus != RPriority+RBonus) 1957 return LPriority+LBonus < RPriority+RBonus; 1958 1959 if (left->getDepth() != right->getDepth()) 1960 return left->getDepth() < right->getDepth(); 1961 1962 if (left->NumSuccsLeft != right->NumSuccsLeft) 1963 return left->NumSuccsLeft > right->NumSuccsLeft; 1964 1965 assert(left->NodeQueueId && right->NodeQueueId && 1966 "NodeQueueId cannot be zero"); 1967 return (left->NodeQueueId > right->NodeQueueId); 1968} 1969 1970//===----------------------------------------------------------------------===// 1971// Public Constructor Functions 1972//===----------------------------------------------------------------------===// 1973 1974llvm::ScheduleDAGSDNodes * 1975llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1976 const TargetMachine &TM = IS->TM; 1977 const TargetInstrInfo *TII = TM.getInstrInfo(); 1978 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1979 1980 BURegReductionPriorityQueue *PQ = 1981 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 1982 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 1983 PQ->setScheduleDAG(SD); 1984 return SD; 1985} 1986 1987llvm::ScheduleDAGSDNodes * 1988llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 1989 const TargetMachine &TM = IS->TM; 1990 const TargetInstrInfo *TII = TM.getInstrInfo(); 1991 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1992 1993 TDRegReductionPriorityQueue *PQ = 1994 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 1995 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 1996 PQ->setScheduleDAG(SD); 1997 return SD; 1998} 1999 2000llvm::ScheduleDAGSDNodes * 2001llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2002 const TargetMachine &TM = IS->TM; 2003 const TargetInstrInfo *TII = TM.getInstrInfo(); 2004 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2005 2006 SrcRegReductionPriorityQueue *PQ = 2007 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2008 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 2009 PQ->setScheduleDAG(SD); 2010 return SD; 2011} 2012 2013llvm::ScheduleDAGSDNodes * 2014llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2015 const TargetMachine &TM = IS->TM; 2016 const TargetInstrInfo *TII = TM.getInstrInfo(); 2017 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2018 const TargetLowering *TLI = &IS->getTargetLowering(); 2019 2020 HybridBURRPriorityQueue *PQ = 2021 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2022 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2023 PQ->setScheduleDAG(SD); 2024 return SD; 2025} 2026 2027llvm::ScheduleDAGSDNodes * 2028llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2029 const TargetMachine &TM = IS->TM; 2030 const TargetInstrInfo *TII = TM.getInstrInfo(); 2031 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2032 const TargetLowering *TLI = &IS->getTargetLowering(); 2033 2034 ILPBURRPriorityQueue *PQ = 2035 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2036 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2037 PQ->setScheduleDAG(SD); 2038 return SD; 2039} 2040