ScheduleDAGRRList.cpp revision f1b4eafbfec976f939ec0ea3e8acf91cef5363e3
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include <climits> 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumPRCopies, "Number of physical register copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50static RegisterScheduler 51 sourceListDAGScheduler("source", 52 "Similar to list-burr but schedules in source " 53 "order when possible", 54 createSourceListDAGScheduler); 55 56static RegisterScheduler 57 hybridListDAGScheduler("list-hybrid", 58 "Bottom-up register pressure aware list scheduling " 59 "which tries to balance latency and register pressure", 60 createHybridListDAGScheduler); 61 62static RegisterScheduler 63 ILPListDAGScheduler("list-ilp", 64 "Bottom-up register pressure aware list scheduling " 65 "which tries to balance ILP and register pressure", 66 createILPListDAGScheduler); 67 68namespace { 69//===----------------------------------------------------------------------===// 70/// ScheduleDAGRRList - The actual register reduction list scheduler 71/// implementation. This supports both top-down and bottom-up scheduling. 72/// 73class ScheduleDAGRRList : public ScheduleDAGSDNodes { 74private: 75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 76 /// it is top-down. 77 bool isBottomUp; 78 79 /// NeedLatency - True if the scheduler will make use of latency information. 80 /// 81 bool NeedLatency; 82 83 /// AvailableQueue - The priority queue to use for the available SUnits. 84 SchedulingPriorityQueue *AvailableQueue; 85 86 /// LiveRegDefs - A set of physical registers and their definition 87 /// that are "live". These nodes must be scheduled before any other nodes that 88 /// modifies the registers can be scheduled. 89 unsigned NumLiveRegs; 90 std::vector<SUnit*> LiveRegDefs; 91 std::vector<unsigned> LiveRegCycles; 92 93 /// Topo - A topological ordering for SUnits which permits fast IsReachable 94 /// and similar queries. 95 ScheduleDAGTopologicalSort Topo; 96 97public: 98 ScheduleDAGRRList(MachineFunction &mf, 99 bool isbottomup, bool needlatency, 100 SchedulingPriorityQueue *availqueue) 101 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 102 AvailableQueue(availqueue), Topo(SUnits) { 103 } 104 105 ~ScheduleDAGRRList() { 106 delete AvailableQueue; 107 } 108 109 void Schedule(); 110 111 /// IsReachable - Checks if SU is reachable from TargetSU. 112 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 113 return Topo.IsReachable(SU, TargetSU); 114 } 115 116 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 117 /// create a cycle. 118 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 119 return Topo.WillCreateCycle(SU, TargetSU); 120 } 121 122 /// AddPred - adds a predecessor edge to SUnit SU. 123 /// This returns true if this is a new predecessor. 124 /// Updates the topological ordering if required. 125 void AddPred(SUnit *SU, const SDep &D) { 126 Topo.AddPred(SU, D.getSUnit()); 127 SU->addPred(D); 128 } 129 130 /// RemovePred - removes a predecessor edge from SUnit SU. 131 /// This returns true if an edge was removed. 132 /// Updates the topological ordering if required. 133 void RemovePred(SUnit *SU, const SDep &D) { 134 Topo.RemovePred(SU, D.getSUnit()); 135 SU->removePred(D); 136 } 137 138private: 139 void ReleasePred(SUnit *SU, const SDep *PredEdge); 140 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 141 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 142 void ReleaseSuccessors(SUnit *SU); 143 void CapturePred(SDep *PredEdge); 144 void ScheduleNodeBottomUp(SUnit*, unsigned); 145 void ScheduleNodeTopDown(SUnit*, unsigned); 146 void UnscheduleNodeBottomUp(SUnit*); 147 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 148 SUnit *CopyAndMoveSuccessors(SUnit*); 149 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 150 const TargetRegisterClass*, 151 const TargetRegisterClass*, 152 SmallVector<SUnit*, 2>&); 153 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 154 void ListScheduleTopDown(); 155 void ListScheduleBottomUp(); 156 157 158 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 159 /// Updates the topological ordering if required. 160 SUnit *CreateNewSUnit(SDNode *N) { 161 unsigned NumSUnits = SUnits.size(); 162 SUnit *NewNode = NewSUnit(N); 163 // Update the topological ordering. 164 if (NewNode->NodeNum >= NumSUnits) 165 Topo.InitDAGTopologicalSorting(); 166 return NewNode; 167 } 168 169 /// CreateClone - Creates a new SUnit from an existing one. 170 /// Updates the topological ordering if required. 171 SUnit *CreateClone(SUnit *N) { 172 unsigned NumSUnits = SUnits.size(); 173 SUnit *NewNode = Clone(N); 174 // Update the topological ordering. 175 if (NewNode->NodeNum >= NumSUnits) 176 Topo.InitDAGTopologicalSorting(); 177 return NewNode; 178 } 179 180 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 181 /// need actual latency information but the hybrid scheduler does. 182 bool ForceUnitLatencies() const { 183 return !NeedLatency; 184 } 185}; 186} // end anonymous namespace 187 188 189/// Schedule - Schedule the DAG using list scheduling. 190void ScheduleDAGRRList::Schedule() { 191 DEBUG(dbgs() 192 << "********** List Scheduling BB#" << BB->getNumber() 193 << " '" << BB->getName() << "' **********\n"); 194 195 NumLiveRegs = 0; 196 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 197 LiveRegCycles.resize(TRI->getNumRegs(), 0); 198 199 // Build the scheduling graph. 200 BuildSchedGraph(NULL); 201 202 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 203 SUnits[su].dumpAll(this)); 204 Topo.InitDAGTopologicalSorting(); 205 206 AvailableQueue->initNodes(SUnits); 207 208 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 209 if (isBottomUp) 210 ListScheduleBottomUp(); 211 else 212 ListScheduleTopDown(); 213 214 AvailableQueue->releaseState(); 215} 216 217//===----------------------------------------------------------------------===// 218// Bottom-Up Scheduling 219//===----------------------------------------------------------------------===// 220 221/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 222/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 223void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 224 SUnit *PredSU = PredEdge->getSUnit(); 225 226#ifndef NDEBUG 227 if (PredSU->NumSuccsLeft == 0) { 228 dbgs() << "*** Scheduling failed! ***\n"; 229 PredSU->dump(this); 230 dbgs() << " has been released too many times!\n"; 231 llvm_unreachable(0); 232 } 233#endif 234 --PredSU->NumSuccsLeft; 235 236 if (!ForceUnitLatencies()) { 237 // Updating predecessor's height. This is now the cycle when the 238 // predecessor can be scheduled without causing a pipeline stall. 239 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 240 } 241 242 // If all the node's successors are scheduled, this node is ready 243 // to be scheduled. Ignore the special EntrySU node. 244 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 245 PredSU->isAvailable = true; 246 AvailableQueue->push(PredSU); 247 } 248} 249 250void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 251 // Bottom up: release predecessors 252 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 253 I != E; ++I) { 254 ReleasePred(SU, &*I); 255 if (I->isAssignedRegDep()) { 256 // This is a physical register dependency and it's impossible or 257 // expensive to copy the register. Make sure nothing that can 258 // clobber the register is scheduled between the predecessor and 259 // this node. 260 if (!LiveRegDefs[I->getReg()]) { 261 ++NumLiveRegs; 262 LiveRegDefs[I->getReg()] = I->getSUnit(); 263 LiveRegCycles[I->getReg()] = CurCycle; 264 } 265 } 266 } 267} 268 269/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 270/// count of its predecessors. If a predecessor pending count is zero, add it to 271/// the Available queue. 272void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 273 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 274 DEBUG(SU->dump(this)); 275 276#ifndef NDEBUG 277 if (CurCycle < SU->getHeight()) 278 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 279#endif 280 281 // FIXME: Handle noop hazard. 282 SU->setHeightToAtLeast(CurCycle); 283 Sequence.push_back(SU); 284 285 AvailableQueue->ScheduledNode(SU); 286 287 ReleasePredecessors(SU, CurCycle); 288 289 // Release all the implicit physical register defs that are live. 290 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 291 I != E; ++I) { 292 if (I->isAssignedRegDep()) { 293 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 295 assert(LiveRegDefs[I->getReg()] == SU && 296 "Physical register dependency violated?"); 297 --NumLiveRegs; 298 LiveRegDefs[I->getReg()] = NULL; 299 LiveRegCycles[I->getReg()] = 0; 300 } 301 } 302 } 303 304 SU->isScheduled = true; 305} 306 307/// CapturePred - This does the opposite of ReleasePred. Since SU is being 308/// unscheduled, incrcease the succ left count of its predecessors. Remove 309/// them from AvailableQueue if necessary. 310void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 311 SUnit *PredSU = PredEdge->getSUnit(); 312 if (PredSU->isAvailable) { 313 PredSU->isAvailable = false; 314 if (!PredSU->isPending) 315 AvailableQueue->remove(PredSU); 316 } 317 318 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 319 ++PredSU->NumSuccsLeft; 320} 321 322/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 323/// its predecessor states to reflect the change. 324void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 325 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 326 DEBUG(SU->dump(this)); 327 328 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 329 I != E; ++I) { 330 CapturePred(&*I); 331 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ 332 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 333 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 334 "Physical register dependency violated?"); 335 --NumLiveRegs; 336 LiveRegDefs[I->getReg()] = NULL; 337 LiveRegCycles[I->getReg()] = 0; 338 } 339 } 340 341 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 342 I != E; ++I) { 343 if (I->isAssignedRegDep()) { 344 if (!LiveRegDefs[I->getReg()]) { 345 LiveRegDefs[I->getReg()] = SU; 346 ++NumLiveRegs; 347 } 348 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 349 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 350 } 351 } 352 353 SU->setHeightDirty(); 354 SU->isScheduled = false; 355 SU->isAvailable = true; 356 AvailableQueue->push(SU); 357 AvailableQueue->UnscheduledNode(SU); 358} 359 360/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 361/// BTCycle in order to schedule a specific node. 362void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 363 unsigned &CurCycle) { 364 SUnit *OldSU = NULL; 365 while (CurCycle > BtCycle) { 366 OldSU = Sequence.back(); 367 Sequence.pop_back(); 368 if (SU->isSucc(OldSU)) 369 // Don't try to remove SU from AvailableQueue. 370 SU->isAvailable = false; 371 UnscheduleNodeBottomUp(OldSU); 372 --CurCycle; 373 AvailableQueue->setCurCycle(CurCycle); 374 } 375 376 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 377 378 ++NumBacktracks; 379} 380 381static bool isOperandOf(const SUnit *SU, SDNode *N) { 382 for (const SDNode *SUNode = SU->getNode(); SUNode; 383 SUNode = SUNode->getFlaggedNode()) { 384 if (SUNode->isOperandOf(N)) 385 return true; 386 } 387 return false; 388} 389 390/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 391/// successors to the newly created node. 392SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 393 if (SU->getNode()->getFlaggedNode()) 394 return NULL; 395 396 SDNode *N = SU->getNode(); 397 if (!N) 398 return NULL; 399 400 SUnit *NewSU; 401 bool TryUnfold = false; 402 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 403 EVT VT = N->getValueType(i); 404 if (VT == MVT::Glue) 405 return NULL; 406 else if (VT == MVT::Other) 407 TryUnfold = true; 408 } 409 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 410 const SDValue &Op = N->getOperand(i); 411 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 412 if (VT == MVT::Glue) 413 return NULL; 414 } 415 416 if (TryUnfold) { 417 SmallVector<SDNode*, 2> NewNodes; 418 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 419 return NULL; 420 421 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 422 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 423 424 N = NewNodes[1]; 425 SDNode *LoadNode = NewNodes[0]; 426 unsigned NumVals = N->getNumValues(); 427 unsigned OldNumVals = SU->getNode()->getNumValues(); 428 for (unsigned i = 0; i != NumVals; ++i) 429 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 430 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 431 SDValue(LoadNode, 1)); 432 433 // LoadNode may already exist. This can happen when there is another 434 // load from the same location and producing the same type of value 435 // but it has different alignment or volatileness. 436 bool isNewLoad = true; 437 SUnit *LoadSU; 438 if (LoadNode->getNodeId() != -1) { 439 LoadSU = &SUnits[LoadNode->getNodeId()]; 440 isNewLoad = false; 441 } else { 442 LoadSU = CreateNewSUnit(LoadNode); 443 LoadNode->setNodeId(LoadSU->NodeNum); 444 ComputeLatency(LoadSU); 445 } 446 447 SUnit *NewSU = CreateNewSUnit(N); 448 assert(N->getNodeId() == -1 && "Node already inserted!"); 449 N->setNodeId(NewSU->NodeNum); 450 451 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 452 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 453 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 454 NewSU->isTwoAddress = true; 455 break; 456 } 457 } 458 if (TID.isCommutable()) 459 NewSU->isCommutable = true; 460 ComputeLatency(NewSU); 461 462 // Record all the edges to and from the old SU, by category. 463 SmallVector<SDep, 4> ChainPreds; 464 SmallVector<SDep, 4> ChainSuccs; 465 SmallVector<SDep, 4> LoadPreds; 466 SmallVector<SDep, 4> NodePreds; 467 SmallVector<SDep, 4> NodeSuccs; 468 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 469 I != E; ++I) { 470 if (I->isCtrl()) 471 ChainPreds.push_back(*I); 472 else if (isOperandOf(I->getSUnit(), LoadNode)) 473 LoadPreds.push_back(*I); 474 else 475 NodePreds.push_back(*I); 476 } 477 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 478 I != E; ++I) { 479 if (I->isCtrl()) 480 ChainSuccs.push_back(*I); 481 else 482 NodeSuccs.push_back(*I); 483 } 484 485 // Now assign edges to the newly-created nodes. 486 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 487 const SDep &Pred = ChainPreds[i]; 488 RemovePred(SU, Pred); 489 if (isNewLoad) 490 AddPred(LoadSU, Pred); 491 } 492 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 493 const SDep &Pred = LoadPreds[i]; 494 RemovePred(SU, Pred); 495 if (isNewLoad) 496 AddPred(LoadSU, Pred); 497 } 498 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 499 const SDep &Pred = NodePreds[i]; 500 RemovePred(SU, Pred); 501 AddPred(NewSU, Pred); 502 } 503 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 504 SDep D = NodeSuccs[i]; 505 SUnit *SuccDep = D.getSUnit(); 506 D.setSUnit(SU); 507 RemovePred(SuccDep, D); 508 D.setSUnit(NewSU); 509 AddPred(SuccDep, D); 510 } 511 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 512 SDep D = ChainSuccs[i]; 513 SUnit *SuccDep = D.getSUnit(); 514 D.setSUnit(SU); 515 RemovePred(SuccDep, D); 516 if (isNewLoad) { 517 D.setSUnit(LoadSU); 518 AddPred(SuccDep, D); 519 } 520 } 521 522 // Add a data dependency to reflect that NewSU reads the value defined 523 // by LoadSU. 524 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 525 526 if (isNewLoad) 527 AvailableQueue->addNode(LoadSU); 528 AvailableQueue->addNode(NewSU); 529 530 ++NumUnfolds; 531 532 if (NewSU->NumSuccsLeft == 0) { 533 NewSU->isAvailable = true; 534 return NewSU; 535 } 536 SU = NewSU; 537 } 538 539 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 540 NewSU = CreateClone(SU); 541 542 // New SUnit has the exact same predecessors. 543 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 544 I != E; ++I) 545 if (!I->isArtificial()) 546 AddPred(NewSU, *I); 547 548 // Only copy scheduled successors. Cut them from old node's successor 549 // list and move them over. 550 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 551 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 552 I != E; ++I) { 553 if (I->isArtificial()) 554 continue; 555 SUnit *SuccSU = I->getSUnit(); 556 if (SuccSU->isScheduled) { 557 SDep D = *I; 558 D.setSUnit(NewSU); 559 AddPred(SuccSU, D); 560 D.setSUnit(SU); 561 DelDeps.push_back(std::make_pair(SuccSU, D)); 562 } 563 } 564 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 565 RemovePred(DelDeps[i].first, DelDeps[i].second); 566 567 AvailableQueue->updateNode(SU); 568 AvailableQueue->addNode(NewSU); 569 570 ++NumDups; 571 return NewSU; 572} 573 574/// InsertCopiesAndMoveSuccs - Insert register copies and move all 575/// scheduled successors of the given SUnit to the last copy. 576void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 577 const TargetRegisterClass *DestRC, 578 const TargetRegisterClass *SrcRC, 579 SmallVector<SUnit*, 2> &Copies) { 580 SUnit *CopyFromSU = CreateNewSUnit(NULL); 581 CopyFromSU->CopySrcRC = SrcRC; 582 CopyFromSU->CopyDstRC = DestRC; 583 584 SUnit *CopyToSU = CreateNewSUnit(NULL); 585 CopyToSU->CopySrcRC = DestRC; 586 CopyToSU->CopyDstRC = SrcRC; 587 588 // Only copy scheduled successors. Cut them from old node's successor 589 // list and move them over. 590 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 591 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 592 I != E; ++I) { 593 if (I->isArtificial()) 594 continue; 595 SUnit *SuccSU = I->getSUnit(); 596 if (SuccSU->isScheduled) { 597 SDep D = *I; 598 D.setSUnit(CopyToSU); 599 AddPred(SuccSU, D); 600 DelDeps.push_back(std::make_pair(SuccSU, *I)); 601 } 602 } 603 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 604 RemovePred(DelDeps[i].first, DelDeps[i].second); 605 606 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 607 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 608 609 AvailableQueue->updateNode(SU); 610 AvailableQueue->addNode(CopyFromSU); 611 AvailableQueue->addNode(CopyToSU); 612 Copies.push_back(CopyFromSU); 613 Copies.push_back(CopyToSU); 614 615 ++NumPRCopies; 616} 617 618/// getPhysicalRegisterVT - Returns the ValueType of the physical register 619/// definition of the specified node. 620/// FIXME: Move to SelectionDAG? 621static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 622 const TargetInstrInfo *TII) { 623 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 624 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 625 unsigned NumRes = TID.getNumDefs(); 626 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 627 if (Reg == *ImpDef) 628 break; 629 ++NumRes; 630 } 631 return N->getValueType(NumRes); 632} 633 634/// CheckForLiveRegDef - Return true and update live register vector if the 635/// specified register def of the specified SUnit clobbers any "live" registers. 636static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 637 std::vector<SUnit*> &LiveRegDefs, 638 SmallSet<unsigned, 4> &RegAdded, 639 SmallVector<unsigned, 4> &LRegs, 640 const TargetRegisterInfo *TRI) { 641 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 642 if (RegAdded.insert(Reg)) 643 LRegs.push_back(Reg); 644 } 645 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 646 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 647 if (RegAdded.insert(*Alias)) 648 LRegs.push_back(*Alias); 649 } 650} 651 652/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 653/// scheduling of the given node to satisfy live physical register dependencies. 654/// If the specific node is the last one that's available to schedule, do 655/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 656bool ScheduleDAGRRList:: 657DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 658 if (NumLiveRegs == 0) 659 return false; 660 661 SmallSet<unsigned, 4> RegAdded; 662 // If this node would clobber any "live" register, then it's not ready. 663 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 664 I != E; ++I) { 665 if (I->isAssignedRegDep()) 666 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 667 RegAdded, LRegs, TRI); 668 } 669 670 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 671 if (Node->getOpcode() == ISD::INLINEASM) { 672 // Inline asm can clobber physical defs. 673 unsigned NumOps = Node->getNumOperands(); 674 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 675 --NumOps; // Ignore the flag operand. 676 677 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 678 unsigned Flags = 679 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 680 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 681 682 ++i; // Skip the ID value. 683 if (InlineAsm::isRegDefKind(Flags) || 684 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 685 // Check for def of register or earlyclobber register. 686 for (; NumVals; --NumVals, ++i) { 687 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 688 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 689 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 690 } 691 } else 692 i += NumVals; 693 } 694 continue; 695 } 696 697 if (!Node->isMachineOpcode()) 698 continue; 699 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 700 if (!TID.ImplicitDefs) 701 continue; 702 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 703 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 704 } 705 706 707 // Okay, we now know all of the live registers that are defined by an 708 // immediate predecessor. It is ok to kill these registers if we are also 709 // using it. 710 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 711 I != E; ++I) { 712 if (I->isAssignedRegDep() && 713 LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 714 unsigned Reg = I->getReg(); 715 if (RegAdded.erase(Reg)) 716 LRegs.erase(std::find(LRegs.begin(), LRegs.end(), Reg)); 717 } 718 } 719 720 return !LRegs.empty(); 721} 722 723 724/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 725/// schedulers. 726void ScheduleDAGRRList::ListScheduleBottomUp() { 727 unsigned CurCycle = 0; 728 729 // Release any predecessors of the special Exit node. 730 ReleasePredecessors(&ExitSU, CurCycle); 731 732 // Add root to Available queue. 733 if (!SUnits.empty()) { 734 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 735 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 736 RootSU->isAvailable = true; 737 AvailableQueue->push(RootSU); 738 } 739 740 // While Available queue is not empty, grab the node with the highest 741 // priority. If it is not ready put it back. Schedule the node. 742 SmallVector<SUnit*, 4> NotReady; 743 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 744 Sequence.reserve(SUnits.size()); 745 while (!AvailableQueue->empty()) { 746 bool Delayed = false; 747 LRegsMap.clear(); 748 SUnit *CurSU = AvailableQueue->pop(); 749 while (CurSU) { 750 SmallVector<unsigned, 4> LRegs; 751 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 752 break; 753 Delayed = true; 754 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 755 756 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 757 NotReady.push_back(CurSU); 758 CurSU = AvailableQueue->pop(); 759 } 760 761 // All candidates are delayed due to live physical reg dependencies. 762 // Try backtracking, code duplication, or inserting cross class copies 763 // to resolve it. 764 if (Delayed && !CurSU) { 765 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 766 SUnit *TrySU = NotReady[i]; 767 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 768 769 // Try unscheduling up to the point where it's safe to schedule 770 // this node. 771 unsigned LiveCycle = CurCycle; 772 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 773 unsigned Reg = LRegs[j]; 774 unsigned LCycle = LiveRegCycles[Reg]; 775 LiveCycle = std::min(LiveCycle, LCycle); 776 } 777 SUnit *OldSU = Sequence[LiveCycle]; 778 if (!WillCreateCycle(TrySU, OldSU)) { 779 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 780 // Force the current node to be scheduled before the node that 781 // requires the physical reg dep. 782 if (OldSU->isAvailable) { 783 OldSU->isAvailable = false; 784 AvailableQueue->remove(OldSU); 785 } 786 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 787 /*Reg=*/0, /*isNormalMemory=*/false, 788 /*isMustAlias=*/false, /*isArtificial=*/true)); 789 // If one or more successors has been unscheduled, then the current 790 // node is no longer avaialable. Schedule a successor that's now 791 // available instead. 792 if (!TrySU->isAvailable) 793 CurSU = AvailableQueue->pop(); 794 else { 795 CurSU = TrySU; 796 TrySU->isPending = false; 797 NotReady.erase(NotReady.begin()+i); 798 } 799 break; 800 } 801 } 802 803 if (!CurSU) { 804 // Can't backtrack. If it's too expensive to copy the value, then try 805 // duplicate the nodes that produces these "too expensive to copy" 806 // values to break the dependency. In case even that doesn't work, 807 // insert cross class copies. 808 // If it's not too expensive, i.e. cost != -1, issue copies. 809 SUnit *TrySU = NotReady[0]; 810 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 811 assert(LRegs.size() == 1 && "Can't handle this yet!"); 812 unsigned Reg = LRegs[0]; 813 SUnit *LRDef = LiveRegDefs[Reg]; 814 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 815 const TargetRegisterClass *RC = 816 TRI->getMinimalPhysRegClass(Reg, VT); 817 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 818 819 // If cross copy register class is null, then it must be possible copy 820 // the value directly. Do not try duplicate the def. 821 SUnit *NewDef = 0; 822 if (DestRC) 823 NewDef = CopyAndMoveSuccessors(LRDef); 824 else 825 DestRC = RC; 826 if (!NewDef) { 827 // Issue copies, these can be expensive cross register class copies. 828 SmallVector<SUnit*, 2> Copies; 829 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 830 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 831 << " to SU #" << Copies.front()->NodeNum << "\n"); 832 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 833 /*Reg=*/0, /*isNormalMemory=*/false, 834 /*isMustAlias=*/false, 835 /*isArtificial=*/true)); 836 NewDef = Copies.back(); 837 } 838 839 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 840 << " to SU #" << TrySU->NodeNum << "\n"); 841 LiveRegDefs[Reg] = NewDef; 842 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 843 /*Reg=*/0, /*isNormalMemory=*/false, 844 /*isMustAlias=*/false, 845 /*isArtificial=*/true)); 846 TrySU->isAvailable = false; 847 CurSU = NewDef; 848 } 849 850 assert(CurSU && "Unable to resolve live physical register dependencies!"); 851 } 852 853 // Add the nodes that aren't ready back onto the available list. 854 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 855 NotReady[i]->isPending = false; 856 // May no longer be available due to backtracking. 857 if (NotReady[i]->isAvailable) 858 AvailableQueue->push(NotReady[i]); 859 } 860 NotReady.clear(); 861 862 if (CurSU) 863 ScheduleNodeBottomUp(CurSU, CurCycle); 864 ++CurCycle; 865 AvailableQueue->setCurCycle(CurCycle); 866 } 867 868 // Reverse the order if it is bottom up. 869 std::reverse(Sequence.begin(), Sequence.end()); 870 871#ifndef NDEBUG 872 VerifySchedule(isBottomUp); 873#endif 874} 875 876//===----------------------------------------------------------------------===// 877// Top-Down Scheduling 878//===----------------------------------------------------------------------===// 879 880/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 881/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 882void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 883 SUnit *SuccSU = SuccEdge->getSUnit(); 884 885#ifndef NDEBUG 886 if (SuccSU->NumPredsLeft == 0) { 887 dbgs() << "*** Scheduling failed! ***\n"; 888 SuccSU->dump(this); 889 dbgs() << " has been released too many times!\n"; 890 llvm_unreachable(0); 891 } 892#endif 893 --SuccSU->NumPredsLeft; 894 895 // If all the node's predecessors are scheduled, this node is ready 896 // to be scheduled. Ignore the special ExitSU node. 897 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 898 SuccSU->isAvailable = true; 899 AvailableQueue->push(SuccSU); 900 } 901} 902 903void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 904 // Top down: release successors 905 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 906 I != E; ++I) { 907 assert(!I->isAssignedRegDep() && 908 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 909 910 ReleaseSucc(SU, &*I); 911 } 912} 913 914/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 915/// count of its successors. If a successor pending count is zero, add it to 916/// the Available queue. 917void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 918 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 919 DEBUG(SU->dump(this)); 920 921 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 922 SU->setDepthToAtLeast(CurCycle); 923 Sequence.push_back(SU); 924 925 ReleaseSuccessors(SU); 926 SU->isScheduled = true; 927 AvailableQueue->ScheduledNode(SU); 928} 929 930/// ListScheduleTopDown - The main loop of list scheduling for top-down 931/// schedulers. 932void ScheduleDAGRRList::ListScheduleTopDown() { 933 unsigned CurCycle = 0; 934 AvailableQueue->setCurCycle(CurCycle); 935 936 // Release any successors of the special Entry node. 937 ReleaseSuccessors(&EntrySU); 938 939 // All leaves to Available queue. 940 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 941 // It is available if it has no predecessors. 942 if (SUnits[i].Preds.empty()) { 943 AvailableQueue->push(&SUnits[i]); 944 SUnits[i].isAvailable = true; 945 } 946 } 947 948 // While Available queue is not empty, grab the node with the highest 949 // priority. If it is not ready put it back. Schedule the node. 950 Sequence.reserve(SUnits.size()); 951 while (!AvailableQueue->empty()) { 952 SUnit *CurSU = AvailableQueue->pop(); 953 954 if (CurSU) 955 ScheduleNodeTopDown(CurSU, CurCycle); 956 ++CurCycle; 957 AvailableQueue->setCurCycle(CurCycle); 958 } 959 960#ifndef NDEBUG 961 VerifySchedule(isBottomUp); 962#endif 963} 964 965 966//===----------------------------------------------------------------------===// 967// RegReductionPriorityQueue Implementation 968//===----------------------------------------------------------------------===// 969// 970// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 971// to reduce register pressure. 972// 973namespace { 974 template<class SF> 975 class RegReductionPriorityQueue; 976 977 /// bu_ls_rr_sort - Priority function for bottom up register pressure 978 // reduction scheduler. 979 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 980 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 981 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 982 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 983 984 bool operator()(const SUnit* left, const SUnit* right) const; 985 }; 986 987 // td_ls_rr_sort - Priority function for top down register pressure reduction 988 // scheduler. 989 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 990 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 991 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 992 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 993 994 bool operator()(const SUnit* left, const SUnit* right) const; 995 }; 996 997 // src_ls_rr_sort - Priority function for source order scheduler. 998 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 999 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 1000 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 1001 : SPQ(spq) {} 1002 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1003 : SPQ(RHS.SPQ) {} 1004 1005 bool operator()(const SUnit* left, const SUnit* right) const; 1006 }; 1007 1008 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1009 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1010 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 1011 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 1012 : SPQ(spq) {} 1013 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1014 : SPQ(RHS.SPQ) {} 1015 1016 bool operator()(const SUnit* left, const SUnit* right) const; 1017 }; 1018 1019 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1020 // scheduler. 1021 struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1022 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; 1023 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) 1024 : SPQ(spq) {} 1025 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1026 : SPQ(RHS.SPQ) {} 1027 1028 bool operator()(const SUnit* left, const SUnit* right) const; 1029 }; 1030} // end anonymous namespace 1031 1032/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1033/// Smaller number is the higher priority. 1034static unsigned 1035CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1036 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1037 if (SethiUllmanNumber != 0) 1038 return SethiUllmanNumber; 1039 1040 unsigned Extra = 0; 1041 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1042 I != E; ++I) { 1043 if (I->isCtrl()) continue; // ignore chain preds 1044 SUnit *PredSU = I->getSUnit(); 1045 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1046 if (PredSethiUllman > SethiUllmanNumber) { 1047 SethiUllmanNumber = PredSethiUllman; 1048 Extra = 0; 1049 } else if (PredSethiUllman == SethiUllmanNumber) 1050 ++Extra; 1051 } 1052 1053 SethiUllmanNumber += Extra; 1054 1055 if (SethiUllmanNumber == 0) 1056 SethiUllmanNumber = 1; 1057 1058 return SethiUllmanNumber; 1059} 1060 1061namespace { 1062 template<class SF> 1063 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1064 std::vector<SUnit*> Queue; 1065 SF Picker; 1066 unsigned CurQueueId; 1067 bool TracksRegPressure; 1068 1069 protected: 1070 // SUnits - The SUnits for the current graph. 1071 std::vector<SUnit> *SUnits; 1072 1073 MachineFunction &MF; 1074 const TargetInstrInfo *TII; 1075 const TargetRegisterInfo *TRI; 1076 const TargetLowering *TLI; 1077 ScheduleDAGRRList *scheduleDAG; 1078 1079 // SethiUllmanNumbers - The SethiUllman number for each node. 1080 std::vector<unsigned> SethiUllmanNumbers; 1081 1082 /// RegPressure - Tracking current reg pressure per register class. 1083 /// 1084 std::vector<unsigned> RegPressure; 1085 1086 /// RegLimit - Tracking the number of allocatable registers per register 1087 /// class. 1088 std::vector<unsigned> RegLimit; 1089 1090 public: 1091 RegReductionPriorityQueue(MachineFunction &mf, 1092 bool tracksrp, 1093 const TargetInstrInfo *tii, 1094 const TargetRegisterInfo *tri, 1095 const TargetLowering *tli) 1096 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), 1097 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1098 if (TracksRegPressure) { 1099 unsigned NumRC = TRI->getNumRegClasses(); 1100 RegLimit.resize(NumRC); 1101 RegPressure.resize(NumRC); 1102 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1103 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1104 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1105 E = TRI->regclass_end(); I != E; ++I) 1106 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); 1107 } 1108 } 1109 1110 void initNodes(std::vector<SUnit> &sunits) { 1111 SUnits = &sunits; 1112 // Add pseudo dependency edges for two-address nodes. 1113 AddPseudoTwoAddrDeps(); 1114 // Reroute edges to nodes with multiple uses. 1115 PrescheduleNodesWithMultipleUses(); 1116 // Calculate node priorities. 1117 CalculateSethiUllmanNumbers(); 1118 } 1119 1120 void addNode(const SUnit *SU) { 1121 unsigned SUSize = SethiUllmanNumbers.size(); 1122 if (SUnits->size() > SUSize) 1123 SethiUllmanNumbers.resize(SUSize*2, 0); 1124 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1125 } 1126 1127 void updateNode(const SUnit *SU) { 1128 SethiUllmanNumbers[SU->NodeNum] = 0; 1129 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1130 } 1131 1132 void releaseState() { 1133 SUnits = 0; 1134 SethiUllmanNumbers.clear(); 1135 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1136 } 1137 1138 unsigned getNodePriority(const SUnit *SU) const { 1139 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1140 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1141 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1142 // CopyToReg should be close to its uses to facilitate coalescing and 1143 // avoid spilling. 1144 return 0; 1145 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1146 Opc == TargetOpcode::SUBREG_TO_REG || 1147 Opc == TargetOpcode::INSERT_SUBREG) 1148 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1149 // close to their uses to facilitate coalescing. 1150 return 0; 1151 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1152 // If SU does not have a register use, i.e. it doesn't produce a value 1153 // that would be consumed (e.g. store), then it terminates a chain of 1154 // computation. Give it a large SethiUllman number so it will be 1155 // scheduled right before its predecessors that it doesn't lengthen 1156 // their live ranges. 1157 return 0xffff; 1158 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1159 // If SU does not have a register def, schedule it close to its uses 1160 // because it does not lengthen any live ranges. 1161 return 0; 1162 return SethiUllmanNumbers[SU->NodeNum]; 1163 } 1164 1165 unsigned getNodeOrdering(const SUnit *SU) const { 1166 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1167 } 1168 1169 bool empty() const { return Queue.empty(); } 1170 1171 void push(SUnit *U) { 1172 assert(!U->NodeQueueId && "Node in the queue already"); 1173 U->NodeQueueId = ++CurQueueId; 1174 Queue.push_back(U); 1175 } 1176 1177 SUnit *pop() { 1178 if (empty()) return NULL; 1179 std::vector<SUnit *>::iterator Best = Queue.begin(); 1180 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), 1181 E = Queue.end(); I != E; ++I) 1182 if (Picker(*Best, *I)) 1183 Best = I; 1184 SUnit *V = *Best; 1185 if (Best != prior(Queue.end())) 1186 std::swap(*Best, Queue.back()); 1187 Queue.pop_back(); 1188 V->NodeQueueId = 0; 1189 return V; 1190 } 1191 1192 void remove(SUnit *SU) { 1193 assert(!Queue.empty() && "Queue is empty!"); 1194 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1195 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1196 SU); 1197 if (I != prior(Queue.end())) 1198 std::swap(*I, Queue.back()); 1199 Queue.pop_back(); 1200 SU->NodeQueueId = 0; 1201 } 1202 1203 bool HighRegPressure(const SUnit *SU) const { 1204 if (!TLI) 1205 return false; 1206 1207 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1208 I != E; ++I) { 1209 if (I->isCtrl()) 1210 continue; 1211 SUnit *PredSU = I->getSUnit(); 1212 const SDNode *PN = PredSU->getNode(); 1213 if (!PN->isMachineOpcode()) { 1214 if (PN->getOpcode() == ISD::CopyFromReg) { 1215 EVT VT = PN->getValueType(0); 1216 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1217 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1218 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1219 return true; 1220 } 1221 continue; 1222 } 1223 unsigned POpc = PN->getMachineOpcode(); 1224 if (POpc == TargetOpcode::IMPLICIT_DEF) 1225 continue; 1226 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1227 EVT VT = PN->getOperand(0).getValueType(); 1228 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1229 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1230 // Check if this increases register pressure of the specific register 1231 // class to the point where it would cause spills. 1232 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1233 return true; 1234 continue; 1235 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1236 POpc == TargetOpcode::SUBREG_TO_REG) { 1237 EVT VT = PN->getValueType(0); 1238 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1239 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1240 // Check if this increases register pressure of the specific register 1241 // class to the point where it would cause spills. 1242 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1243 return true; 1244 continue; 1245 } 1246 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1247 for (unsigned i = 0; i != NumDefs; ++i) { 1248 EVT VT = PN->getValueType(i); 1249 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1250 if (RegPressure[RCId] >= RegLimit[RCId]) 1251 return true; // Reg pressure already high. 1252 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1253 if (!PN->hasAnyUseOfValue(i)) 1254 continue; 1255 // Check if this increases register pressure of the specific register 1256 // class to the point where it would cause spills. 1257 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1258 return true; 1259 } 1260 } 1261 1262 return false; 1263 } 1264 1265 void ScheduledNode(SUnit *SU) { 1266 if (!TracksRegPressure) 1267 return; 1268 1269 const SDNode *N = SU->getNode(); 1270 if (!N->isMachineOpcode()) { 1271 if (N->getOpcode() != ISD::CopyToReg) 1272 return; 1273 } else { 1274 unsigned Opc = N->getMachineOpcode(); 1275 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1276 Opc == TargetOpcode::INSERT_SUBREG || 1277 Opc == TargetOpcode::SUBREG_TO_REG || 1278 Opc == TargetOpcode::REG_SEQUENCE || 1279 Opc == TargetOpcode::IMPLICIT_DEF) 1280 return; 1281 } 1282 1283 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1284 I != E; ++I) { 1285 if (I->isCtrl()) 1286 continue; 1287 SUnit *PredSU = I->getSUnit(); 1288 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1289 continue; 1290 const SDNode *PN = PredSU->getNode(); 1291 if (!PN->isMachineOpcode()) { 1292 if (PN->getOpcode() == ISD::CopyFromReg) { 1293 EVT VT = PN->getValueType(0); 1294 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1295 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1296 } 1297 continue; 1298 } 1299 unsigned POpc = PN->getMachineOpcode(); 1300 if (POpc == TargetOpcode::IMPLICIT_DEF) 1301 continue; 1302 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1303 EVT VT = PN->getOperand(0).getValueType(); 1304 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1305 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1306 continue; 1307 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1308 POpc == TargetOpcode::SUBREG_TO_REG) { 1309 EVT VT = PN->getValueType(0); 1310 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1311 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1312 continue; 1313 } 1314 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1315 for (unsigned i = 0; i != NumDefs; ++i) { 1316 EVT VT = PN->getValueType(i); 1317 if (!PN->hasAnyUseOfValue(i)) 1318 continue; 1319 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1320 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1321 } 1322 } 1323 1324 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1325 // may transfer data dependencies to CopyToReg. 1326 if (SU->NumSuccs && N->isMachineOpcode()) { 1327 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1328 for (unsigned i = 0; i != NumDefs; ++i) { 1329 EVT VT = N->getValueType(i); 1330 if (!N->hasAnyUseOfValue(i)) 1331 continue; 1332 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1333 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1334 // Register pressure tracking is imprecise. This can happen. 1335 RegPressure[RCId] = 0; 1336 else 1337 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1338 } 1339 } 1340 1341 dumpRegPressure(); 1342 } 1343 1344 void UnscheduledNode(SUnit *SU) { 1345 if (!TracksRegPressure) 1346 return; 1347 1348 const SDNode *N = SU->getNode(); 1349 if (!N->isMachineOpcode()) { 1350 if (N->getOpcode() != ISD::CopyToReg) 1351 return; 1352 } else { 1353 unsigned Opc = N->getMachineOpcode(); 1354 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1355 Opc == TargetOpcode::INSERT_SUBREG || 1356 Opc == TargetOpcode::SUBREG_TO_REG || 1357 Opc == TargetOpcode::REG_SEQUENCE || 1358 Opc == TargetOpcode::IMPLICIT_DEF) 1359 return; 1360 } 1361 1362 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1363 I != E; ++I) { 1364 if (I->isCtrl()) 1365 continue; 1366 SUnit *PredSU = I->getSUnit(); 1367 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1368 continue; 1369 const SDNode *PN = PredSU->getNode(); 1370 if (!PN->isMachineOpcode()) { 1371 if (PN->getOpcode() == ISD::CopyFromReg) { 1372 EVT VT = PN->getValueType(0); 1373 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1374 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1375 } 1376 continue; 1377 } 1378 unsigned POpc = PN->getMachineOpcode(); 1379 if (POpc == TargetOpcode::IMPLICIT_DEF) 1380 continue; 1381 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1382 EVT VT = PN->getOperand(0).getValueType(); 1383 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1384 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1385 continue; 1386 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1387 POpc == TargetOpcode::SUBREG_TO_REG) { 1388 EVT VT = PN->getValueType(0); 1389 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1390 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1391 continue; 1392 } 1393 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1394 for (unsigned i = 0; i != NumDefs; ++i) { 1395 EVT VT = PN->getValueType(i); 1396 if (!PN->hasAnyUseOfValue(i)) 1397 continue; 1398 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1399 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1400 // Register pressure tracking is imprecise. This can happen. 1401 RegPressure[RCId] = 0; 1402 else 1403 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1404 } 1405 } 1406 1407 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1408 // may transfer data dependencies to CopyToReg. 1409 if (SU->NumSuccs && N->isMachineOpcode()) { 1410 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1411 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1412 EVT VT = N->getValueType(i); 1413 if (VT == MVT::Glue || VT == MVT::Other) 1414 continue; 1415 if (!N->hasAnyUseOfValue(i)) 1416 continue; 1417 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1418 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1419 } 1420 } 1421 1422 dumpRegPressure(); 1423 } 1424 1425 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1426 scheduleDAG = scheduleDag; 1427 } 1428 1429 void dumpRegPressure() const { 1430 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1431 E = TRI->regclass_end(); I != E; ++I) { 1432 const TargetRegisterClass *RC = *I; 1433 unsigned Id = RC->getID(); 1434 unsigned RP = RegPressure[Id]; 1435 if (!RP) continue; 1436 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1437 << '\n'); 1438 } 1439 } 1440 1441 protected: 1442 bool canClobber(const SUnit *SU, const SUnit *Op); 1443 void AddPseudoTwoAddrDeps(); 1444 void PrescheduleNodesWithMultipleUses(); 1445 void CalculateSethiUllmanNumbers(); 1446 }; 1447 1448 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1449 BURegReductionPriorityQueue; 1450 1451 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1452 TDRegReductionPriorityQueue; 1453 1454 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1455 SrcRegReductionPriorityQueue; 1456 1457 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1458 HybridBURRPriorityQueue; 1459 1460 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1461 ILPBURRPriorityQueue; 1462} 1463 1464/// closestSucc - Returns the scheduled cycle of the successor which is 1465/// closest to the current cycle. 1466static unsigned closestSucc(const SUnit *SU) { 1467 unsigned MaxHeight = 0; 1468 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1469 I != E; ++I) { 1470 if (I->isCtrl()) continue; // ignore chain succs 1471 unsigned Height = I->getSUnit()->getHeight(); 1472 // If there are bunch of CopyToRegs stacked up, they should be considered 1473 // to be at the same position. 1474 if (I->getSUnit()->getNode() && 1475 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1476 Height = closestSucc(I->getSUnit())+1; 1477 if (Height > MaxHeight) 1478 MaxHeight = Height; 1479 } 1480 return MaxHeight; 1481} 1482 1483/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1484/// for scratch registers, i.e. number of data dependencies. 1485static unsigned calcMaxScratches(const SUnit *SU) { 1486 unsigned Scratches = 0; 1487 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1488 I != E; ++I) { 1489 if (I->isCtrl()) continue; // ignore chain preds 1490 Scratches++; 1491 } 1492 return Scratches; 1493} 1494 1495/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a 1496/// CopyToReg to a virtual register. This SU def is probably a liveout and 1497/// it has no other use. It should be scheduled closer to the terminator. 1498static bool hasOnlyLiveOutUses(const SUnit *SU) { 1499 bool RetVal = false; 1500 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1501 I != E; ++I) { 1502 if (I->isCtrl()) continue; 1503 const SUnit *SuccSU = I->getSUnit(); 1504 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 1505 unsigned Reg = 1506 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 1507 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1508 RetVal = true; 1509 continue; 1510 } 1511 } 1512 return false; 1513 } 1514 return RetVal; 1515} 1516 1517/// UnitsSharePred - Return true if the two scheduling units share a common 1518/// data predecessor. 1519static bool UnitsSharePred(const SUnit *left, const SUnit *right) { 1520 SmallSet<const SUnit*, 4> Preds; 1521 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); 1522 I != E; ++I) { 1523 if (I->isCtrl()) continue; // ignore chain preds 1524 Preds.insert(I->getSUnit()); 1525 } 1526 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); 1527 I != E; ++I) { 1528 if (I->isCtrl()) continue; // ignore chain preds 1529 if (Preds.count(I->getSUnit())) 1530 return true; 1531 } 1532 return false; 1533} 1534 1535template <typename RRSort> 1536static bool BURRSort(const SUnit *left, const SUnit *right, 1537 const RegReductionPriorityQueue<RRSort> *SPQ) { 1538 unsigned LPriority = SPQ->getNodePriority(left); 1539 unsigned RPriority = SPQ->getNodePriority(right); 1540 if (LPriority != RPriority) 1541 return LPriority > RPriority; 1542 1543 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1544 // e.g. 1545 // t1 = op t2, c1 1546 // t3 = op t4, c2 1547 // 1548 // and the following instructions are both ready. 1549 // t2 = op c3 1550 // t4 = op c4 1551 // 1552 // Then schedule t2 = op first. 1553 // i.e. 1554 // t4 = op c4 1555 // t2 = op c3 1556 // t1 = op t2, c1 1557 // t3 = op t4, c2 1558 // 1559 // This creates more short live intervals. 1560 unsigned LDist = closestSucc(left); 1561 unsigned RDist = closestSucc(right); 1562 if (LDist != RDist) 1563 return LDist < RDist; 1564 1565 // How many registers becomes live when the node is scheduled. 1566 unsigned LScratch = calcMaxScratches(left); 1567 unsigned RScratch = calcMaxScratches(right); 1568 if (LScratch != RScratch) 1569 return LScratch > RScratch; 1570 1571 if (left->getHeight() != right->getHeight()) 1572 return left->getHeight() > right->getHeight(); 1573 1574 if (left->getDepth() != right->getDepth()) 1575 return left->getDepth() < right->getDepth(); 1576 1577 assert(left->NodeQueueId && right->NodeQueueId && 1578 "NodeQueueId cannot be zero"); 1579 return (left->NodeQueueId > right->NodeQueueId); 1580} 1581 1582// Bottom up 1583bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1584 return BURRSort(left, right, SPQ); 1585} 1586 1587// Source order, otherwise bottom up. 1588bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1589 unsigned LOrder = SPQ->getNodeOrdering(left); 1590 unsigned ROrder = SPQ->getNodeOrdering(right); 1591 1592 // Prefer an ordering where the lower the non-zero order number, the higher 1593 // the preference. 1594 if ((LOrder || ROrder) && LOrder != ROrder) 1595 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1596 1597 return BURRSort(left, right, SPQ); 1598} 1599 1600bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1601 if (left->isCall || right->isCall) 1602 // No way to compute latency of calls. 1603 return BURRSort(left, right, SPQ); 1604 1605 bool LHigh = SPQ->HighRegPressure(left); 1606 bool RHigh = SPQ->HighRegPressure(right); 1607 // Avoid causing spills. If register pressure is high, schedule for 1608 // register pressure reduction. 1609 if (LHigh && !RHigh) 1610 return true; 1611 else if (!LHigh && RHigh) 1612 return false; 1613 else if (!LHigh && !RHigh) { 1614 // If the two nodes share an operand and one of them has a single 1615 // use that is a live out copy, favor the one that is live out. Otherwise 1616 // it will be difficult to eliminate the copy if the instruction is a 1617 // loop induction variable update. e.g. 1618 // BB: 1619 // sub r1, r3, #1 1620 // str r0, [r2, r3] 1621 // mov r3, r1 1622 // cmp 1623 // bne BB 1624 bool SharePred = UnitsSharePred(left, right); 1625 // FIXME: Only adjust if BB is a loop back edge. 1626 // FIXME: What's the cost of a copy? 1627 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; 1628 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; 1629 int LHeight = (int)left->getHeight() - LBonus; 1630 int RHeight = (int)right->getHeight() - RBonus; 1631 1632 // Low register pressure situation, schedule for latency if possible. 1633 bool LStall = left->SchedulingPref == Sched::Latency && 1634 (int)SPQ->getCurCycle() < LHeight; 1635 bool RStall = right->SchedulingPref == Sched::Latency && 1636 (int)SPQ->getCurCycle() < RHeight; 1637 // If scheduling one of the node will cause a pipeline stall, delay it. 1638 // If scheduling either one of the node will cause a pipeline stall, sort 1639 // them according to their height. 1640 if (LStall) { 1641 if (!RStall) 1642 return true; 1643 if (LHeight != RHeight) 1644 return LHeight > RHeight; 1645 } else if (RStall) 1646 return false; 1647 1648 // If either node is scheduling for latency, sort them by height 1649 // and latency. 1650 if (left->SchedulingPref == Sched::Latency || 1651 right->SchedulingPref == Sched::Latency) { 1652 if (LHeight != RHeight) 1653 return LHeight > RHeight; 1654 if (left->Latency != right->Latency) 1655 return left->Latency > right->Latency; 1656 } 1657 } 1658 1659 return BURRSort(left, right, SPQ); 1660} 1661 1662bool ilp_ls_rr_sort::operator()(const SUnit *left, 1663 const SUnit *right) const { 1664 if (left->isCall || right->isCall) 1665 // No way to compute latency of calls. 1666 return BURRSort(left, right, SPQ); 1667 1668 bool LHigh = SPQ->HighRegPressure(left); 1669 bool RHigh = SPQ->HighRegPressure(right); 1670 // Avoid causing spills. If register pressure is high, schedule for 1671 // register pressure reduction. 1672 if (LHigh && !RHigh) 1673 return true; 1674 else if (!LHigh && RHigh) 1675 return false; 1676 else if (!LHigh && !RHigh) { 1677 // Low register pressure situation, schedule to maximize instruction level 1678 // parallelism. 1679 if (left->NumPreds > right->NumPreds) 1680 return false; 1681 else if (left->NumPreds < right->NumPreds) 1682 return false; 1683 } 1684 1685 return BURRSort(left, right, SPQ); 1686} 1687 1688template<class SF> 1689bool 1690RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1691 if (SU->isTwoAddress) { 1692 unsigned Opc = SU->getNode()->getMachineOpcode(); 1693 const TargetInstrDesc &TID = TII->get(Opc); 1694 unsigned NumRes = TID.getNumDefs(); 1695 unsigned NumOps = TID.getNumOperands() - NumRes; 1696 for (unsigned i = 0; i != NumOps; ++i) { 1697 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1698 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1699 if (DU->getNodeId() != -1 && 1700 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1701 return true; 1702 } 1703 } 1704 } 1705 return false; 1706} 1707 1708/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1709/// physical register defs. 1710static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1711 const TargetInstrInfo *TII, 1712 const TargetRegisterInfo *TRI) { 1713 SDNode *N = SuccSU->getNode(); 1714 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1715 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1716 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1717 for (const SDNode *SUNode = SU->getNode(); SUNode; 1718 SUNode = SUNode->getFlaggedNode()) { 1719 if (!SUNode->isMachineOpcode()) 1720 continue; 1721 const unsigned *SUImpDefs = 1722 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1723 if (!SUImpDefs) 1724 return false; 1725 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1726 EVT VT = N->getValueType(i); 1727 if (VT == MVT::Glue || VT == MVT::Other) 1728 continue; 1729 if (!N->hasAnyUseOfValue(i)) 1730 continue; 1731 unsigned Reg = ImpDefs[i - NumDefs]; 1732 for (;*SUImpDefs; ++SUImpDefs) { 1733 unsigned SUReg = *SUImpDefs; 1734 if (TRI->regsOverlap(Reg, SUReg)) 1735 return true; 1736 } 1737 } 1738 } 1739 return false; 1740} 1741 1742/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1743/// are not handled well by the general register pressure reduction 1744/// heuristics. When presented with code like this: 1745/// 1746/// N 1747/// / | 1748/// / | 1749/// U store 1750/// | 1751/// ... 1752/// 1753/// the heuristics tend to push the store up, but since the 1754/// operand of the store has another use (U), this would increase 1755/// the length of that other use (the U->N edge). 1756/// 1757/// This function transforms code like the above to route U's 1758/// dependence through the store when possible, like this: 1759/// 1760/// N 1761/// || 1762/// || 1763/// store 1764/// | 1765/// U 1766/// | 1767/// ... 1768/// 1769/// This results in the store being scheduled immediately 1770/// after N, which shortens the U->N live range, reducing 1771/// register pressure. 1772/// 1773template<class SF> 1774void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1775 // Visit all the nodes in topological order, working top-down. 1776 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1777 SUnit *SU = &(*SUnits)[i]; 1778 // For now, only look at nodes with no data successors, such as stores. 1779 // These are especially important, due to the heuristics in 1780 // getNodePriority for nodes with no data successors. 1781 if (SU->NumSuccs != 0) 1782 continue; 1783 // For now, only look at nodes with exactly one data predecessor. 1784 if (SU->NumPreds != 1) 1785 continue; 1786 // Avoid prescheduling copies to virtual registers, which don't behave 1787 // like other nodes from the perspective of scheduling heuristics. 1788 if (SDNode *N = SU->getNode()) 1789 if (N->getOpcode() == ISD::CopyToReg && 1790 TargetRegisterInfo::isVirtualRegister 1791 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1792 continue; 1793 1794 // Locate the single data predecessor. 1795 SUnit *PredSU = 0; 1796 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1797 EE = SU->Preds.end(); II != EE; ++II) 1798 if (!II->isCtrl()) { 1799 PredSU = II->getSUnit(); 1800 break; 1801 } 1802 assert(PredSU); 1803 1804 // Don't rewrite edges that carry physregs, because that requires additional 1805 // support infrastructure. 1806 if (PredSU->hasPhysRegDefs) 1807 continue; 1808 // Short-circuit the case where SU is PredSU's only data successor. 1809 if (PredSU->NumSuccs == 1) 1810 continue; 1811 // Avoid prescheduling to copies from virtual registers, which don't behave 1812 // like other nodes from the perspective of scheduling // heuristics. 1813 if (SDNode *N = SU->getNode()) 1814 if (N->getOpcode() == ISD::CopyFromReg && 1815 TargetRegisterInfo::isVirtualRegister 1816 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1817 continue; 1818 1819 // Perform checks on the successors of PredSU. 1820 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1821 EE = PredSU->Succs.end(); II != EE; ++II) { 1822 SUnit *PredSuccSU = II->getSUnit(); 1823 if (PredSuccSU == SU) continue; 1824 // If PredSU has another successor with no data successors, for 1825 // now don't attempt to choose either over the other. 1826 if (PredSuccSU->NumSuccs == 0) 1827 goto outer_loop_continue; 1828 // Don't break physical register dependencies. 1829 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1830 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1831 goto outer_loop_continue; 1832 // Don't introduce graph cycles. 1833 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1834 goto outer_loop_continue; 1835 } 1836 1837 // Ok, the transformation is safe and the heuristics suggest it is 1838 // profitable. Update the graph. 1839 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1840 << " next to PredSU #" << PredSU->NodeNum 1841 << " to guide scheduling in the presence of multiple uses\n"); 1842 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1843 SDep Edge = PredSU->Succs[i]; 1844 assert(!Edge.isAssignedRegDep()); 1845 SUnit *SuccSU = Edge.getSUnit(); 1846 if (SuccSU != SU) { 1847 Edge.setSUnit(PredSU); 1848 scheduleDAG->RemovePred(SuccSU, Edge); 1849 scheduleDAG->AddPred(SU, Edge); 1850 Edge.setSUnit(SU); 1851 scheduleDAG->AddPred(SuccSU, Edge); 1852 --i; 1853 } 1854 } 1855 outer_loop_continue:; 1856 } 1857} 1858 1859/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1860/// it as a def&use operand. Add a pseudo control edge from it to the other 1861/// node (if it won't create a cycle) so the two-address one will be scheduled 1862/// first (lower in the schedule). If both nodes are two-address, favor the 1863/// one that has a CopyToReg use (more likely to be a loop induction update). 1864/// If both are two-address, but one is commutable while the other is not 1865/// commutable, favor the one that's not commutable. 1866template<class SF> 1867void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1868 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1869 SUnit *SU = &(*SUnits)[i]; 1870 if (!SU->isTwoAddress) 1871 continue; 1872 1873 SDNode *Node = SU->getNode(); 1874 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1875 continue; 1876 1877 bool isLiveOut = hasOnlyLiveOutUses(SU); 1878 unsigned Opc = Node->getMachineOpcode(); 1879 const TargetInstrDesc &TID = TII->get(Opc); 1880 unsigned NumRes = TID.getNumDefs(); 1881 unsigned NumOps = TID.getNumOperands() - NumRes; 1882 for (unsigned j = 0; j != NumOps; ++j) { 1883 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1884 continue; 1885 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1886 if (DU->getNodeId() == -1) 1887 continue; 1888 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1889 if (!DUSU) continue; 1890 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1891 E = DUSU->Succs.end(); I != E; ++I) { 1892 if (I->isCtrl()) continue; 1893 SUnit *SuccSU = I->getSUnit(); 1894 if (SuccSU == SU) 1895 continue; 1896 // Be conservative. Ignore if nodes aren't at roughly the same 1897 // depth and height. 1898 if (SuccSU->getHeight() < SU->getHeight() && 1899 (SU->getHeight() - SuccSU->getHeight()) > 1) 1900 continue; 1901 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1902 // constrains whatever is using the copy, instead of the copy 1903 // itself. In the case that the copy is coalesced, this 1904 // preserves the intent of the pseudo two-address heurietics. 1905 while (SuccSU->Succs.size() == 1 && 1906 SuccSU->getNode()->isMachineOpcode() && 1907 SuccSU->getNode()->getMachineOpcode() == 1908 TargetOpcode::COPY_TO_REGCLASS) 1909 SuccSU = SuccSU->Succs.front().getSUnit(); 1910 // Don't constrain non-instruction nodes. 1911 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1912 continue; 1913 // Don't constrain nodes with physical register defs if the 1914 // predecessor can clobber them. 1915 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1916 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1917 continue; 1918 } 1919 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1920 // these may be coalesced away. We want them close to their uses. 1921 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1922 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1923 SuccOpc == TargetOpcode::INSERT_SUBREG || 1924 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1925 continue; 1926 if ((!canClobber(SuccSU, DUSU) || 1927 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 1928 (!SU->isCommutable && SuccSU->isCommutable)) && 1929 !scheduleDAG->IsReachable(SuccSU, SU)) { 1930 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1931 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1932 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1933 /*Reg=*/0, /*isNormalMemory=*/false, 1934 /*isMustAlias=*/false, 1935 /*isArtificial=*/true)); 1936 } 1937 } 1938 } 1939 } 1940} 1941 1942/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1943/// scheduling units. 1944template<class SF> 1945void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1946 SethiUllmanNumbers.assign(SUnits->size(), 0); 1947 1948 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1949 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1950} 1951 1952/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1953/// predecessors of the successors of the SUnit SU. Stop when the provided 1954/// limit is exceeded. 1955static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1956 unsigned Limit) { 1957 unsigned Sum = 0; 1958 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1959 I != E; ++I) { 1960 const SUnit *SuccSU = I->getSUnit(); 1961 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1962 EE = SuccSU->Preds.end(); II != EE; ++II) { 1963 SUnit *PredSU = II->getSUnit(); 1964 if (!PredSU->isScheduled) 1965 if (++Sum > Limit) 1966 return Sum; 1967 } 1968 } 1969 return Sum; 1970} 1971 1972 1973// Top down 1974bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1975 unsigned LPriority = SPQ->getNodePriority(left); 1976 unsigned RPriority = SPQ->getNodePriority(right); 1977 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1978 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1979 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1980 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1981 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1982 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1983 1984 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1985 return false; 1986 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1987 return true; 1988 1989 if (LIsFloater) 1990 LBonus -= 2; 1991 if (RIsFloater) 1992 RBonus -= 2; 1993 if (left->NumSuccs == 1) 1994 LBonus += 2; 1995 if (right->NumSuccs == 1) 1996 RBonus += 2; 1997 1998 if (LPriority+LBonus != RPriority+RBonus) 1999 return LPriority+LBonus < RPriority+RBonus; 2000 2001 if (left->getDepth() != right->getDepth()) 2002 return left->getDepth() < right->getDepth(); 2003 2004 if (left->NumSuccsLeft != right->NumSuccsLeft) 2005 return left->NumSuccsLeft > right->NumSuccsLeft; 2006 2007 assert(left->NodeQueueId && right->NodeQueueId && 2008 "NodeQueueId cannot be zero"); 2009 return (left->NodeQueueId > right->NodeQueueId); 2010} 2011 2012//===----------------------------------------------------------------------===// 2013// Public Constructor Functions 2014//===----------------------------------------------------------------------===// 2015 2016llvm::ScheduleDAGSDNodes * 2017llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2018 const TargetMachine &TM = IS->TM; 2019 const TargetInstrInfo *TII = TM.getInstrInfo(); 2020 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2021 2022 BURegReductionPriorityQueue *PQ = 2023 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2024 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 2025 PQ->setScheduleDAG(SD); 2026 return SD; 2027} 2028 2029llvm::ScheduleDAGSDNodes * 2030llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2031 const TargetMachine &TM = IS->TM; 2032 const TargetInstrInfo *TII = TM.getInstrInfo(); 2033 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2034 2035 TDRegReductionPriorityQueue *PQ = 2036 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2037 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 2038 PQ->setScheduleDAG(SD); 2039 return SD; 2040} 2041 2042llvm::ScheduleDAGSDNodes * 2043llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2044 const TargetMachine &TM = IS->TM; 2045 const TargetInstrInfo *TII = TM.getInstrInfo(); 2046 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2047 2048 SrcRegReductionPriorityQueue *PQ = 2049 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2050 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 2051 PQ->setScheduleDAG(SD); 2052 return SD; 2053} 2054 2055llvm::ScheduleDAGSDNodes * 2056llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2057 const TargetMachine &TM = IS->TM; 2058 const TargetInstrInfo *TII = TM.getInstrInfo(); 2059 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2060 const TargetLowering *TLI = &IS->getTargetLowering(); 2061 2062 HybridBURRPriorityQueue *PQ = 2063 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2064 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2065 PQ->setScheduleDAG(SD); 2066 return SD; 2067} 2068 2069llvm::ScheduleDAGSDNodes * 2070llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2071 const TargetMachine &TM = IS->TM; 2072 const TargetInstrInfo *TII = TM.getInstrInfo(); 2073 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2074 const TargetLowering *TLI = &IS->getTargetLowering(); 2075 2076 ILPBURRPriorityQueue *PQ = 2077 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2078 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2079 PQ->setScheduleDAG(SD); 2080 return SD; 2081} 2082