ScheduleDAGRRList.cpp revision 0b33cd55d1cfaa98fe4571d48caae4946a5e3a54
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SelectionDAGISel.h" 22#include "llvm/Target/TargetRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/STLExtras.h" 32#include <climits> 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 36STATISTIC(NumUnfolds, "Number of nodes unfolded"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumPRCopies, "Number of physical register copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 "Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 "Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 SchedulingPriorityQueue *AvailableQueue; 62 63 /// LiveRegDefs - A set of physical registers and their definition 64 /// that are "live". These nodes must be scheduled before any other nodes that 65 /// modifies the registers can be scheduled. 66 unsigned NumLiveRegs; 67 std::vector<SUnit*> LiveRegDefs; 68 std::vector<unsigned> LiveRegCycles; 69 70 /// Topo - A topological ordering for SUnits which permits fast IsReachable 71 /// and similar queries. 72 ScheduleDAGTopologicalSort Topo; 73 74public: 75 ScheduleDAGRRList(MachineFunction &mf, 76 bool isbottomup, 77 SchedulingPriorityQueue *availqueue) 78 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), 79 AvailableQueue(availqueue), Topo(SUnits) { 80 } 81 82 ~ScheduleDAGRRList() { 83 delete AvailableQueue; 84 } 85 86 void Schedule(); 87 88 /// IsReachable - Checks if SU is reachable from TargetSU. 89 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 90 return Topo.IsReachable(SU, TargetSU); 91 } 92 93 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 94 /// create a cycle. 95 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 96 return Topo.WillCreateCycle(SU, TargetSU); 97 } 98 99 /// AddPred - adds a predecessor edge to SUnit SU. 100 /// This returns true if this is a new predecessor. 101 /// Updates the topological ordering if required. 102 void AddPred(SUnit *SU, const SDep &D) { 103 Topo.AddPred(SU, D.getSUnit()); 104 SU->addPred(D); 105 } 106 107 /// RemovePred - removes a predecessor edge from SUnit SU. 108 /// This returns true if an edge was removed. 109 /// Updates the topological ordering if required. 110 void RemovePred(SUnit *SU, const SDep &D) { 111 Topo.RemovePred(SU, D.getSUnit()); 112 SU->removePred(D); 113 } 114 115private: 116 void ReleasePred(SUnit *SU, const SDep *PredEdge); 117 void ReleasePredecessors(SUnit *SU, unsigned CurCycle); 118 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 119 void ReleaseSuccessors(SUnit *SU); 120 void CapturePred(SDep *PredEdge); 121 void ScheduleNodeBottomUp(SUnit*, unsigned); 122 void ScheduleNodeTopDown(SUnit*, unsigned); 123 void UnscheduleNodeBottomUp(SUnit*); 124 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 125 SUnit *CopyAndMoveSuccessors(SUnit*); 126 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 127 const TargetRegisterClass*, 128 const TargetRegisterClass*, 129 SmallVector<SUnit*, 2>&); 130 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 131 void ListScheduleTopDown(); 132 void ListScheduleBottomUp(); 133 134 135 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 136 /// Updates the topological ordering if required. 137 SUnit *CreateNewSUnit(SDNode *N) { 138 unsigned NumSUnits = SUnits.size(); 139 SUnit *NewNode = NewSUnit(N); 140 // Update the topological ordering. 141 if (NewNode->NodeNum >= NumSUnits) 142 Topo.InitDAGTopologicalSorting(); 143 return NewNode; 144 } 145 146 /// CreateClone - Creates a new SUnit from an existing one. 147 /// Updates the topological ordering if required. 148 SUnit *CreateClone(SUnit *N) { 149 unsigned NumSUnits = SUnits.size(); 150 SUnit *NewNode = Clone(N); 151 // Update the topological ordering. 152 if (NewNode->NodeNum >= NumSUnits) 153 Topo.InitDAGTopologicalSorting(); 154 return NewNode; 155 } 156 157 /// ForceUnitLatencies - Return true, since register-pressure-reducing 158 /// scheduling doesn't need actual latency information. 159 bool ForceUnitLatencies() const { return true; } 160}; 161} // end anonymous namespace 162 163 164/// Schedule - Schedule the DAG using list scheduling. 165void ScheduleDAGRRList::Schedule() { 166 DOUT << "********** List Scheduling **********\n"; 167 168 NumLiveRegs = 0; 169 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 170 LiveRegCycles.resize(TRI->getNumRegs(), 0); 171 172 // Build the scheduling graph. 173 BuildSchedGraph(); 174 175 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 176 SUnits[su].dumpAll(this)); 177 Topo.InitDAGTopologicalSorting(); 178 179 AvailableQueue->initNodes(SUnits); 180 181 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 182 if (isBottomUp) 183 ListScheduleBottomUp(); 184 else 185 ListScheduleTopDown(); 186 187 AvailableQueue->releaseState(); 188} 189 190//===----------------------------------------------------------------------===// 191// Bottom-Up Scheduling 192//===----------------------------------------------------------------------===// 193 194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 195/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 196void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 197 SUnit *PredSU = PredEdge->getSUnit(); 198 --PredSU->NumSuccsLeft; 199 200#ifndef NDEBUG 201 if (PredSU->NumSuccsLeft < 0) { 202 cerr << "*** Scheduling failed! ***\n"; 203 PredSU->dump(this); 204 cerr << " has been released too many times!\n"; 205 assert(0); 206 } 207#endif 208 209 // If all the node's successors are scheduled, this node is ready 210 // to be scheduled. Ignore the special EntrySU node. 211 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 212 PredSU->isAvailable = true; 213 AvailableQueue->push(PredSU); 214 } 215} 216 217void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { 218 // Bottom up: release predecessors 219 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 220 I != E; ++I) { 221 ReleasePred(SU, &*I); 222 if (I->isAssignedRegDep()) { 223 // This is a physical register dependency and it's impossible or 224 // expensive to copy the register. Make sure nothing that can 225 // clobber the register is scheduled between the predecessor and 226 // this node. 227 if (!LiveRegDefs[I->getReg()]) { 228 ++NumLiveRegs; 229 LiveRegDefs[I->getReg()] = I->getSUnit(); 230 LiveRegCycles[I->getReg()] = CurCycle; 231 } 232 } 233 } 234} 235 236/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 237/// count of its predecessors. If a predecessor pending count is zero, add it to 238/// the Available queue. 239void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 240 DOUT << "*** Scheduling [" << CurCycle << "]: "; 241 DEBUG(SU->dump(this)); 242 243 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 244 SU->setHeightToAtLeast(CurCycle); 245 Sequence.push_back(SU); 246 247 ReleasePredecessors(SU, CurCycle); 248 249 // Release all the implicit physical register defs that are live. 250 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 251 I != E; ++I) { 252 if (I->isAssignedRegDep()) { 253 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 254 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 255 assert(LiveRegDefs[I->getReg()] == SU && 256 "Physical register dependency violated?"); 257 --NumLiveRegs; 258 LiveRegDefs[I->getReg()] = NULL; 259 LiveRegCycles[I->getReg()] = 0; 260 } 261 } 262 } 263 264 SU->isScheduled = true; 265 AvailableQueue->ScheduledNode(SU); 266} 267 268/// CapturePred - This does the opposite of ReleasePred. Since SU is being 269/// unscheduled, incrcease the succ left count of its predecessors. Remove 270/// them from AvailableQueue if necessary. 271void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 272 SUnit *PredSU = PredEdge->getSUnit(); 273 if (PredSU->isAvailable) { 274 PredSU->isAvailable = false; 275 if (!PredSU->isPending) 276 AvailableQueue->remove(PredSU); 277 } 278 279 ++PredSU->NumSuccsLeft; 280} 281 282/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 283/// its predecessor states to reflect the change. 284void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 285 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; 286 DEBUG(SU->dump(this)); 287 288 AvailableQueue->UnscheduledNode(SU); 289 290 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 291 I != E; ++I) { 292 CapturePred(&*I); 293 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 294 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 295 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 296 "Physical register dependency violated?"); 297 --NumLiveRegs; 298 LiveRegDefs[I->getReg()] = NULL; 299 LiveRegCycles[I->getReg()] = 0; 300 } 301 } 302 303 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 304 I != E; ++I) { 305 if (I->isAssignedRegDep()) { 306 if (!LiveRegDefs[I->getReg()]) { 307 LiveRegDefs[I->getReg()] = SU; 308 ++NumLiveRegs; 309 } 310 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 311 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 312 } 313 } 314 315 SU->setHeightDirty(); 316 SU->isScheduled = false; 317 SU->isAvailable = true; 318 AvailableQueue->push(SU); 319} 320 321/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 322/// BTCycle in order to schedule a specific node. 323void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 324 unsigned &CurCycle) { 325 SUnit *OldSU = NULL; 326 while (CurCycle > BtCycle) { 327 OldSU = Sequence.back(); 328 Sequence.pop_back(); 329 if (SU->isSucc(OldSU)) 330 // Don't try to remove SU from AvailableQueue. 331 SU->isAvailable = false; 332 UnscheduleNodeBottomUp(OldSU); 333 --CurCycle; 334 } 335 336 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 337 338 ++NumBacktracks; 339} 340 341/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 342/// successors to the newly created node. 343SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 344 if (SU->getNode()->getFlaggedNode()) 345 return NULL; 346 347 SDNode *N = SU->getNode(); 348 if (!N) 349 return NULL; 350 351 SUnit *NewSU; 352 bool TryUnfold = false; 353 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 354 MVT VT = N->getValueType(i); 355 if (VT == MVT::Flag) 356 return NULL; 357 else if (VT == MVT::Other) 358 TryUnfold = true; 359 } 360 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 361 const SDValue &Op = N->getOperand(i); 362 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 363 if (VT == MVT::Flag) 364 return NULL; 365 } 366 367 if (TryUnfold) { 368 SmallVector<SDNode*, 2> NewNodes; 369 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 370 return NULL; 371 372 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 373 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 374 375 N = NewNodes[1]; 376 SDNode *LoadNode = NewNodes[0]; 377 unsigned NumVals = N->getNumValues(); 378 unsigned OldNumVals = SU->getNode()->getNumValues(); 379 for (unsigned i = 0; i != NumVals; ++i) 380 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 381 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 382 SDValue(LoadNode, 1)); 383 384 // LoadNode may already exist. This can happen when there is another 385 // load from the same location and producing the same type of value 386 // but it has different alignment or volatileness. 387 bool isNewLoad = true; 388 SUnit *LoadSU; 389 if (LoadNode->getNodeId() != -1) { 390 LoadSU = &SUnits[LoadNode->getNodeId()]; 391 isNewLoad = false; 392 } else { 393 LoadSU = CreateNewSUnit(LoadNode); 394 LoadNode->setNodeId(LoadSU->NodeNum); 395 ComputeLatency(LoadSU); 396 } 397 398 SUnit *NewSU = CreateNewSUnit(N); 399 assert(N->getNodeId() == -1 && "Node already inserted!"); 400 N->setNodeId(NewSU->NodeNum); 401 402 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 403 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 404 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 405 NewSU->isTwoAddress = true; 406 break; 407 } 408 } 409 if (TID.isCommutable()) 410 NewSU->isCommutable = true; 411 ComputeLatency(NewSU); 412 413 // Record all the edges to and from the old SU, by category. 414 SmallVector<SDep, 4> ChainPreds; 415 SmallVector<SDep, 4> ChainSuccs; 416 SmallVector<SDep, 4> LoadPreds; 417 SmallVector<SDep, 4> NodePreds; 418 SmallVector<SDep, 4> NodeSuccs; 419 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 420 I != E; ++I) { 421 if (I->isCtrl()) 422 ChainPreds.push_back(*I); 423 else if (I->getSUnit()->getNode() && 424 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 425 LoadPreds.push_back(*I); 426 else 427 NodePreds.push_back(*I); 428 } 429 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 430 I != E; ++I) { 431 if (I->isCtrl()) 432 ChainSuccs.push_back(*I); 433 else 434 NodeSuccs.push_back(*I); 435 } 436 437 // Now assign edges to the newly-created nodes. 438 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 439 const SDep &Pred = ChainPreds[i]; 440 RemovePred(SU, Pred); 441 if (isNewLoad) 442 AddPred(LoadSU, Pred); 443 } 444 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 445 const SDep &Pred = LoadPreds[i]; 446 RemovePred(SU, Pred); 447 if (isNewLoad) 448 AddPred(LoadSU, Pred); 449 } 450 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 451 const SDep &Pred = NodePreds[i]; 452 RemovePred(SU, Pred); 453 AddPred(NewSU, Pred); 454 } 455 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 456 SDep D = NodeSuccs[i]; 457 SUnit *SuccDep = D.getSUnit(); 458 D.setSUnit(SU); 459 RemovePred(SuccDep, D); 460 D.setSUnit(NewSU); 461 AddPred(SuccDep, D); 462 } 463 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 464 SDep D = ChainSuccs[i]; 465 SUnit *SuccDep = D.getSUnit(); 466 D.setSUnit(SU); 467 RemovePred(SuccDep, D); 468 if (isNewLoad) { 469 D.setSUnit(LoadSU); 470 AddPred(SuccDep, D); 471 } 472 } 473 474 // Add a data dependency to reflect that NewSU reads the value defined 475 // by LoadSU. 476 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 477 478 if (isNewLoad) 479 AvailableQueue->addNode(LoadSU); 480 AvailableQueue->addNode(NewSU); 481 482 ++NumUnfolds; 483 484 if (NewSU->NumSuccsLeft == 0) { 485 NewSU->isAvailable = true; 486 return NewSU; 487 } 488 SU = NewSU; 489 } 490 491 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 492 NewSU = CreateClone(SU); 493 494 // New SUnit has the exact same predecessors. 495 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 496 I != E; ++I) 497 if (!I->isArtificial()) 498 AddPred(NewSU, *I); 499 500 // Only copy scheduled successors. Cut them from old node's successor 501 // list and move them over. 502 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 503 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 504 I != E; ++I) { 505 if (I->isArtificial()) 506 continue; 507 SUnit *SuccSU = I->getSUnit(); 508 if (SuccSU->isScheduled) { 509 SDep D = *I; 510 D.setSUnit(NewSU); 511 AddPred(SuccSU, D); 512 D.setSUnit(SU); 513 DelDeps.push_back(std::make_pair(SuccSU, D)); 514 } 515 } 516 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 517 RemovePred(DelDeps[i].first, DelDeps[i].second); 518 519 AvailableQueue->updateNode(SU); 520 AvailableQueue->addNode(NewSU); 521 522 ++NumDups; 523 return NewSU; 524} 525 526/// InsertCopiesAndMoveSuccs - Insert register copies and move all 527/// scheduled successors of the given SUnit to the last copy. 528void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 529 const TargetRegisterClass *DestRC, 530 const TargetRegisterClass *SrcRC, 531 SmallVector<SUnit*, 2> &Copies) { 532 SUnit *CopyFromSU = CreateNewSUnit(NULL); 533 CopyFromSU->CopySrcRC = SrcRC; 534 CopyFromSU->CopyDstRC = DestRC; 535 536 SUnit *CopyToSU = CreateNewSUnit(NULL); 537 CopyToSU->CopySrcRC = DestRC; 538 CopyToSU->CopyDstRC = SrcRC; 539 540 // Only copy scheduled successors. Cut them from old node's successor 541 // list and move them over. 542 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 543 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 544 I != E; ++I) { 545 if (I->isArtificial()) 546 continue; 547 SUnit *SuccSU = I->getSUnit(); 548 if (SuccSU->isScheduled) { 549 SDep D = *I; 550 D.setSUnit(CopyToSU); 551 AddPred(SuccSU, D); 552 DelDeps.push_back(std::make_pair(SuccSU, *I)); 553 } 554 } 555 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 556 RemovePred(DelDeps[i].first, DelDeps[i].second); 557 558 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 559 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 560 561 AvailableQueue->updateNode(SU); 562 AvailableQueue->addNode(CopyFromSU); 563 AvailableQueue->addNode(CopyToSU); 564 Copies.push_back(CopyFromSU); 565 Copies.push_back(CopyToSU); 566 567 ++NumPRCopies; 568} 569 570/// getPhysicalRegisterVT - Returns the ValueType of the physical register 571/// definition of the specified node. 572/// FIXME: Move to SelectionDAG? 573static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 574 const TargetInstrInfo *TII) { 575 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 576 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 577 unsigned NumRes = TID.getNumDefs(); 578 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 579 if (Reg == *ImpDef) 580 break; 581 ++NumRes; 582 } 583 return N->getValueType(NumRes); 584} 585 586/// CheckForLiveRegDef - Return true and update live register vector if the 587/// specified register def of the specified SUnit clobbers any "live" registers. 588static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, 589 std::vector<SUnit*> &LiveRegDefs, 590 SmallSet<unsigned, 4> &RegAdded, 591 SmallVector<unsigned, 4> &LRegs, 592 const TargetRegisterInfo *TRI) { 593 bool Added = false; 594 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) { 595 if (RegAdded.insert(Reg)) { 596 LRegs.push_back(Reg); 597 Added = true; 598 } 599 } 600 for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) 601 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 602 if (RegAdded.insert(*Alias)) { 603 LRegs.push_back(*Alias); 604 Added = true; 605 } 606 } 607 return Added; 608} 609 610/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 611/// scheduling of the given node to satisfy live physical register dependencies. 612/// If the specific node is the last one that's available to schedule, do 613/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 614bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 615 SmallVector<unsigned, 4> &LRegs){ 616 if (NumLiveRegs == 0) 617 return false; 618 619 SmallSet<unsigned, 4> RegAdded; 620 // If this node would clobber any "live" register, then it's not ready. 621 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 622 I != E; ++I) { 623 if (I->isAssignedRegDep()) 624 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 625 RegAdded, LRegs, TRI); 626 } 627 628 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 629 if (Node->getOpcode() == ISD::INLINEASM) { 630 // Inline asm can clobber physical defs. 631 unsigned NumOps = Node->getNumOperands(); 632 if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) 633 --NumOps; // Ignore the flag operand. 634 635 for (unsigned i = 2; i != NumOps;) { 636 unsigned Flags = 637 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 638 unsigned NumVals = (Flags & 0xffff) >> 3; 639 640 ++i; // Skip the ID value. 641 if ((Flags & 7) == 2 || (Flags & 7) == 6) { 642 // Check for def of register or earlyclobber register. 643 for (; NumVals; --NumVals, ++i) { 644 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 645 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 646 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 647 } 648 } else 649 i += NumVals; 650 } 651 continue; 652 } 653 654 if (!Node->isMachineOpcode()) 655 continue; 656 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 657 if (!TID.ImplicitDefs) 658 continue; 659 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 660 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 661 } 662 return !LRegs.empty(); 663} 664 665 666/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 667/// schedulers. 668void ScheduleDAGRRList::ListScheduleBottomUp() { 669 unsigned CurCycle = 0; 670 671 // Release any predecessors of the special Exit node. 672 ReleasePredecessors(&ExitSU, CurCycle); 673 674 // Add root to Available queue. 675 if (!SUnits.empty()) { 676 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 677 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 678 RootSU->isAvailable = true; 679 AvailableQueue->push(RootSU); 680 } 681 682 // While Available queue is not empty, grab the node with the highest 683 // priority. If it is not ready put it back. Schedule the node. 684 SmallVector<SUnit*, 4> NotReady; 685 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 686 Sequence.reserve(SUnits.size()); 687 while (!AvailableQueue->empty()) { 688 bool Delayed = false; 689 LRegsMap.clear(); 690 SUnit *CurSU = AvailableQueue->pop(); 691 while (CurSU) { 692 SmallVector<unsigned, 4> LRegs; 693 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 694 break; 695 Delayed = true; 696 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 697 698 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 699 NotReady.push_back(CurSU); 700 CurSU = AvailableQueue->pop(); 701 } 702 703 // All candidates are delayed due to live physical reg dependencies. 704 // Try backtracking, code duplication, or inserting cross class copies 705 // to resolve it. 706 if (Delayed && !CurSU) { 707 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 708 SUnit *TrySU = NotReady[i]; 709 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 710 711 // Try unscheduling up to the point where it's safe to schedule 712 // this node. 713 unsigned LiveCycle = CurCycle; 714 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 715 unsigned Reg = LRegs[j]; 716 unsigned LCycle = LiveRegCycles[Reg]; 717 LiveCycle = std::min(LiveCycle, LCycle); 718 } 719 SUnit *OldSU = Sequence[LiveCycle]; 720 if (!WillCreateCycle(TrySU, OldSU)) { 721 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 722 // Force the current node to be scheduled before the node that 723 // requires the physical reg dep. 724 if (OldSU->isAvailable) { 725 OldSU->isAvailable = false; 726 AvailableQueue->remove(OldSU); 727 } 728 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 729 /*Reg=*/0, /*isNormalMemory=*/false, 730 /*isMustAlias=*/false, /*isArtificial=*/true)); 731 // If one or more successors has been unscheduled, then the current 732 // node is no longer avaialable. Schedule a successor that's now 733 // available instead. 734 if (!TrySU->isAvailable) 735 CurSU = AvailableQueue->pop(); 736 else { 737 CurSU = TrySU; 738 TrySU->isPending = false; 739 NotReady.erase(NotReady.begin()+i); 740 } 741 break; 742 } 743 } 744 745 if (!CurSU) { 746 // Can't backtrack. If it's too expensive to copy the value, then try 747 // duplicate the nodes that produces these "too expensive to copy" 748 // values to break the dependency. In case even that doesn't work, 749 // insert cross class copies. 750 // If it's not too expensive, i.e. cost != -1, issue copies. 751 SUnit *TrySU = NotReady[0]; 752 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 753 assert(LRegs.size() == 1 && "Can't handle this yet!"); 754 unsigned Reg = LRegs[0]; 755 SUnit *LRDef = LiveRegDefs[Reg]; 756 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 757 const TargetRegisterClass *RC = 758 TRI->getPhysicalRegisterRegClass(Reg, VT); 759 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 760 761 // If cross copy register class is null, then it must be possible copy 762 // the value directly. Do not try duplicate the def. 763 SUnit *NewDef = 0; 764 if (DestRC) 765 NewDef = CopyAndMoveSuccessors(LRDef); 766 else 767 DestRC = RC; 768 if (!NewDef) { 769 // Issue copies, these can be expensive cross register class copies. 770 SmallVector<SUnit*, 2> Copies; 771 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 772 DOUT << "Adding an edge from SU #" << TrySU->NodeNum 773 << " to SU #" << Copies.front()->NodeNum << "\n"; 774 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 775 /*Reg=*/0, /*isNormalMemory=*/false, 776 /*isMustAlias=*/false, 777 /*isArtificial=*/true)); 778 NewDef = Copies.back(); 779 } 780 781 DOUT << "Adding an edge from SU #" << NewDef->NodeNum 782 << " to SU #" << TrySU->NodeNum << "\n"; 783 LiveRegDefs[Reg] = NewDef; 784 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 785 /*Reg=*/0, /*isNormalMemory=*/false, 786 /*isMustAlias=*/false, 787 /*isArtificial=*/true)); 788 TrySU->isAvailable = false; 789 CurSU = NewDef; 790 } 791 792 assert(CurSU && "Unable to resolve live physical register dependencies!"); 793 } 794 795 // Add the nodes that aren't ready back onto the available list. 796 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 797 NotReady[i]->isPending = false; 798 // May no longer be available due to backtracking. 799 if (NotReady[i]->isAvailable) 800 AvailableQueue->push(NotReady[i]); 801 } 802 NotReady.clear(); 803 804 if (CurSU) 805 ScheduleNodeBottomUp(CurSU, CurCycle); 806 ++CurCycle; 807 } 808 809 // Reverse the order if it is bottom up. 810 std::reverse(Sequence.begin(), Sequence.end()); 811 812#ifndef NDEBUG 813 VerifySchedule(isBottomUp); 814#endif 815} 816 817//===----------------------------------------------------------------------===// 818// Top-Down Scheduling 819//===----------------------------------------------------------------------===// 820 821/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 822/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 823void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 824 SUnit *SuccSU = SuccEdge->getSUnit(); 825 --SuccSU->NumPredsLeft; 826 827#ifndef NDEBUG 828 if (SuccSU->NumPredsLeft < 0) { 829 cerr << "*** Scheduling failed! ***\n"; 830 SuccSU->dump(this); 831 cerr << " has been released too many times!\n"; 832 assert(0); 833 } 834#endif 835 836 // If all the node's predecessors are scheduled, this node is ready 837 // to be scheduled. Ignore the special ExitSU node. 838 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 839 SuccSU->isAvailable = true; 840 AvailableQueue->push(SuccSU); 841 } 842} 843 844void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 845 // Top down: release successors 846 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 847 I != E; ++I) { 848 assert(!I->isAssignedRegDep() && 849 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 850 851 ReleaseSucc(SU, &*I); 852 } 853} 854 855/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 856/// count of its successors. If a successor pending count is zero, add it to 857/// the Available queue. 858void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 859 DOUT << "*** Scheduling [" << CurCycle << "]: "; 860 DEBUG(SU->dump(this)); 861 862 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 863 SU->setDepthToAtLeast(CurCycle); 864 Sequence.push_back(SU); 865 866 ReleaseSuccessors(SU); 867 SU->isScheduled = true; 868 AvailableQueue->ScheduledNode(SU); 869} 870 871/// ListScheduleTopDown - The main loop of list scheduling for top-down 872/// schedulers. 873void ScheduleDAGRRList::ListScheduleTopDown() { 874 unsigned CurCycle = 0; 875 876 // Release any successors of the special Entry node. 877 ReleaseSuccessors(&EntrySU); 878 879 // All leaves to Available queue. 880 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 881 // It is available if it has no predecessors. 882 if (SUnits[i].Preds.empty()) { 883 AvailableQueue->push(&SUnits[i]); 884 SUnits[i].isAvailable = true; 885 } 886 } 887 888 // While Available queue is not empty, grab the node with the highest 889 // priority. If it is not ready put it back. Schedule the node. 890 Sequence.reserve(SUnits.size()); 891 while (!AvailableQueue->empty()) { 892 SUnit *CurSU = AvailableQueue->pop(); 893 894 if (CurSU) 895 ScheduleNodeTopDown(CurSU, CurCycle); 896 ++CurCycle; 897 } 898 899#ifndef NDEBUG 900 VerifySchedule(isBottomUp); 901#endif 902} 903 904 905//===----------------------------------------------------------------------===// 906// RegReductionPriorityQueue Implementation 907//===----------------------------------------------------------------------===// 908// 909// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 910// to reduce register pressure. 911// 912namespace { 913 template<class SF> 914 class RegReductionPriorityQueue; 915 916 /// Sorting functions for the Available queue. 917 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 918 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 919 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 920 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 921 922 bool operator()(const SUnit* left, const SUnit* right) const; 923 }; 924 925 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 926 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 927 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 928 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 929 930 bool operator()(const SUnit* left, const SUnit* right) const; 931 }; 932} // end anonymous namespace 933 934/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 935/// Smaller number is the higher priority. 936static unsigned 937CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 938 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 939 if (SethiUllmanNumber != 0) 940 return SethiUllmanNumber; 941 942 unsigned Extra = 0; 943 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 944 I != E; ++I) { 945 if (I->isCtrl()) continue; // ignore chain preds 946 SUnit *PredSU = I->getSUnit(); 947 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 948 if (PredSethiUllman > SethiUllmanNumber) { 949 SethiUllmanNumber = PredSethiUllman; 950 Extra = 0; 951 } else if (PredSethiUllman == SethiUllmanNumber) 952 ++Extra; 953 } 954 955 SethiUllmanNumber += Extra; 956 957 if (SethiUllmanNumber == 0) 958 SethiUllmanNumber = 1; 959 960 return SethiUllmanNumber; 961} 962 963namespace { 964 template<class SF> 965 class VISIBILITY_HIDDEN RegReductionPriorityQueue 966 : public SchedulingPriorityQueue { 967 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 968 unsigned currentQueueId; 969 970 protected: 971 // SUnits - The SUnits for the current graph. 972 std::vector<SUnit> *SUnits; 973 974 const TargetInstrInfo *TII; 975 const TargetRegisterInfo *TRI; 976 ScheduleDAGRRList *scheduleDAG; 977 978 // SethiUllmanNumbers - The SethiUllman number for each node. 979 std::vector<unsigned> SethiUllmanNumbers; 980 981 public: 982 RegReductionPriorityQueue(const TargetInstrInfo *tii, 983 const TargetRegisterInfo *tri) : 984 Queue(SF(this)), currentQueueId(0), 985 TII(tii), TRI(tri), scheduleDAG(NULL) {} 986 987 void initNodes(std::vector<SUnit> &sunits) { 988 SUnits = &sunits; 989 // Add pseudo dependency edges for two-address nodes. 990 AddPseudoTwoAddrDeps(); 991 // Reroute edges to nodes with multiple uses. 992 PrescheduleNodesWithMultipleUses(); 993 // Calculate node priorities. 994 CalculateSethiUllmanNumbers(); 995 } 996 997 void addNode(const SUnit *SU) { 998 unsigned SUSize = SethiUllmanNumbers.size(); 999 if (SUnits->size() > SUSize) 1000 SethiUllmanNumbers.resize(SUSize*2, 0); 1001 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1002 } 1003 1004 void updateNode(const SUnit *SU) { 1005 SethiUllmanNumbers[SU->NodeNum] = 0; 1006 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1007 } 1008 1009 void releaseState() { 1010 SUnits = 0; 1011 SethiUllmanNumbers.clear(); 1012 } 1013 1014 unsigned getNodePriority(const SUnit *SU) const { 1015 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1016 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1017 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1018 // CopyToReg should be close to its uses to facilitate coalescing and 1019 // avoid spilling. 1020 return 0; 1021 if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1022 Opc == TargetInstrInfo::SUBREG_TO_REG || 1023 Opc == TargetInstrInfo::INSERT_SUBREG) 1024 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1025 // close to their uses to facilitate coalescing. 1026 return 0; 1027 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1028 // If SU does not have a register use, i.e. it doesn't produce a value 1029 // that would be consumed (e.g. store), then it terminates a chain of 1030 // computation. Give it a large SethiUllman number so it will be 1031 // scheduled right before its predecessors that it doesn't lengthen 1032 // their live ranges. 1033 return 0xffff; 1034 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1035 // If SU does not have a register def, schedule it close to its uses 1036 // because it does not lengthen any live ranges. 1037 return 0; 1038 return SethiUllmanNumbers[SU->NodeNum]; 1039 } 1040 1041 unsigned size() const { return Queue.size(); } 1042 1043 bool empty() const { return Queue.empty(); } 1044 1045 void push(SUnit *U) { 1046 assert(!U->NodeQueueId && "Node in the queue already"); 1047 U->NodeQueueId = ++currentQueueId; 1048 Queue.push(U); 1049 } 1050 1051 void push_all(const std::vector<SUnit *> &Nodes) { 1052 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1053 push(Nodes[i]); 1054 } 1055 1056 SUnit *pop() { 1057 if (empty()) return NULL; 1058 SUnit *V = Queue.top(); 1059 Queue.pop(); 1060 V->NodeQueueId = 0; 1061 return V; 1062 } 1063 1064 void remove(SUnit *SU) { 1065 assert(!Queue.empty() && "Queue is empty!"); 1066 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1067 Queue.erase_one(SU); 1068 SU->NodeQueueId = 0; 1069 } 1070 1071 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1072 scheduleDAG = scheduleDag; 1073 } 1074 1075 protected: 1076 bool canClobber(const SUnit *SU, const SUnit *Op); 1077 void AddPseudoTwoAddrDeps(); 1078 void PrescheduleNodesWithMultipleUses(); 1079 void CalculateSethiUllmanNumbers(); 1080 }; 1081 1082 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1083 BURegReductionPriorityQueue; 1084 1085 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1086 TDRegReductionPriorityQueue; 1087} 1088 1089/// closestSucc - Returns the scheduled cycle of the successor which is 1090/// closest to the current cycle. 1091static unsigned closestSucc(const SUnit *SU) { 1092 unsigned MaxHeight = 0; 1093 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1094 I != E; ++I) { 1095 if (I->isCtrl()) continue; // ignore chain succs 1096 unsigned Height = I->getSUnit()->getHeight(); 1097 // If there are bunch of CopyToRegs stacked up, they should be considered 1098 // to be at the same position. 1099 if (I->getSUnit()->getNode() && 1100 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1101 Height = closestSucc(I->getSUnit())+1; 1102 if (Height > MaxHeight) 1103 MaxHeight = Height; 1104 } 1105 return MaxHeight; 1106} 1107 1108/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1109/// for scratch registers, i.e. number of data dependencies. 1110static unsigned calcMaxScratches(const SUnit *SU) { 1111 unsigned Scratches = 0; 1112 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1113 I != E; ++I) { 1114 if (I->isCtrl()) continue; // ignore chain preds 1115 Scratches++; 1116 } 1117 return Scratches; 1118} 1119 1120// Bottom up 1121bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1122 unsigned LPriority = SPQ->getNodePriority(left); 1123 unsigned RPriority = SPQ->getNodePriority(right); 1124 if (LPriority != RPriority) 1125 return LPriority > RPriority; 1126 1127 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1128 // e.g. 1129 // t1 = op t2, c1 1130 // t3 = op t4, c2 1131 // 1132 // and the following instructions are both ready. 1133 // t2 = op c3 1134 // t4 = op c4 1135 // 1136 // Then schedule t2 = op first. 1137 // i.e. 1138 // t4 = op c4 1139 // t2 = op c3 1140 // t1 = op t2, c1 1141 // t3 = op t4, c2 1142 // 1143 // This creates more short live intervals. 1144 unsigned LDist = closestSucc(left); 1145 unsigned RDist = closestSucc(right); 1146 if (LDist != RDist) 1147 return LDist < RDist; 1148 1149 // How many registers becomes live when the node is scheduled. 1150 unsigned LScratch = calcMaxScratches(left); 1151 unsigned RScratch = calcMaxScratches(right); 1152 if (LScratch != RScratch) 1153 return LScratch > RScratch; 1154 1155 if (left->getHeight() != right->getHeight()) 1156 return left->getHeight() > right->getHeight(); 1157 1158 if (left->getDepth() != right->getDepth()) 1159 return left->getDepth() < right->getDepth(); 1160 1161 assert(left->NodeQueueId && right->NodeQueueId && 1162 "NodeQueueId cannot be zero"); 1163 return (left->NodeQueueId > right->NodeQueueId); 1164} 1165 1166template<class SF> 1167bool 1168RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1169 if (SU->isTwoAddress) { 1170 unsigned Opc = SU->getNode()->getMachineOpcode(); 1171 const TargetInstrDesc &TID = TII->get(Opc); 1172 unsigned NumRes = TID.getNumDefs(); 1173 unsigned NumOps = TID.getNumOperands() - NumRes; 1174 for (unsigned i = 0; i != NumOps; ++i) { 1175 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1176 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1177 if (DU->getNodeId() != -1 && 1178 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1179 return true; 1180 } 1181 } 1182 } 1183 return false; 1184} 1185 1186 1187/// hasCopyToRegUse - Return true if SU has a value successor that is a 1188/// CopyToReg node. 1189static bool hasCopyToRegUse(const SUnit *SU) { 1190 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1191 I != E; ++I) { 1192 if (I->isCtrl()) continue; 1193 const SUnit *SuccSU = I->getSUnit(); 1194 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1195 return true; 1196 } 1197 return false; 1198} 1199 1200/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1201/// physical register defs. 1202static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1203 const TargetInstrInfo *TII, 1204 const TargetRegisterInfo *TRI) { 1205 SDNode *N = SuccSU->getNode(); 1206 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1207 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1208 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1209 for (const SDNode *SUNode = SU->getNode(); SUNode; 1210 SUNode = SUNode->getFlaggedNode()) { 1211 if (!SUNode->isMachineOpcode()) 1212 continue; 1213 const unsigned *SUImpDefs = 1214 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1215 if (!SUImpDefs) 1216 return false; 1217 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1218 MVT VT = N->getValueType(i); 1219 if (VT == MVT::Flag || VT == MVT::Other) 1220 continue; 1221 if (!N->hasAnyUseOfValue(i)) 1222 continue; 1223 unsigned Reg = ImpDefs[i - NumDefs]; 1224 for (;*SUImpDefs; ++SUImpDefs) { 1225 unsigned SUReg = *SUImpDefs; 1226 if (TRI->regsOverlap(Reg, SUReg)) 1227 return true; 1228 } 1229 } 1230 } 1231 return false; 1232} 1233 1234/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1235/// are not handled well by the general register pressure reduction 1236/// heuristics. When presented with code like this: 1237/// 1238/// N 1239/// / | 1240/// / | 1241/// U store 1242/// | 1243/// ... 1244/// 1245/// the heuristics tend to push the store up, but since the 1246/// operand of the store has another use (U), this would increase 1247/// the length of that other use (the U->N edge). 1248/// 1249/// This function transforms code like the above to route U's 1250/// dependence through the store when possible, like this: 1251/// 1252/// N 1253/// || 1254/// || 1255/// store 1256/// | 1257/// U 1258/// | 1259/// ... 1260/// 1261/// This results in the store being scheduled immediately 1262/// after N, which shortens the U->N live range, reducing 1263/// register pressure. 1264/// 1265template<class SF> 1266void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1267 // Visit all the nodes in topological order, working top-down. 1268 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1269 SUnit *SU = &(*SUnits)[i]; 1270 // For now, only look at nodes with no data successors, such as stores. 1271 // These are especially important, due to the heuristics in 1272 // getNodePriority for nodes with no data successors. 1273 if (SU->NumSuccs != 0) 1274 continue; 1275 // For now, only look at nodes with exactly one data predecessor. 1276 if (SU->NumPreds != 1) 1277 continue; 1278 // Avoid prescheduling copies to virtual registers, which don't behave 1279 // like other nodes from the perspective of scheduling heuristics. 1280 if (SDNode *N = SU->getNode()) 1281 if (N->getOpcode() == ISD::CopyToReg && 1282 TargetRegisterInfo::isVirtualRegister 1283 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1284 continue; 1285 1286 // Locate the single data predecessor. 1287 SUnit *PredSU = 0; 1288 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1289 EE = SU->Preds.end(); II != EE; ++II) 1290 if (!II->isCtrl()) { 1291 PredSU = II->getSUnit(); 1292 break; 1293 } 1294 assert(PredSU); 1295 1296 // Don't rewrite edges that carry physregs, because that requires additional 1297 // support infrastructure. 1298 if (PredSU->hasPhysRegDefs) 1299 continue; 1300 // Short-circuit the case where SU is PredSU's only data successor. 1301 if (PredSU->NumSuccs == 1) 1302 continue; 1303 // Avoid prescheduling to copies from virtual registers, which don't behave 1304 // like other nodes from the perspective of scheduling // heuristics. 1305 if (SDNode *N = SU->getNode()) 1306 if (N->getOpcode() == ISD::CopyFromReg && 1307 TargetRegisterInfo::isVirtualRegister 1308 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1309 continue; 1310 1311 // Perform checks on the successors of PredSU. 1312 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1313 EE = PredSU->Succs.end(); II != EE; ++II) { 1314 SUnit *PredSuccSU = II->getSUnit(); 1315 if (PredSuccSU == SU) continue; 1316 // If PredSU has another successor with no data successors, for 1317 // now don't attempt to choose either over the other. 1318 if (PredSuccSU->NumSuccs == 0) 1319 goto outer_loop_continue; 1320 // Don't break physical register dependencies. 1321 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1322 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1323 goto outer_loop_continue; 1324 // Don't introduce graph cycles. 1325 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1326 goto outer_loop_continue; 1327 } 1328 1329 // Ok, the transformation is safe and the heuristics suggest it is 1330 // profitable. Update the graph. 1331 DOUT << "Prescheduling SU # " << SU->NodeNum 1332 << " next to PredSU # " << PredSU->NodeNum 1333 << " to guide scheduling in the presence of multiple uses\n"; 1334 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1335 SDep Edge = PredSU->Succs[i]; 1336 assert(!Edge.isAssignedRegDep()); 1337 SUnit *SuccSU = Edge.getSUnit(); 1338 if (SuccSU != SU) { 1339 Edge.setSUnit(PredSU); 1340 scheduleDAG->RemovePred(SuccSU, Edge); 1341 scheduleDAG->AddPred(SU, Edge); 1342 Edge.setSUnit(SU); 1343 scheduleDAG->AddPred(SuccSU, Edge); 1344 --i; 1345 } 1346 } 1347 outer_loop_continue:; 1348 } 1349} 1350 1351/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1352/// it as a def&use operand. Add a pseudo control edge from it to the other 1353/// node (if it won't create a cycle) so the two-address one will be scheduled 1354/// first (lower in the schedule). If both nodes are two-address, favor the 1355/// one that has a CopyToReg use (more likely to be a loop induction update). 1356/// If both are two-address, but one is commutable while the other is not 1357/// commutable, favor the one that's not commutable. 1358template<class SF> 1359void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1360 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1361 SUnit *SU = &(*SUnits)[i]; 1362 if (!SU->isTwoAddress) 1363 continue; 1364 1365 SDNode *Node = SU->getNode(); 1366 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1367 continue; 1368 1369 unsigned Opc = Node->getMachineOpcode(); 1370 const TargetInstrDesc &TID = TII->get(Opc); 1371 unsigned NumRes = TID.getNumDefs(); 1372 unsigned NumOps = TID.getNumOperands() - NumRes; 1373 for (unsigned j = 0; j != NumOps; ++j) { 1374 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1375 continue; 1376 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1377 if (DU->getNodeId() == -1) 1378 continue; 1379 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1380 if (!DUSU) continue; 1381 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1382 E = DUSU->Succs.end(); I != E; ++I) { 1383 if (I->isCtrl()) continue; 1384 SUnit *SuccSU = I->getSUnit(); 1385 if (SuccSU == SU) 1386 continue; 1387 // Be conservative. Ignore if nodes aren't at roughly the same 1388 // depth and height. 1389 if (SuccSU->getHeight() < SU->getHeight() && 1390 (SU->getHeight() - SuccSU->getHeight()) > 1) 1391 continue; 1392 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1393 continue; 1394 // Don't constrain nodes with physical register defs if the 1395 // predecessor can clobber them. 1396 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1397 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1398 continue; 1399 } 1400 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1401 // these may be coalesced away. We want them close to their uses. 1402 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1403 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1404 SuccOpc == TargetInstrInfo::INSERT_SUBREG || 1405 SuccOpc == TargetInstrInfo::SUBREG_TO_REG) 1406 continue; 1407 if ((!canClobber(SuccSU, DUSU) || 1408 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1409 (!SU->isCommutable && SuccSU->isCommutable)) && 1410 !scheduleDAG->IsReachable(SuccSU, SU)) { 1411 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1412 << " to SU #" << SuccSU->NodeNum << "\n"; 1413 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1414 /*Reg=*/0, /*isNormalMemory=*/false, 1415 /*isMustAlias=*/false, 1416 /*isArtificial=*/true)); 1417 } 1418 } 1419 } 1420 } 1421} 1422 1423/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1424/// scheduling units. 1425template<class SF> 1426void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1427 SethiUllmanNumbers.assign(SUnits->size(), 0); 1428 1429 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1430 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1431} 1432 1433/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1434/// predecessors of the successors of the SUnit SU. Stop when the provided 1435/// limit is exceeded. 1436static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1437 unsigned Limit) { 1438 unsigned Sum = 0; 1439 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1440 I != E; ++I) { 1441 const SUnit *SuccSU = I->getSUnit(); 1442 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1443 EE = SuccSU->Preds.end(); II != EE; ++II) { 1444 SUnit *PredSU = II->getSUnit(); 1445 if (!PredSU->isScheduled) 1446 if (++Sum > Limit) 1447 return Sum; 1448 } 1449 } 1450 return Sum; 1451} 1452 1453 1454// Top down 1455bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1456 unsigned LPriority = SPQ->getNodePriority(left); 1457 unsigned RPriority = SPQ->getNodePriority(right); 1458 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1459 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1460 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1461 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1462 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1463 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1464 1465 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1466 return false; 1467 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1468 return true; 1469 1470 if (LIsFloater) 1471 LBonus -= 2; 1472 if (RIsFloater) 1473 RBonus -= 2; 1474 if (left->NumSuccs == 1) 1475 LBonus += 2; 1476 if (right->NumSuccs == 1) 1477 RBonus += 2; 1478 1479 if (LPriority+LBonus != RPriority+RBonus) 1480 return LPriority+LBonus < RPriority+RBonus; 1481 1482 if (left->getDepth() != right->getDepth()) 1483 return left->getDepth() < right->getDepth(); 1484 1485 if (left->NumSuccsLeft != right->NumSuccsLeft) 1486 return left->NumSuccsLeft > right->NumSuccsLeft; 1487 1488 assert(left->NodeQueueId && right->NodeQueueId && 1489 "NodeQueueId cannot be zero"); 1490 return (left->NodeQueueId > right->NodeQueueId); 1491} 1492 1493//===----------------------------------------------------------------------===// 1494// Public Constructor Functions 1495//===----------------------------------------------------------------------===// 1496 1497llvm::ScheduleDAGSDNodes * 1498llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) { 1499 const TargetMachine &TM = IS->TM; 1500 const TargetInstrInfo *TII = TM.getInstrInfo(); 1501 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1502 1503 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1504 1505 ScheduleDAGRRList *SD = 1506 new ScheduleDAGRRList(*IS->MF, true, PQ); 1507 PQ->setScheduleDAG(SD); 1508 return SD; 1509} 1510 1511llvm::ScheduleDAGSDNodes * 1512llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, bool) { 1513 const TargetMachine &TM = IS->TM; 1514 const TargetInstrInfo *TII = TM.getInstrInfo(); 1515 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 1516 1517 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1518 1519 ScheduleDAGRRList *SD = 1520 new ScheduleDAGRRList(*IS->MF, false, PQ); 1521 PQ->setScheduleDAG(SD); 1522 return SD; 1523} 1524