ScheduleDAGRRList.cpp revision fd2163bcf77df6b3e58868483c089bd3869b01d6
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/PriorityQueue.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include <climits> 32#include "llvm/Support/CommandLine.h" 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 36STATISTIC(NumUnfolds, "Number of nodes unfolded"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumCCCopies, "Number of cross class copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 "Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 "Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 SchedulingPriorityQueue *AvailableQueue; 62 63 /// LiveRegDefs - A set of physical registers and their definition 64 /// that are "live". These nodes must be scheduled before any other nodes that 65 /// modifies the registers can be scheduled. 66 unsigned NumLiveRegs; 67 std::vector<SUnit*> LiveRegDefs; 68 std::vector<unsigned> LiveRegCycles; 69 70 /// Topo - A topological ordering for SUnits which permits fast IsReachable 71 /// and similar queries. 72 ScheduleDAGTopologicalSort Topo; 73 74public: 75 ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb, 76 const TargetMachine &tm, bool isbottomup, 77 SchedulingPriorityQueue *availqueue) 78 : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), 79 AvailableQueue(availqueue), Topo(SUnits) { 80 } 81 82 ~ScheduleDAGRRList() { 83 delete AvailableQueue; 84 } 85 86 void Schedule(); 87 88 /// IsReachable - Checks if SU is reachable from TargetSU. 89 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 90 return Topo.IsReachable(SU, TargetSU); 91 } 92 93 /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 94 /// create a cycle. 95 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 96 return Topo.WillCreateCycle(SU, TargetSU); 97 } 98 99 /// AddPred - adds a predecessor edge to SUnit SU. 100 /// This returns true if this is a new predecessor. 101 /// Updates the topological ordering if required. 102 void AddPred(SUnit *SU, const SDep &D) { 103 Topo.AddPred(SU, D.getSUnit()); 104 SU->addPred(D); 105 } 106 107 /// RemovePred - removes a predecessor edge from SUnit SU. 108 /// This returns true if an edge was removed. 109 /// Updates the topological ordering if required. 110 void RemovePred(SUnit *SU, const SDep &D) { 111 Topo.RemovePred(SU, D.getSUnit()); 112 SU->removePred(D); 113 } 114 115private: 116 void ReleasePred(SUnit *SU, SDep *PredEdge); 117 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 118 void CapturePred(SDep *PredEdge); 119 void ScheduleNodeBottomUp(SUnit*, unsigned); 120 void ScheduleNodeTopDown(SUnit*, unsigned); 121 void UnscheduleNodeBottomUp(SUnit*); 122 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 123 SUnit *CopyAndMoveSuccessors(SUnit*); 124 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 125 const TargetRegisterClass*, 126 const TargetRegisterClass*, 127 SmallVector<SUnit*, 2>&); 128 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 129 void ListScheduleTopDown(); 130 void ListScheduleBottomUp(); 131 132 133 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 134 /// Updates the topological ordering if required. 135 SUnit *CreateNewSUnit(SDNode *N) { 136 unsigned NumSUnits = SUnits.size(); 137 SUnit *NewNode = NewSUnit(N); 138 // Update the topological ordering. 139 if (NewNode->NodeNum >= NumSUnits) 140 Topo.InitDAGTopologicalSorting(); 141 return NewNode; 142 } 143 144 /// CreateClone - Creates a new SUnit from an existing one. 145 /// Updates the topological ordering if required. 146 SUnit *CreateClone(SUnit *N) { 147 unsigned NumSUnits = SUnits.size(); 148 SUnit *NewNode = Clone(N); 149 // Update the topological ordering. 150 if (NewNode->NodeNum >= NumSUnits) 151 Topo.InitDAGTopologicalSorting(); 152 return NewNode; 153 } 154 155 /// ForceUnitLatencies - Return true, since register-pressure-reducing 156 /// scheduling doesn't need actual latency information. 157 bool ForceUnitLatencies() const { return true; } 158}; 159} // end anonymous namespace 160 161 162/// Schedule - Schedule the DAG using list scheduling. 163void ScheduleDAGRRList::Schedule() { 164 DOUT << "********** List Scheduling **********\n"; 165 166 NumLiveRegs = 0; 167 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 168 LiveRegCycles.resize(TRI->getNumRegs(), 0); 169 170 // Build the scheduling graph. 171 BuildSchedGraph(); 172 173 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 174 SUnits[su].dumpAll(this)); 175 Topo.InitDAGTopologicalSorting(); 176 177 AvailableQueue->initNodes(SUnits); 178 179 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 180 if (isBottomUp) 181 ListScheduleBottomUp(); 182 else 183 ListScheduleTopDown(); 184 185 AvailableQueue->releaseState(); 186} 187 188//===----------------------------------------------------------------------===// 189// Bottom-Up Scheduling 190//===----------------------------------------------------------------------===// 191 192/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 193/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 194void ScheduleDAGRRList::ReleasePred(SUnit *SU, SDep *PredEdge) { 195 SUnit *PredSU = PredEdge->getSUnit(); 196 --PredSU->NumSuccsLeft; 197 198#ifndef NDEBUG 199 if (PredSU->NumSuccsLeft < 0) { 200 cerr << "*** Scheduling failed! ***\n"; 201 PredSU->dump(this); 202 cerr << " has been released too many times!\n"; 203 assert(0); 204 } 205#endif 206 207 if (PredSU->NumSuccsLeft == 0) { 208 PredSU->isAvailable = true; 209 AvailableQueue->push(PredSU); 210 } 211} 212 213/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 214/// count of its predecessors. If a predecessor pending count is zero, add it to 215/// the Available queue. 216void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 217 DOUT << "*** Scheduling [" << CurCycle << "]: "; 218 DEBUG(SU->dump(this)); 219 220 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 221 SU->setHeightToAtLeast(CurCycle); 222 Sequence.push_back(SU); 223 224 // Bottom up: release predecessors 225 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 226 I != E; ++I) { 227 ReleasePred(SU, &*I); 228 if (I->isAssignedRegDep()) { 229 // This is a physical register dependency and it's impossible or 230 // expensive to copy the register. Make sure nothing that can 231 // clobber the register is scheduled between the predecessor and 232 // this node. 233 if (!LiveRegDefs[I->getReg()]) { 234 ++NumLiveRegs; 235 LiveRegDefs[I->getReg()] = I->getSUnit(); 236 LiveRegCycles[I->getReg()] = CurCycle; 237 } 238 } 239 } 240 241 // Release all the implicit physical register defs that are live. 242 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 243 I != E; ++I) { 244 if (I->isAssignedRegDep()) { 245 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 246 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 247 assert(LiveRegDefs[I->getReg()] == SU && 248 "Physical register dependency violated?"); 249 --NumLiveRegs; 250 LiveRegDefs[I->getReg()] = NULL; 251 LiveRegCycles[I->getReg()] = 0; 252 } 253 } 254 } 255 256 SU->isScheduled = true; 257 AvailableQueue->ScheduledNode(SU); 258} 259 260/// CapturePred - This does the opposite of ReleasePred. Since SU is being 261/// unscheduled, incrcease the succ left count of its predecessors. Remove 262/// them from AvailableQueue if necessary. 263void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 264 SUnit *PredSU = PredEdge->getSUnit(); 265 if (PredSU->isAvailable) { 266 PredSU->isAvailable = false; 267 if (!PredSU->isPending) 268 AvailableQueue->remove(PredSU); 269 } 270 271 ++PredSU->NumSuccsLeft; 272} 273 274/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 275/// its predecessor states to reflect the change. 276void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 277 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; 278 DEBUG(SU->dump(this)); 279 280 AvailableQueue->UnscheduledNode(SU); 281 282 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 283 I != E; ++I) { 284 CapturePred(&*I); 285 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 286 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 287 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 288 "Physical register dependency violated?"); 289 --NumLiveRegs; 290 LiveRegDefs[I->getReg()] = NULL; 291 LiveRegCycles[I->getReg()] = 0; 292 } 293 } 294 295 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 296 I != E; ++I) { 297 if (I->isAssignedRegDep()) { 298 if (!LiveRegDefs[I->getReg()]) { 299 LiveRegDefs[I->getReg()] = SU; 300 ++NumLiveRegs; 301 } 302 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 303 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 304 } 305 } 306 307 SU->setHeightDirty(); 308 SU->isScheduled = false; 309 SU->isAvailable = true; 310 AvailableQueue->push(SU); 311} 312 313/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 314/// BTCycle in order to schedule a specific node. Returns the last unscheduled 315/// SUnit. Also returns if a successor is unscheduled in the process. 316void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 317 unsigned &CurCycle) { 318 SUnit *OldSU = NULL; 319 while (CurCycle > BtCycle) { 320 OldSU = Sequence.back(); 321 Sequence.pop_back(); 322 if (SU->isSucc(OldSU)) 323 // Don't try to remove SU from AvailableQueue. 324 SU->isAvailable = false; 325 UnscheduleNodeBottomUp(OldSU); 326 --CurCycle; 327 } 328 329 330 if (SU->isSucc(OldSU)) { 331 assert(false && "Something is wrong!"); 332 abort(); 333 } 334 335 ++NumBacktracks; 336} 337 338/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 339/// successors to the newly created node. 340SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 341 if (SU->getNode()->getFlaggedNode()) 342 return NULL; 343 344 SDNode *N = SU->getNode(); 345 if (!N) 346 return NULL; 347 348 SUnit *NewSU; 349 bool TryUnfold = false; 350 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 351 MVT VT = N->getValueType(i); 352 if (VT == MVT::Flag) 353 return NULL; 354 else if (VT == MVT::Other) 355 TryUnfold = true; 356 } 357 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 358 const SDValue &Op = N->getOperand(i); 359 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 360 if (VT == MVT::Flag) 361 return NULL; 362 } 363 364 if (TryUnfold) { 365 SmallVector<SDNode*, 2> NewNodes; 366 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 367 return NULL; 368 369 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 370 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 371 372 N = NewNodes[1]; 373 SDNode *LoadNode = NewNodes[0]; 374 unsigned NumVals = N->getNumValues(); 375 unsigned OldNumVals = SU->getNode()->getNumValues(); 376 for (unsigned i = 0; i != NumVals; ++i) 377 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 378 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 379 SDValue(LoadNode, 1)); 380 381 // LoadNode may already exist. This can happen when there is another 382 // load from the same location and producing the same type of value 383 // but it has different alignment or volatileness. 384 bool isNewLoad = true; 385 SUnit *LoadSU; 386 if (LoadNode->getNodeId() != -1) { 387 LoadSU = &SUnits[LoadNode->getNodeId()]; 388 isNewLoad = false; 389 } else { 390 LoadSU = CreateNewSUnit(LoadNode); 391 LoadNode->setNodeId(LoadSU->NodeNum); 392 ComputeLatency(LoadSU); 393 } 394 395 SUnit *NewSU = CreateNewSUnit(N); 396 assert(N->getNodeId() == -1 && "Node already inserted!"); 397 N->setNodeId(NewSU->NodeNum); 398 399 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 400 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 401 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 402 NewSU->isTwoAddress = true; 403 break; 404 } 405 } 406 if (TID.isCommutable()) 407 NewSU->isCommutable = true; 408 ComputeLatency(NewSU); 409 410 SDep ChainPred; 411 SmallVector<SDep, 4> ChainSuccs; 412 SmallVector<SDep, 4> LoadPreds; 413 SmallVector<SDep, 4> NodePreds; 414 SmallVector<SDep, 4> NodeSuccs; 415 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 416 I != E; ++I) { 417 if (I->isCtrl()) 418 ChainPred = *I; 419 else if (I->getSUnit()->getNode() && 420 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 421 LoadPreds.push_back(*I); 422 else 423 NodePreds.push_back(*I); 424 } 425 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 426 I != E; ++I) { 427 if (I->isCtrl()) 428 ChainSuccs.push_back(*I); 429 else 430 NodeSuccs.push_back(*I); 431 } 432 433 if (ChainPred.getSUnit()) { 434 RemovePred(SU, ChainPred); 435 if (isNewLoad) 436 AddPred(LoadSU, ChainPred); 437 } 438 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 439 const SDep &Pred = LoadPreds[i]; 440 RemovePred(SU, Pred); 441 if (isNewLoad) { 442 AddPred(LoadSU, Pred); 443 } 444 } 445 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 446 const SDep &Pred = NodePreds[i]; 447 RemovePred(SU, Pred); 448 AddPred(NewSU, Pred); 449 } 450 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 451 SDep D = NodeSuccs[i]; 452 SUnit *SuccDep = D.getSUnit(); 453 D.setSUnit(SU); 454 RemovePred(SuccDep, D); 455 D.setSUnit(NewSU); 456 AddPred(SuccDep, D); 457 } 458 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 459 SDep D = ChainSuccs[i]; 460 SUnit *SuccDep = D.getSUnit(); 461 D.setSUnit(SU); 462 RemovePred(SuccDep, D); 463 if (isNewLoad) { 464 D.setSUnit(LoadSU); 465 AddPred(SuccDep, D); 466 } 467 } 468 if (isNewLoad) { 469 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); 470 } 471 472 if (isNewLoad) 473 AvailableQueue->addNode(LoadSU); 474 AvailableQueue->addNode(NewSU); 475 476 ++NumUnfolds; 477 478 if (NewSU->NumSuccsLeft == 0) { 479 NewSU->isAvailable = true; 480 return NewSU; 481 } 482 SU = NewSU; 483 } 484 485 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 486 NewSU = CreateClone(SU); 487 488 // New SUnit has the exact same predecessors. 489 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 490 I != E; ++I) 491 if (!I->isArtificial()) 492 AddPred(NewSU, *I); 493 494 // Only copy scheduled successors. Cut them from old node's successor 495 // list and move them over. 496 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 497 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 498 I != E; ++I) { 499 if (I->isArtificial()) 500 continue; 501 SUnit *SuccSU = I->getSUnit(); 502 if (SuccSU->isScheduled) { 503 SDep D = *I; 504 D.setSUnit(NewSU); 505 AddPred(SuccSU, D); 506 D.setSUnit(SU); 507 DelDeps.push_back(std::make_pair(SuccSU, D)); 508 } 509 } 510 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 511 RemovePred(DelDeps[i].first, DelDeps[i].second); 512 513 AvailableQueue->updateNode(SU); 514 AvailableQueue->addNode(NewSU); 515 516 ++NumDups; 517 return NewSU; 518} 519 520/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 521/// and move all scheduled successors of the given SUnit to the last copy. 522void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 523 const TargetRegisterClass *DestRC, 524 const TargetRegisterClass *SrcRC, 525 SmallVector<SUnit*, 2> &Copies) { 526 SUnit *CopyFromSU = CreateNewSUnit(NULL); 527 CopyFromSU->CopySrcRC = SrcRC; 528 CopyFromSU->CopyDstRC = DestRC; 529 530 SUnit *CopyToSU = CreateNewSUnit(NULL); 531 CopyToSU->CopySrcRC = DestRC; 532 CopyToSU->CopyDstRC = SrcRC; 533 534 // Only copy scheduled successors. Cut them from old node's successor 535 // list and move them over. 536 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 537 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 538 I != E; ++I) { 539 if (I->isArtificial()) 540 continue; 541 SUnit *SuccSU = I->getSUnit(); 542 if (SuccSU->isScheduled) { 543 SDep D = *I; 544 D.setSUnit(CopyToSU); 545 AddPred(SuccSU, D); 546 DelDeps.push_back(std::make_pair(SuccSU, *I)); 547 } 548 } 549 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 550 RemovePred(DelDeps[i].first, DelDeps[i].second); 551 } 552 553 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 554 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 555 556 AvailableQueue->updateNode(SU); 557 AvailableQueue->addNode(CopyFromSU); 558 AvailableQueue->addNode(CopyToSU); 559 Copies.push_back(CopyFromSU); 560 Copies.push_back(CopyToSU); 561 562 ++NumCCCopies; 563} 564 565/// getPhysicalRegisterVT - Returns the ValueType of the physical register 566/// definition of the specified node. 567/// FIXME: Move to SelectionDAG? 568static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 569 const TargetInstrInfo *TII) { 570 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 571 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 572 unsigned NumRes = TID.getNumDefs(); 573 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 574 if (Reg == *ImpDef) 575 break; 576 ++NumRes; 577 } 578 return N->getValueType(NumRes); 579} 580 581/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 582/// scheduling of the given node to satisfy live physical register dependencies. 583/// If the specific node is the last one that's available to schedule, do 584/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 585bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 586 SmallVector<unsigned, 4> &LRegs){ 587 if (NumLiveRegs == 0) 588 return false; 589 590 SmallSet<unsigned, 4> RegAdded; 591 // If this node would clobber any "live" register, then it's not ready. 592 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 593 I != E; ++I) { 594 if (I->isAssignedRegDep()) { 595 unsigned Reg = I->getReg(); 596 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { 597 if (RegAdded.insert(Reg)) 598 LRegs.push_back(Reg); 599 } 600 for (const unsigned *Alias = TRI->getAliasSet(Reg); 601 *Alias; ++Alias) 602 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { 603 if (RegAdded.insert(*Alias)) 604 LRegs.push_back(*Alias); 605 } 606 } 607 } 608 609 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 610 if (!Node->isMachineOpcode()) 611 continue; 612 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 613 if (!TID.ImplicitDefs) 614 continue; 615 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 616 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { 617 if (RegAdded.insert(*Reg)) 618 LRegs.push_back(*Reg); 619 } 620 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 621 *Alias; ++Alias) 622 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 623 if (RegAdded.insert(*Alias)) 624 LRegs.push_back(*Alias); 625 } 626 } 627 } 628 return !LRegs.empty(); 629} 630 631 632/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 633/// schedulers. 634void ScheduleDAGRRList::ListScheduleBottomUp() { 635 unsigned CurCycle = 0; 636 // Add root to Available queue. 637 if (!SUnits.empty()) { 638 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 639 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 640 RootSU->isAvailable = true; 641 AvailableQueue->push(RootSU); 642 } 643 644 // While Available queue is not empty, grab the node with the highest 645 // priority. If it is not ready put it back. Schedule the node. 646 SmallVector<SUnit*, 4> NotReady; 647 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 648 Sequence.reserve(SUnits.size()); 649 while (!AvailableQueue->empty()) { 650 bool Delayed = false; 651 LRegsMap.clear(); 652 SUnit *CurSU = AvailableQueue->pop(); 653 while (CurSU) { 654 SmallVector<unsigned, 4> LRegs; 655 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 656 break; 657 Delayed = true; 658 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 659 660 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 661 NotReady.push_back(CurSU); 662 CurSU = AvailableQueue->pop(); 663 } 664 665 // All candidates are delayed due to live physical reg dependencies. 666 // Try backtracking, code duplication, or inserting cross class copies 667 // to resolve it. 668 if (Delayed && !CurSU) { 669 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 670 SUnit *TrySU = NotReady[i]; 671 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 672 673 // Try unscheduling up to the point where it's safe to schedule 674 // this node. 675 unsigned LiveCycle = CurCycle; 676 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 677 unsigned Reg = LRegs[j]; 678 unsigned LCycle = LiveRegCycles[Reg]; 679 LiveCycle = std::min(LiveCycle, LCycle); 680 } 681 SUnit *OldSU = Sequence[LiveCycle]; 682 if (!WillCreateCycle(TrySU, OldSU)) { 683 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 684 // Force the current node to be scheduled before the node that 685 // requires the physical reg dep. 686 if (OldSU->isAvailable) { 687 OldSU->isAvailable = false; 688 AvailableQueue->remove(OldSU); 689 } 690 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 691 /*Reg=*/0, /*isNormalMemory=*/false, 692 /*isMustAlias=*/false, /*isArtificial=*/true)); 693 // If one or more successors has been unscheduled, then the current 694 // node is no longer avaialable. Schedule a successor that's now 695 // available instead. 696 if (!TrySU->isAvailable) 697 CurSU = AvailableQueue->pop(); 698 else { 699 CurSU = TrySU; 700 TrySU->isPending = false; 701 NotReady.erase(NotReady.begin()+i); 702 } 703 break; 704 } 705 } 706 707 if (!CurSU) { 708 // Can't backtrack. Try duplicating the nodes that produces these 709 // "expensive to copy" values to break the dependency. In case even 710 // that doesn't work, insert cross class copies. 711 SUnit *TrySU = NotReady[0]; 712 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 713 assert(LRegs.size() == 1 && "Can't handle this yet!"); 714 unsigned Reg = LRegs[0]; 715 SUnit *LRDef = LiveRegDefs[Reg]; 716 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 717 if (!NewDef) { 718 // Issue expensive cross register class copies. 719 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 720 const TargetRegisterClass *RC = 721 TRI->getPhysicalRegisterRegClass(Reg, VT); 722 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 723 if (!DestRC) { 724 assert(false && "Don't know how to copy this physical register!"); 725 abort(); 726 } 727 SmallVector<SUnit*, 2> Copies; 728 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 729 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 730 << " to SU #" << Copies.front()->NodeNum << "\n"; 731 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 732 /*Reg=*/0, /*isMustAlias=*/false, 733 /*isArtificial=*/true)); 734 NewDef = Copies.back(); 735 } 736 737 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 738 << " to SU #" << TrySU->NodeNum << "\n"; 739 LiveRegDefs[Reg] = NewDef; 740 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 741 /*Reg=*/0, /*isMustAlias=*/false, 742 /*isArtificial=*/true)); 743 TrySU->isAvailable = false; 744 CurSU = NewDef; 745 } 746 747 if (!CurSU) { 748 assert(false && "Unable to resolve live physical register dependencies!"); 749 abort(); 750 } 751 } 752 753 // Add the nodes that aren't ready back onto the available list. 754 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 755 NotReady[i]->isPending = false; 756 // May no longer be available due to backtracking. 757 if (NotReady[i]->isAvailable) 758 AvailableQueue->push(NotReady[i]); 759 } 760 NotReady.clear(); 761 762 if (CurSU) 763 ScheduleNodeBottomUp(CurSU, CurCycle); 764 ++CurCycle; 765 } 766 767 // Reverse the order if it is bottom up. 768 std::reverse(Sequence.begin(), Sequence.end()); 769 770#ifndef NDEBUG 771 VerifySchedule(isBottomUp); 772#endif 773} 774 775//===----------------------------------------------------------------------===// 776// Top-Down Scheduling 777//===----------------------------------------------------------------------===// 778 779/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 780/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 781void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 782 SUnit *SuccSU = SuccEdge->getSUnit(); 783 --SuccSU->NumPredsLeft; 784 785#ifndef NDEBUG 786 if (SuccSU->NumPredsLeft < 0) { 787 cerr << "*** Scheduling failed! ***\n"; 788 SuccSU->dump(this); 789 cerr << " has been released too many times!\n"; 790 assert(0); 791 } 792#endif 793 794 if (SuccSU->NumPredsLeft == 0) { 795 SuccSU->isAvailable = true; 796 AvailableQueue->push(SuccSU); 797 } 798} 799 800/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 801/// count of its successors. If a successor pending count is zero, add it to 802/// the Available queue. 803void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 804 DOUT << "*** Scheduling [" << CurCycle << "]: "; 805 DEBUG(SU->dump(this)); 806 807 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 808 SU->setDepthToAtLeast(CurCycle); 809 Sequence.push_back(SU); 810 811 // Top down: release successors 812 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 813 I != E; ++I) 814 ReleaseSucc(SU, &*I); 815 816 SU->isScheduled = true; 817 AvailableQueue->ScheduledNode(SU); 818} 819 820/// ListScheduleTopDown - The main loop of list scheduling for top-down 821/// schedulers. 822void ScheduleDAGRRList::ListScheduleTopDown() { 823 unsigned CurCycle = 0; 824 825 // All leaves to Available queue. 826 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 827 // It is available if it has no predecessors. 828 if (SUnits[i].Preds.empty()) { 829 AvailableQueue->push(&SUnits[i]); 830 SUnits[i].isAvailable = true; 831 } 832 } 833 834 // While Available queue is not empty, grab the node with the highest 835 // priority. If it is not ready put it back. Schedule the node. 836 Sequence.reserve(SUnits.size()); 837 while (!AvailableQueue->empty()) { 838 SUnit *CurSU = AvailableQueue->pop(); 839 840 if (CurSU) 841 ScheduleNodeTopDown(CurSU, CurCycle); 842 ++CurCycle; 843 } 844 845#ifndef NDEBUG 846 VerifySchedule(isBottomUp); 847#endif 848} 849 850 851//===----------------------------------------------------------------------===// 852// RegReductionPriorityQueue Implementation 853//===----------------------------------------------------------------------===// 854// 855// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 856// to reduce register pressure. 857// 858namespace { 859 template<class SF> 860 class RegReductionPriorityQueue; 861 862 /// Sorting functions for the Available queue. 863 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 864 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 865 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 866 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 867 868 bool operator()(const SUnit* left, const SUnit* right) const; 869 }; 870 871 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 872 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 873 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 874 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 875 876 bool operator()(const SUnit* left, const SUnit* right) const; 877 }; 878} // end anonymous namespace 879 880static inline bool isCopyFromLiveIn(const SUnit *SU) { 881 SDNode *N = SU->getNode(); 882 return N && N->getOpcode() == ISD::CopyFromReg && 883 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 884} 885 886/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 887/// Smaller number is the higher priority. 888static unsigned 889CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 890 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 891 if (SethiUllmanNumber != 0) 892 return SethiUllmanNumber; 893 894 unsigned Extra = 0; 895 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 896 I != E; ++I) { 897 if (I->isCtrl()) continue; // ignore chain preds 898 SUnit *PredSU = I->getSUnit(); 899 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 900 if (PredSethiUllman > SethiUllmanNumber) { 901 SethiUllmanNumber = PredSethiUllman; 902 Extra = 0; 903 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl()) 904 ++Extra; 905 } 906 907 SethiUllmanNumber += Extra; 908 909 if (SethiUllmanNumber == 0) 910 SethiUllmanNumber = 1; 911 912 return SethiUllmanNumber; 913} 914 915namespace { 916 template<class SF> 917 class VISIBILITY_HIDDEN RegReductionPriorityQueue 918 : public SchedulingPriorityQueue { 919 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 920 unsigned currentQueueId; 921 922 protected: 923 // SUnits - The SUnits for the current graph. 924 std::vector<SUnit> *SUnits; 925 926 const TargetInstrInfo *TII; 927 const TargetRegisterInfo *TRI; 928 ScheduleDAGRRList *scheduleDAG; 929 930 // SethiUllmanNumbers - The SethiUllman number for each node. 931 std::vector<unsigned> SethiUllmanNumbers; 932 933 public: 934 RegReductionPriorityQueue(const TargetInstrInfo *tii, 935 const TargetRegisterInfo *tri) : 936 Queue(SF(this)), currentQueueId(0), 937 TII(tii), TRI(tri), scheduleDAG(NULL) {} 938 939 void initNodes(std::vector<SUnit> &sunits) { 940 SUnits = &sunits; 941 // Add pseudo dependency edges for two-address nodes. 942 AddPseudoTwoAddrDeps(); 943 // Calculate node priorities. 944 CalculateSethiUllmanNumbers(); 945 } 946 947 void addNode(const SUnit *SU) { 948 unsigned SUSize = SethiUllmanNumbers.size(); 949 if (SUnits->size() > SUSize) 950 SethiUllmanNumbers.resize(SUSize*2, 0); 951 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 952 } 953 954 void updateNode(const SUnit *SU) { 955 SethiUllmanNumbers[SU->NodeNum] = 0; 956 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 957 } 958 959 void releaseState() { 960 SUnits = 0; 961 SethiUllmanNumbers.clear(); 962 } 963 964 unsigned getNodePriority(const SUnit *SU) const { 965 assert(SU->NodeNum < SethiUllmanNumbers.size()); 966 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 967 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 968 // CopyFromReg should be close to its def because it restricts 969 // allocation choices. But if it is a livein then perhaps we want it 970 // closer to its uses so it can be coalesced. 971 return 0xffff; 972 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 973 // CopyToReg should be close to its uses to facilitate coalescing and 974 // avoid spilling. 975 return 0; 976 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 977 Opc == TargetInstrInfo::INSERT_SUBREG) 978 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 979 // facilitate coalescing. 980 return 0; 981 else if (SU->NumSuccs == 0) 982 // If SU does not have a use, i.e. it doesn't produce a value that would 983 // be consumed (e.g. store), then it terminates a chain of computation. 984 // Give it a large SethiUllman number so it will be scheduled right 985 // before its predecessors that it doesn't lengthen their live ranges. 986 return 0xffff; 987 else if (SU->NumPreds == 0) 988 // If SU does not have a def, schedule it close to its uses because it 989 // does not lengthen any live ranges. 990 return 0; 991 else 992 return SethiUllmanNumbers[SU->NodeNum]; 993 } 994 995 unsigned size() const { return Queue.size(); } 996 997 bool empty() const { return Queue.empty(); } 998 999 void push(SUnit *U) { 1000 assert(!U->NodeQueueId && "Node in the queue already"); 1001 U->NodeQueueId = ++currentQueueId; 1002 Queue.push(U); 1003 } 1004 1005 void push_all(const std::vector<SUnit *> &Nodes) { 1006 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1007 push(Nodes[i]); 1008 } 1009 1010 SUnit *pop() { 1011 if (empty()) return NULL; 1012 SUnit *V = Queue.top(); 1013 Queue.pop(); 1014 V->NodeQueueId = 0; 1015 return V; 1016 } 1017 1018 void remove(SUnit *SU) { 1019 assert(!Queue.empty() && "Queue is empty!"); 1020 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1021 Queue.erase_one(SU); 1022 SU->NodeQueueId = 0; 1023 } 1024 1025 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1026 scheduleDAG = scheduleDag; 1027 } 1028 1029 protected: 1030 bool canClobber(const SUnit *SU, const SUnit *Op); 1031 void AddPseudoTwoAddrDeps(); 1032 void CalculateSethiUllmanNumbers(); 1033 }; 1034 1035 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1036 BURegReductionPriorityQueue; 1037 1038 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1039 TDRegReductionPriorityQueue; 1040} 1041 1042/// closestSucc - Returns the scheduled cycle of the successor which is 1043/// closet to the current cycle. 1044static unsigned closestSucc(const SUnit *SU) { 1045 unsigned MaxHeight = 0; 1046 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1047 I != E; ++I) { 1048 unsigned Height = I->getSUnit()->getHeight(); 1049 // If there are bunch of CopyToRegs stacked up, they should be considered 1050 // to be at the same position. 1051 if (I->getSUnit()->getNode() && 1052 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1053 Height = closestSucc(I->getSUnit())+1; 1054 if (Height > MaxHeight) 1055 MaxHeight = Height; 1056 } 1057 return MaxHeight; 1058} 1059 1060/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1061/// for scratch registers. Live-in operands and live-out results don't count 1062/// since they are "fixed". 1063static unsigned calcMaxScratches(const SUnit *SU) { 1064 unsigned Scratches = 0; 1065 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1066 I != E; ++I) { 1067 if (I->isCtrl()) continue; // ignore chain preds 1068 if (!I->getSUnit()->getNode() || 1069 I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg) 1070 Scratches++; 1071 } 1072 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1073 I != E; ++I) { 1074 if (I->isCtrl()) continue; // ignore chain succs 1075 if (!I->getSUnit()->getNode() || 1076 I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg) 1077 Scratches += 10; 1078 } 1079 return Scratches; 1080} 1081 1082// Bottom up 1083bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1084 unsigned LPriority = SPQ->getNodePriority(left); 1085 unsigned RPriority = SPQ->getNodePriority(right); 1086 if (LPriority != RPriority) 1087 return LPriority > RPriority; 1088 1089 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1090 // e.g. 1091 // t1 = op t2, c1 1092 // t3 = op t4, c2 1093 // 1094 // and the following instructions are both ready. 1095 // t2 = op c3 1096 // t4 = op c4 1097 // 1098 // Then schedule t2 = op first. 1099 // i.e. 1100 // t4 = op c4 1101 // t2 = op c3 1102 // t1 = op t2, c1 1103 // t3 = op t4, c2 1104 // 1105 // This creates more short live intervals. 1106 unsigned LDist = closestSucc(left); 1107 unsigned RDist = closestSucc(right); 1108 if (LDist != RDist) 1109 return LDist < RDist; 1110 1111 // Intuitively, it's good to push down instructions whose results are 1112 // liveout so their long live ranges won't conflict with other values 1113 // which are needed inside the BB. Further prioritize liveout instructions 1114 // by the number of operands which are calculated within the BB. 1115 unsigned LScratch = calcMaxScratches(left); 1116 unsigned RScratch = calcMaxScratches(right); 1117 if (LScratch != RScratch) 1118 return LScratch > RScratch; 1119 1120 if (left->getHeight() != right->getHeight()) 1121 return left->getHeight() > right->getHeight(); 1122 1123 if (left->getDepth() != right->getDepth()) 1124 return left->getDepth() < right->getDepth(); 1125 1126 assert(left->NodeQueueId && right->NodeQueueId && 1127 "NodeQueueId cannot be zero"); 1128 return (left->NodeQueueId > right->NodeQueueId); 1129} 1130 1131template<class SF> 1132bool 1133RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1134 if (SU->isTwoAddress) { 1135 unsigned Opc = SU->getNode()->getMachineOpcode(); 1136 const TargetInstrDesc &TID = TII->get(Opc); 1137 unsigned NumRes = TID.getNumDefs(); 1138 unsigned NumOps = TID.getNumOperands() - NumRes; 1139 for (unsigned i = 0; i != NumOps; ++i) { 1140 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1141 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1142 if (DU->getNodeId() != -1 && 1143 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1144 return true; 1145 } 1146 } 1147 } 1148 return false; 1149} 1150 1151 1152/// hasCopyToRegUse - Return true if SU has a value successor that is a 1153/// CopyToReg node. 1154static bool hasCopyToRegUse(const SUnit *SU) { 1155 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1156 I != E; ++I) { 1157 if (I->isCtrl()) continue; 1158 const SUnit *SuccSU = I->getSUnit(); 1159 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1160 return true; 1161 } 1162 return false; 1163} 1164 1165/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1166/// physical register defs. 1167static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1168 const TargetInstrInfo *TII, 1169 const TargetRegisterInfo *TRI) { 1170 SDNode *N = SuccSU->getNode(); 1171 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1172 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1173 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1174 const unsigned *SUImpDefs = 1175 TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 1176 if (!SUImpDefs) 1177 return false; 1178 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1179 MVT VT = N->getValueType(i); 1180 if (VT == MVT::Flag || VT == MVT::Other) 1181 continue; 1182 if (!N->hasAnyUseOfValue(i)) 1183 continue; 1184 unsigned Reg = ImpDefs[i - NumDefs]; 1185 for (;*SUImpDefs; ++SUImpDefs) { 1186 unsigned SUReg = *SUImpDefs; 1187 if (TRI->regsOverlap(Reg, SUReg)) 1188 return true; 1189 } 1190 } 1191 return false; 1192} 1193 1194/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1195/// it as a def&use operand. Add a pseudo control edge from it to the other 1196/// node (if it won't create a cycle) so the two-address one will be scheduled 1197/// first (lower in the schedule). If both nodes are two-address, favor the 1198/// one that has a CopyToReg use (more likely to be a loop induction update). 1199/// If both are two-address, but one is commutable while the other is not 1200/// commutable, favor the one that's not commutable. 1201template<class SF> 1202void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1203 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1204 SUnit *SU = &(*SUnits)[i]; 1205 if (!SU->isTwoAddress) 1206 continue; 1207 1208 SDNode *Node = SU->getNode(); 1209 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1210 continue; 1211 1212 unsigned Opc = Node->getMachineOpcode(); 1213 const TargetInstrDesc &TID = TII->get(Opc); 1214 unsigned NumRes = TID.getNumDefs(); 1215 unsigned NumOps = TID.getNumOperands() - NumRes; 1216 for (unsigned j = 0; j != NumOps; ++j) { 1217 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1218 continue; 1219 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1220 if (DU->getNodeId() == -1) 1221 continue; 1222 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1223 if (!DUSU) continue; 1224 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1225 E = DUSU->Succs.end(); I != E; ++I) { 1226 if (I->isCtrl()) continue; 1227 SUnit *SuccSU = I->getSUnit(); 1228 if (SuccSU == SU) 1229 continue; 1230 // Be conservative. Ignore if nodes aren't at roughly the same 1231 // depth and height. 1232 if (SuccSU->getHeight() < SU->getHeight() && 1233 (SU->getHeight() - SuccSU->getHeight()) > 1) 1234 continue; 1235 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1236 continue; 1237 // Don't constrain nodes with physical register defs if the 1238 // predecessor can clobber them. 1239 if (SuccSU->hasPhysRegDefs) { 1240 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1241 continue; 1242 } 1243 // Don't constraint extract_subreg / insert_subreg these may be 1244 // coalesced away. We don't them close to their uses. 1245 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1246 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1247 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1248 continue; 1249 if ((!canClobber(SuccSU, DUSU) || 1250 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1251 (!SU->isCommutable && SuccSU->isCommutable)) && 1252 !scheduleDAG->IsReachable(SuccSU, SU)) { 1253 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1254 << " to SU #" << SuccSU->NodeNum << "\n"; 1255 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1256 /*Reg=*/0, /*isMustAlias=*/false, 1257 /*isArtificial=*/true)); 1258 } 1259 } 1260 } 1261 } 1262} 1263 1264/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1265/// scheduling units. 1266template<class SF> 1267void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1268 SethiUllmanNumbers.assign(SUnits->size(), 0); 1269 1270 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1271 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1272} 1273 1274/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1275/// predecessors of the successors of the SUnit SU. Stop when the provided 1276/// limit is exceeded. 1277static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1278 unsigned Limit) { 1279 unsigned Sum = 0; 1280 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1281 I != E; ++I) { 1282 const SUnit *SuccSU = I->getSUnit(); 1283 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1284 EE = SuccSU->Preds.end(); II != EE; ++II) { 1285 SUnit *PredSU = II->getSUnit(); 1286 if (!PredSU->isScheduled) 1287 if (++Sum > Limit) 1288 return Sum; 1289 } 1290 } 1291 return Sum; 1292} 1293 1294 1295// Top down 1296bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1297 unsigned LPriority = SPQ->getNodePriority(left); 1298 unsigned RPriority = SPQ->getNodePriority(right); 1299 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1300 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1301 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1302 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1303 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1304 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1305 1306 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1307 return false; 1308 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1309 return true; 1310 1311 if (LIsFloater) 1312 LBonus -= 2; 1313 if (RIsFloater) 1314 RBonus -= 2; 1315 if (left->NumSuccs == 1) 1316 LBonus += 2; 1317 if (right->NumSuccs == 1) 1318 RBonus += 2; 1319 1320 if (LPriority+LBonus != RPriority+RBonus) 1321 return LPriority+LBonus < RPriority+RBonus; 1322 1323 if (left->getDepth() != right->getDepth()) 1324 return left->getDepth() < right->getDepth(); 1325 1326 if (left->NumSuccsLeft != right->NumSuccsLeft) 1327 return left->NumSuccsLeft > right->NumSuccsLeft; 1328 1329 assert(left->NodeQueueId && right->NodeQueueId && 1330 "NodeQueueId cannot be zero"); 1331 return (left->NodeQueueId > right->NodeQueueId); 1332} 1333 1334//===----------------------------------------------------------------------===// 1335// Public Constructor Functions 1336//===----------------------------------------------------------------------===// 1337 1338llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1339 SelectionDAG *DAG, 1340 const TargetMachine *TM, 1341 MachineBasicBlock *BB, 1342 bool) { 1343 const TargetInstrInfo *TII = TM->getInstrInfo(); 1344 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1345 1346 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1347 1348 ScheduleDAGRRList *SD = 1349 new ScheduleDAGRRList(DAG, BB, *TM, true, PQ); 1350 PQ->setScheduleDAG(SD); 1351 return SD; 1352} 1353 1354llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1355 SelectionDAG *DAG, 1356 const TargetMachine *TM, 1357 MachineBasicBlock *BB, 1358 bool) { 1359 const TargetInstrInfo *TII = TM->getInstrInfo(); 1360 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1361 1362 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1363 1364 ScheduleDAGRRList *SD = new ScheduleDAGRRList(DAG, BB, *TM, false, PQ); 1365 PQ->setScheduleDAG(SD); 1366 return SD; 1367} 1368