ScheduleDAGRRList.cpp revision 3f23744df4809eba94284e601e81489212c974d4
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/ADT/STLExtras.h" 33#include <climits> 34#include "llvm/Support/CommandLine.h" 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumCCCopies, "Number of cross class copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50 51namespace { 52//===----------------------------------------------------------------------===// 53/// ScheduleDAGRRList - The actual register reduction list scheduler 54/// implementation. This supports both top-down and bottom-up scheduling. 55/// 56class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 57private: 58 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 59 /// it is top-down. 60 bool isBottomUp; 61 62 /// AvailableQueue - The priority queue to use for the available SUnits. 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 unsigned NumLiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72 /// Topo - A topological ordering for SUnits which permits fast IsReachable 73 /// and similar queries. 74 ScheduleDAGTopologicalSort Topo; 75 76public: 77 ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb, 78 const TargetMachine &tm, bool isbottomup, 79 SchedulingPriorityQueue *availqueue) 80 : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), 81 AvailableQueue(availqueue), Topo(SUnits) { 82 } 83 84 ~ScheduleDAGRRList() { 85 delete AvailableQueue; 86 } 87 88 void Schedule(); 89 90 /// IsReachable - Checks if SU is reachable from TargetSU. 91 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 92 return Topo.IsReachable(SU, TargetSU); 93 } 94 95 /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 96 /// create a cycle. 97 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 98 return Topo.WillCreateCycle(SU, TargetSU); 99 } 100 101 /// AddPred - adds a predecessor edge to SUnit SU. 102 /// This returns true if this is a new predecessor. 103 /// Updates the topological ordering if required. 104 void AddPred(SUnit *SU, const SDep &D) { 105 Topo.AddPred(SU, D.getSUnit()); 106 SU->addPred(D); 107 } 108 109 /// RemovePred - removes a predecessor edge from SUnit SU. 110 /// This returns true if an edge was removed. 111 /// Updates the topological ordering if required. 112 void RemovePred(SUnit *SU, const SDep &D) { 113 Topo.RemovePred(SU, D.getSUnit()); 114 SU->removePred(D); 115 } 116 117private: 118 void ReleasePred(SUnit *SU, SDep *PredEdge); 119 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 120 void CapturePred(SDep *PredEdge); 121 void ScheduleNodeBottomUp(SUnit*, unsigned); 122 void ScheduleNodeTopDown(SUnit*, unsigned); 123 void UnscheduleNodeBottomUp(SUnit*); 124 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 125 SUnit *CopyAndMoveSuccessors(SUnit*); 126 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 127 const TargetRegisterClass*, 128 const TargetRegisterClass*, 129 SmallVector<SUnit*, 2>&); 130 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 131 void ListScheduleTopDown(); 132 void ListScheduleBottomUp(); 133 void CommuteNodesToReducePressure(); 134 135 136 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 137 /// Updates the topological ordering if required. 138 SUnit *CreateNewSUnit(SDNode *N) { 139 unsigned NumSUnits = SUnits.size(); 140 SUnit *NewNode = NewSUnit(N); 141 // Update the topological ordering. 142 if (NewNode->NodeNum >= NumSUnits) 143 Topo.InitDAGTopologicalSorting(); 144 return NewNode; 145 } 146 147 /// CreateClone - Creates a new SUnit from an existing one. 148 /// Updates the topological ordering if required. 149 SUnit *CreateClone(SUnit *N) { 150 unsigned NumSUnits = SUnits.size(); 151 SUnit *NewNode = Clone(N); 152 // Update the topological ordering. 153 if (NewNode->NodeNum >= NumSUnits) 154 Topo.InitDAGTopologicalSorting(); 155 return NewNode; 156 } 157 158 /// ForceUnitLatencies - Return true, since register-pressure-reducing 159 /// scheduling doesn't need actual latency information. 160 bool ForceUnitLatencies() const { return true; } 161}; 162} // end anonymous namespace 163 164 165/// Schedule - Schedule the DAG using list scheduling. 166void ScheduleDAGRRList::Schedule() { 167 DOUT << "********** List Scheduling **********\n"; 168 169 NumLiveRegs = 0; 170 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 171 LiveRegCycles.resize(TRI->getNumRegs(), 0); 172 173 // Build scheduling units. 174 BuildSchedUnits(); 175 176 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 177 SUnits[su].dumpAll(this)); 178 Topo.InitDAGTopologicalSorting(); 179 180 AvailableQueue->initNodes(SUnits); 181 182 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 183 if (isBottomUp) 184 ListScheduleBottomUp(); 185 else 186 ListScheduleTopDown(); 187 188 AvailableQueue->releaseState(); 189 190 CommuteNodesToReducePressure(); 191} 192 193/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 194/// it is not the last use of its first operand, add it to the CommuteSet if 195/// possible. It will be commuted when it is translated to a MI. 196void ScheduleDAGRRList::CommuteNodesToReducePressure() { 197 SmallPtrSet<SUnit*, 4> OperandSeen; 198 for (unsigned i = Sequence.size(); i != 0; ) { 199 --i; 200 SUnit *SU = Sequence[i]; 201 if (!SU || !SU->getNode()) continue; 202 if (SU->isCommutable) { 203 unsigned Opc = SU->getNode()->getMachineOpcode(); 204 const TargetInstrDesc &TID = TII->get(Opc); 205 unsigned NumRes = TID.getNumDefs(); 206 unsigned NumOps = TID.getNumOperands() - NumRes; 207 for (unsigned j = 0; j != NumOps; ++j) { 208 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 209 continue; 210 211 SDNode *OpN = SU->getNode()->getOperand(j).getNode(); 212 SUnit *OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 213 if (OpSU && OperandSeen.count(OpSU) == 1) { 214 // Ok, so SU is not the last use of OpSU, but SU is two-address so 215 // it will clobber OpSU. Try to commute SU if no other source operands 216 // are live below. 217 bool DoCommute = true; 218 for (unsigned k = 0; k < NumOps; ++k) { 219 if (k != j) { 220 OpN = SU->getNode()->getOperand(k).getNode(); 221 OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 222 if (OpSU && OperandSeen.count(OpSU) == 1) { 223 DoCommute = false; 224 break; 225 } 226 } 227 } 228 if (DoCommute) 229 CommuteSet.insert(SU->getNode()); 230 } 231 232 // Only look at the first use&def node for now. 233 break; 234 } 235 } 236 237 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 238 I != E; ++I) { 239 if (!I->isCtrl()) 240 OperandSeen.insert(I->getSUnit()->OrigNode); 241 } 242 } 243} 244 245//===----------------------------------------------------------------------===// 246// Bottom-Up Scheduling 247//===----------------------------------------------------------------------===// 248 249/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 250/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 251void ScheduleDAGRRList::ReleasePred(SUnit *SU, SDep *PredEdge) { 252 SUnit *PredSU = PredEdge->getSUnit(); 253 --PredSU->NumSuccsLeft; 254 255#ifndef NDEBUG 256 if (PredSU->NumSuccsLeft < 0) { 257 cerr << "*** Scheduling failed! ***\n"; 258 PredSU->dump(this); 259 cerr << " has been released too many times!\n"; 260 assert(0); 261 } 262#endif 263 264 if (PredSU->NumSuccsLeft == 0) { 265 PredSU->isAvailable = true; 266 AvailableQueue->push(PredSU); 267 } 268} 269 270/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 271/// count of its predecessors. If a predecessor pending count is zero, add it to 272/// the Available queue. 273void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 274 DOUT << "*** Scheduling [" << CurCycle << "]: "; 275 DEBUG(SU->dump(this)); 276 277 assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); 278 SU->setHeightToAtLeast(CurCycle); 279 Sequence.push_back(SU); 280 281 // Bottom up: release predecessors 282 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 283 I != E; ++I) { 284 ReleasePred(SU, &*I); 285 if (I->isAssignedRegDep()) { 286 // This is a physical register dependency and it's impossible or 287 // expensive to copy the register. Make sure nothing that can 288 // clobber the register is scheduled between the predecessor and 289 // this node. 290 if (!LiveRegDefs[I->getReg()]) { 291 ++NumLiveRegs; 292 LiveRegDefs[I->getReg()] = I->getSUnit(); 293 LiveRegCycles[I->getReg()] = CurCycle; 294 } 295 } 296 } 297 298 // Release all the implicit physical register defs that are live. 299 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 300 I != E; ++I) { 301 if (I->isAssignedRegDep()) { 302 if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { 303 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 304 assert(LiveRegDefs[I->getReg()] == SU && 305 "Physical register dependency violated?"); 306 --NumLiveRegs; 307 LiveRegDefs[I->getReg()] = NULL; 308 LiveRegCycles[I->getReg()] = 0; 309 } 310 } 311 } 312 313 SU->isScheduled = true; 314 AvailableQueue->ScheduledNode(SU); 315} 316 317/// CapturePred - This does the opposite of ReleasePred. Since SU is being 318/// unscheduled, incrcease the succ left count of its predecessors. Remove 319/// them from AvailableQueue if necessary. 320void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 321 SUnit *PredSU = PredEdge->getSUnit(); 322 if (PredSU->isAvailable) { 323 PredSU->isAvailable = false; 324 if (!PredSU->isPending) 325 AvailableQueue->remove(PredSU); 326 } 327 328 ++PredSU->NumSuccsLeft; 329} 330 331/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 332/// its predecessor states to reflect the change. 333void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 334 DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; 335 DEBUG(SU->dump(this)); 336 337 AvailableQueue->UnscheduledNode(SU); 338 339 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 340 I != E; ++I) { 341 CapturePred(&*I); 342 if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { 343 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 344 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 345 "Physical register dependency violated?"); 346 --NumLiveRegs; 347 LiveRegDefs[I->getReg()] = NULL; 348 LiveRegCycles[I->getReg()] = 0; 349 } 350 } 351 352 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 353 I != E; ++I) { 354 if (I->isAssignedRegDep()) { 355 if (!LiveRegDefs[I->getReg()]) { 356 LiveRegDefs[I->getReg()] = SU; 357 ++NumLiveRegs; 358 } 359 if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()]) 360 LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight(); 361 } 362 } 363 364 SU->setHeightDirty(); 365 SU->isScheduled = false; 366 SU->isAvailable = true; 367 AvailableQueue->push(SU); 368} 369 370/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 371/// BTCycle in order to schedule a specific node. Returns the last unscheduled 372/// SUnit. Also returns if a successor is unscheduled in the process. 373void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 374 unsigned &CurCycle) { 375 SUnit *OldSU = NULL; 376 while (CurCycle > BtCycle) { 377 OldSU = Sequence.back(); 378 Sequence.pop_back(); 379 if (SU->isSucc(OldSU)) 380 // Don't try to remove SU from AvailableQueue. 381 SU->isAvailable = false; 382 UnscheduleNodeBottomUp(OldSU); 383 --CurCycle; 384 } 385 386 387 if (SU->isSucc(OldSU)) { 388 assert(false && "Something is wrong!"); 389 abort(); 390 } 391 392 ++NumBacktracks; 393} 394 395/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 396/// successors to the newly created node. 397SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 398 if (SU->getNode()->getFlaggedNode()) 399 return NULL; 400 401 SDNode *N = SU->getNode(); 402 if (!N) 403 return NULL; 404 405 SUnit *NewSU; 406 bool TryUnfold = false; 407 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 408 MVT VT = N->getValueType(i); 409 if (VT == MVT::Flag) 410 return NULL; 411 else if (VT == MVT::Other) 412 TryUnfold = true; 413 } 414 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 415 const SDValue &Op = N->getOperand(i); 416 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 417 if (VT == MVT::Flag) 418 return NULL; 419 } 420 421 if (TryUnfold) { 422 SmallVector<SDNode*, 2> NewNodes; 423 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 424 return NULL; 425 426 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 427 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 428 429 N = NewNodes[1]; 430 SDNode *LoadNode = NewNodes[0]; 431 unsigned NumVals = N->getNumValues(); 432 unsigned OldNumVals = SU->getNode()->getNumValues(); 433 for (unsigned i = 0; i != NumVals; ++i) 434 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 435 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 436 SDValue(LoadNode, 1)); 437 438 // LoadNode may already exist. This can happen when there is another 439 // load from the same location and producing the same type of value 440 // but it has different alignment or volatileness. 441 bool isNewLoad = true; 442 SUnit *LoadSU; 443 if (LoadNode->getNodeId() != -1) { 444 LoadSU = &SUnits[LoadNode->getNodeId()]; 445 isNewLoad = false; 446 } else { 447 LoadSU = CreateNewSUnit(LoadNode); 448 LoadNode->setNodeId(LoadSU->NodeNum); 449 ComputeLatency(LoadSU); 450 } 451 452 SUnit *NewSU = CreateNewSUnit(N); 453 assert(N->getNodeId() == -1 && "Node already inserted!"); 454 N->setNodeId(NewSU->NodeNum); 455 456 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 457 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 458 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 459 NewSU->isTwoAddress = true; 460 break; 461 } 462 } 463 if (TID.isCommutable()) 464 NewSU->isCommutable = true; 465 ComputeLatency(NewSU); 466 467 SDep ChainPred; 468 SmallVector<SDep, 4> ChainSuccs; 469 SmallVector<SDep, 4> LoadPreds; 470 SmallVector<SDep, 4> NodePreds; 471 SmallVector<SDep, 4> NodeSuccs; 472 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 473 I != E; ++I) { 474 if (I->isCtrl()) 475 ChainPred = *I; 476 else if (I->getSUnit()->getNode() && 477 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 478 LoadPreds.push_back(*I); 479 else 480 NodePreds.push_back(*I); 481 } 482 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 483 I != E; ++I) { 484 if (I->isCtrl()) 485 ChainSuccs.push_back(*I); 486 else 487 NodeSuccs.push_back(*I); 488 } 489 490 if (ChainPred.getSUnit()) { 491 RemovePred(SU, ChainPred); 492 if (isNewLoad) 493 AddPred(LoadSU, ChainPred); 494 } 495 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 496 const SDep &Pred = LoadPreds[i]; 497 RemovePred(SU, Pred); 498 if (isNewLoad) { 499 AddPred(LoadSU, Pred); 500 } 501 } 502 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 503 const SDep &Pred = NodePreds[i]; 504 RemovePred(SU, Pred); 505 AddPred(NewSU, Pred); 506 } 507 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 508 SDep D = NodeSuccs[i]; 509 SUnit *SuccDep = D.getSUnit(); 510 D.setSUnit(SU); 511 RemovePred(SuccDep, D); 512 D.setSUnit(NewSU); 513 AddPred(SuccDep, D); 514 } 515 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 516 SDep D = ChainSuccs[i]; 517 SUnit *SuccDep = D.getSUnit(); 518 D.setSUnit(SU); 519 RemovePred(SuccDep, D); 520 if (isNewLoad) { 521 D.setSUnit(LoadSU); 522 AddPred(SuccDep, D); 523 } 524 } 525 if (isNewLoad) { 526 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); 527 } 528 529 if (isNewLoad) 530 AvailableQueue->addNode(LoadSU); 531 AvailableQueue->addNode(NewSU); 532 533 ++NumUnfolds; 534 535 if (NewSU->NumSuccsLeft == 0) { 536 NewSU->isAvailable = true; 537 return NewSU; 538 } 539 SU = NewSU; 540 } 541 542 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 543 NewSU = CreateClone(SU); 544 545 // New SUnit has the exact same predecessors. 546 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 547 I != E; ++I) 548 if (!I->isArtificial()) 549 AddPred(NewSU, *I); 550 551 // Only copy scheduled successors. Cut them from old node's successor 552 // list and move them over. 553 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 554 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 555 I != E; ++I) { 556 if (I->isArtificial()) 557 continue; 558 SUnit *SuccSU = I->getSUnit(); 559 if (SuccSU->isScheduled) { 560 SDep D = *I; 561 D.setSUnit(NewSU); 562 AddPred(SuccSU, D); 563 D.setSUnit(SU); 564 DelDeps.push_back(std::make_pair(SuccSU, D)); 565 } 566 } 567 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 568 RemovePred(DelDeps[i].first, DelDeps[i].second); 569 570 AvailableQueue->updateNode(SU); 571 AvailableQueue->addNode(NewSU); 572 573 ++NumDups; 574 return NewSU; 575} 576 577/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 578/// and move all scheduled successors of the given SUnit to the last copy. 579void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 580 const TargetRegisterClass *DestRC, 581 const TargetRegisterClass *SrcRC, 582 SmallVector<SUnit*, 2> &Copies) { 583 SUnit *CopyFromSU = CreateNewSUnit(NULL); 584 CopyFromSU->CopySrcRC = SrcRC; 585 CopyFromSU->CopyDstRC = DestRC; 586 587 SUnit *CopyToSU = CreateNewSUnit(NULL); 588 CopyToSU->CopySrcRC = DestRC; 589 CopyToSU->CopyDstRC = SrcRC; 590 591 // Only copy scheduled successors. Cut them from old node's successor 592 // list and move them over. 593 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 594 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 595 I != E; ++I) { 596 if (I->isArtificial()) 597 continue; 598 SUnit *SuccSU = I->getSUnit(); 599 if (SuccSU->isScheduled) { 600 SDep D = *I; 601 D.setSUnit(CopyToSU); 602 AddPred(SuccSU, D); 603 DelDeps.push_back(std::make_pair(SuccSU, *I)); 604 } 605 } 606 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 607 RemovePred(DelDeps[i].first, DelDeps[i].second); 608 } 609 610 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 611 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 612 613 AvailableQueue->updateNode(SU); 614 AvailableQueue->addNode(CopyFromSU); 615 AvailableQueue->addNode(CopyToSU); 616 Copies.push_back(CopyFromSU); 617 Copies.push_back(CopyToSU); 618 619 ++NumCCCopies; 620} 621 622/// getPhysicalRegisterVT - Returns the ValueType of the physical register 623/// definition of the specified node. 624/// FIXME: Move to SelectionDAG? 625static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 626 const TargetInstrInfo *TII) { 627 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 628 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 629 unsigned NumRes = TID.getNumDefs(); 630 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 631 if (Reg == *ImpDef) 632 break; 633 ++NumRes; 634 } 635 return N->getValueType(NumRes); 636} 637 638/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 639/// scheduling of the given node to satisfy live physical register dependencies. 640/// If the specific node is the last one that's available to schedule, do 641/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 642bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 643 SmallVector<unsigned, 4> &LRegs){ 644 if (NumLiveRegs == 0) 645 return false; 646 647 SmallSet<unsigned, 4> RegAdded; 648 // If this node would clobber any "live" register, then it's not ready. 649 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 650 I != E; ++I) { 651 if (I->isAssignedRegDep()) { 652 unsigned Reg = I->getReg(); 653 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { 654 if (RegAdded.insert(Reg)) 655 LRegs.push_back(Reg); 656 } 657 for (const unsigned *Alias = TRI->getAliasSet(Reg); 658 *Alias; ++Alias) 659 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { 660 if (RegAdded.insert(*Alias)) 661 LRegs.push_back(*Alias); 662 } 663 } 664 } 665 666 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 667 if (!Node->isMachineOpcode()) 668 continue; 669 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 670 if (!TID.ImplicitDefs) 671 continue; 672 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 673 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { 674 if (RegAdded.insert(*Reg)) 675 LRegs.push_back(*Reg); 676 } 677 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 678 *Alias; ++Alias) 679 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 680 if (RegAdded.insert(*Alias)) 681 LRegs.push_back(*Alias); 682 } 683 } 684 } 685 return !LRegs.empty(); 686} 687 688 689/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 690/// schedulers. 691void ScheduleDAGRRList::ListScheduleBottomUp() { 692 unsigned CurCycle = 0; 693 // Add root to Available queue. 694 if (!SUnits.empty()) { 695 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 696 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 697 RootSU->isAvailable = true; 698 AvailableQueue->push(RootSU); 699 } 700 701 // While Available queue is not empty, grab the node with the highest 702 // priority. If it is not ready put it back. Schedule the node. 703 SmallVector<SUnit*, 4> NotReady; 704 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 705 Sequence.reserve(SUnits.size()); 706 while (!AvailableQueue->empty()) { 707 bool Delayed = false; 708 LRegsMap.clear(); 709 SUnit *CurSU = AvailableQueue->pop(); 710 while (CurSU) { 711 SmallVector<unsigned, 4> LRegs; 712 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 713 break; 714 Delayed = true; 715 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 716 717 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 718 NotReady.push_back(CurSU); 719 CurSU = AvailableQueue->pop(); 720 } 721 722 // All candidates are delayed due to live physical reg dependencies. 723 // Try backtracking, code duplication, or inserting cross class copies 724 // to resolve it. 725 if (Delayed && !CurSU) { 726 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 727 SUnit *TrySU = NotReady[i]; 728 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 729 730 // Try unscheduling up to the point where it's safe to schedule 731 // this node. 732 unsigned LiveCycle = CurCycle; 733 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 734 unsigned Reg = LRegs[j]; 735 unsigned LCycle = LiveRegCycles[Reg]; 736 LiveCycle = std::min(LiveCycle, LCycle); 737 } 738 SUnit *OldSU = Sequence[LiveCycle]; 739 if (!WillCreateCycle(TrySU, OldSU)) { 740 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 741 // Force the current node to be scheduled before the node that 742 // requires the physical reg dep. 743 if (OldSU->isAvailable) { 744 OldSU->isAvailable = false; 745 AvailableQueue->remove(OldSU); 746 } 747 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 748 /*Reg=*/0, /*isNormalMemory=*/false, 749 /*isMustAlias=*/false, /*isArtificial=*/true)); 750 // If one or more successors has been unscheduled, then the current 751 // node is no longer avaialable. Schedule a successor that's now 752 // available instead. 753 if (!TrySU->isAvailable) 754 CurSU = AvailableQueue->pop(); 755 else { 756 CurSU = TrySU; 757 TrySU->isPending = false; 758 NotReady.erase(NotReady.begin()+i); 759 } 760 break; 761 } 762 } 763 764 if (!CurSU) { 765 // Can't backtrack. Try duplicating the nodes that produces these 766 // "expensive to copy" values to break the dependency. In case even 767 // that doesn't work, insert cross class copies. 768 SUnit *TrySU = NotReady[0]; 769 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 770 assert(LRegs.size() == 1 && "Can't handle this yet!"); 771 unsigned Reg = LRegs[0]; 772 SUnit *LRDef = LiveRegDefs[Reg]; 773 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 774 if (!NewDef) { 775 // Issue expensive cross register class copies. 776 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 777 const TargetRegisterClass *RC = 778 TRI->getPhysicalRegisterRegClass(Reg, VT); 779 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 780 if (!DestRC) { 781 assert(false && "Don't know how to copy this physical register!"); 782 abort(); 783 } 784 SmallVector<SUnit*, 2> Copies; 785 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 786 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 787 << " to SU #" << Copies.front()->NodeNum << "\n"; 788 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 789 /*Reg=*/0, /*isMustAlias=*/false, 790 /*isArtificial=*/true)); 791 NewDef = Copies.back(); 792 } 793 794 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 795 << " to SU #" << TrySU->NodeNum << "\n"; 796 LiveRegDefs[Reg] = NewDef; 797 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 798 /*Reg=*/0, /*isMustAlias=*/false, 799 /*isArtificial=*/true)); 800 TrySU->isAvailable = false; 801 CurSU = NewDef; 802 } 803 804 if (!CurSU) { 805 assert(false && "Unable to resolve live physical register dependencies!"); 806 abort(); 807 } 808 } 809 810 // Add the nodes that aren't ready back onto the available list. 811 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 812 NotReady[i]->isPending = false; 813 // May no longer be available due to backtracking. 814 if (NotReady[i]->isAvailable) 815 AvailableQueue->push(NotReady[i]); 816 } 817 NotReady.clear(); 818 819 if (CurSU) 820 ScheduleNodeBottomUp(CurSU, CurCycle); 821 ++CurCycle; 822 } 823 824 // Reverse the order if it is bottom up. 825 std::reverse(Sequence.begin(), Sequence.end()); 826 827#ifndef NDEBUG 828 VerifySchedule(isBottomUp); 829#endif 830} 831 832//===----------------------------------------------------------------------===// 833// Top-Down Scheduling 834//===----------------------------------------------------------------------===// 835 836/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 837/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 838void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 839 SUnit *SuccSU = SuccEdge->getSUnit(); 840 --SuccSU->NumPredsLeft; 841 842#ifndef NDEBUG 843 if (SuccSU->NumPredsLeft < 0) { 844 cerr << "*** Scheduling failed! ***\n"; 845 SuccSU->dump(this); 846 cerr << " has been released too many times!\n"; 847 assert(0); 848 } 849#endif 850 851 if (SuccSU->NumPredsLeft == 0) { 852 SuccSU->isAvailable = true; 853 AvailableQueue->push(SuccSU); 854 } 855} 856 857/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 858/// count of its successors. If a successor pending count is zero, add it to 859/// the Available queue. 860void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 861 DOUT << "*** Scheduling [" << CurCycle << "]: "; 862 DEBUG(SU->dump(this)); 863 864 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 865 SU->setDepthToAtLeast(CurCycle); 866 Sequence.push_back(SU); 867 868 // Top down: release successors 869 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 870 I != E; ++I) 871 ReleaseSucc(SU, &*I); 872 873 SU->isScheduled = true; 874 AvailableQueue->ScheduledNode(SU); 875} 876 877/// ListScheduleTopDown - The main loop of list scheduling for top-down 878/// schedulers. 879void ScheduleDAGRRList::ListScheduleTopDown() { 880 unsigned CurCycle = 0; 881 882 // All leaves to Available queue. 883 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 884 // It is available if it has no predecessors. 885 if (SUnits[i].Preds.empty()) { 886 AvailableQueue->push(&SUnits[i]); 887 SUnits[i].isAvailable = true; 888 } 889 } 890 891 // While Available queue is not empty, grab the node with the highest 892 // priority. If it is not ready put it back. Schedule the node. 893 Sequence.reserve(SUnits.size()); 894 while (!AvailableQueue->empty()) { 895 SUnit *CurSU = AvailableQueue->pop(); 896 897 if (CurSU) 898 ScheduleNodeTopDown(CurSU, CurCycle); 899 ++CurCycle; 900 } 901 902#ifndef NDEBUG 903 VerifySchedule(isBottomUp); 904#endif 905} 906 907 908//===----------------------------------------------------------------------===// 909// RegReductionPriorityQueue Implementation 910//===----------------------------------------------------------------------===// 911// 912// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 913// to reduce register pressure. 914// 915namespace { 916 template<class SF> 917 class RegReductionPriorityQueue; 918 919 /// Sorting functions for the Available queue. 920 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 921 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 922 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 923 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 924 925 bool operator()(const SUnit* left, const SUnit* right) const; 926 }; 927 928 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 929 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 930 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 931 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 932 933 bool operator()(const SUnit* left, const SUnit* right) const; 934 }; 935} // end anonymous namespace 936 937static inline bool isCopyFromLiveIn(const SUnit *SU) { 938 SDNode *N = SU->getNode(); 939 return N && N->getOpcode() == ISD::CopyFromReg && 940 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 941} 942 943/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 944/// Smaller number is the higher priority. 945static unsigned 946CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 947 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 948 if (SethiUllmanNumber != 0) 949 return SethiUllmanNumber; 950 951 unsigned Extra = 0; 952 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 953 I != E; ++I) { 954 if (I->isCtrl()) continue; // ignore chain preds 955 SUnit *PredSU = I->getSUnit(); 956 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 957 if (PredSethiUllman > SethiUllmanNumber) { 958 SethiUllmanNumber = PredSethiUllman; 959 Extra = 0; 960 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl()) 961 ++Extra; 962 } 963 964 SethiUllmanNumber += Extra; 965 966 if (SethiUllmanNumber == 0) 967 SethiUllmanNumber = 1; 968 969 return SethiUllmanNumber; 970} 971 972namespace { 973 template<class SF> 974 class VISIBILITY_HIDDEN RegReductionPriorityQueue 975 : public SchedulingPriorityQueue { 976 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 977 unsigned currentQueueId; 978 979 protected: 980 // SUnits - The SUnits for the current graph. 981 std::vector<SUnit> *SUnits; 982 983 const TargetInstrInfo *TII; 984 const TargetRegisterInfo *TRI; 985 ScheduleDAGRRList *scheduleDAG; 986 987 // SethiUllmanNumbers - The SethiUllman number for each node. 988 std::vector<unsigned> SethiUllmanNumbers; 989 990 public: 991 RegReductionPriorityQueue(const TargetInstrInfo *tii, 992 const TargetRegisterInfo *tri) : 993 Queue(SF(this)), currentQueueId(0), 994 TII(tii), TRI(tri), scheduleDAG(NULL) {} 995 996 void initNodes(std::vector<SUnit> &sunits) { 997 SUnits = &sunits; 998 // Add pseudo dependency edges for two-address nodes. 999 AddPseudoTwoAddrDeps(); 1000 // Calculate node priorities. 1001 CalculateSethiUllmanNumbers(); 1002 } 1003 1004 void addNode(const SUnit *SU) { 1005 unsigned SUSize = SethiUllmanNumbers.size(); 1006 if (SUnits->size() > SUSize) 1007 SethiUllmanNumbers.resize(SUSize*2, 0); 1008 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1009 } 1010 1011 void updateNode(const SUnit *SU) { 1012 SethiUllmanNumbers[SU->NodeNum] = 0; 1013 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1014 } 1015 1016 void releaseState() { 1017 SUnits = 0; 1018 SethiUllmanNumbers.clear(); 1019 } 1020 1021 unsigned getNodePriority(const SUnit *SU) const { 1022 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1023 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1024 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1025 // CopyFromReg should be close to its def because it restricts 1026 // allocation choices. But if it is a livein then perhaps we want it 1027 // closer to its uses so it can be coalesced. 1028 return 0xffff; 1029 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1030 // CopyToReg should be close to its uses to facilitate coalescing and 1031 // avoid spilling. 1032 return 0; 1033 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1034 Opc == TargetInstrInfo::INSERT_SUBREG) 1035 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1036 // facilitate coalescing. 1037 return 0; 1038 else if (SU->NumSuccs == 0) 1039 // If SU does not have a use, i.e. it doesn't produce a value that would 1040 // be consumed (e.g. store), then it terminates a chain of computation. 1041 // Give it a large SethiUllman number so it will be scheduled right 1042 // before its predecessors that it doesn't lengthen their live ranges. 1043 return 0xffff; 1044 else if (SU->NumPreds == 0) 1045 // If SU does not have a def, schedule it close to its uses because it 1046 // does not lengthen any live ranges. 1047 return 0; 1048 else 1049 return SethiUllmanNumbers[SU->NodeNum]; 1050 } 1051 1052 unsigned size() const { return Queue.size(); } 1053 1054 bool empty() const { return Queue.empty(); } 1055 1056 void push(SUnit *U) { 1057 assert(!U->NodeQueueId && "Node in the queue already"); 1058 U->NodeQueueId = ++currentQueueId; 1059 Queue.push(U); 1060 } 1061 1062 void push_all(const std::vector<SUnit *> &Nodes) { 1063 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1064 push(Nodes[i]); 1065 } 1066 1067 SUnit *pop() { 1068 if (empty()) return NULL; 1069 SUnit *V = Queue.top(); 1070 Queue.pop(); 1071 V->NodeQueueId = 0; 1072 return V; 1073 } 1074 1075 void remove(SUnit *SU) { 1076 assert(!Queue.empty() && "Queue is empty!"); 1077 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1078 Queue.erase_one(SU); 1079 SU->NodeQueueId = 0; 1080 } 1081 1082 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1083 scheduleDAG = scheduleDag; 1084 } 1085 1086 protected: 1087 bool canClobber(const SUnit *SU, const SUnit *Op); 1088 void AddPseudoTwoAddrDeps(); 1089 void CalculateSethiUllmanNumbers(); 1090 }; 1091 1092 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1093 BURegReductionPriorityQueue; 1094 1095 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1096 TDRegReductionPriorityQueue; 1097} 1098 1099/// closestSucc - Returns the scheduled cycle of the successor which is 1100/// closet to the current cycle. 1101static unsigned closestSucc(const SUnit *SU) { 1102 unsigned MaxHeight = 0; 1103 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1104 I != E; ++I) { 1105 unsigned Height = I->getSUnit()->getHeight(); 1106 // If there are bunch of CopyToRegs stacked up, they should be considered 1107 // to be at the same position. 1108 if (I->getSUnit()->getNode() && 1109 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1110 Height = closestSucc(I->getSUnit())+1; 1111 if (Height > MaxHeight) 1112 MaxHeight = Height; 1113 } 1114 return MaxHeight; 1115} 1116 1117/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1118/// for scratch registers. Live-in operands and live-out results don't count 1119/// since they are "fixed". 1120static unsigned calcMaxScratches(const SUnit *SU) { 1121 unsigned Scratches = 0; 1122 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1123 I != E; ++I) { 1124 if (I->isCtrl()) continue; // ignore chain preds 1125 if (!I->getSUnit()->getNode() || 1126 I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg) 1127 Scratches++; 1128 } 1129 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1130 I != E; ++I) { 1131 if (I->isCtrl()) continue; // ignore chain succs 1132 if (!I->getSUnit()->getNode() || 1133 I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg) 1134 Scratches += 10; 1135 } 1136 return Scratches; 1137} 1138 1139// Bottom up 1140bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1141 unsigned LPriority = SPQ->getNodePriority(left); 1142 unsigned RPriority = SPQ->getNodePriority(right); 1143 if (LPriority != RPriority) 1144 return LPriority > RPriority; 1145 1146 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1147 // e.g. 1148 // t1 = op t2, c1 1149 // t3 = op t4, c2 1150 // 1151 // and the following instructions are both ready. 1152 // t2 = op c3 1153 // t4 = op c4 1154 // 1155 // Then schedule t2 = op first. 1156 // i.e. 1157 // t4 = op c4 1158 // t2 = op c3 1159 // t1 = op t2, c1 1160 // t3 = op t4, c2 1161 // 1162 // This creates more short live intervals. 1163 unsigned LDist = closestSucc(left); 1164 unsigned RDist = closestSucc(right); 1165 if (LDist != RDist) 1166 return LDist < RDist; 1167 1168 // Intuitively, it's good to push down instructions whose results are 1169 // liveout so their long live ranges won't conflict with other values 1170 // which are needed inside the BB. Further prioritize liveout instructions 1171 // by the number of operands which are calculated within the BB. 1172 unsigned LScratch = calcMaxScratches(left); 1173 unsigned RScratch = calcMaxScratches(right); 1174 if (LScratch != RScratch) 1175 return LScratch > RScratch; 1176 1177 if (left->getHeight() != right->getHeight()) 1178 return left->getHeight() > right->getHeight(); 1179 1180 if (left->getDepth() != right->getDepth()) 1181 return left->getDepth() < right->getDepth(); 1182 1183 assert(left->NodeQueueId && right->NodeQueueId && 1184 "NodeQueueId cannot be zero"); 1185 return (left->NodeQueueId > right->NodeQueueId); 1186} 1187 1188template<class SF> 1189bool 1190RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1191 if (SU->isTwoAddress) { 1192 unsigned Opc = SU->getNode()->getMachineOpcode(); 1193 const TargetInstrDesc &TID = TII->get(Opc); 1194 unsigned NumRes = TID.getNumDefs(); 1195 unsigned NumOps = TID.getNumOperands() - NumRes; 1196 for (unsigned i = 0; i != NumOps; ++i) { 1197 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1198 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1199 if (DU->getNodeId() != -1 && 1200 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1201 return true; 1202 } 1203 } 1204 } 1205 return false; 1206} 1207 1208 1209/// hasCopyToRegUse - Return true if SU has a value successor that is a 1210/// CopyToReg node. 1211static bool hasCopyToRegUse(const SUnit *SU) { 1212 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1213 I != E; ++I) { 1214 if (I->isCtrl()) continue; 1215 const SUnit *SuccSU = I->getSUnit(); 1216 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1217 return true; 1218 } 1219 return false; 1220} 1221 1222/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1223/// physical register defs. 1224static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1225 const TargetInstrInfo *TII, 1226 const TargetRegisterInfo *TRI) { 1227 SDNode *N = SuccSU->getNode(); 1228 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1229 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1230 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1231 const unsigned *SUImpDefs = 1232 TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 1233 if (!SUImpDefs) 1234 return false; 1235 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1236 MVT VT = N->getValueType(i); 1237 if (VT == MVT::Flag || VT == MVT::Other) 1238 continue; 1239 if (!N->hasAnyUseOfValue(i)) 1240 continue; 1241 unsigned Reg = ImpDefs[i - NumDefs]; 1242 for (;*SUImpDefs; ++SUImpDefs) { 1243 unsigned SUReg = *SUImpDefs; 1244 if (TRI->regsOverlap(Reg, SUReg)) 1245 return true; 1246 } 1247 } 1248 return false; 1249} 1250 1251/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1252/// it as a def&use operand. Add a pseudo control edge from it to the other 1253/// node (if it won't create a cycle) so the two-address one will be scheduled 1254/// first (lower in the schedule). If both nodes are two-address, favor the 1255/// one that has a CopyToReg use (more likely to be a loop induction update). 1256/// If both are two-address, but one is commutable while the other is not 1257/// commutable, favor the one that's not commutable. 1258template<class SF> 1259void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1260 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1261 SUnit *SU = &(*SUnits)[i]; 1262 if (!SU->isTwoAddress) 1263 continue; 1264 1265 SDNode *Node = SU->getNode(); 1266 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1267 continue; 1268 1269 unsigned Opc = Node->getMachineOpcode(); 1270 const TargetInstrDesc &TID = TII->get(Opc); 1271 unsigned NumRes = TID.getNumDefs(); 1272 unsigned NumOps = TID.getNumOperands() - NumRes; 1273 for (unsigned j = 0; j != NumOps; ++j) { 1274 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1275 continue; 1276 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1277 if (DU->getNodeId() == -1) 1278 continue; 1279 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1280 if (!DUSU) continue; 1281 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1282 E = DUSU->Succs.end(); I != E; ++I) { 1283 if (I->isCtrl()) continue; 1284 SUnit *SuccSU = I->getSUnit(); 1285 if (SuccSU == SU) 1286 continue; 1287 // Be conservative. Ignore if nodes aren't at roughly the same 1288 // depth and height. 1289 if (SuccSU->getHeight() < SU->getHeight() && 1290 (SU->getHeight() - SuccSU->getHeight()) > 1) 1291 continue; 1292 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1293 continue; 1294 // Don't constrain nodes with physical register defs if the 1295 // predecessor can clobber them. 1296 if (SuccSU->hasPhysRegDefs) { 1297 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1298 continue; 1299 } 1300 // Don't constraint extract_subreg / insert_subreg these may be 1301 // coalesced away. We don't them close to their uses. 1302 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1303 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1304 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1305 continue; 1306 if ((!canClobber(SuccSU, DUSU) || 1307 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1308 (!SU->isCommutable && SuccSU->isCommutable)) && 1309 !scheduleDAG->IsReachable(SuccSU, SU)) { 1310 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1311 << " to SU #" << SuccSU->NodeNum << "\n"; 1312 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/1, 1313 /*Reg=*/0, /*isMustAlias=*/false, 1314 /*isArtificial=*/true)); 1315 } 1316 } 1317 } 1318 } 1319} 1320 1321/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1322/// scheduling units. 1323template<class SF> 1324void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1325 SethiUllmanNumbers.assign(SUnits->size(), 0); 1326 1327 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1328 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1329} 1330 1331/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1332/// predecessors of the successors of the SUnit SU. Stop when the provided 1333/// limit is exceeded. 1334static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1335 unsigned Limit) { 1336 unsigned Sum = 0; 1337 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1338 I != E; ++I) { 1339 const SUnit *SuccSU = I->getSUnit(); 1340 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1341 EE = SuccSU->Preds.end(); II != EE; ++II) { 1342 SUnit *PredSU = II->getSUnit(); 1343 if (!PredSU->isScheduled) 1344 if (++Sum > Limit) 1345 return Sum; 1346 } 1347 } 1348 return Sum; 1349} 1350 1351 1352// Top down 1353bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1354 unsigned LPriority = SPQ->getNodePriority(left); 1355 unsigned RPriority = SPQ->getNodePriority(right); 1356 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1357 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1358 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1359 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1360 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1361 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1362 1363 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1364 return false; 1365 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1366 return true; 1367 1368 if (LIsFloater) 1369 LBonus -= 2; 1370 if (RIsFloater) 1371 RBonus -= 2; 1372 if (left->NumSuccs == 1) 1373 LBonus += 2; 1374 if (right->NumSuccs == 1) 1375 RBonus += 2; 1376 1377 if (LPriority+LBonus != RPriority+RBonus) 1378 return LPriority+LBonus < RPriority+RBonus; 1379 1380 if (left->getDepth() != right->getDepth()) 1381 return left->getDepth() < right->getDepth(); 1382 1383 if (left->NumSuccsLeft != right->NumSuccsLeft) 1384 return left->NumSuccsLeft > right->NumSuccsLeft; 1385 1386 assert(left->NodeQueueId && right->NodeQueueId && 1387 "NodeQueueId cannot be zero"); 1388 return (left->NodeQueueId > right->NodeQueueId); 1389} 1390 1391//===----------------------------------------------------------------------===// 1392// Public Constructor Functions 1393//===----------------------------------------------------------------------===// 1394 1395llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1396 SelectionDAG *DAG, 1397 const TargetMachine *TM, 1398 MachineBasicBlock *BB, 1399 bool) { 1400 const TargetInstrInfo *TII = TM->getInstrInfo(); 1401 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1402 1403 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1404 1405 ScheduleDAGRRList *SD = 1406 new ScheduleDAGRRList(DAG, BB, *TM, true, PQ); 1407 PQ->setScheduleDAG(SD); 1408 return SD; 1409} 1410 1411llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1412 SelectionDAG *DAG, 1413 const TargetMachine *TM, 1414 MachineBasicBlock *BB, 1415 bool) { 1416 const TargetInstrInfo *TII = TM->getInstrInfo(); 1417 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1418 1419 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1420 1421 ScheduleDAGRRList *SD = new ScheduleDAGRRList(DAG, BB, *TM, false, PQ); 1422 PQ->setScheduleDAG(SD); 1423 return SD; 1424} 1425