ScheduleDAGRRList.cpp revision 29d8f0cae425f1bba583565227eaebf58f26ce73
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "ScheduleDAGSDNodes.h" 20#include "llvm/InlineAsm.h" 21#include "llvm/CodeGen/SchedulerRegistry.h" 22#include "llvm/CodeGen/SelectionDAGISel.h" 23#include "llvm/Target/TargetRegisterInfo.h" 24#include "llvm/Target/TargetData.h" 25#include "llvm/Target/TargetMachine.h" 26#include "llvm/Target/TargetInstrInfo.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include "llvm/ADT/STLExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include <climits> 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumPRCopies, "Number of physical register copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50static RegisterScheduler 51 sourceListDAGScheduler("source", 52 "Similar to list-burr but schedules in source " 53 "order when possible", 54 createSourceListDAGScheduler); 55 56static RegisterScheduler 57 hybridListDAGScheduler("list-hybrid", 58 "Bottom-up register pressure aware list scheduling " 59 "which tries to balance latency and register pressure", 60 createHybridListDAGScheduler); 61 62static RegisterScheduler 63 ILPListDAGScheduler("list-ilp", 64 "Bottom-up register pressure aware list scheduling " 65 "which tries to balance ILP and register pressure", 66 createILPListDAGScheduler); 67 68namespace { 69//===----------------------------------------------------------------------===// 70/// ScheduleDAGRRList - The actual register reduction list scheduler 71/// implementation. This supports both top-down and bottom-up scheduling. 72/// 73class ScheduleDAGRRList : public ScheduleDAGSDNodes { 74private: 75 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 76 /// it is top-down. 77 bool isBottomUp; 78 79 /// NeedLatency - True if the scheduler will make use of latency information. 80 /// 81 bool NeedLatency; 82 83 /// AvailableQueue - The priority queue to use for the available SUnits. 84 SchedulingPriorityQueue *AvailableQueue; 85 86 /// CurCycle - The current scheduler state corresponds to this cycle. 87 unsigned CurCycle; 88 89 /// LiveRegDefs - A set of physical registers and their definition 90 /// that are "live". These nodes must be scheduled before any other nodes that 91 /// modifies the registers can be scheduled. 92 unsigned NumLiveRegs; 93 std::vector<SUnit*> LiveRegDefs; 94 std::vector<SUnit*> LiveRegGens; 95 96 /// Topo - A topological ordering for SUnits which permits fast IsReachable 97 /// and similar queries. 98 ScheduleDAGTopologicalSort Topo; 99 100public: 101 ScheduleDAGRRList(MachineFunction &mf, 102 bool isbottomup, bool needlatency, 103 SchedulingPriorityQueue *availqueue) 104 : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), 105 AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { 106 } 107 108 ~ScheduleDAGRRList() { 109 delete AvailableQueue; 110 } 111 112 void Schedule(); 113 114 /// IsReachable - Checks if SU is reachable from TargetSU. 115 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 116 return Topo.IsReachable(SU, TargetSU); 117 } 118 119 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 120 /// create a cycle. 121 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 122 return Topo.WillCreateCycle(SU, TargetSU); 123 } 124 125 /// AddPred - adds a predecessor edge to SUnit SU. 126 /// This returns true if this is a new predecessor. 127 /// Updates the topological ordering if required. 128 void AddPred(SUnit *SU, const SDep &D) { 129 Topo.AddPred(SU, D.getSUnit()); 130 SU->addPred(D); 131 } 132 133 /// RemovePred - removes a predecessor edge from SUnit SU. 134 /// This returns true if an edge was removed. 135 /// Updates the topological ordering if required. 136 void RemovePred(SUnit *SU, const SDep &D) { 137 Topo.RemovePred(SU, D.getSUnit()); 138 SU->removePred(D); 139 } 140 141private: 142 void ReleasePred(SUnit *SU, const SDep *PredEdge); 143 void ReleasePredecessors(SUnit *SU); 144 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 145 void ReleaseSuccessors(SUnit *SU); 146 void CapturePred(SDep *PredEdge); 147 void ScheduleNodeBottomUp(SUnit*); 148 void UnscheduleNodeBottomUp(SUnit*); 149 void BacktrackBottomUp(SUnit*, unsigned); 150 SUnit *CopyAndMoveSuccessors(SUnit*); 151 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 152 const TargetRegisterClass*, 153 const TargetRegisterClass*, 154 SmallVector<SUnit*, 2>&); 155 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 156 SUnit *PickNodeToScheduleBottomUp(); 157 void ListScheduleBottomUp(); 158 159 void ScheduleNodeTopDown(SUnit*); 160 void ListScheduleTopDown(); 161 162 163 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 164 /// Updates the topological ordering if required. 165 SUnit *CreateNewSUnit(SDNode *N) { 166 unsigned NumSUnits = SUnits.size(); 167 SUnit *NewNode = NewSUnit(N); 168 // Update the topological ordering. 169 if (NewNode->NodeNum >= NumSUnits) 170 Topo.InitDAGTopologicalSorting(); 171 return NewNode; 172 } 173 174 /// CreateClone - Creates a new SUnit from an existing one. 175 /// Updates the topological ordering if required. 176 SUnit *CreateClone(SUnit *N) { 177 unsigned NumSUnits = SUnits.size(); 178 SUnit *NewNode = Clone(N); 179 // Update the topological ordering. 180 if (NewNode->NodeNum >= NumSUnits) 181 Topo.InitDAGTopologicalSorting(); 182 return NewNode; 183 } 184 185 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 186 /// need actual latency information but the hybrid scheduler does. 187 bool ForceUnitLatencies() const { 188 return !NeedLatency; 189 } 190}; 191} // end anonymous namespace 192 193 194/// Schedule - Schedule the DAG using list scheduling. 195void ScheduleDAGRRList::Schedule() { 196 DEBUG(dbgs() 197 << "********** List Scheduling BB#" << BB->getNumber() 198 << " '" << BB->getName() << "' **********\n"); 199 200 CurCycle = 0; 201 NumLiveRegs = 0; 202 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 203 LiveRegGens.resize(TRI->getNumRegs(), NULL); 204 205 // Build the scheduling graph. 206 BuildSchedGraph(NULL); 207 208 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 209 SUnits[su].dumpAll(this)); 210 Topo.InitDAGTopologicalSorting(); 211 212 AvailableQueue->initNodes(SUnits); 213 214 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 215 if (isBottomUp) 216 ListScheduleBottomUp(); 217 else 218 ListScheduleTopDown(); 219 220 AvailableQueue->releaseState(); 221} 222 223//===----------------------------------------------------------------------===// 224// Bottom-Up Scheduling 225//===----------------------------------------------------------------------===// 226 227/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 228/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 229void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 230 SUnit *PredSU = PredEdge->getSUnit(); 231 232#ifndef NDEBUG 233 if (PredSU->NumSuccsLeft == 0) { 234 dbgs() << "*** Scheduling failed! ***\n"; 235 PredSU->dump(this); 236 dbgs() << " has been released too many times!\n"; 237 llvm_unreachable(0); 238 } 239#endif 240 --PredSU->NumSuccsLeft; 241 242 if (!ForceUnitLatencies()) { 243 // Updating predecessor's height. This is now the cycle when the 244 // predecessor can be scheduled without causing a pipeline stall. 245 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 246 } 247 248 // If all the node's successors are scheduled, this node is ready 249 // to be scheduled. Ignore the special EntrySU node. 250 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 251 PredSU->isAvailable = true; 252 AvailableQueue->push(PredSU); 253 } 254} 255 256/// Call ReleasePred for each predecessor, then update register live def/gen. 257/// Always update LiveRegDefs for a register dependence even if the current SU 258/// also defines the register. This effectively create one large live range 259/// across a sequence of two-address node. This is important because the 260/// entire chain must be scheduled together. Example: 261/// 262/// flags = (3) add 263/// flags = (2) addc flags 264/// flags = (1) addc flags 265/// 266/// results in 267/// 268/// LiveRegDefs[flags] = 3 269/// LiveRegGens[flags] = 1 270/// 271/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 272/// interference on flags. 273void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 274 // Bottom up: release predecessors 275 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 276 I != E; ++I) { 277 ReleasePred(SU, &*I); 278 if (I->isAssignedRegDep()) { 279 // This is a physical register dependency and it's impossible or 280 // expensive to copy the register. Make sure nothing that can 281 // clobber the register is scheduled between the predecessor and 282 // this node. 283 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 284 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 285 "interference on register dependence"); 286 LiveRegDefs[I->getReg()] = I->getSUnit(); 287 if (!LiveRegGens[I->getReg()]) { 288 ++NumLiveRegs; 289 LiveRegGens[I->getReg()] = SU; 290 } 291 } 292 } 293} 294 295/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 296/// count of its predecessors. If a predecessor pending count is zero, add it to 297/// the Available queue. 298void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 299 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 300 DEBUG(SU->dump(this)); 301 302#ifndef NDEBUG 303 if (CurCycle < SU->getHeight()) 304 DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); 305#endif 306 307 // FIXME: Handle noop hazard. 308 SU->setHeightToAtLeast(CurCycle); 309 Sequence.push_back(SU); 310 311 AvailableQueue->ScheduledNode(SU); 312 313 // Update liveness of predecessors before successors to avoid treating a 314 // two-address node as a live range def. 315 ReleasePredecessors(SU); 316 317 // Release all the implicit physical register defs that are live. 318 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 319 I != E; ++I) { 320 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 321 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 322 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 323 --NumLiveRegs; 324 LiveRegDefs[I->getReg()] = NULL; 325 LiveRegGens[I->getReg()] = NULL; 326 } 327 } 328 329 SU->isScheduled = true; 330} 331 332/// CapturePred - This does the opposite of ReleasePred. Since SU is being 333/// unscheduled, incrcease the succ left count of its predecessors. Remove 334/// them from AvailableQueue if necessary. 335void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 336 SUnit *PredSU = PredEdge->getSUnit(); 337 if (PredSU->isAvailable) { 338 PredSU->isAvailable = false; 339 if (!PredSU->isPending) 340 AvailableQueue->remove(PredSU); 341 } 342 343 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 344 ++PredSU->NumSuccsLeft; 345} 346 347/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 348/// its predecessor states to reflect the change. 349void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 350 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 351 DEBUG(SU->dump(this)); 352 353 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 354 I != E; ++I) { 355 CapturePred(&*I); 356 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 357 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 358 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 359 "Physical register dependency violated?"); 360 --NumLiveRegs; 361 LiveRegDefs[I->getReg()] = NULL; 362 LiveRegGens[I->getReg()] = NULL; 363 } 364 } 365 366 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 367 I != E; ++I) { 368 if (I->isAssignedRegDep()) { 369 // This becomes the nearest def. Note that an earlier def may still be 370 // pending if this is a two-address node. 371 LiveRegDefs[I->getReg()] = SU; 372 if (!LiveRegDefs[I->getReg()]) { 373 ++NumLiveRegs; 374 } 375 if (LiveRegGens[I->getReg()] == NULL || 376 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 377 LiveRegGens[I->getReg()] = I->getSUnit(); 378 } 379 } 380 381 SU->setHeightDirty(); 382 SU->isScheduled = false; 383 SU->isAvailable = true; 384 AvailableQueue->push(SU); 385 AvailableQueue->UnscheduledNode(SU); 386} 387 388/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 389/// BTCycle in order to schedule a specific node. 390void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle) { 391 SUnit *OldSU = NULL; 392 while (CurCycle > BtCycle) { 393 OldSU = Sequence.back(); 394 Sequence.pop_back(); 395 if (SU->isSucc(OldSU)) 396 // Don't try to remove SU from AvailableQueue. 397 SU->isAvailable = false; 398 UnscheduleNodeBottomUp(OldSU); 399 --CurCycle; 400 AvailableQueue->setCurCycle(CurCycle); 401 } 402 403 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 404 405 ++NumBacktracks; 406} 407 408static bool isOperandOf(const SUnit *SU, SDNode *N) { 409 for (const SDNode *SUNode = SU->getNode(); SUNode; 410 SUNode = SUNode->getGluedNode()) { 411 if (SUNode->isOperandOf(N)) 412 return true; 413 } 414 return false; 415} 416 417/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 418/// successors to the newly created node. 419SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 420 if (SU->getNode()->getGluedNode()) 421 return NULL; 422 423 SDNode *N = SU->getNode(); 424 if (!N) 425 return NULL; 426 427 SUnit *NewSU; 428 bool TryUnfold = false; 429 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 430 EVT VT = N->getValueType(i); 431 if (VT == MVT::Glue) 432 return NULL; 433 else if (VT == MVT::Other) 434 TryUnfold = true; 435 } 436 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 437 const SDValue &Op = N->getOperand(i); 438 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 439 if (VT == MVT::Glue) 440 return NULL; 441 } 442 443 if (TryUnfold) { 444 SmallVector<SDNode*, 2> NewNodes; 445 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 446 return NULL; 447 448 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 449 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 450 451 N = NewNodes[1]; 452 SDNode *LoadNode = NewNodes[0]; 453 unsigned NumVals = N->getNumValues(); 454 unsigned OldNumVals = SU->getNode()->getNumValues(); 455 for (unsigned i = 0; i != NumVals; ++i) 456 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 457 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 458 SDValue(LoadNode, 1)); 459 460 // LoadNode may already exist. This can happen when there is another 461 // load from the same location and producing the same type of value 462 // but it has different alignment or volatileness. 463 bool isNewLoad = true; 464 SUnit *LoadSU; 465 if (LoadNode->getNodeId() != -1) { 466 LoadSU = &SUnits[LoadNode->getNodeId()]; 467 isNewLoad = false; 468 } else { 469 LoadSU = CreateNewSUnit(LoadNode); 470 LoadNode->setNodeId(LoadSU->NodeNum); 471 ComputeLatency(LoadSU); 472 } 473 474 SUnit *NewSU = CreateNewSUnit(N); 475 assert(N->getNodeId() == -1 && "Node already inserted!"); 476 N->setNodeId(NewSU->NodeNum); 477 478 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 479 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 480 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 481 NewSU->isTwoAddress = true; 482 break; 483 } 484 } 485 if (TID.isCommutable()) 486 NewSU->isCommutable = true; 487 ComputeLatency(NewSU); 488 489 // Record all the edges to and from the old SU, by category. 490 SmallVector<SDep, 4> ChainPreds; 491 SmallVector<SDep, 4> ChainSuccs; 492 SmallVector<SDep, 4> LoadPreds; 493 SmallVector<SDep, 4> NodePreds; 494 SmallVector<SDep, 4> NodeSuccs; 495 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 496 I != E; ++I) { 497 if (I->isCtrl()) 498 ChainPreds.push_back(*I); 499 else if (isOperandOf(I->getSUnit(), LoadNode)) 500 LoadPreds.push_back(*I); 501 else 502 NodePreds.push_back(*I); 503 } 504 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 505 I != E; ++I) { 506 if (I->isCtrl()) 507 ChainSuccs.push_back(*I); 508 else 509 NodeSuccs.push_back(*I); 510 } 511 512 // Now assign edges to the newly-created nodes. 513 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 514 const SDep &Pred = ChainPreds[i]; 515 RemovePred(SU, Pred); 516 if (isNewLoad) 517 AddPred(LoadSU, Pred); 518 } 519 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 520 const SDep &Pred = LoadPreds[i]; 521 RemovePred(SU, Pred); 522 if (isNewLoad) 523 AddPred(LoadSU, Pred); 524 } 525 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 526 const SDep &Pred = NodePreds[i]; 527 RemovePred(SU, Pred); 528 AddPred(NewSU, Pred); 529 } 530 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 531 SDep D = NodeSuccs[i]; 532 SUnit *SuccDep = D.getSUnit(); 533 D.setSUnit(SU); 534 RemovePred(SuccDep, D); 535 D.setSUnit(NewSU); 536 AddPred(SuccDep, D); 537 } 538 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 539 SDep D = ChainSuccs[i]; 540 SUnit *SuccDep = D.getSUnit(); 541 D.setSUnit(SU); 542 RemovePred(SuccDep, D); 543 if (isNewLoad) { 544 D.setSUnit(LoadSU); 545 AddPred(SuccDep, D); 546 } 547 } 548 549 // Add a data dependency to reflect that NewSU reads the value defined 550 // by LoadSU. 551 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 552 553 if (isNewLoad) 554 AvailableQueue->addNode(LoadSU); 555 AvailableQueue->addNode(NewSU); 556 557 ++NumUnfolds; 558 559 if (NewSU->NumSuccsLeft == 0) { 560 NewSU->isAvailable = true; 561 return NewSU; 562 } 563 SU = NewSU; 564 } 565 566 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 567 NewSU = CreateClone(SU); 568 569 // New SUnit has the exact same predecessors. 570 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 571 I != E; ++I) 572 if (!I->isArtificial()) 573 AddPred(NewSU, *I); 574 575 // Only copy scheduled successors. Cut them from old node's successor 576 // list and move them over. 577 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 578 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 579 I != E; ++I) { 580 if (I->isArtificial()) 581 continue; 582 SUnit *SuccSU = I->getSUnit(); 583 if (SuccSU->isScheduled) { 584 SDep D = *I; 585 D.setSUnit(NewSU); 586 AddPred(SuccSU, D); 587 D.setSUnit(SU); 588 DelDeps.push_back(std::make_pair(SuccSU, D)); 589 } 590 } 591 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 592 RemovePred(DelDeps[i].first, DelDeps[i].second); 593 594 AvailableQueue->updateNode(SU); 595 AvailableQueue->addNode(NewSU); 596 597 ++NumDups; 598 return NewSU; 599} 600 601/// InsertCopiesAndMoveSuccs - Insert register copies and move all 602/// scheduled successors of the given SUnit to the last copy. 603void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 604 const TargetRegisterClass *DestRC, 605 const TargetRegisterClass *SrcRC, 606 SmallVector<SUnit*, 2> &Copies) { 607 SUnit *CopyFromSU = CreateNewSUnit(NULL); 608 CopyFromSU->CopySrcRC = SrcRC; 609 CopyFromSU->CopyDstRC = DestRC; 610 611 SUnit *CopyToSU = CreateNewSUnit(NULL); 612 CopyToSU->CopySrcRC = DestRC; 613 CopyToSU->CopyDstRC = SrcRC; 614 615 // Only copy scheduled successors. Cut them from old node's successor 616 // list and move them over. 617 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 618 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 619 I != E; ++I) { 620 if (I->isArtificial()) 621 continue; 622 SUnit *SuccSU = I->getSUnit(); 623 if (SuccSU->isScheduled) { 624 SDep D = *I; 625 D.setSUnit(CopyToSU); 626 AddPred(SuccSU, D); 627 DelDeps.push_back(std::make_pair(SuccSU, *I)); 628 } 629 } 630 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 631 RemovePred(DelDeps[i].first, DelDeps[i].second); 632 633 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 634 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 635 636 AvailableQueue->updateNode(SU); 637 AvailableQueue->addNode(CopyFromSU); 638 AvailableQueue->addNode(CopyToSU); 639 Copies.push_back(CopyFromSU); 640 Copies.push_back(CopyToSU); 641 642 ++NumPRCopies; 643} 644 645/// getPhysicalRegisterVT - Returns the ValueType of the physical register 646/// definition of the specified node. 647/// FIXME: Move to SelectionDAG? 648static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 649 const TargetInstrInfo *TII) { 650 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 651 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 652 unsigned NumRes = TID.getNumDefs(); 653 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 654 if (Reg == *ImpDef) 655 break; 656 ++NumRes; 657 } 658 return N->getValueType(NumRes); 659} 660 661/// CheckForLiveRegDef - Return true and update live register vector if the 662/// specified register def of the specified SUnit clobbers any "live" registers. 663static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 664 std::vector<SUnit*> &LiveRegDefs, 665 SmallSet<unsigned, 4> &RegAdded, 666 SmallVector<unsigned, 4> &LRegs, 667 const TargetRegisterInfo *TRI) { 668 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 669 670 // Check if Ref is live. 671 if (!LiveRegDefs[Reg]) continue; 672 673 // Allow multiple uses of the same def. 674 if (LiveRegDefs[Reg] == SU) continue; 675 676 // Add Reg to the set of interfering live regs. 677 if (RegAdded.insert(Reg)) 678 LRegs.push_back(Reg); 679 } 680} 681 682/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 683/// scheduling of the given node to satisfy live physical register dependencies. 684/// If the specific node is the last one that's available to schedule, do 685/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 686bool ScheduleDAGRRList:: 687DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 688 if (NumLiveRegs == 0) 689 return false; 690 691 SmallSet<unsigned, 4> RegAdded; 692 // If this node would clobber any "live" register, then it's not ready. 693 // 694 // If SU is the currently live definition of the same register that it uses, 695 // then we are free to schedule it. 696 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 697 I != E; ++I) { 698 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 699 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 700 RegAdded, LRegs, TRI); 701 } 702 703 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 704 if (Node->getOpcode() == ISD::INLINEASM) { 705 // Inline asm can clobber physical defs. 706 unsigned NumOps = Node->getNumOperands(); 707 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 708 --NumOps; // Ignore the glue operand. 709 710 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 711 unsigned Flags = 712 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 713 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 714 715 ++i; // Skip the ID value. 716 if (InlineAsm::isRegDefKind(Flags) || 717 InlineAsm::isRegDefEarlyClobberKind(Flags)) { 718 // Check for def of register or earlyclobber register. 719 for (; NumVals; --NumVals, ++i) { 720 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 721 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 722 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 723 } 724 } else 725 i += NumVals; 726 } 727 continue; 728 } 729 730 if (!Node->isMachineOpcode()) 731 continue; 732 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 733 if (!TID.ImplicitDefs) 734 continue; 735 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) 736 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 737 } 738 739 return !LRegs.empty(); 740} 741 742/// Return a node that can be scheduled in this cycle. Requirements: 743/// (1) Ready: latency has been satisfied 744/// (2) No Hazards: resources are available (TBD) 745/// (3) No Interferences: may unschedule to break register interferences. 746SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 747 SmallVector<SUnit*, 4> Interferences; 748 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 749 750 SUnit *CurSU = AvailableQueue->pop(); 751 while (CurSU) { 752 SmallVector<unsigned, 4> LRegs; 753 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 754 break; 755 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 756 757 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 758 Interferences.push_back(CurSU); 759 CurSU = AvailableQueue->pop(); 760 } 761 if (CurSU) { 762 // Add the nodes that aren't ready back onto the available list. 763 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 764 Interferences[i]->isPending = false; 765 assert(Interferences[i]->isAvailable && "must still be available"); 766 AvailableQueue->push(Interferences[i]); 767 } 768 return CurSU; 769 } 770 771 // All candidates are delayed due to live physical reg dependencies. 772 // Try backtracking, code duplication, or inserting cross class copies 773 // to resolve it. 774 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 775 SUnit *TrySU = Interferences[i]; 776 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 777 778 // Try unscheduling up to the point where it's safe to schedule 779 // this node. 780 unsigned LiveCycle = CurCycle; 781 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 782 unsigned Reg = LRegs[j]; 783 unsigned LCycle = LiveRegGens[Reg]->getHeight(); 784 LiveCycle = std::min(LiveCycle, LCycle); 785 } 786 SUnit *OldSU = Sequence[LiveCycle]; 787 if (!WillCreateCycle(TrySU, OldSU)) { 788 BacktrackBottomUp(TrySU, LiveCycle); 789 790 // Force the current node to be scheduled before the node that 791 // requires the physical reg dep. 792 if (OldSU->isAvailable) { 793 OldSU->isAvailable = false; 794 if (!OldSU->isPending) 795 AvailableQueue->remove(OldSU); 796 } 797 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 798 /*Reg=*/0, /*isNormalMemory=*/false, 799 /*isMustAlias=*/false, /*isArtificial=*/true)); 800 801 // If one or more successors has been unscheduled, then the current 802 // node is no longer avaialable. Schedule a successor that's now 803 // available instead. 804 if (!TrySU->isAvailable) { 805 CurSU = AvailableQueue->pop(); 806 } 807 else { 808 CurSU = TrySU; 809 TrySU->isPending = false; 810 Interferences.erase(Interferences.begin()+i); 811 } 812 break; 813 } 814 } 815 816 if (!CurSU) { 817 // Can't backtrack. If it's too expensive to copy the value, then try 818 // duplicate the nodes that produces these "too expensive to copy" 819 // values to break the dependency. In case even that doesn't work, 820 // insert cross class copies. 821 // If it's not too expensive, i.e. cost != -1, issue copies. 822 SUnit *TrySU = Interferences[0]; 823 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 824 assert(LRegs.size() == 1 && "Can't handle this yet!"); 825 unsigned Reg = LRegs[0]; 826 SUnit *LRDef = LiveRegDefs[Reg]; 827 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 828 const TargetRegisterClass *RC = 829 TRI->getMinimalPhysRegClass(Reg, VT); 830 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 831 832 // If cross copy register class is null, then it must be possible copy 833 // the value directly. Do not try duplicate the def. 834 SUnit *NewDef = 0; 835 if (DestRC) 836 NewDef = CopyAndMoveSuccessors(LRDef); 837 else 838 DestRC = RC; 839 if (!NewDef) { 840 // Issue copies, these can be expensive cross register class copies. 841 SmallVector<SUnit*, 2> Copies; 842 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 843 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 844 << " to SU #" << Copies.front()->NodeNum << "\n"); 845 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 846 /*Reg=*/0, /*isNormalMemory=*/false, 847 /*isMustAlias=*/false, 848 /*isArtificial=*/true)); 849 NewDef = Copies.back(); 850 } 851 852 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 853 << " to SU #" << TrySU->NodeNum << "\n"); 854 LiveRegDefs[Reg] = NewDef; 855 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 856 /*Reg=*/0, /*isNormalMemory=*/false, 857 /*isMustAlias=*/false, 858 /*isArtificial=*/true)); 859 TrySU->isAvailable = false; 860 CurSU = NewDef; 861 } 862 863 assert(CurSU && "Unable to resolve live physical register dependencies!"); 864 865 // Add the nodes that aren't ready back onto the available list. 866 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 867 Interferences[i]->isPending = false; 868 // May no longer be available due to backtracking. 869 if (Interferences[i]->isAvailable) { 870 AvailableQueue->push(Interferences[i]); 871 } 872 } 873 return CurSU; 874} 875 876/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 877/// schedulers. 878void ScheduleDAGRRList::ListScheduleBottomUp() { 879 // Release any predecessors of the special Exit node. 880 ReleasePredecessors(&ExitSU); 881 882 // Add root to Available queue. 883 if (!SUnits.empty()) { 884 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 885 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 886 RootSU->isAvailable = true; 887 AvailableQueue->push(RootSU); 888 } 889 890 // While Available queue is not empty, grab the node with the highest 891 // priority. If it is not ready put it back. Schedule the node. 892 Sequence.reserve(SUnits.size()); 893 while (!AvailableQueue->empty()) { 894 // Pick the best node to schedule taking all constraints into 895 // consideration. 896 SUnit *SU = PickNodeToScheduleBottomUp(); 897 898 if (SU) 899 ScheduleNodeBottomUp(SU); 900 901 ++CurCycle; 902 AvailableQueue->setCurCycle(CurCycle); 903 } 904 905 // Reverse the order if it is bottom up. 906 std::reverse(Sequence.begin(), Sequence.end()); 907 908#ifndef NDEBUG 909 VerifySchedule(isBottomUp); 910#endif 911} 912 913//===----------------------------------------------------------------------===// 914// Top-Down Scheduling 915//===----------------------------------------------------------------------===// 916 917/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 918/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 919void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 920 SUnit *SuccSU = SuccEdge->getSUnit(); 921 922#ifndef NDEBUG 923 if (SuccSU->NumPredsLeft == 0) { 924 dbgs() << "*** Scheduling failed! ***\n"; 925 SuccSU->dump(this); 926 dbgs() << " has been released too many times!\n"; 927 llvm_unreachable(0); 928 } 929#endif 930 --SuccSU->NumPredsLeft; 931 932 // If all the node's predecessors are scheduled, this node is ready 933 // to be scheduled. Ignore the special ExitSU node. 934 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 935 SuccSU->isAvailable = true; 936 AvailableQueue->push(SuccSU); 937 } 938} 939 940void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 941 // Top down: release successors 942 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 943 I != E; ++I) { 944 assert(!I->isAssignedRegDep() && 945 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 946 947 ReleaseSucc(SU, &*I); 948 } 949} 950 951/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 952/// count of its successors. If a successor pending count is zero, add it to 953/// the Available queue. 954void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { 955 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 956 DEBUG(SU->dump(this)); 957 958 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 959 SU->setDepthToAtLeast(CurCycle); 960 Sequence.push_back(SU); 961 962 ReleaseSuccessors(SU); 963 SU->isScheduled = true; 964 AvailableQueue->ScheduledNode(SU); 965} 966 967/// ListScheduleTopDown - The main loop of list scheduling for top-down 968/// schedulers. 969void ScheduleDAGRRList::ListScheduleTopDown() { 970 unsigned CurCycle = 0; 971 AvailableQueue->setCurCycle(CurCycle); 972 973 // Release any successors of the special Entry node. 974 ReleaseSuccessors(&EntrySU); 975 976 // All leaves to Available queue. 977 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 978 // It is available if it has no predecessors. 979 if (SUnits[i].Preds.empty()) { 980 AvailableQueue->push(&SUnits[i]); 981 SUnits[i].isAvailable = true; 982 } 983 } 984 985 // While Available queue is not empty, grab the node with the highest 986 // priority. If it is not ready put it back. Schedule the node. 987 Sequence.reserve(SUnits.size()); 988 while (!AvailableQueue->empty()) { 989 SUnit *CurSU = AvailableQueue->pop(); 990 991 if (CurSU) 992 ScheduleNodeTopDown(CurSU); 993 ++CurCycle; 994 AvailableQueue->setCurCycle(CurCycle); 995 } 996 997#ifndef NDEBUG 998 VerifySchedule(isBottomUp); 999#endif 1000} 1001 1002 1003//===----------------------------------------------------------------------===// 1004// RegReductionPriorityQueue Implementation 1005//===----------------------------------------------------------------------===// 1006// 1007// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1008// to reduce register pressure. 1009// 1010namespace { 1011 template<class SF> 1012 class RegReductionPriorityQueue; 1013 1014 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1015 // reduction scheduler. 1016 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1017 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 1018 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 1019 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1020 1021 bool operator()(const SUnit* left, const SUnit* right) const; 1022 }; 1023 1024 // td_ls_rr_sort - Priority function for top down register pressure reduction 1025 // scheduler. 1026 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1027 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 1028 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 1029 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1030 1031 bool operator()(const SUnit* left, const SUnit* right) const; 1032 }; 1033 1034 // src_ls_rr_sort - Priority function for source order scheduler. 1035 struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1036 RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; 1037 src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) 1038 : SPQ(spq) {} 1039 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1040 : SPQ(RHS.SPQ) {} 1041 1042 bool operator()(const SUnit* left, const SUnit* right) const; 1043 }; 1044 1045 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1046 struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1047 RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; 1048 hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) 1049 : SPQ(spq) {} 1050 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1051 : SPQ(RHS.SPQ) {} 1052 1053 bool operator()(const SUnit* left, const SUnit* right) const; 1054 }; 1055 1056 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1057 // scheduler. 1058 struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1059 RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; 1060 ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) 1061 : SPQ(spq) {} 1062 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1063 : SPQ(RHS.SPQ) {} 1064 1065 bool operator()(const SUnit* left, const SUnit* right) const; 1066 }; 1067} // end anonymous namespace 1068 1069/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1070/// Smaller number is the higher priority. 1071static unsigned 1072CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1073 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1074 if (SethiUllmanNumber != 0) 1075 return SethiUllmanNumber; 1076 1077 unsigned Extra = 0; 1078 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1079 I != E; ++I) { 1080 if (I->isCtrl()) continue; // ignore chain preds 1081 SUnit *PredSU = I->getSUnit(); 1082 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1083 if (PredSethiUllman > SethiUllmanNumber) { 1084 SethiUllmanNumber = PredSethiUllman; 1085 Extra = 0; 1086 } else if (PredSethiUllman == SethiUllmanNumber) 1087 ++Extra; 1088 } 1089 1090 SethiUllmanNumber += Extra; 1091 1092 if (SethiUllmanNumber == 0) 1093 SethiUllmanNumber = 1; 1094 1095 return SethiUllmanNumber; 1096} 1097 1098namespace { 1099 template<class SF> 1100 class RegReductionPriorityQueue : public SchedulingPriorityQueue { 1101 std::vector<SUnit*> Queue; 1102 SF Picker; 1103 unsigned CurQueueId; 1104 bool TracksRegPressure; 1105 1106 protected: 1107 // SUnits - The SUnits for the current graph. 1108 std::vector<SUnit> *SUnits; 1109 1110 MachineFunction &MF; 1111 const TargetInstrInfo *TII; 1112 const TargetRegisterInfo *TRI; 1113 const TargetLowering *TLI; 1114 ScheduleDAGRRList *scheduleDAG; 1115 1116 // SethiUllmanNumbers - The SethiUllman number for each node. 1117 std::vector<unsigned> SethiUllmanNumbers; 1118 1119 /// RegPressure - Tracking current reg pressure per register class. 1120 /// 1121 std::vector<unsigned> RegPressure; 1122 1123 /// RegLimit - Tracking the number of allocatable registers per register 1124 /// class. 1125 std::vector<unsigned> RegLimit; 1126 1127 public: 1128 RegReductionPriorityQueue(MachineFunction &mf, 1129 bool tracksrp, 1130 const TargetInstrInfo *tii, 1131 const TargetRegisterInfo *tri, 1132 const TargetLowering *tli) 1133 : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), 1134 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1135 if (TracksRegPressure) { 1136 unsigned NumRC = TRI->getNumRegClasses(); 1137 RegLimit.resize(NumRC); 1138 RegPressure.resize(NumRC); 1139 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1140 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1141 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1142 E = TRI->regclass_end(); I != E; ++I) 1143 RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); 1144 } 1145 } 1146 1147 void initNodes(std::vector<SUnit> &sunits) { 1148 SUnits = &sunits; 1149 // Add pseudo dependency edges for two-address nodes. 1150 AddPseudoTwoAddrDeps(); 1151 // Reroute edges to nodes with multiple uses. 1152 PrescheduleNodesWithMultipleUses(); 1153 // Calculate node priorities. 1154 CalculateSethiUllmanNumbers(); 1155 } 1156 1157 void addNode(const SUnit *SU) { 1158 unsigned SUSize = SethiUllmanNumbers.size(); 1159 if (SUnits->size() > SUSize) 1160 SethiUllmanNumbers.resize(SUSize*2, 0); 1161 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1162 } 1163 1164 void updateNode(const SUnit *SU) { 1165 SethiUllmanNumbers[SU->NodeNum] = 0; 1166 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1167 } 1168 1169 void releaseState() { 1170 SUnits = 0; 1171 SethiUllmanNumbers.clear(); 1172 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1173 } 1174 1175 unsigned getNodePriority(const SUnit *SU) const { 1176 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1177 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1178 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1179 // CopyToReg should be close to its uses to facilitate coalescing and 1180 // avoid spilling. 1181 return 0; 1182 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1183 Opc == TargetOpcode::SUBREG_TO_REG || 1184 Opc == TargetOpcode::INSERT_SUBREG) 1185 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1186 // close to their uses to facilitate coalescing. 1187 return 0; 1188 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1189 // If SU does not have a register use, i.e. it doesn't produce a value 1190 // that would be consumed (e.g. store), then it terminates a chain of 1191 // computation. Give it a large SethiUllman number so it will be 1192 // scheduled right before its predecessors that it doesn't lengthen 1193 // their live ranges. 1194 return 0xffff; 1195 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1196 // If SU does not have a register def, schedule it close to its uses 1197 // because it does not lengthen any live ranges. 1198 return 0; 1199 return SethiUllmanNumbers[SU->NodeNum]; 1200 } 1201 1202 unsigned getNodeOrdering(const SUnit *SU) const { 1203 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1204 } 1205 1206 bool empty() const { return Queue.empty(); } 1207 1208 void push(SUnit *U) { 1209 assert(!U->NodeQueueId && "Node in the queue already"); 1210 U->NodeQueueId = ++CurQueueId; 1211 Queue.push_back(U); 1212 } 1213 1214 SUnit *pop() { 1215 if (empty()) return NULL; 1216 std::vector<SUnit *>::iterator Best = Queue.begin(); 1217 for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), 1218 E = Queue.end(); I != E; ++I) 1219 if (Picker(*Best, *I)) 1220 Best = I; 1221 SUnit *V = *Best; 1222 if (Best != prior(Queue.end())) 1223 std::swap(*Best, Queue.back()); 1224 Queue.pop_back(); 1225 V->NodeQueueId = 0; 1226 return V; 1227 } 1228 1229 void remove(SUnit *SU) { 1230 assert(!Queue.empty() && "Queue is empty!"); 1231 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1232 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1233 SU); 1234 if (I != prior(Queue.end())) 1235 std::swap(*I, Queue.back()); 1236 Queue.pop_back(); 1237 SU->NodeQueueId = 0; 1238 } 1239 1240 bool HighRegPressure(const SUnit *SU) const { 1241 if (!TLI) 1242 return false; 1243 1244 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1245 I != E; ++I) { 1246 if (I->isCtrl()) 1247 continue; 1248 SUnit *PredSU = I->getSUnit(); 1249 const SDNode *PN = PredSU->getNode(); 1250 if (!PN->isMachineOpcode()) { 1251 if (PN->getOpcode() == ISD::CopyFromReg) { 1252 EVT VT = PN->getValueType(0); 1253 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1254 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1255 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1256 return true; 1257 } 1258 continue; 1259 } 1260 unsigned POpc = PN->getMachineOpcode(); 1261 if (POpc == TargetOpcode::IMPLICIT_DEF) 1262 continue; 1263 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1264 EVT VT = PN->getOperand(0).getValueType(); 1265 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1266 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1267 // Check if this increases register pressure of the specific register 1268 // class to the point where it would cause spills. 1269 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1270 return true; 1271 continue; 1272 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1273 POpc == TargetOpcode::SUBREG_TO_REG) { 1274 EVT VT = PN->getValueType(0); 1275 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1276 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1277 // Check if this increases register pressure of the specific register 1278 // class to the point where it would cause spills. 1279 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1280 return true; 1281 continue; 1282 } 1283 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1284 for (unsigned i = 0; i != NumDefs; ++i) { 1285 EVT VT = PN->getValueType(i); 1286 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1287 if (RegPressure[RCId] >= RegLimit[RCId]) 1288 return true; // Reg pressure already high. 1289 unsigned Cost = TLI->getRepRegClassCostFor(VT); 1290 if (!PN->hasAnyUseOfValue(i)) 1291 continue; 1292 // Check if this increases register pressure of the specific register 1293 // class to the point where it would cause spills. 1294 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1295 return true; 1296 } 1297 } 1298 1299 return false; 1300 } 1301 1302 void ScheduledNode(SUnit *SU) { 1303 if (!TracksRegPressure) 1304 return; 1305 1306 const SDNode *N = SU->getNode(); 1307 if (!N->isMachineOpcode()) { 1308 if (N->getOpcode() != ISD::CopyToReg) 1309 return; 1310 } else { 1311 unsigned Opc = N->getMachineOpcode(); 1312 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1313 Opc == TargetOpcode::INSERT_SUBREG || 1314 Opc == TargetOpcode::SUBREG_TO_REG || 1315 Opc == TargetOpcode::REG_SEQUENCE || 1316 Opc == TargetOpcode::IMPLICIT_DEF) 1317 return; 1318 } 1319 1320 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1321 I != E; ++I) { 1322 if (I->isCtrl()) 1323 continue; 1324 SUnit *PredSU = I->getSUnit(); 1325 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1326 continue; 1327 const SDNode *PN = PredSU->getNode(); 1328 if (!PN->isMachineOpcode()) { 1329 if (PN->getOpcode() == ISD::CopyFromReg) { 1330 EVT VT = PN->getValueType(0); 1331 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1332 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1333 } 1334 continue; 1335 } 1336 unsigned POpc = PN->getMachineOpcode(); 1337 if (POpc == TargetOpcode::IMPLICIT_DEF) 1338 continue; 1339 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1340 EVT VT = PN->getOperand(0).getValueType(); 1341 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1342 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1343 continue; 1344 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1345 POpc == TargetOpcode::SUBREG_TO_REG) { 1346 EVT VT = PN->getValueType(0); 1347 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1348 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1349 continue; 1350 } 1351 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1352 for (unsigned i = 0; i != NumDefs; ++i) { 1353 EVT VT = PN->getValueType(i); 1354 if (!PN->hasAnyUseOfValue(i)) 1355 continue; 1356 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1357 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1358 } 1359 } 1360 1361 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1362 // may transfer data dependencies to CopyToReg. 1363 if (SU->NumSuccs && N->isMachineOpcode()) { 1364 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1365 for (unsigned i = 0; i != NumDefs; ++i) { 1366 EVT VT = N->getValueType(i); 1367 if (!N->hasAnyUseOfValue(i)) 1368 continue; 1369 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1370 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1371 // Register pressure tracking is imprecise. This can happen. 1372 RegPressure[RCId] = 0; 1373 else 1374 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1375 } 1376 } 1377 1378 dumpRegPressure(); 1379 } 1380 1381 void UnscheduledNode(SUnit *SU) { 1382 if (!TracksRegPressure) 1383 return; 1384 1385 const SDNode *N = SU->getNode(); 1386 if (!N->isMachineOpcode()) { 1387 if (N->getOpcode() != ISD::CopyToReg) 1388 return; 1389 } else { 1390 unsigned Opc = N->getMachineOpcode(); 1391 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1392 Opc == TargetOpcode::INSERT_SUBREG || 1393 Opc == TargetOpcode::SUBREG_TO_REG || 1394 Opc == TargetOpcode::REG_SEQUENCE || 1395 Opc == TargetOpcode::IMPLICIT_DEF) 1396 return; 1397 } 1398 1399 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1400 I != E; ++I) { 1401 if (I->isCtrl()) 1402 continue; 1403 SUnit *PredSU = I->getSUnit(); 1404 if (PredSU->NumSuccsLeft != PredSU->NumSuccs) 1405 continue; 1406 const SDNode *PN = PredSU->getNode(); 1407 if (!PN->isMachineOpcode()) { 1408 if (PN->getOpcode() == ISD::CopyFromReg) { 1409 EVT VT = PN->getValueType(0); 1410 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1411 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1412 } 1413 continue; 1414 } 1415 unsigned POpc = PN->getMachineOpcode(); 1416 if (POpc == TargetOpcode::IMPLICIT_DEF) 1417 continue; 1418 if (POpc == TargetOpcode::EXTRACT_SUBREG) { 1419 EVT VT = PN->getOperand(0).getValueType(); 1420 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1421 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1422 continue; 1423 } else if (POpc == TargetOpcode::INSERT_SUBREG || 1424 POpc == TargetOpcode::SUBREG_TO_REG) { 1425 EVT VT = PN->getValueType(0); 1426 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1427 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1428 continue; 1429 } 1430 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 1431 for (unsigned i = 0; i != NumDefs; ++i) { 1432 EVT VT = PN->getValueType(i); 1433 if (!PN->hasAnyUseOfValue(i)) 1434 continue; 1435 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1436 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 1437 // Register pressure tracking is imprecise. This can happen. 1438 RegPressure[RCId] = 0; 1439 else 1440 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 1441 } 1442 } 1443 1444 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 1445 // may transfer data dependencies to CopyToReg. 1446 if (SU->NumSuccs && N->isMachineOpcode()) { 1447 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1448 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1449 EVT VT = N->getValueType(i); 1450 if (VT == MVT::Glue || VT == MVT::Other) 1451 continue; 1452 if (!N->hasAnyUseOfValue(i)) 1453 continue; 1454 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1455 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 1456 } 1457 } 1458 1459 dumpRegPressure(); 1460 } 1461 1462 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1463 scheduleDAG = scheduleDag; 1464 } 1465 1466 void dumpRegPressure() const { 1467 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1468 E = TRI->regclass_end(); I != E; ++I) { 1469 const TargetRegisterClass *RC = *I; 1470 unsigned Id = RC->getID(); 1471 unsigned RP = RegPressure[Id]; 1472 if (!RP) continue; 1473 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1474 << '\n'); 1475 } 1476 } 1477 1478 protected: 1479 bool canClobber(const SUnit *SU, const SUnit *Op); 1480 void AddPseudoTwoAddrDeps(); 1481 void PrescheduleNodesWithMultipleUses(); 1482 void CalculateSethiUllmanNumbers(); 1483 }; 1484 1485 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1486 BURegReductionPriorityQueue; 1487 1488 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1489 TDRegReductionPriorityQueue; 1490 1491 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1492 SrcRegReductionPriorityQueue; 1493 1494 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1495 HybridBURRPriorityQueue; 1496 1497 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1498 ILPBURRPriorityQueue; 1499} 1500 1501/// closestSucc - Returns the scheduled cycle of the successor which is 1502/// closest to the current cycle. 1503static unsigned closestSucc(const SUnit *SU) { 1504 unsigned MaxHeight = 0; 1505 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1506 I != E; ++I) { 1507 if (I->isCtrl()) continue; // ignore chain succs 1508 unsigned Height = I->getSUnit()->getHeight(); 1509 // If there are bunch of CopyToRegs stacked up, they should be considered 1510 // to be at the same position. 1511 if (I->getSUnit()->getNode() && 1512 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1513 Height = closestSucc(I->getSUnit())+1; 1514 if (Height > MaxHeight) 1515 MaxHeight = Height; 1516 } 1517 return MaxHeight; 1518} 1519 1520/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1521/// for scratch registers, i.e. number of data dependencies. 1522static unsigned calcMaxScratches(const SUnit *SU) { 1523 unsigned Scratches = 0; 1524 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1525 I != E; ++I) { 1526 if (I->isCtrl()) continue; // ignore chain preds 1527 Scratches++; 1528 } 1529 return Scratches; 1530} 1531 1532/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a 1533/// CopyToReg to a virtual register. This SU def is probably a liveout and 1534/// it has no other use. It should be scheduled closer to the terminator. 1535static bool hasOnlyLiveOutUses(const SUnit *SU) { 1536 bool RetVal = false; 1537 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1538 I != E; ++I) { 1539 if (I->isCtrl()) continue; 1540 const SUnit *SuccSU = I->getSUnit(); 1541 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 1542 unsigned Reg = 1543 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 1544 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1545 RetVal = true; 1546 continue; 1547 } 1548 } 1549 return false; 1550 } 1551 return RetVal; 1552} 1553 1554/// UnitsSharePred - Return true if the two scheduling units share a common 1555/// data predecessor. 1556static bool UnitsSharePred(const SUnit *left, const SUnit *right) { 1557 SmallSet<const SUnit*, 4> Preds; 1558 for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); 1559 I != E; ++I) { 1560 if (I->isCtrl()) continue; // ignore chain preds 1561 Preds.insert(I->getSUnit()); 1562 } 1563 for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); 1564 I != E; ++I) { 1565 if (I->isCtrl()) continue; // ignore chain preds 1566 if (Preds.count(I->getSUnit())) 1567 return true; 1568 } 1569 return false; 1570} 1571 1572template <typename RRSort> 1573static bool BURRSort(const SUnit *left, const SUnit *right, 1574 const RegReductionPriorityQueue<RRSort> *SPQ) { 1575 unsigned LPriority = SPQ->getNodePriority(left); 1576 unsigned RPriority = SPQ->getNodePriority(right); 1577 if (LPriority != RPriority) 1578 return LPriority > RPriority; 1579 1580 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1581 // e.g. 1582 // t1 = op t2, c1 1583 // t3 = op t4, c2 1584 // 1585 // and the following instructions are both ready. 1586 // t2 = op c3 1587 // t4 = op c4 1588 // 1589 // Then schedule t2 = op first. 1590 // i.e. 1591 // t4 = op c4 1592 // t2 = op c3 1593 // t1 = op t2, c1 1594 // t3 = op t4, c2 1595 // 1596 // This creates more short live intervals. 1597 unsigned LDist = closestSucc(left); 1598 unsigned RDist = closestSucc(right); 1599 if (LDist != RDist) 1600 return LDist < RDist; 1601 1602 // How many registers becomes live when the node is scheduled. 1603 unsigned LScratch = calcMaxScratches(left); 1604 unsigned RScratch = calcMaxScratches(right); 1605 if (LScratch != RScratch) 1606 return LScratch > RScratch; 1607 1608 if (left->getHeight() != right->getHeight()) 1609 return left->getHeight() > right->getHeight(); 1610 1611 if (left->getDepth() != right->getDepth()) 1612 return left->getDepth() < right->getDepth(); 1613 1614 assert(left->NodeQueueId && right->NodeQueueId && 1615 "NodeQueueId cannot be zero"); 1616 return (left->NodeQueueId > right->NodeQueueId); 1617} 1618 1619// Bottom up 1620bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1621 return BURRSort(left, right, SPQ); 1622} 1623 1624// Source order, otherwise bottom up. 1625bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1626 unsigned LOrder = SPQ->getNodeOrdering(left); 1627 unsigned ROrder = SPQ->getNodeOrdering(right); 1628 1629 // Prefer an ordering where the lower the non-zero order number, the higher 1630 // the preference. 1631 if ((LOrder || ROrder) && LOrder != ROrder) 1632 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 1633 1634 return BURRSort(left, right, SPQ); 1635} 1636 1637bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ 1638 if (left->isCall || right->isCall) 1639 // No way to compute latency of calls. 1640 return BURRSort(left, right, SPQ); 1641 1642 bool LHigh = SPQ->HighRegPressure(left); 1643 bool RHigh = SPQ->HighRegPressure(right); 1644 // Avoid causing spills. If register pressure is high, schedule for 1645 // register pressure reduction. 1646 if (LHigh && !RHigh) 1647 return true; 1648 else if (!LHigh && RHigh) 1649 return false; 1650 else if (!LHigh && !RHigh) { 1651 // If the two nodes share an operand and one of them has a single 1652 // use that is a live out copy, favor the one that is live out. Otherwise 1653 // it will be difficult to eliminate the copy if the instruction is a 1654 // loop induction variable update. e.g. 1655 // BB: 1656 // sub r1, r3, #1 1657 // str r0, [r2, r3] 1658 // mov r3, r1 1659 // cmp 1660 // bne BB 1661 bool SharePred = UnitsSharePred(left, right); 1662 // FIXME: Only adjust if BB is a loop back edge. 1663 // FIXME: What's the cost of a copy? 1664 int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; 1665 int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; 1666 int LHeight = (int)left->getHeight() - LBonus; 1667 int RHeight = (int)right->getHeight() - RBonus; 1668 1669 // Low register pressure situation, schedule for latency if possible. 1670 bool LStall = left->SchedulingPref == Sched::Latency && 1671 (int)SPQ->getCurCycle() < LHeight; 1672 bool RStall = right->SchedulingPref == Sched::Latency && 1673 (int)SPQ->getCurCycle() < RHeight; 1674 // If scheduling one of the node will cause a pipeline stall, delay it. 1675 // If scheduling either one of the node will cause a pipeline stall, sort 1676 // them according to their height. 1677 if (LStall) { 1678 if (!RStall) 1679 return true; 1680 if (LHeight != RHeight) 1681 return LHeight > RHeight; 1682 } else if (RStall) 1683 return false; 1684 1685 // If either node is scheduling for latency, sort them by height 1686 // and latency. 1687 if (left->SchedulingPref == Sched::Latency || 1688 right->SchedulingPref == Sched::Latency) { 1689 if (LHeight != RHeight) 1690 return LHeight > RHeight; 1691 if (left->Latency != right->Latency) 1692 return left->Latency > right->Latency; 1693 } 1694 } 1695 1696 return BURRSort(left, right, SPQ); 1697} 1698 1699bool ilp_ls_rr_sort::operator()(const SUnit *left, 1700 const SUnit *right) const { 1701 if (left->isCall || right->isCall) 1702 // No way to compute latency of calls. 1703 return BURRSort(left, right, SPQ); 1704 1705 bool LHigh = SPQ->HighRegPressure(left); 1706 bool RHigh = SPQ->HighRegPressure(right); 1707 // Avoid causing spills. If register pressure is high, schedule for 1708 // register pressure reduction. 1709 if (LHigh && !RHigh) 1710 return true; 1711 else if (!LHigh && RHigh) 1712 return false; 1713 else if (!LHigh && !RHigh) { 1714 // Low register pressure situation, schedule to maximize instruction level 1715 // parallelism. 1716 if (left->NumPreds > right->NumPreds) 1717 return false; 1718 else if (left->NumPreds < right->NumPreds) 1719 return false; 1720 } 1721 1722 return BURRSort(left, right, SPQ); 1723} 1724 1725template<class SF> 1726bool 1727RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1728 if (SU->isTwoAddress) { 1729 unsigned Opc = SU->getNode()->getMachineOpcode(); 1730 const TargetInstrDesc &TID = TII->get(Opc); 1731 unsigned NumRes = TID.getNumDefs(); 1732 unsigned NumOps = TID.getNumOperands() - NumRes; 1733 for (unsigned i = 0; i != NumOps; ++i) { 1734 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1735 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1736 if (DU->getNodeId() != -1 && 1737 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1738 return true; 1739 } 1740 } 1741 } 1742 return false; 1743} 1744 1745/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1746/// physical register defs. 1747static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1748 const TargetInstrInfo *TII, 1749 const TargetRegisterInfo *TRI) { 1750 SDNode *N = SuccSU->getNode(); 1751 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1752 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1753 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1754 for (const SDNode *SUNode = SU->getNode(); SUNode; 1755 SUNode = SUNode->getGluedNode()) { 1756 if (!SUNode->isMachineOpcode()) 1757 continue; 1758 const unsigned *SUImpDefs = 1759 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 1760 if (!SUImpDefs) 1761 return false; 1762 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1763 EVT VT = N->getValueType(i); 1764 if (VT == MVT::Glue || VT == MVT::Other) 1765 continue; 1766 if (!N->hasAnyUseOfValue(i)) 1767 continue; 1768 unsigned Reg = ImpDefs[i - NumDefs]; 1769 for (;*SUImpDefs; ++SUImpDefs) { 1770 unsigned SUReg = *SUImpDefs; 1771 if (TRI->regsOverlap(Reg, SUReg)) 1772 return true; 1773 } 1774 } 1775 } 1776 return false; 1777} 1778 1779/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 1780/// are not handled well by the general register pressure reduction 1781/// heuristics. When presented with code like this: 1782/// 1783/// N 1784/// / | 1785/// / | 1786/// U store 1787/// | 1788/// ... 1789/// 1790/// the heuristics tend to push the store up, but since the 1791/// operand of the store has another use (U), this would increase 1792/// the length of that other use (the U->N edge). 1793/// 1794/// This function transforms code like the above to route U's 1795/// dependence through the store when possible, like this: 1796/// 1797/// N 1798/// || 1799/// || 1800/// store 1801/// | 1802/// U 1803/// | 1804/// ... 1805/// 1806/// This results in the store being scheduled immediately 1807/// after N, which shortens the U->N live range, reducing 1808/// register pressure. 1809/// 1810template<class SF> 1811void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() { 1812 // Visit all the nodes in topological order, working top-down. 1813 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1814 SUnit *SU = &(*SUnits)[i]; 1815 // For now, only look at nodes with no data successors, such as stores. 1816 // These are especially important, due to the heuristics in 1817 // getNodePriority for nodes with no data successors. 1818 if (SU->NumSuccs != 0) 1819 continue; 1820 // For now, only look at nodes with exactly one data predecessor. 1821 if (SU->NumPreds != 1) 1822 continue; 1823 // Avoid prescheduling copies to virtual registers, which don't behave 1824 // like other nodes from the perspective of scheduling heuristics. 1825 if (SDNode *N = SU->getNode()) 1826 if (N->getOpcode() == ISD::CopyToReg && 1827 TargetRegisterInfo::isVirtualRegister 1828 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1829 continue; 1830 1831 // Locate the single data predecessor. 1832 SUnit *PredSU = 0; 1833 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 1834 EE = SU->Preds.end(); II != EE; ++II) 1835 if (!II->isCtrl()) { 1836 PredSU = II->getSUnit(); 1837 break; 1838 } 1839 assert(PredSU); 1840 1841 // Don't rewrite edges that carry physregs, because that requires additional 1842 // support infrastructure. 1843 if (PredSU->hasPhysRegDefs) 1844 continue; 1845 // Short-circuit the case where SU is PredSU's only data successor. 1846 if (PredSU->NumSuccs == 1) 1847 continue; 1848 // Avoid prescheduling to copies from virtual registers, which don't behave 1849 // like other nodes from the perspective of scheduling // heuristics. 1850 if (SDNode *N = SU->getNode()) 1851 if (N->getOpcode() == ISD::CopyFromReg && 1852 TargetRegisterInfo::isVirtualRegister 1853 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 1854 continue; 1855 1856 // Perform checks on the successors of PredSU. 1857 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 1858 EE = PredSU->Succs.end(); II != EE; ++II) { 1859 SUnit *PredSuccSU = II->getSUnit(); 1860 if (PredSuccSU == SU) continue; 1861 // If PredSU has another successor with no data successors, for 1862 // now don't attempt to choose either over the other. 1863 if (PredSuccSU->NumSuccs == 0) 1864 goto outer_loop_continue; 1865 // Don't break physical register dependencies. 1866 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 1867 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 1868 goto outer_loop_continue; 1869 // Don't introduce graph cycles. 1870 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 1871 goto outer_loop_continue; 1872 } 1873 1874 // Ok, the transformation is safe and the heuristics suggest it is 1875 // profitable. Update the graph. 1876 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 1877 << " next to PredSU #" << PredSU->NodeNum 1878 << " to guide scheduling in the presence of multiple uses\n"); 1879 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 1880 SDep Edge = PredSU->Succs[i]; 1881 assert(!Edge.isAssignedRegDep()); 1882 SUnit *SuccSU = Edge.getSUnit(); 1883 if (SuccSU != SU) { 1884 Edge.setSUnit(PredSU); 1885 scheduleDAG->RemovePred(SuccSU, Edge); 1886 scheduleDAG->AddPred(SU, Edge); 1887 Edge.setSUnit(SU); 1888 scheduleDAG->AddPred(SuccSU, Edge); 1889 --i; 1890 } 1891 } 1892 outer_loop_continue:; 1893 } 1894} 1895 1896/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1897/// it as a def&use operand. Add a pseudo control edge from it to the other 1898/// node (if it won't create a cycle) so the two-address one will be scheduled 1899/// first (lower in the schedule). If both nodes are two-address, favor the 1900/// one that has a CopyToReg use (more likely to be a loop induction update). 1901/// If both are two-address, but one is commutable while the other is not 1902/// commutable, favor the one that's not commutable. 1903template<class SF> 1904void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1905 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1906 SUnit *SU = &(*SUnits)[i]; 1907 if (!SU->isTwoAddress) 1908 continue; 1909 1910 SDNode *Node = SU->getNode(); 1911 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 1912 continue; 1913 1914 bool isLiveOut = hasOnlyLiveOutUses(SU); 1915 unsigned Opc = Node->getMachineOpcode(); 1916 const TargetInstrDesc &TID = TII->get(Opc); 1917 unsigned NumRes = TID.getNumDefs(); 1918 unsigned NumOps = TID.getNumOperands() - NumRes; 1919 for (unsigned j = 0; j != NumOps; ++j) { 1920 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1921 continue; 1922 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1923 if (DU->getNodeId() == -1) 1924 continue; 1925 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1926 if (!DUSU) continue; 1927 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1928 E = DUSU->Succs.end(); I != E; ++I) { 1929 if (I->isCtrl()) continue; 1930 SUnit *SuccSU = I->getSUnit(); 1931 if (SuccSU == SU) 1932 continue; 1933 // Be conservative. Ignore if nodes aren't at roughly the same 1934 // depth and height. 1935 if (SuccSU->getHeight() < SU->getHeight() && 1936 (SU->getHeight() - SuccSU->getHeight()) > 1) 1937 continue; 1938 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 1939 // constrains whatever is using the copy, instead of the copy 1940 // itself. In the case that the copy is coalesced, this 1941 // preserves the intent of the pseudo two-address heurietics. 1942 while (SuccSU->Succs.size() == 1 && 1943 SuccSU->getNode()->isMachineOpcode() && 1944 SuccSU->getNode()->getMachineOpcode() == 1945 TargetOpcode::COPY_TO_REGCLASS) 1946 SuccSU = SuccSU->Succs.front().getSUnit(); 1947 // Don't constrain non-instruction nodes. 1948 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1949 continue; 1950 // Don't constrain nodes with physical register defs if the 1951 // predecessor can clobber them. 1952 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 1953 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1954 continue; 1955 } 1956 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 1957 // these may be coalesced away. We want them close to their uses. 1958 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1959 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 1960 SuccOpc == TargetOpcode::INSERT_SUBREG || 1961 SuccOpc == TargetOpcode::SUBREG_TO_REG) 1962 continue; 1963 if ((!canClobber(SuccSU, DUSU) || 1964 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 1965 (!SU->isCommutable && SuccSU->isCommutable)) && 1966 !scheduleDAG->IsReachable(SuccSU, SU)) { 1967 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 1968 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 1969 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 1970 /*Reg=*/0, /*isNormalMemory=*/false, 1971 /*isMustAlias=*/false, 1972 /*isArtificial=*/true)); 1973 } 1974 } 1975 } 1976 } 1977} 1978 1979/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1980/// scheduling units. 1981template<class SF> 1982void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1983 SethiUllmanNumbers.assign(SUnits->size(), 0); 1984 1985 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1986 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1987} 1988 1989/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1990/// predecessors of the successors of the SUnit SU. Stop when the provided 1991/// limit is exceeded. 1992static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1993 unsigned Limit) { 1994 unsigned Sum = 0; 1995 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1996 I != E; ++I) { 1997 const SUnit *SuccSU = I->getSUnit(); 1998 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1999 EE = SuccSU->Preds.end(); II != EE; ++II) { 2000 SUnit *PredSU = II->getSUnit(); 2001 if (!PredSU->isScheduled) 2002 if (++Sum > Limit) 2003 return Sum; 2004 } 2005 } 2006 return Sum; 2007} 2008 2009 2010// Top down 2011bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 2012 unsigned LPriority = SPQ->getNodePriority(left); 2013 unsigned RPriority = SPQ->getNodePriority(right); 2014 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 2015 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 2016 bool LIsFloater = LIsTarget && left->NumPreds == 0; 2017 bool RIsFloater = RIsTarget && right->NumPreds == 0; 2018 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 2019 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 2020 2021 if (left->NumSuccs == 0 && right->NumSuccs != 0) 2022 return false; 2023 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 2024 return true; 2025 2026 if (LIsFloater) 2027 LBonus -= 2; 2028 if (RIsFloater) 2029 RBonus -= 2; 2030 if (left->NumSuccs == 1) 2031 LBonus += 2; 2032 if (right->NumSuccs == 1) 2033 RBonus += 2; 2034 2035 if (LPriority+LBonus != RPriority+RBonus) 2036 return LPriority+LBonus < RPriority+RBonus; 2037 2038 if (left->getDepth() != right->getDepth()) 2039 return left->getDepth() < right->getDepth(); 2040 2041 if (left->NumSuccsLeft != right->NumSuccsLeft) 2042 return left->NumSuccsLeft > right->NumSuccsLeft; 2043 2044 assert(left->NodeQueueId && right->NodeQueueId && 2045 "NodeQueueId cannot be zero"); 2046 return (left->NodeQueueId > right->NodeQueueId); 2047} 2048 2049//===----------------------------------------------------------------------===// 2050// Public Constructor Functions 2051//===----------------------------------------------------------------------===// 2052 2053llvm::ScheduleDAGSDNodes * 2054llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2055 const TargetMachine &TM = IS->TM; 2056 const TargetInstrInfo *TII = TM.getInstrInfo(); 2057 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2058 2059 BURegReductionPriorityQueue *PQ = 2060 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2061 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 2062 PQ->setScheduleDAG(SD); 2063 return SD; 2064} 2065 2066llvm::ScheduleDAGSDNodes * 2067llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2068 const TargetMachine &TM = IS->TM; 2069 const TargetInstrInfo *TII = TM.getInstrInfo(); 2070 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2071 2072 TDRegReductionPriorityQueue *PQ = 2073 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2074 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); 2075 PQ->setScheduleDAG(SD); 2076 return SD; 2077} 2078 2079llvm::ScheduleDAGSDNodes * 2080llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2081 const TargetMachine &TM = IS->TM; 2082 const TargetInstrInfo *TII = TM.getInstrInfo(); 2083 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2084 2085 SrcRegReductionPriorityQueue *PQ = 2086 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2087 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); 2088 PQ->setScheduleDAG(SD); 2089 return SD; 2090} 2091 2092llvm::ScheduleDAGSDNodes * 2093llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2094 const TargetMachine &TM = IS->TM; 2095 const TargetInstrInfo *TII = TM.getInstrInfo(); 2096 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2097 const TargetLowering *TLI = &IS->getTargetLowering(); 2098 2099 HybridBURRPriorityQueue *PQ = 2100 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2101 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2102 PQ->setScheduleDAG(SD); 2103 return SD; 2104} 2105 2106llvm::ScheduleDAGSDNodes * 2107llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { 2108 const TargetMachine &TM = IS->TM; 2109 const TargetInstrInfo *TII = TM.getInstrInfo(); 2110 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2111 const TargetLowering *TLI = &IS->getTargetLowering(); 2112 2113 ILPBURRPriorityQueue *PQ = 2114 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 2115 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); 2116 PQ->setScheduleDAG(SD); 2117 return SD; 2118} 2119