ScheduleDAGRRList.cpp revision ffa391272bad598d73fd5404dadf3686b69f2a63
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAGSDNodes.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/BitVector.h" 28#include "llvm/ADT/PriorityQueue.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/ADT/STLExtras.h" 33#include <climits> 34#include "llvm/Support/CommandLine.h" 35using namespace llvm; 36 37STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 38STATISTIC(NumUnfolds, "Number of nodes unfolded"); 39STATISTIC(NumDups, "Number of duplicated nodes"); 40STATISTIC(NumCCCopies, "Number of cross class copies"); 41 42static RegisterScheduler 43 burrListDAGScheduler("list-burr", 44 "Bottom-up register reduction list scheduling", 45 createBURRListDAGScheduler); 46static RegisterScheduler 47 tdrListrDAGScheduler("list-tdrr", 48 "Top-down register reduction list scheduling", 49 createTDRRListDAGScheduler); 50 51namespace { 52//===----------------------------------------------------------------------===// 53/// ScheduleDAGRRList - The actual register reduction list scheduler 54/// implementation. This supports both top-down and bottom-up scheduling. 55/// 56class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes { 57private: 58 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 59 /// it is top-down. 60 bool isBottomUp; 61 62 /// AvailableQueue - The priority queue to use for the available SUnits. 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 unsigned NumLiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72 /// Topo - A topological ordering for SUnits which permits fast IsReachable 73 /// and similar queries. 74 ScheduleDAGTopologicalSort Topo; 75 76public: 77 ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb, 78 const TargetMachine &tm, bool isbottomup, 79 SchedulingPriorityQueue *availqueue) 80 : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), 81 AvailableQueue(availqueue), Topo(SUnits) { 82 } 83 84 ~ScheduleDAGRRList() { 85 delete AvailableQueue; 86 } 87 88 void Schedule(); 89 90 /// IsReachable - Checks if SU is reachable from TargetSU. 91 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 92 return Topo.IsReachable(SU, TargetSU); 93 } 94 95 /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 96 /// create a cycle. 97 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 98 return Topo.WillCreateCycle(SU, TargetSU); 99 } 100 101 /// AddPred - adds a predecessor edge to SUnit SU. 102 /// This returns true if this is a new predecessor. 103 /// Updates the topological ordering if required. 104 void AddPred(SUnit *SU, const SDep &D) { 105 Topo.AddPred(SU, D.getSUnit()); 106 SU->addPred(D); 107 } 108 109 /// RemovePred - removes a predecessor edge from SUnit SU. 110 /// This returns true if an edge was removed. 111 /// Updates the topological ordering if required. 112 void RemovePred(SUnit *SU, const SDep &D) { 113 Topo.RemovePred(SU, D.getSUnit()); 114 SU->removePred(D); 115 } 116 117private: 118 void ReleasePred(SUnit *SU, SDep *PredEdge); 119 void ReleaseSucc(SUnit *SU, SDep *SuccEdge); 120 void CapturePred(SDep *PredEdge); 121 void ScheduleNodeBottomUp(SUnit*, unsigned); 122 void ScheduleNodeTopDown(SUnit*, unsigned); 123 void UnscheduleNodeBottomUp(SUnit*); 124 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 125 SUnit *CopyAndMoveSuccessors(SUnit*); 126 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 127 const TargetRegisterClass*, 128 const TargetRegisterClass*, 129 SmallVector<SUnit*, 2>&); 130 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 131 void ListScheduleTopDown(); 132 void ListScheduleBottomUp(); 133 void CommuteNodesToReducePressure(); 134 135 136 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 137 /// Updates the topological ordering if required. 138 SUnit *CreateNewSUnit(SDNode *N) { 139 unsigned NumSUnits = SUnits.size(); 140 SUnit *NewNode = NewSUnit(N); 141 // Update the topological ordering. 142 if (NewNode->NodeNum >= NumSUnits) 143 Topo.InitDAGTopologicalSorting(); 144 return NewNode; 145 } 146 147 /// CreateClone - Creates a new SUnit from an existing one. 148 /// Updates the topological ordering if required. 149 SUnit *CreateClone(SUnit *N) { 150 unsigned NumSUnits = SUnits.size(); 151 SUnit *NewNode = Clone(N); 152 // Update the topological ordering. 153 if (NewNode->NodeNum >= NumSUnits) 154 Topo.InitDAGTopologicalSorting(); 155 return NewNode; 156 } 157}; 158} // end anonymous namespace 159 160 161/// Schedule - Schedule the DAG using list scheduling. 162void ScheduleDAGRRList::Schedule() { 163 DOUT << "********** List Scheduling **********\n"; 164 165 NumLiveRegs = 0; 166 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 167 LiveRegCycles.resize(TRI->getNumRegs(), 0); 168 169 // Build scheduling units. 170 BuildSchedUnits(); 171 172 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 173 SUnits[su].dumpAll(this)); 174 CalculateDepths(); 175 CalculateHeights(); 176 Topo.InitDAGTopologicalSorting(); 177 178 AvailableQueue->initNodes(SUnits); 179 180 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 181 if (isBottomUp) 182 ListScheduleBottomUp(); 183 else 184 ListScheduleTopDown(); 185 186 AvailableQueue->releaseState(); 187 188 CommuteNodesToReducePressure(); 189} 190 191/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 192/// it is not the last use of its first operand, add it to the CommuteSet if 193/// possible. It will be commuted when it is translated to a MI. 194void ScheduleDAGRRList::CommuteNodesToReducePressure() { 195 SmallPtrSet<SUnit*, 4> OperandSeen; 196 for (unsigned i = Sequence.size(); i != 0; ) { 197 --i; 198 SUnit *SU = Sequence[i]; 199 if (!SU || !SU->getNode()) continue; 200 if (SU->isCommutable) { 201 unsigned Opc = SU->getNode()->getMachineOpcode(); 202 const TargetInstrDesc &TID = TII->get(Opc); 203 unsigned NumRes = TID.getNumDefs(); 204 unsigned NumOps = TID.getNumOperands() - NumRes; 205 for (unsigned j = 0; j != NumOps; ++j) { 206 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 207 continue; 208 209 SDNode *OpN = SU->getNode()->getOperand(j).getNode(); 210 SUnit *OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 211 if (OpSU && OperandSeen.count(OpSU) == 1) { 212 // Ok, so SU is not the last use of OpSU, but SU is two-address so 213 // it will clobber OpSU. Try to commute SU if no other source operands 214 // are live below. 215 bool DoCommute = true; 216 for (unsigned k = 0; k < NumOps; ++k) { 217 if (k != j) { 218 OpN = SU->getNode()->getOperand(k).getNode(); 219 OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()]; 220 if (OpSU && OperandSeen.count(OpSU) == 1) { 221 DoCommute = false; 222 break; 223 } 224 } 225 } 226 if (DoCommute) 227 CommuteSet.insert(SU->getNode()); 228 } 229 230 // Only look at the first use&def node for now. 231 break; 232 } 233 } 234 235 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 236 I != E; ++I) { 237 if (!I->isCtrl()) 238 OperandSeen.insert(I->getSUnit()->OrigNode); 239 } 240 } 241} 242 243//===----------------------------------------------------------------------===// 244// Bottom-Up Scheduling 245//===----------------------------------------------------------------------===// 246 247/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 248/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 249void ScheduleDAGRRList::ReleasePred(SUnit *SU, SDep *PredEdge) { 250 SUnit *PredSU = PredEdge->getSUnit(); 251 --PredSU->NumSuccsLeft; 252 253#ifndef NDEBUG 254 if (PredSU->NumSuccsLeft < 0) { 255 cerr << "*** Scheduling failed! ***\n"; 256 PredSU->dump(this); 257 cerr << " has been released too many times!\n"; 258 assert(0); 259 } 260#endif 261 262 if (PredSU->NumSuccsLeft == 0) { 263 PredSU->isAvailable = true; 264 AvailableQueue->push(PredSU); 265 } 266} 267 268/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 269/// count of its predecessors. If a predecessor pending count is zero, add it to 270/// the Available queue. 271void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 272 DOUT << "*** Scheduling [" << CurCycle << "]: "; 273 DEBUG(SU->dump(this)); 274 275 SU->Cycle = CurCycle; 276 Sequence.push_back(SU); 277 278 // Bottom up: release predecessors 279 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 280 I != E; ++I) { 281 ReleasePred(SU, &*I); 282 if (I->isAssignedRegDep()) { 283 // This is a physical register dependency and it's impossible or 284 // expensive to copy the register. Make sure nothing that can 285 // clobber the register is scheduled between the predecessor and 286 // this node. 287 if (!LiveRegDefs[I->getReg()]) { 288 ++NumLiveRegs; 289 LiveRegDefs[I->getReg()] = I->getSUnit(); 290 LiveRegCycles[I->getReg()] = CurCycle; 291 } 292 } 293 } 294 295 // Release all the implicit physical register defs that are live. 296 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 297 I != E; ++I) { 298 if (I->isAssignedRegDep()) { 299 if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) { 300 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 301 assert(LiveRegDefs[I->getReg()] == SU && 302 "Physical register dependency violated?"); 303 --NumLiveRegs; 304 LiveRegDefs[I->getReg()] = NULL; 305 LiveRegCycles[I->getReg()] = 0; 306 } 307 } 308 } 309 310 SU->isScheduled = true; 311 AvailableQueue->ScheduledNode(SU); 312} 313 314/// CapturePred - This does the opposite of ReleasePred. Since SU is being 315/// unscheduled, incrcease the succ left count of its predecessors. Remove 316/// them from AvailableQueue if necessary. 317void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 318 SUnit *PredSU = PredEdge->getSUnit(); 319 if (PredSU->isAvailable) { 320 PredSU->isAvailable = false; 321 if (!PredSU->isPending) 322 AvailableQueue->remove(PredSU); 323 } 324 325 ++PredSU->NumSuccsLeft; 326} 327 328/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 329/// its predecessor states to reflect the change. 330void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 331 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 332 DEBUG(SU->dump(this)); 333 334 AvailableQueue->UnscheduledNode(SU); 335 336 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 337 I != E; ++I) { 338 CapturePred(&*I); 339 if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) { 340 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 341 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 342 "Physical register dependency violated?"); 343 --NumLiveRegs; 344 LiveRegDefs[I->getReg()] = NULL; 345 LiveRegCycles[I->getReg()] = 0; 346 } 347 } 348 349 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 350 I != E; ++I) { 351 if (I->isAssignedRegDep()) { 352 if (!LiveRegDefs[I->getReg()]) { 353 LiveRegDefs[I->getReg()] = SU; 354 ++NumLiveRegs; 355 } 356 if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()]) 357 LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle; 358 } 359 } 360 361 SU->Cycle = 0; 362 SU->isScheduled = false; 363 SU->isAvailable = true; 364 AvailableQueue->push(SU); 365} 366 367/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 368/// BTCycle in order to schedule a specific node. Returns the last unscheduled 369/// SUnit. Also returns if a successor is unscheduled in the process. 370void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 371 unsigned &CurCycle) { 372 SUnit *OldSU = NULL; 373 while (CurCycle > BtCycle) { 374 OldSU = Sequence.back(); 375 Sequence.pop_back(); 376 if (SU->isSucc(OldSU)) 377 // Don't try to remove SU from AvailableQueue. 378 SU->isAvailable = false; 379 UnscheduleNodeBottomUp(OldSU); 380 --CurCycle; 381 } 382 383 384 if (SU->isSucc(OldSU)) { 385 assert(false && "Something is wrong!"); 386 abort(); 387 } 388 389 ++NumBacktracks; 390} 391 392/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 393/// successors to the newly created node. 394SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 395 if (SU->getNode()->getFlaggedNode()) 396 return NULL; 397 398 SDNode *N = SU->getNode(); 399 if (!N) 400 return NULL; 401 402 SUnit *NewSU; 403 bool TryUnfold = false; 404 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 405 MVT VT = N->getValueType(i); 406 if (VT == MVT::Flag) 407 return NULL; 408 else if (VT == MVT::Other) 409 TryUnfold = true; 410 } 411 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 412 const SDValue &Op = N->getOperand(i); 413 MVT VT = Op.getNode()->getValueType(Op.getResNo()); 414 if (VT == MVT::Flag) 415 return NULL; 416 } 417 418 if (TryUnfold) { 419 SmallVector<SDNode*, 2> NewNodes; 420 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 421 return NULL; 422 423 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 424 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 425 426 N = NewNodes[1]; 427 SDNode *LoadNode = NewNodes[0]; 428 unsigned NumVals = N->getNumValues(); 429 unsigned OldNumVals = SU->getNode()->getNumValues(); 430 for (unsigned i = 0; i != NumVals; ++i) 431 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 432 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 433 SDValue(LoadNode, 1)); 434 435 // LoadNode may already exist. This can happen when there is another 436 // load from the same location and producing the same type of value 437 // but it has different alignment or volatileness. 438 bool isNewLoad = true; 439 SUnit *LoadSU; 440 if (LoadNode->getNodeId() != -1) { 441 LoadSU = &SUnits[LoadNode->getNodeId()]; 442 isNewLoad = false; 443 } else { 444 LoadSU = CreateNewSUnit(LoadNode); 445 LoadNode->setNodeId(LoadSU->NodeNum); 446 447 LoadSU->Depth = SU->Depth; 448 LoadSU->Height = SU->Height; 449 ComputeLatency(LoadSU); 450 } 451 452 SUnit *NewSU = CreateNewSUnit(N); 453 assert(N->getNodeId() == -1 && "Node already inserted!"); 454 N->setNodeId(NewSU->NodeNum); 455 456 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 457 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 458 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 459 NewSU->isTwoAddress = true; 460 break; 461 } 462 } 463 if (TID.isCommutable()) 464 NewSU->isCommutable = true; 465 // FIXME: Calculate height / depth and propagate the changes? 466 NewSU->Depth = SU->Depth; 467 NewSU->Height = SU->Height; 468 ComputeLatency(NewSU); 469 470 SDep ChainPred; 471 SmallVector<SDep, 4> ChainSuccs; 472 SmallVector<SDep, 4> LoadPreds; 473 SmallVector<SDep, 4> NodePreds; 474 SmallVector<SDep, 4> NodeSuccs; 475 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 476 I != E; ++I) { 477 if (I->isCtrl()) 478 ChainPred = *I; 479 else if (I->getSUnit()->getNode() && 480 I->getSUnit()->getNode()->isOperandOf(LoadNode)) 481 LoadPreds.push_back(*I); 482 else 483 NodePreds.push_back(*I); 484 } 485 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 486 I != E; ++I) { 487 if (I->isCtrl()) 488 ChainSuccs.push_back(*I); 489 else 490 NodeSuccs.push_back(*I); 491 } 492 493 if (ChainPred.getSUnit()) { 494 RemovePred(SU, ChainPred); 495 if (isNewLoad) 496 AddPred(LoadSU, ChainPred); 497 } 498 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 499 const SDep &Pred = LoadPreds[i]; 500 RemovePred(SU, Pred); 501 if (isNewLoad) { 502 AddPred(LoadSU, Pred); 503 } 504 } 505 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 506 const SDep &Pred = NodePreds[i]; 507 RemovePred(SU, Pred); 508 AddPred(NewSU, Pred); 509 } 510 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 511 SDep D = NodeSuccs[i]; 512 SUnit *SuccDep = D.getSUnit(); 513 D.setSUnit(SU); 514 RemovePred(SuccDep, D); 515 D.setSUnit(NewSU); 516 AddPred(SuccDep, D); 517 } 518 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 519 SDep D = ChainSuccs[i]; 520 SUnit *SuccDep = D.getSUnit(); 521 D.setSUnit(SU); 522 RemovePred(SuccDep, D); 523 if (isNewLoad) { 524 D.setSUnit(LoadSU); 525 AddPred(SuccDep, D); 526 } 527 } 528 if (isNewLoad) { 529 AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); 530 } 531 532 if (isNewLoad) 533 AvailableQueue->addNode(LoadSU); 534 AvailableQueue->addNode(NewSU); 535 536 ++NumUnfolds; 537 538 if (NewSU->NumSuccsLeft == 0) { 539 NewSU->isAvailable = true; 540 return NewSU; 541 } 542 SU = NewSU; 543 } 544 545 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 546 NewSU = CreateClone(SU); 547 548 // New SUnit has the exact same predecessors. 549 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 550 I != E; ++I) 551 if (!I->isArtificial()) { 552 AddPred(NewSU, *I); 553 NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1); 554 } 555 556 // Only copy scheduled successors. Cut them from old node's successor 557 // list and move them over. 558 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 559 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 560 I != E; ++I) { 561 if (I->isArtificial()) 562 continue; 563 SUnit *SuccSU = I->getSUnit(); 564 if (SuccSU->isScheduled) { 565 NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1); 566 SDep D = *I; 567 D.setSUnit(NewSU); 568 AddPred(SuccSU, D); 569 D.setSUnit(SU); 570 DelDeps.push_back(std::make_pair(SuccSU, D)); 571 } 572 } 573 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 574 RemovePred(DelDeps[i].first, DelDeps[i].second); 575 } 576 577 AvailableQueue->updateNode(SU); 578 AvailableQueue->addNode(NewSU); 579 580 ++NumDups; 581 return NewSU; 582} 583 584/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 585/// and move all scheduled successors of the given SUnit to the last copy. 586void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 587 const TargetRegisterClass *DestRC, 588 const TargetRegisterClass *SrcRC, 589 SmallVector<SUnit*, 2> &Copies) { 590 SUnit *CopyFromSU = CreateNewSUnit(NULL); 591 CopyFromSU->CopySrcRC = SrcRC; 592 CopyFromSU->CopyDstRC = DestRC; 593 CopyFromSU->Depth = SU->Depth; 594 CopyFromSU->Height = SU->Height; 595 596 SUnit *CopyToSU = CreateNewSUnit(NULL); 597 CopyToSU->CopySrcRC = DestRC; 598 CopyToSU->CopyDstRC = SrcRC; 599 600 // Only copy scheduled successors. Cut them from old node's successor 601 // list and move them over. 602 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 603 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 604 I != E; ++I) { 605 if (I->isArtificial()) 606 continue; 607 SUnit *SuccSU = I->getSUnit(); 608 if (SuccSU->isScheduled) { 609 SDep D = *I; 610 D.setSUnit(CopyToSU); 611 AddPred(SuccSU, D); 612 DelDeps.push_back(std::make_pair(SuccSU, *I)); 613 } 614 } 615 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 616 RemovePred(DelDeps[i].first, DelDeps[i].second); 617 } 618 619 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 620 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 621 622 AvailableQueue->updateNode(SU); 623 AvailableQueue->addNode(CopyFromSU); 624 AvailableQueue->addNode(CopyToSU); 625 Copies.push_back(CopyFromSU); 626 Copies.push_back(CopyToSU); 627 628 ++NumCCCopies; 629} 630 631/// getPhysicalRegisterVT - Returns the ValueType of the physical register 632/// definition of the specified node. 633/// FIXME: Move to SelectionDAG? 634static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 635 const TargetInstrInfo *TII) { 636 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 637 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 638 unsigned NumRes = TID.getNumDefs(); 639 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 640 if (Reg == *ImpDef) 641 break; 642 ++NumRes; 643 } 644 return N->getValueType(NumRes); 645} 646 647/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 648/// scheduling of the given node to satisfy live physical register dependencies. 649/// If the specific node is the last one that's available to schedule, do 650/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 651bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 652 SmallVector<unsigned, 4> &LRegs){ 653 if (NumLiveRegs == 0) 654 return false; 655 656 SmallSet<unsigned, 4> RegAdded; 657 // If this node would clobber any "live" register, then it's not ready. 658 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 659 I != E; ++I) { 660 if (I->isAssignedRegDep()) { 661 unsigned Reg = I->getReg(); 662 if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) { 663 if (RegAdded.insert(Reg)) 664 LRegs.push_back(Reg); 665 } 666 for (const unsigned *Alias = TRI->getAliasSet(Reg); 667 *Alias; ++Alias) 668 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) { 669 if (RegAdded.insert(*Alias)) 670 LRegs.push_back(*Alias); 671 } 672 } 673 } 674 675 for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) { 676 if (!Node->isMachineOpcode()) 677 continue; 678 const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode()); 679 if (!TID.ImplicitDefs) 680 continue; 681 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 682 if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) { 683 if (RegAdded.insert(*Reg)) 684 LRegs.push_back(*Reg); 685 } 686 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 687 *Alias; ++Alias) 688 if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) { 689 if (RegAdded.insert(*Alias)) 690 LRegs.push_back(*Alias); 691 } 692 } 693 } 694 return !LRegs.empty(); 695} 696 697 698/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 699/// schedulers. 700void ScheduleDAGRRList::ListScheduleBottomUp() { 701 unsigned CurCycle = 0; 702 // Add root to Available queue. 703 if (!SUnits.empty()) { 704 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 705 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 706 RootSU->isAvailable = true; 707 AvailableQueue->push(RootSU); 708 } 709 710 // While Available queue is not empty, grab the node with the highest 711 // priority. If it is not ready put it back. Schedule the node. 712 SmallVector<SUnit*, 4> NotReady; 713 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 714 Sequence.reserve(SUnits.size()); 715 while (!AvailableQueue->empty()) { 716 bool Delayed = false; 717 LRegsMap.clear(); 718 SUnit *CurSU = AvailableQueue->pop(); 719 while (CurSU) { 720 SmallVector<unsigned, 4> LRegs; 721 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 722 break; 723 Delayed = true; 724 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 725 726 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 727 NotReady.push_back(CurSU); 728 CurSU = AvailableQueue->pop(); 729 } 730 731 // All candidates are delayed due to live physical reg dependencies. 732 // Try backtracking, code duplication, or inserting cross class copies 733 // to resolve it. 734 if (Delayed && !CurSU) { 735 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 736 SUnit *TrySU = NotReady[i]; 737 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 738 739 // Try unscheduling up to the point where it's safe to schedule 740 // this node. 741 unsigned LiveCycle = CurCycle; 742 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 743 unsigned Reg = LRegs[j]; 744 unsigned LCycle = LiveRegCycles[Reg]; 745 LiveCycle = std::min(LiveCycle, LCycle); 746 } 747 SUnit *OldSU = Sequence[LiveCycle]; 748 if (!WillCreateCycle(TrySU, OldSU)) { 749 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 750 // Force the current node to be scheduled before the node that 751 // requires the physical reg dep. 752 if (OldSU->isAvailable) { 753 OldSU->isAvailable = false; 754 AvailableQueue->remove(OldSU); 755 } 756 AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, 757 /*Reg=*/0, /*isNormalMemory=*/false, 758 /*isMustAlias=*/false, /*isArtificial=*/true)); 759 // If one or more successors has been unscheduled, then the current 760 // node is no longer avaialable. Schedule a successor that's now 761 // available instead. 762 if (!TrySU->isAvailable) 763 CurSU = AvailableQueue->pop(); 764 else { 765 CurSU = TrySU; 766 TrySU->isPending = false; 767 NotReady.erase(NotReady.begin()+i); 768 } 769 break; 770 } 771 } 772 773 if (!CurSU) { 774 // Can't backtrack. Try duplicating the nodes that produces these 775 // "expensive to copy" values to break the dependency. In case even 776 // that doesn't work, insert cross class copies. 777 SUnit *TrySU = NotReady[0]; 778 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 779 assert(LRegs.size() == 1 && "Can't handle this yet!"); 780 unsigned Reg = LRegs[0]; 781 SUnit *LRDef = LiveRegDefs[Reg]; 782 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 783 if (!NewDef) { 784 // Issue expensive cross register class copies. 785 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 786 const TargetRegisterClass *RC = 787 TRI->getPhysicalRegisterRegClass(Reg, VT); 788 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 789 if (!DestRC) { 790 assert(false && "Don't know how to copy this physical register!"); 791 abort(); 792 } 793 SmallVector<SUnit*, 2> Copies; 794 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 795 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 796 << " to SU #" << Copies.front()->NodeNum << "\n"; 797 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 798 /*Reg=*/0, /*isMustAlias=*/false, 799 /*isArtificial=*/true)); 800 NewDef = Copies.back(); 801 } 802 803 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 804 << " to SU #" << TrySU->NodeNum << "\n"; 805 LiveRegDefs[Reg] = NewDef; 806 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 807 /*Reg=*/0, /*isMustAlias=*/false, 808 /*isArtificial=*/true)); 809 TrySU->isAvailable = false; 810 CurSU = NewDef; 811 } 812 813 if (!CurSU) { 814 assert(false && "Unable to resolve live physical register dependencies!"); 815 abort(); 816 } 817 } 818 819 // Add the nodes that aren't ready back onto the available list. 820 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 821 NotReady[i]->isPending = false; 822 // May no longer be available due to backtracking. 823 if (NotReady[i]->isAvailable) 824 AvailableQueue->push(NotReady[i]); 825 } 826 NotReady.clear(); 827 828 if (CurSU) 829 ScheduleNodeBottomUp(CurSU, CurCycle); 830 ++CurCycle; 831 } 832 833 // Reverse the order if it is bottom up. 834 std::reverse(Sequence.begin(), Sequence.end()); 835 836#ifndef NDEBUG 837 VerifySchedule(isBottomUp); 838#endif 839} 840 841//===----------------------------------------------------------------------===// 842// Top-Down Scheduling 843//===----------------------------------------------------------------------===// 844 845/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 846/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 847void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { 848 SUnit *SuccSU = SuccEdge->getSUnit(); 849 --SuccSU->NumPredsLeft; 850 851#ifndef NDEBUG 852 if (SuccSU->NumPredsLeft < 0) { 853 cerr << "*** Scheduling failed! ***\n"; 854 SuccSU->dump(this); 855 cerr << " has been released too many times!\n"; 856 assert(0); 857 } 858#endif 859 860 if (SuccSU->NumPredsLeft == 0) { 861 SuccSU->isAvailable = true; 862 AvailableQueue->push(SuccSU); 863 } 864} 865 866/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 867/// count of its successors. If a successor pending count is zero, add it to 868/// the Available queue. 869void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 870 DOUT << "*** Scheduling [" << CurCycle << "]: "; 871 DEBUG(SU->dump(this)); 872 873 SU->Cycle = CurCycle; 874 Sequence.push_back(SU); 875 876 // Top down: release successors 877 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 878 I != E; ++I) 879 ReleaseSucc(SU, &*I); 880 881 SU->isScheduled = true; 882 AvailableQueue->ScheduledNode(SU); 883} 884 885/// ListScheduleTopDown - The main loop of list scheduling for top-down 886/// schedulers. 887void ScheduleDAGRRList::ListScheduleTopDown() { 888 unsigned CurCycle = 0; 889 890 // All leaves to Available queue. 891 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 892 // It is available if it has no predecessors. 893 if (SUnits[i].Preds.empty()) { 894 AvailableQueue->push(&SUnits[i]); 895 SUnits[i].isAvailable = true; 896 } 897 } 898 899 // While Available queue is not empty, grab the node with the highest 900 // priority. If it is not ready put it back. Schedule the node. 901 Sequence.reserve(SUnits.size()); 902 while (!AvailableQueue->empty()) { 903 SUnit *CurSU = AvailableQueue->pop(); 904 905 if (CurSU) 906 ScheduleNodeTopDown(CurSU, CurCycle); 907 ++CurCycle; 908 } 909 910#ifndef NDEBUG 911 VerifySchedule(isBottomUp); 912#endif 913} 914 915 916//===----------------------------------------------------------------------===// 917// RegReductionPriorityQueue Implementation 918//===----------------------------------------------------------------------===// 919// 920// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 921// to reduce register pressure. 922// 923namespace { 924 template<class SF> 925 class RegReductionPriorityQueue; 926 927 /// Sorting functions for the Available queue. 928 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 929 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 930 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 931 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 932 933 bool operator()(const SUnit* left, const SUnit* right) const; 934 }; 935 936 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 937 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 938 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 939 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 940 941 bool operator()(const SUnit* left, const SUnit* right) const; 942 }; 943} // end anonymous namespace 944 945static inline bool isCopyFromLiveIn(const SUnit *SU) { 946 SDNode *N = SU->getNode(); 947 return N && N->getOpcode() == ISD::CopyFromReg && 948 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 949} 950 951/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 952/// Smaller number is the higher priority. 953static unsigned 954CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 955 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 956 if (SethiUllmanNumber != 0) 957 return SethiUllmanNumber; 958 959 unsigned Extra = 0; 960 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 961 I != E; ++I) { 962 if (I->isCtrl()) continue; // ignore chain preds 963 SUnit *PredSU = I->getSUnit(); 964 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 965 if (PredSethiUllman > SethiUllmanNumber) { 966 SethiUllmanNumber = PredSethiUllman; 967 Extra = 0; 968 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl()) 969 ++Extra; 970 } 971 972 SethiUllmanNumber += Extra; 973 974 if (SethiUllmanNumber == 0) 975 SethiUllmanNumber = 1; 976 977 return SethiUllmanNumber; 978} 979 980namespace { 981 template<class SF> 982 class VISIBILITY_HIDDEN RegReductionPriorityQueue 983 : public SchedulingPriorityQueue { 984 PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue; 985 unsigned currentQueueId; 986 987 protected: 988 // SUnits - The SUnits for the current graph. 989 std::vector<SUnit> *SUnits; 990 991 const TargetInstrInfo *TII; 992 const TargetRegisterInfo *TRI; 993 ScheduleDAGRRList *scheduleDAG; 994 995 // SethiUllmanNumbers - The SethiUllman number for each node. 996 std::vector<unsigned> SethiUllmanNumbers; 997 998 public: 999 RegReductionPriorityQueue(const TargetInstrInfo *tii, 1000 const TargetRegisterInfo *tri) : 1001 Queue(SF(this)), currentQueueId(0), 1002 TII(tii), TRI(tri), scheduleDAG(NULL) {} 1003 1004 void initNodes(std::vector<SUnit> &sunits) { 1005 SUnits = &sunits; 1006 // Add pseudo dependency edges for two-address nodes. 1007 AddPseudoTwoAddrDeps(); 1008 // Calculate node priorities. 1009 CalculateSethiUllmanNumbers(); 1010 } 1011 1012 void addNode(const SUnit *SU) { 1013 unsigned SUSize = SethiUllmanNumbers.size(); 1014 if (SUnits->size() > SUSize) 1015 SethiUllmanNumbers.resize(SUSize*2, 0); 1016 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1017 } 1018 1019 void updateNode(const SUnit *SU) { 1020 SethiUllmanNumbers[SU->NodeNum] = 0; 1021 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1022 } 1023 1024 void releaseState() { 1025 SUnits = 0; 1026 SethiUllmanNumbers.clear(); 1027 } 1028 1029 unsigned getNodePriority(const SUnit *SU) const { 1030 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1031 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1032 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1033 // CopyFromReg should be close to its def because it restricts 1034 // allocation choices. But if it is a livein then perhaps we want it 1035 // closer to its uses so it can be coalesced. 1036 return 0xffff; 1037 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1038 // CopyToReg should be close to its uses to facilitate coalescing and 1039 // avoid spilling. 1040 return 0; 1041 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1042 Opc == TargetInstrInfo::INSERT_SUBREG) 1043 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1044 // facilitate coalescing. 1045 return 0; 1046 else if (SU->NumSuccs == 0) 1047 // If SU does not have a use, i.e. it doesn't produce a value that would 1048 // be consumed (e.g. store), then it terminates a chain of computation. 1049 // Give it a large SethiUllman number so it will be scheduled right 1050 // before its predecessors that it doesn't lengthen their live ranges. 1051 return 0xffff; 1052 else if (SU->NumPreds == 0) 1053 // If SU does not have a def, schedule it close to its uses because it 1054 // does not lengthen any live ranges. 1055 return 0; 1056 else 1057 return SethiUllmanNumbers[SU->NodeNum]; 1058 } 1059 1060 unsigned size() const { return Queue.size(); } 1061 1062 bool empty() const { return Queue.empty(); } 1063 1064 void push(SUnit *U) { 1065 assert(!U->NodeQueueId && "Node in the queue already"); 1066 U->NodeQueueId = ++currentQueueId; 1067 Queue.push(U); 1068 } 1069 1070 void push_all(const std::vector<SUnit *> &Nodes) { 1071 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1072 push(Nodes[i]); 1073 } 1074 1075 SUnit *pop() { 1076 if (empty()) return NULL; 1077 SUnit *V = Queue.top(); 1078 Queue.pop(); 1079 V->NodeQueueId = 0; 1080 return V; 1081 } 1082 1083 void remove(SUnit *SU) { 1084 assert(!Queue.empty() && "Queue is empty!"); 1085 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1086 Queue.erase_one(SU); 1087 SU->NodeQueueId = 0; 1088 } 1089 1090 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1091 scheduleDAG = scheduleDag; 1092 } 1093 1094 protected: 1095 bool canClobber(const SUnit *SU, const SUnit *Op); 1096 void AddPseudoTwoAddrDeps(); 1097 void CalculateSethiUllmanNumbers(); 1098 }; 1099 1100 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1101 BURegReductionPriorityQueue; 1102 1103 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1104 TDRegReductionPriorityQueue; 1105} 1106 1107/// closestSucc - Returns the scheduled cycle of the successor which is 1108/// closet to the current cycle. 1109static unsigned closestSucc(const SUnit *SU) { 1110 unsigned MaxCycle = 0; 1111 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1112 I != E; ++I) { 1113 unsigned Cycle = I->getSUnit()->Cycle; 1114 // If there are bunch of CopyToRegs stacked up, they should be considered 1115 // to be at the same position. 1116 if (I->getSUnit()->getNode() && 1117 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 1118 Cycle = closestSucc(I->getSUnit())+1; 1119 if (Cycle > MaxCycle) 1120 MaxCycle = Cycle; 1121 } 1122 return MaxCycle; 1123} 1124 1125/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1126/// for scratch registers. Live-in operands and live-out results don't count 1127/// since they are "fixed". 1128static unsigned calcMaxScratches(const SUnit *SU) { 1129 unsigned Scratches = 0; 1130 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1131 I != E; ++I) { 1132 if (I->isCtrl()) continue; // ignore chain preds 1133 if (!I->getSUnit()->getNode() || 1134 I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg) 1135 Scratches++; 1136 } 1137 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1138 I != E; ++I) { 1139 if (I->isCtrl()) continue; // ignore chain succs 1140 if (!I->getSUnit()->getNode() || 1141 I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg) 1142 Scratches += 10; 1143 } 1144 return Scratches; 1145} 1146 1147// Bottom up 1148bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1149 unsigned LPriority = SPQ->getNodePriority(left); 1150 unsigned RPriority = SPQ->getNodePriority(right); 1151 if (LPriority != RPriority) 1152 return LPriority > RPriority; 1153 1154 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1155 // e.g. 1156 // t1 = op t2, c1 1157 // t3 = op t4, c2 1158 // 1159 // and the following instructions are both ready. 1160 // t2 = op c3 1161 // t4 = op c4 1162 // 1163 // Then schedule t2 = op first. 1164 // i.e. 1165 // t4 = op c4 1166 // t2 = op c3 1167 // t1 = op t2, c1 1168 // t3 = op t4, c2 1169 // 1170 // This creates more short live intervals. 1171 unsigned LDist = closestSucc(left); 1172 unsigned RDist = closestSucc(right); 1173 if (LDist != RDist) 1174 return LDist < RDist; 1175 1176 // Intuitively, it's good to push down instructions whose results are 1177 // liveout so their long live ranges won't conflict with other values 1178 // which are needed inside the BB. Further prioritize liveout instructions 1179 // by the number of operands which are calculated within the BB. 1180 unsigned LScratch = calcMaxScratches(left); 1181 unsigned RScratch = calcMaxScratches(right); 1182 if (LScratch != RScratch) 1183 return LScratch > RScratch; 1184 1185 if (left->Height != right->Height) 1186 return left->Height > right->Height; 1187 1188 if (left->Depth != right->Depth) 1189 return left->Depth < right->Depth; 1190 1191 assert(left->NodeQueueId && right->NodeQueueId && 1192 "NodeQueueId cannot be zero"); 1193 return (left->NodeQueueId > right->NodeQueueId); 1194} 1195 1196template<class SF> 1197bool 1198RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { 1199 if (SU->isTwoAddress) { 1200 unsigned Opc = SU->getNode()->getMachineOpcode(); 1201 const TargetInstrDesc &TID = TII->get(Opc); 1202 unsigned NumRes = TID.getNumDefs(); 1203 unsigned NumOps = TID.getNumOperands() - NumRes; 1204 for (unsigned i = 0; i != NumOps; ++i) { 1205 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1206 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 1207 if (DU->getNodeId() != -1 && 1208 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 1209 return true; 1210 } 1211 } 1212 } 1213 return false; 1214} 1215 1216 1217/// hasCopyToRegUse - Return true if SU has a value successor that is a 1218/// CopyToReg node. 1219static bool hasCopyToRegUse(const SUnit *SU) { 1220 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1221 I != E; ++I) { 1222 if (I->isCtrl()) continue; 1223 const SUnit *SuccSU = I->getSUnit(); 1224 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) 1225 return true; 1226 } 1227 return false; 1228} 1229 1230/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1231/// physical register defs. 1232static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 1233 const TargetInstrInfo *TII, 1234 const TargetRegisterInfo *TRI) { 1235 SDNode *N = SuccSU->getNode(); 1236 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1237 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 1238 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 1239 const unsigned *SUImpDefs = 1240 TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 1241 if (!SUImpDefs) 1242 return false; 1243 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1244 MVT VT = N->getValueType(i); 1245 if (VT == MVT::Flag || VT == MVT::Other) 1246 continue; 1247 if (!N->hasAnyUseOfValue(i)) 1248 continue; 1249 unsigned Reg = ImpDefs[i - NumDefs]; 1250 for (;*SUImpDefs; ++SUImpDefs) { 1251 unsigned SUReg = *SUImpDefs; 1252 if (TRI->regsOverlap(Reg, SUReg)) 1253 return true; 1254 } 1255 } 1256 return false; 1257} 1258 1259/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1260/// it as a def&use operand. Add a pseudo control edge from it to the other 1261/// node (if it won't create a cycle) so the two-address one will be scheduled 1262/// first (lower in the schedule). If both nodes are two-address, favor the 1263/// one that has a CopyToReg use (more likely to be a loop induction update). 1264/// If both are two-address, but one is commutable while the other is not 1265/// commutable, favor the one that's not commutable. 1266template<class SF> 1267void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1268 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1269 SUnit *SU = &(*SUnits)[i]; 1270 if (!SU->isTwoAddress) 1271 continue; 1272 1273 SDNode *Node = SU->getNode(); 1274 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) 1275 continue; 1276 1277 unsigned Opc = Node->getMachineOpcode(); 1278 const TargetInstrDesc &TID = TII->get(Opc); 1279 unsigned NumRes = TID.getNumDefs(); 1280 unsigned NumOps = TID.getNumOperands() - NumRes; 1281 for (unsigned j = 0; j != NumOps; ++j) { 1282 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 1283 continue; 1284 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 1285 if (DU->getNodeId() == -1) 1286 continue; 1287 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 1288 if (!DUSU) continue; 1289 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 1290 E = DUSU->Succs.end(); I != E; ++I) { 1291 if (I->isCtrl()) continue; 1292 SUnit *SuccSU = I->getSUnit(); 1293 if (SuccSU == SU) 1294 continue; 1295 // Be conservative. Ignore if nodes aren't at roughly the same 1296 // depth and height. 1297 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1298 continue; 1299 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 1300 continue; 1301 // Don't constrain nodes with physical register defs if the 1302 // predecessor can clobber them. 1303 if (SuccSU->hasPhysRegDefs) { 1304 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1305 continue; 1306 } 1307 // Don't constraint extract_subreg / insert_subreg these may be 1308 // coalesced away. We don't them close to their uses. 1309 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 1310 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1311 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1312 continue; 1313 if ((!canClobber(SuccSU, DUSU) || 1314 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1315 (!SU->isCommutable && SuccSU->isCommutable)) && 1316 !scheduleDAG->IsReachable(SuccSU, SU)) { 1317 DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum 1318 << " to SU #" << SuccSU->NodeNum << "\n"; 1319 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/1, 1320 /*Reg=*/0, /*isMustAlias=*/false, 1321 /*isArtificial=*/true)); 1322 } 1323 } 1324 } 1325 } 1326} 1327 1328/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1329/// scheduling units. 1330template<class SF> 1331void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1332 SethiUllmanNumbers.assign(SUnits->size(), 0); 1333 1334 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1335 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1336} 1337 1338/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 1339/// predecessors of the successors of the SUnit SU. Stop when the provided 1340/// limit is exceeded. 1341static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 1342 unsigned Limit) { 1343 unsigned Sum = 0; 1344 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1345 I != E; ++I) { 1346 const SUnit *SuccSU = I->getSUnit(); 1347 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1348 EE = SuccSU->Preds.end(); II != EE; ++II) { 1349 SUnit *PredSU = II->getSUnit(); 1350 if (!PredSU->isScheduled) 1351 if (++Sum > Limit) 1352 return Sum; 1353 } 1354 } 1355 return Sum; 1356} 1357 1358 1359// Top down 1360bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1361 unsigned LPriority = SPQ->getNodePriority(left); 1362 unsigned RPriority = SPQ->getNodePriority(right); 1363 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 1364 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 1365 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1366 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1367 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 1368 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 1369 1370 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1371 return false; 1372 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1373 return true; 1374 1375 if (LIsFloater) 1376 LBonus -= 2; 1377 if (RIsFloater) 1378 RBonus -= 2; 1379 if (left->NumSuccs == 1) 1380 LBonus += 2; 1381 if (right->NumSuccs == 1) 1382 RBonus += 2; 1383 1384 if (LPriority+LBonus != RPriority+RBonus) 1385 return LPriority+LBonus < RPriority+RBonus; 1386 1387 if (left->Depth != right->Depth) 1388 return left->Depth < right->Depth; 1389 1390 if (left->NumSuccsLeft != right->NumSuccsLeft) 1391 return left->NumSuccsLeft > right->NumSuccsLeft; 1392 1393 assert(left->NodeQueueId && right->NodeQueueId && 1394 "NodeQueueId cannot be zero"); 1395 return (left->NodeQueueId > right->NodeQueueId); 1396} 1397 1398//===----------------------------------------------------------------------===// 1399// Public Constructor Functions 1400//===----------------------------------------------------------------------===// 1401 1402llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1403 SelectionDAG *DAG, 1404 const TargetMachine *TM, 1405 MachineBasicBlock *BB, 1406 bool) { 1407 const TargetInstrInfo *TII = TM->getInstrInfo(); 1408 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1409 1410 BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI); 1411 1412 ScheduleDAGRRList *SD = 1413 new ScheduleDAGRRList(DAG, BB, *TM, true, PQ); 1414 PQ->setScheduleDAG(SD); 1415 return SD; 1416} 1417 1418llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1419 SelectionDAG *DAG, 1420 const TargetMachine *TM, 1421 MachineBasicBlock *BB, 1422 bool) { 1423 const TargetInstrInfo *TII = TM->getInstrInfo(); 1424 const TargetRegisterInfo *TRI = TM->getRegisterInfo(); 1425 1426 TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI); 1427 1428 ScheduleDAGRRList *SD = new ScheduleDAGRRList(DAG, BB, *TM, false, PQ); 1429 PQ->setScheduleDAG(SD); 1430 return SD; 1431} 1432