ScheduleDAGRRList.cpp revision 2dc7a0e075f619ecd657acdb19c4be8af051e35c
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include <climits> 31#include <queue> 32#include "llvm/Support/CommandLine.h" 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 36STATISTIC(NumDups, "Number of duplicated nodes"); 37STATISTIC(NumCCCopies, "Number of cross class copies"); 38 39static RegisterScheduler 40 burrListDAGScheduler("list-burr", 41 " Bottom-up register reduction list scheduling", 42 createBURRListDAGScheduler); 43static RegisterScheduler 44 tdrListrDAGScheduler("list-tdrr", 45 " Top-down register reduction list scheduling", 46 createTDRRListDAGScheduler); 47 48namespace { 49//===----------------------------------------------------------------------===// 50/// ScheduleDAGRRList - The actual register reduction list scheduler 51/// implementation. This supports both top-down and bottom-up scheduling. 52/// 53class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 54private: 55 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 56 /// it is top-down. 57 bool isBottomUp; 58 59 /// AvailableQueue - The priority queue to use for the available SUnits. 60 ///a 61 SchedulingPriorityQueue *AvailableQueue; 62 63 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 64 /// that are "live". These nodes must be scheduled before any other nodes that 65 /// modifies the registers can be scheduled. 66 SmallSet<unsigned, 4> LiveRegs; 67 std::vector<SUnit*> LiveRegDefs; 68 std::vector<unsigned> LiveRegCycles; 69 70public: 71 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 72 const TargetMachine &tm, bool isbottomup, 73 SchedulingPriorityQueue *availqueue) 74 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 75 AvailableQueue(availqueue) { 76 } 77 78 ~ScheduleDAGRRList() { 79 delete AvailableQueue; 80 } 81 82 void Schedule(); 83 84private: 85 void ReleasePred(SUnit*, bool, unsigned); 86 void ReleaseSucc(SUnit*, bool isChain, unsigned); 87 void CapturePred(SUnit*, SUnit*, bool); 88 void ScheduleNodeBottomUp(SUnit*, unsigned); 89 void ScheduleNodeTopDown(SUnit*, unsigned); 90 void UnscheduleNodeBottomUp(SUnit*); 91 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 92 SUnit *CopyAndMoveSuccessors(SUnit*); 93 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 94 const TargetRegisterClass*, 95 const TargetRegisterClass*, 96 SmallVector<SUnit*, 2>&); 97 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 98 void ListScheduleTopDown(); 99 void ListScheduleBottomUp(); 100 void CommuteNodesToReducePressure(); 101}; 102} // end anonymous namespace 103 104 105/// Schedule - Schedule the DAG using list scheduling. 106void ScheduleDAGRRList::Schedule() { 107 DOUT << "********** List Scheduling **********\n"; 108 109 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 110 LiveRegCycles.resize(MRI->getNumRegs(), 0); 111 112 // Build scheduling units. 113 BuildSchedUnits(); 114 115 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 116 SUnits[su].dumpAll(&DAG)); 117 CalculateDepths(); 118 CalculateHeights(); 119 120 AvailableQueue->initNodes(SUnitMap, SUnits); 121 122 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 123 if (isBottomUp) 124 ListScheduleBottomUp(); 125 else 126 ListScheduleTopDown(); 127 128 AvailableQueue->releaseState(); 129 130 CommuteNodesToReducePressure(); 131 132 DOUT << "*** Final schedule ***\n"; 133 DEBUG(dumpSchedule()); 134 DOUT << "\n"; 135 136 // Emit in scheduled order 137 EmitSchedule(); 138} 139 140/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 141/// it is not the last use of its first operand, add it to the CommuteSet if 142/// possible. It will be commuted when it is translated to a MI. 143void ScheduleDAGRRList::CommuteNodesToReducePressure() { 144 SmallPtrSet<SUnit*, 4> OperandSeen; 145 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 146 SUnit *SU = Sequence[i]; 147 if (!SU || !SU->Node) continue; 148 if (SU->isCommutable) { 149 unsigned Opc = SU->Node->getTargetOpcode(); 150 unsigned NumRes = TII->getNumDefs(Opc); 151 unsigned NumOps = CountOperands(SU->Node); 152 for (unsigned j = 0; j != NumOps; ++j) { 153 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 154 continue; 155 156 SDNode *OpN = SU->Node->getOperand(j).Val; 157 SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; 158 if (OpSU && OperandSeen.count(OpSU) == 1) { 159 // Ok, so SU is not the last use of OpSU, but SU is two-address so 160 // it will clobber OpSU. Try to commute SU if no other source operands 161 // are live below. 162 bool DoCommute = true; 163 for (unsigned k = 0; k < NumOps; ++k) { 164 if (k != j) { 165 OpN = SU->Node->getOperand(k).Val; 166 OpSU = SUnitMap[OpN][SU->InstanceNo]; 167 if (OpSU && OperandSeen.count(OpSU) == 1) { 168 DoCommute = false; 169 break; 170 } 171 } 172 } 173 if (DoCommute) 174 CommuteSet.insert(SU->Node); 175 } 176 177 // Only look at the first use&def node for now. 178 break; 179 } 180 } 181 182 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 183 I != E; ++I) { 184 if (!I->isCtrl) 185 OperandSeen.insert(I->Dep); 186 } 187 } 188} 189 190//===----------------------------------------------------------------------===// 191// Bottom-Up Scheduling 192//===----------------------------------------------------------------------===// 193 194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 195/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 196void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 197 unsigned CurCycle) { 198 // FIXME: the distance between two nodes is not always == the predecessor's 199 // latency. For example, the reader can very well read the register written 200 // by the predecessor later than the issue cycle. It also depends on the 201 // interrupt model (drain vs. freeze). 202 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 203 204 if (!isChain) 205 --PredSU->NumSuccsLeft; 206 else 207 --PredSU->NumChainSuccsLeft; 208 209#ifndef NDEBUG 210 if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { 211 cerr << "*** List scheduling failed! ***\n"; 212 PredSU->dump(&DAG); 213 cerr << " has been released too many times!\n"; 214 assert(0); 215 } 216#endif 217 218 if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { 219 // EntryToken has to go last! Special case it here. 220 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 221 PredSU->isAvailable = true; 222 AvailableQueue->push(PredSU); 223 } 224 } 225} 226 227/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 228/// count of its predecessors. If a predecessor pending count is zero, add it to 229/// the Available queue. 230void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 231 DOUT << "*** Scheduling [" << CurCycle << "]: "; 232 DEBUG(SU->dump(&DAG)); 233 SU->Cycle = CurCycle; 234 235 AvailableQueue->ScheduledNode(SU); 236 237 // Bottom up: release predecessors 238 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 239 I != E; ++I) { 240 ReleasePred(I->Dep, I->isCtrl, CurCycle); 241 if (I->Cost < 0) { 242 // This is a physical register dependency and it's impossible or 243 // expensive to copy the register. Make sure nothing that can 244 // clobber the register is scheduled between the predecessor and 245 // this node. 246 if (LiveRegs.insert(I->Reg)) { 247 LiveRegDefs[I->Reg] = I->Dep; 248 LiveRegCycles[I->Reg] = CurCycle; 249 } 250 } 251 } 252 253 // Release all the implicit physical register defs that are live. 254 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 255 I != E; ++I) { 256 if (I->Cost < 0) { 257 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 258 LiveRegs.erase(I->Reg); 259 assert(LiveRegDefs[I->Reg] == SU && 260 "Physical register dependency violated?"); 261 LiveRegDefs[I->Reg] = NULL; 262 LiveRegCycles[I->Reg] = 0; 263 } 264 } 265 } 266 267 SU->isScheduled = true; 268} 269 270/// CapturePred - This does the opposite of ReleasePred. Since SU is being 271/// unscheduled, incrcease the succ left count of its predecessors. Remove 272/// them from AvailableQueue if necessary. 273void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 274 PredSU->CycleBound = 0; 275 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 276 I != E; ++I) { 277 if (I->Dep == SU) 278 continue; 279 PredSU->CycleBound = std::max(PredSU->CycleBound, 280 I->Dep->Cycle + PredSU->Latency); 281 } 282 283 if (PredSU->isAvailable) { 284 PredSU->isAvailable = false; 285 if (!PredSU->isPending) 286 AvailableQueue->remove(PredSU); 287 } 288 289 if (!isChain) 290 ++PredSU->NumSuccsLeft; 291 else 292 ++PredSU->NumChainSuccsLeft; 293} 294 295/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 296/// its predecessor states to reflect the change. 297void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 298 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 299 DEBUG(SU->dump(&DAG)); 300 301 AvailableQueue->UnscheduledNode(SU); 302 303 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 304 I != E; ++I) { 305 CapturePred(I->Dep, SU, I->isCtrl); 306 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 307 LiveRegs.erase(I->Reg); 308 assert(LiveRegDefs[I->Reg] == I->Dep && 309 "Physical register dependency violated?"); 310 LiveRegDefs[I->Reg] = NULL; 311 LiveRegCycles[I->Reg] = 0; 312 } 313 } 314 315 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 316 I != E; ++I) { 317 if (I->Cost < 0) { 318 if (LiveRegs.insert(I->Reg)) { 319 assert(!LiveRegDefs[I->Reg] && 320 "Physical register dependency violated?"); 321 LiveRegDefs[I->Reg] = SU; 322 } 323 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 324 LiveRegCycles[I->Reg] = I->Dep->Cycle; 325 } 326 } 327 328 SU->Cycle = 0; 329 SU->isScheduled = false; 330 SU->isAvailable = true; 331 AvailableQueue->push(SU); 332} 333 334// FIXME: This is probably too slow! 335static void isReachable(SUnit *SU, SUnit *TargetSU, 336 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 337 if (Reached) return; 338 if (SU == TargetSU) { 339 Reached = true; 340 return; 341 } 342 if (!Visited.insert(SU)) return; 343 344 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 345 ++I) 346 isReachable(I->Dep, TargetSU, Visited, Reached); 347} 348 349static bool isReachable(SUnit *SU, SUnit *TargetSU) { 350 SmallPtrSet<SUnit*, 32> Visited; 351 bool Reached = false; 352 isReachable(SU, TargetSU, Visited, Reached); 353 return Reached; 354} 355 356/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 357/// create a cycle. 358static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 359 if (isReachable(TargetSU, SU)) 360 return true; 361 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 362 I != E; ++I) 363 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 364 return true; 365 return false; 366} 367 368/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 369/// BTCycle in order to schedule a specific node. Returns the last unscheduled 370/// SUnit. Also returns if a successor is unscheduled in the process. 371void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 372 unsigned &CurCycle) { 373 SUnit *OldSU = NULL; 374 while (CurCycle > BtCycle) { 375 OldSU = Sequence.back(); 376 Sequence.pop_back(); 377 if (SU->isSucc(OldSU)) 378 // Don't try to remove SU from AvailableQueue. 379 SU->isAvailable = false; 380 UnscheduleNodeBottomUp(OldSU); 381 --CurCycle; 382 } 383 384 385 if (SU->isSucc(OldSU)) { 386 assert(false && "Something is wrong!"); 387 abort(); 388 } 389 390 ++NumBacktracks; 391} 392 393/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, 394/// i.e. the node does not produce a flag, it does not read a flag and it does 395/// not have an incoming chain. 396static bool isSafeToCopy(SDNode *N) { 397 if (!N) 398 return true; 399 400 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) 401 if (N->getValueType(i) == MVT::Flag) 402 return false; 403 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 404 const SDOperand &Op = N->getOperand(i); 405 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 406 if (VT == MVT::Other || VT == MVT::Flag) 407 return false; 408 } 409 410 return true; 411} 412 413/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 414/// successors to the newly created node. 415SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 416 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 417 418 SUnit *NewSU = Clone(SU); 419 420 // New SUnit has the exact same predecessors. 421 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 422 I != E; ++I) 423 if (!I->isSpecial) { 424 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 425 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 426 } 427 428 // Only copy scheduled successors. Cut them from old node's successor 429 // list and move them over. 430 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 431 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 432 I != E; ++I) { 433 if (I->isSpecial) 434 continue; 435 if (I->Dep->isScheduled) { 436 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 437 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 438 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 439 } 440 } 441 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 442 SUnit *Succ = DelDeps[i].first; 443 bool isCtrl = DelDeps[i].second; 444 Succ->removePred(SU, isCtrl, false); 445 } 446 447 AvailableQueue->updateNode(SU); 448 AvailableQueue->addNode(NewSU); 449 450 ++NumDups; 451 return NewSU; 452} 453 454/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 455/// and move all scheduled successors of the given SUnit to the last copy. 456void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 457 const TargetRegisterClass *DestRC, 458 const TargetRegisterClass *SrcRC, 459 SmallVector<SUnit*, 2> &Copies) { 460 SUnit *CopyFromSU = NewSUnit(NULL); 461 CopyFromSU->CopySrcRC = SrcRC; 462 CopyFromSU->CopyDstRC = DestRC; 463 CopyFromSU->Depth = SU->Depth; 464 CopyFromSU->Height = SU->Height; 465 466 SUnit *CopyToSU = NewSUnit(NULL); 467 CopyToSU->CopySrcRC = DestRC; 468 CopyToSU->CopyDstRC = SrcRC; 469 470 // Only copy scheduled successors. Cut them from old node's successor 471 // list and move them over. 472 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 473 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 474 I != E; ++I) { 475 if (I->isSpecial) 476 continue; 477 if (I->Dep->isScheduled) { 478 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 479 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 480 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 481 } 482 } 483 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 484 SUnit *Succ = DelDeps[i].first; 485 bool isCtrl = DelDeps[i].second; 486 Succ->removePred(SU, isCtrl, false); 487 } 488 489 CopyFromSU->addPred(SU, false, false, Reg, -1); 490 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 491 492 AvailableQueue->updateNode(SU); 493 AvailableQueue->addNode(CopyFromSU); 494 AvailableQueue->addNode(CopyToSU); 495 Copies.push_back(CopyFromSU); 496 Copies.push_back(CopyToSU); 497 498 ++NumCCCopies; 499} 500 501/// getPhysicalRegisterVT - Returns the ValueType of the physical register 502/// definition of the specified node. 503/// FIXME: Move to SelectionDAG? 504static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 505 const TargetInstrInfo *TII) { 506 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 507 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 508 unsigned NumRes = TID.numDefs; 509 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 510 if (Reg == *ImpDef) 511 break; 512 ++NumRes; 513 } 514 return N->getValueType(NumRes); 515} 516 517/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 518/// scheduling of the given node to satisfy live physical register dependencies. 519/// If the specific node is the last one that's available to schedule, do 520/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 521bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 522 SmallVector<unsigned, 4> &LRegs){ 523 if (LiveRegs.empty()) 524 return false; 525 526 // If this node would clobber any "live" register, then it's not ready. 527 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 528 I != E; ++I) { 529 if (I->Cost < 0) { 530 unsigned Reg = I->Reg; 531 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) 532 LRegs.push_back(Reg); 533 for (const unsigned *Alias = MRI->getAliasSet(Reg); 534 *Alias; ++Alias) 535 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) 536 LRegs.push_back(*Alias); 537 } 538 } 539 540 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 541 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 542 if (!Node || !Node->isTargetOpcode()) 543 continue; 544 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 545 if (!TID.ImplicitDefs) 546 continue; 547 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 548 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) 549 LRegs.push_back(*Reg); 550 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 551 *Alias; ++Alias) 552 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) 553 LRegs.push_back(*Alias); 554 } 555 } 556 return !LRegs.empty(); 557} 558 559 560/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 561/// schedulers. 562void ScheduleDAGRRList::ListScheduleBottomUp() { 563 unsigned CurCycle = 0; 564 // Add root to Available queue. 565 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 566 RootSU->isAvailable = true; 567 AvailableQueue->push(RootSU); 568 569 // While Available queue is not empty, grab the node with the highest 570 // priority. If it is not ready put it back. Schedule the node. 571 SmallVector<SUnit*, 4> NotReady; 572 while (!AvailableQueue->empty()) { 573 bool Delayed = false; 574 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 575 SUnit *CurSU = AvailableQueue->pop(); 576 while (CurSU) { 577 if (CurSU->CycleBound <= CurCycle) { 578 SmallVector<unsigned, 4> LRegs; 579 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 580 break; 581 Delayed = true; 582 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 583 } 584 585 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 586 NotReady.push_back(CurSU); 587 CurSU = AvailableQueue->pop(); 588 } 589 590 // All candidates are delayed due to live physical reg dependencies. 591 // Try backtracking, code duplication, or inserting cross class copies 592 // to resolve it. 593 if (Delayed && !CurSU) { 594 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 595 SUnit *TrySU = NotReady[i]; 596 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 597 598 // Try unscheduling up to the point where it's safe to schedule 599 // this node. 600 unsigned LiveCycle = CurCycle; 601 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 602 unsigned Reg = LRegs[j]; 603 unsigned LCycle = LiveRegCycles[Reg]; 604 LiveCycle = std::min(LiveCycle, LCycle); 605 } 606 SUnit *OldSU = Sequence[LiveCycle]; 607 if (!WillCreateCycle(TrySU, OldSU)) { 608 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 609 // Force the current node to be scheduled before the node that 610 // requires the physical reg dep. 611 if (OldSU->isAvailable) { 612 OldSU->isAvailable = false; 613 AvailableQueue->remove(OldSU); 614 } 615 TrySU->addPred(OldSU, true, true); 616 // If one or more successors has been unscheduled, then the current 617 // node is no longer avaialable. Schedule a successor that's now 618 // available instead. 619 if (!TrySU->isAvailable) 620 CurSU = AvailableQueue->pop(); 621 else { 622 CurSU = TrySU; 623 TrySU->isPending = false; 624 NotReady.erase(NotReady.begin()+i); 625 } 626 break; 627 } 628 } 629 630 if (!CurSU) { 631 // Can't backtrace. Try duplicating the nodes that produces these 632 // "expensive to copy" values to break the dependency. In case even 633 // that doesn't work, insert cross class copies. 634 SUnit *TrySU = NotReady[0]; 635 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 636 assert(LRegs.size() == 1 && "Can't handle this yet!"); 637 unsigned Reg = LRegs[0]; 638 SUnit *LRDef = LiveRegDefs[Reg]; 639 SUnit *NewDef; 640 if (isSafeToCopy(LRDef->Node)) 641 NewDef = CopyAndMoveSuccessors(LRDef); 642 else { 643 // Issue expensive cross register class copies. 644 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 645 const TargetRegisterClass *RC = 646 MRI->getPhysicalRegisterRegClass(VT, Reg); 647 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 648 if (!DestRC) { 649 assert(false && "Don't know how to copy this physical register!"); 650 abort(); 651 } 652 SmallVector<SUnit*, 2> Copies; 653 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 654 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 655 << " to SU #" << Copies.front()->NodeNum << "\n"; 656 TrySU->addPred(Copies.front(), true, true); 657 NewDef = Copies.back(); 658 } 659 660 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 661 << " to SU #" << TrySU->NodeNum << "\n"; 662 LiveRegDefs[Reg] = NewDef; 663 NewDef->addPred(TrySU, true, true); 664 TrySU->isAvailable = false; 665 CurSU = NewDef; 666 } 667 668 if (!CurSU) { 669 assert(false && "Unable to resolve live physical register dependencies!"); 670 abort(); 671 } 672 } 673 674 // Add the nodes that aren't ready back onto the available list. 675 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 676 NotReady[i]->isPending = false; 677 // May no longer be available due to backtracking. 678 if (NotReady[i]->isAvailable) 679 AvailableQueue->push(NotReady[i]); 680 } 681 NotReady.clear(); 682 683 if (!CurSU) 684 Sequence.push_back(0); 685 else { 686 ScheduleNodeBottomUp(CurSU, CurCycle); 687 Sequence.push_back(CurSU); 688 } 689 ++CurCycle; 690 } 691 692 // Add entry node last 693 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 694 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 695 Sequence.push_back(Entry); 696 } 697 698 // Reverse the order if it is bottom up. 699 std::reverse(Sequence.begin(), Sequence.end()); 700 701 702#ifndef NDEBUG 703 // Verify that all SUnits were scheduled. 704 bool AnyNotSched = false; 705 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 706 if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { 707 if (!AnyNotSched) 708 cerr << "*** List scheduling failed! ***\n"; 709 SUnits[i].dump(&DAG); 710 cerr << "has not been scheduled!\n"; 711 AnyNotSched = true; 712 } 713 } 714 assert(!AnyNotSched); 715#endif 716} 717 718//===----------------------------------------------------------------------===// 719// Top-Down Scheduling 720//===----------------------------------------------------------------------===// 721 722/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 723/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 724void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 725 unsigned CurCycle) { 726 // FIXME: the distance between two nodes is not always == the predecessor's 727 // latency. For example, the reader can very well read the register written 728 // by the predecessor later than the issue cycle. It also depends on the 729 // interrupt model (drain vs. freeze). 730 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 731 732 if (!isChain) 733 --SuccSU->NumPredsLeft; 734 else 735 --SuccSU->NumChainPredsLeft; 736 737#ifndef NDEBUG 738 if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { 739 cerr << "*** List scheduling failed! ***\n"; 740 SuccSU->dump(&DAG); 741 cerr << " has been released too many times!\n"; 742 assert(0); 743 } 744#endif 745 746 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { 747 SuccSU->isAvailable = true; 748 AvailableQueue->push(SuccSU); 749 } 750} 751 752 753/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 754/// count of its successors. If a successor pending count is zero, add it to 755/// the Available queue. 756void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 757 DOUT << "*** Scheduling [" << CurCycle << "]: "; 758 DEBUG(SU->dump(&DAG)); 759 SU->Cycle = CurCycle; 760 761 AvailableQueue->ScheduledNode(SU); 762 763 // Top down: release successors 764 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 765 I != E; ++I) 766 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 767 SU->isScheduled = true; 768} 769 770/// ListScheduleTopDown - The main loop of list scheduling for top-down 771/// schedulers. 772void ScheduleDAGRRList::ListScheduleTopDown() { 773 unsigned CurCycle = 0; 774 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 775 776 // All leaves to Available queue. 777 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 778 // It is available if it has no predecessors. 779 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 780 AvailableQueue->push(&SUnits[i]); 781 SUnits[i].isAvailable = true; 782 } 783 } 784 785 // Emit the entry node first. 786 ScheduleNodeTopDown(Entry, CurCycle); 787 Sequence.push_back(Entry); 788 ++CurCycle; 789 790 // While Available queue is not empty, grab the node with the highest 791 // priority. If it is not ready put it back. Schedule the node. 792 std::vector<SUnit*> NotReady; 793 while (!AvailableQueue->empty()) { 794 SUnit *CurSU = AvailableQueue->pop(); 795 while (CurSU && CurSU->CycleBound > CurCycle) { 796 NotReady.push_back(CurSU); 797 CurSU = AvailableQueue->pop(); 798 } 799 800 // Add the nodes that aren't ready back onto the available list. 801 AvailableQueue->push_all(NotReady); 802 NotReady.clear(); 803 804 if (!CurSU) 805 Sequence.push_back(0); 806 else { 807 ScheduleNodeTopDown(CurSU, CurCycle); 808 Sequence.push_back(CurSU); 809 } 810 CurCycle++; 811 } 812 813 814#ifndef NDEBUG 815 // Verify that all SUnits were scheduled. 816 bool AnyNotSched = false; 817 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 818 if (!SUnits[i].isScheduled) { 819 if (!AnyNotSched) 820 cerr << "*** List scheduling failed! ***\n"; 821 SUnits[i].dump(&DAG); 822 cerr << "has not been scheduled!\n"; 823 AnyNotSched = true; 824 } 825 } 826 assert(!AnyNotSched); 827#endif 828} 829 830 831 832//===----------------------------------------------------------------------===// 833// RegReductionPriorityQueue Implementation 834//===----------------------------------------------------------------------===// 835// 836// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 837// to reduce register pressure. 838// 839namespace { 840 template<class SF> 841 class RegReductionPriorityQueue; 842 843 /// Sorting functions for the Available queue. 844 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 845 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 846 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 847 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 848 849 bool operator()(const SUnit* left, const SUnit* right) const; 850 }; 851 852 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 853 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 854 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 855 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 856 857 bool operator()(const SUnit* left, const SUnit* right) const; 858 }; 859} // end anonymous namespace 860 861static inline bool isCopyFromLiveIn(const SUnit *SU) { 862 SDNode *N = SU->Node; 863 return N && N->getOpcode() == ISD::CopyFromReg && 864 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 865} 866 867namespace { 868 template<class SF> 869 class VISIBILITY_HIDDEN RegReductionPriorityQueue 870 : public SchedulingPriorityQueue { 871 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 872 873 public: 874 RegReductionPriorityQueue() : 875 Queue(SF(this)) {} 876 877 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 878 std::vector<SUnit> &sunits) {} 879 880 virtual void addNode(const SUnit *SU) {} 881 882 virtual void updateNode(const SUnit *SU) {} 883 884 virtual void releaseState() {} 885 886 virtual unsigned getNodePriority(const SUnit *SU) const { 887 return 0; 888 } 889 890 unsigned size() const { return Queue.size(); } 891 892 bool empty() const { return Queue.empty(); } 893 894 void push(SUnit *U) { 895 Queue.push(U); 896 } 897 void push_all(const std::vector<SUnit *> &Nodes) { 898 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 899 Queue.push(Nodes[i]); 900 } 901 902 SUnit *pop() { 903 if (empty()) return NULL; 904 SUnit *V = Queue.top(); 905 Queue.pop(); 906 return V; 907 } 908 909 /// remove - This is a really inefficient way to remove a node from a 910 /// priority queue. We should roll our own heap to make this better or 911 /// something. 912 void remove(SUnit *SU) { 913 std::vector<SUnit*> Temp; 914 915 assert(!Queue.empty() && "Not in queue!"); 916 while (Queue.top() != SU) { 917 Temp.push_back(Queue.top()); 918 Queue.pop(); 919 assert(!Queue.empty() && "Not in queue!"); 920 } 921 922 // Remove the node from the PQ. 923 Queue.pop(); 924 925 // Add all the other nodes back. 926 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 927 Queue.push(Temp[i]); 928 } 929 }; 930 931 template<class SF> 932 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 933 : public RegReductionPriorityQueue<SF> { 934 // SUnitMap SDNode to SUnit mapping (n -> n). 935 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 936 937 // SUnits - The SUnits for the current graph. 938 const std::vector<SUnit> *SUnits; 939 940 // SethiUllmanNumbers - The SethiUllman number for each node. 941 std::vector<unsigned> SethiUllmanNumbers; 942 943 const TargetInstrInfo *TII; 944 public: 945 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 946 : TII(tii) {} 947 948 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 949 std::vector<SUnit> &sunits) { 950 SUnitMap = &sumap; 951 SUnits = &sunits; 952 // Add pseudo dependency edges for two-address nodes. 953 AddPseudoTwoAddrDeps(); 954 // Calculate node priorities. 955 CalculateSethiUllmanNumbers(); 956 } 957 958 void addNode(const SUnit *SU) { 959 SethiUllmanNumbers.resize(SUnits->size(), 0); 960 CalcNodeSethiUllmanNumber(SU); 961 } 962 963 void updateNode(const SUnit *SU) { 964 SethiUllmanNumbers[SU->NodeNum] = 0; 965 CalcNodeSethiUllmanNumber(SU); 966 } 967 968 void releaseState() { 969 SUnits = 0; 970 SethiUllmanNumbers.clear(); 971 } 972 973 unsigned getNodePriority(const SUnit *SU) const { 974 assert(SU->NodeNum < SethiUllmanNumbers.size()); 975 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 976 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 977 // CopyFromReg should be close to its def because it restricts 978 // allocation choices. But if it is a livein then perhaps we want it 979 // closer to its uses so it can be coalesced. 980 return 0xffff; 981 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 982 // CopyToReg should be close to its uses to facilitate coalescing and 983 // avoid spilling. 984 return 0; 985 else if (SU->NumSuccs == 0) 986 // If SU does not have a use, i.e. it doesn't produce a value that would 987 // be consumed (e.g. store), then it terminates a chain of computation. 988 // Give it a large SethiUllman number so it will be scheduled right 989 // before its predecessors that it doesn't lengthen their live ranges. 990 return 0xffff; 991 else if (SU->NumPreds == 0) 992 // If SU does not have a def, schedule it close to its uses because it 993 // does not lengthen any live ranges. 994 return 0; 995 else 996 return SethiUllmanNumbers[SU->NodeNum]; 997 } 998 999 private: 1000 bool canClobber(SUnit *SU, SUnit *Op); 1001 void AddPseudoTwoAddrDeps(); 1002 void CalculateSethiUllmanNumbers(); 1003 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1004 }; 1005 1006 1007 template<class SF> 1008 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1009 : public RegReductionPriorityQueue<SF> { 1010 // SUnitMap SDNode to SUnit mapping (n -> n). 1011 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1012 1013 // SUnits - The SUnits for the current graph. 1014 const std::vector<SUnit> *SUnits; 1015 1016 // SethiUllmanNumbers - The SethiUllman number for each node. 1017 std::vector<unsigned> SethiUllmanNumbers; 1018 1019 public: 1020 TDRegReductionPriorityQueue() {} 1021 1022 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1023 std::vector<SUnit> &sunits) { 1024 SUnitMap = &sumap; 1025 SUnits = &sunits; 1026 // Calculate node priorities. 1027 CalculateSethiUllmanNumbers(); 1028 } 1029 1030 void addNode(const SUnit *SU) { 1031 SethiUllmanNumbers.resize(SUnits->size(), 0); 1032 CalcNodeSethiUllmanNumber(SU); 1033 } 1034 1035 void updateNode(const SUnit *SU) { 1036 SethiUllmanNumbers[SU->NodeNum] = 0; 1037 CalcNodeSethiUllmanNumber(SU); 1038 } 1039 1040 void releaseState() { 1041 SUnits = 0; 1042 SethiUllmanNumbers.clear(); 1043 } 1044 1045 unsigned getNodePriority(const SUnit *SU) const { 1046 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1047 return SethiUllmanNumbers[SU->NodeNum]; 1048 } 1049 1050 private: 1051 void CalculateSethiUllmanNumbers(); 1052 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1053 }; 1054} 1055 1056/// closestSucc - Returns the scheduled cycle of the successor which is 1057/// closet to the current cycle. 1058static unsigned closestSucc(const SUnit *SU) { 1059 unsigned MaxCycle = 0; 1060 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1061 I != E; ++I) { 1062 unsigned Cycle = I->Dep->Cycle; 1063 // If there are bunch of CopyToRegs stacked up, they should be considered 1064 // to be at the same position. 1065 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1066 Cycle = closestSucc(I->Dep)+1; 1067 if (Cycle > MaxCycle) 1068 MaxCycle = Cycle; 1069 } 1070 return MaxCycle; 1071} 1072 1073/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1074/// for scratch registers. Live-in operands and live-out results don't count 1075/// since they are "fixed". 1076static unsigned calcMaxScratches(const SUnit *SU) { 1077 unsigned Scratches = 0; 1078 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1079 I != E; ++I) { 1080 if (I->isCtrl) continue; // ignore chain preds 1081 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1082 Scratches++; 1083 } 1084 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1085 I != E; ++I) { 1086 if (I->isCtrl) continue; // ignore chain succs 1087 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyToReg) 1088 Scratches += 10; 1089 } 1090 return Scratches; 1091} 1092 1093// Bottom up 1094bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1095 // There used to be a special tie breaker here that looked for 1096 // two-address instructions and preferred the instruction with a 1097 // def&use operand. The special case triggered diagnostics when 1098 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1099 // ordering that priority_queue requires. It didn't help much anyway 1100 // because AddPseudoTwoAddrDeps already covers many of the cases 1101 // where it would have applied. In addition, it's counter-intuitive 1102 // that a tie breaker would be the first thing attempted. There's a 1103 // "real" tie breaker below that is the operation of last resort. 1104 // The fact that the "special tie breaker" would trigger when there 1105 // wasn't otherwise a tie is what broke the strict weak ordering 1106 // constraint. 1107 1108 unsigned LPriority = SPQ->getNodePriority(left); 1109 unsigned RPriority = SPQ->getNodePriority(right); 1110 if (LPriority > RPriority) 1111 return true; 1112 else if (LPriority == RPriority) { 1113 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1114 // e.g. 1115 // t1 = op t2, c1 1116 // t3 = op t4, c2 1117 // 1118 // and the following instructions are both ready. 1119 // t2 = op c3 1120 // t4 = op c4 1121 // 1122 // Then schedule t2 = op first. 1123 // i.e. 1124 // t4 = op c4 1125 // t2 = op c3 1126 // t1 = op t2, c1 1127 // t3 = op t4, c2 1128 // 1129 // This creates more short live intervals. 1130 unsigned LDist = closestSucc(left); 1131 unsigned RDist = closestSucc(right); 1132 if (LDist < RDist) 1133 return true; 1134 else if (LDist == RDist) { 1135 // Intuitively, it's good to push down instructions whose results are 1136 // liveout so their long live ranges won't conflict with other values 1137 // which are needed inside the BB. Further prioritize liveout instructions 1138 // by the number of operands which are calculated within the BB. 1139 unsigned LScratch = calcMaxScratches(left); 1140 unsigned RScratch = calcMaxScratches(right); 1141 if (LScratch > RScratch) 1142 return true; 1143 else if (LScratch == RScratch) 1144 if (left->Height > right->Height) 1145 return true; 1146 else if (left->Height == right->Height) 1147 if (left->Depth < right->Depth) 1148 return true; 1149 else if (left->Depth == right->Depth) 1150 if (left->CycleBound > right->CycleBound) 1151 return true; 1152 } 1153 } 1154 return false; 1155} 1156 1157template<class SF> 1158bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1159 if (SU->isTwoAddress) { 1160 unsigned Opc = SU->Node->getTargetOpcode(); 1161 unsigned NumRes = TII->getNumDefs(Opc); 1162 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1163 for (unsigned i = 0; i != NumOps; ++i) { 1164 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1165 SDNode *DU = SU->Node->getOperand(i).Val; 1166 if (Op == (*SUnitMap)[DU][SU->InstanceNo]) 1167 return true; 1168 } 1169 } 1170 } 1171 return false; 1172} 1173 1174 1175/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1176/// it as a def&use operand. Add a pseudo control edge from it to the other 1177/// node (if it won't create a cycle) so the two-address one will be scheduled 1178/// first (lower in the schedule). 1179template<class SF> 1180void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1181 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1182 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1183 if (!SU->isTwoAddress) 1184 continue; 1185 1186 SDNode *Node = SU->Node; 1187 if (!Node || !Node->isTargetOpcode()) 1188 continue; 1189 1190 unsigned Opc = Node->getTargetOpcode(); 1191 unsigned NumRes = TII->getNumDefs(Opc); 1192 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1193 for (unsigned j = 0; j != NumOps; ++j) { 1194 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1195 SDNode *DU = SU->Node->getOperand(j).Val; 1196 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1197 if (!DUSU) continue; 1198 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1199 I != E; ++I) { 1200 if (I->isCtrl) continue; 1201 SUnit *SuccSU = I->Dep; 1202 // Don't constraint nodes with implicit defs. It can create cycles 1203 // plus it may increase register pressures. 1204 if (SuccSU == SU || SuccSU->hasImplicitDefs) 1205 continue; 1206 // Be conservative. Ignore if nodes aren't at the same depth. 1207 if (SuccSU->Depth != SU->Depth) 1208 continue; 1209 if ((!canClobber(SuccSU, DUSU) || 1210 (!SU->isCommutable && SuccSU->isCommutable)) && 1211 !isReachable(SuccSU, SU)) { 1212 DOUT << "Adding an edge from SU # " << SU->NodeNum 1213 << " to SU #" << SuccSU->NodeNum << "\n"; 1214 SU->addPred(SuccSU, true, true); 1215 } 1216 } 1217 } 1218 } 1219 } 1220} 1221 1222/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1223/// Smaller number is the higher priority. 1224template<class SF> 1225unsigned BURegReductionPriorityQueue<SF>:: 1226CalcNodeSethiUllmanNumber(const SUnit *SU) { 1227 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1228 if (SethiUllmanNumber != 0) 1229 return SethiUllmanNumber; 1230 1231 unsigned Extra = 0; 1232 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1233 I != E; ++I) { 1234 if (I->isCtrl) continue; // ignore chain preds 1235 SUnit *PredSU = I->Dep; 1236 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1237 if (PredSethiUllman > SethiUllmanNumber) { 1238 SethiUllmanNumber = PredSethiUllman; 1239 Extra = 0; 1240 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1241 ++Extra; 1242 } 1243 1244 SethiUllmanNumber += Extra; 1245 1246 if (SethiUllmanNumber == 0) 1247 SethiUllmanNumber = 1; 1248 1249 return SethiUllmanNumber; 1250} 1251 1252/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1253/// scheduling units. 1254template<class SF> 1255void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1256 SethiUllmanNumbers.assign(SUnits->size(), 0); 1257 1258 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1259 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1260} 1261 1262static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1263 unsigned Sum = 0; 1264 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1265 I != E; ++I) { 1266 SUnit *SuccSU = I->Dep; 1267 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1268 EE = SuccSU->Preds.end(); II != EE; ++II) { 1269 SUnit *PredSU = II->Dep; 1270 if (!PredSU->isScheduled) 1271 ++Sum; 1272 } 1273 } 1274 1275 return Sum; 1276} 1277 1278 1279// Top down 1280bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1281 unsigned LPriority = SPQ->getNodePriority(left); 1282 unsigned RPriority = SPQ->getNodePriority(right); 1283 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1284 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1285 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1286 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1287 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1288 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1289 1290 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1291 return false; 1292 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1293 return true; 1294 1295 // Special tie breaker: if two nodes share a operand, the one that use it 1296 // as a def&use operand is preferred. 1297 if (LIsTarget && RIsTarget) { 1298 if (left->isTwoAddress && !right->isTwoAddress) { 1299 SDNode *DUNode = left->Node->getOperand(0).Val; 1300 if (DUNode->isOperand(right->Node)) 1301 RBonus += 2; 1302 } 1303 if (!left->isTwoAddress && right->isTwoAddress) { 1304 SDNode *DUNode = right->Node->getOperand(0).Val; 1305 if (DUNode->isOperand(left->Node)) 1306 LBonus += 2; 1307 } 1308 } 1309 if (LIsFloater) 1310 LBonus -= 2; 1311 if (RIsFloater) 1312 RBonus -= 2; 1313 if (left->NumSuccs == 1) 1314 LBonus += 2; 1315 if (right->NumSuccs == 1) 1316 RBonus += 2; 1317 1318 if (LPriority+LBonus < RPriority+RBonus) 1319 return true; 1320 else if (LPriority == RPriority) 1321 if (left->Depth < right->Depth) 1322 return true; 1323 else if (left->Depth == right->Depth) 1324 if (left->NumSuccsLeft > right->NumSuccsLeft) 1325 return true; 1326 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1327 if (left->CycleBound > right->CycleBound) 1328 return true; 1329 return false; 1330} 1331 1332/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1333/// Smaller number is the higher priority. 1334template<class SF> 1335unsigned TDRegReductionPriorityQueue<SF>:: 1336CalcNodeSethiUllmanNumber(const SUnit *SU) { 1337 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1338 if (SethiUllmanNumber != 0) 1339 return SethiUllmanNumber; 1340 1341 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1342 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1343 SethiUllmanNumber = 0xffff; 1344 else if (SU->NumSuccsLeft == 0) 1345 // If SU does not have a use, i.e. it doesn't produce a value that would 1346 // be consumed (e.g. store), then it terminates a chain of computation. 1347 // Give it a small SethiUllman number so it will be scheduled right before 1348 // its predecessors that it doesn't lengthen their live ranges. 1349 SethiUllmanNumber = 0; 1350 else if (SU->NumPredsLeft == 0 && 1351 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1352 SethiUllmanNumber = 0xffff; 1353 else { 1354 int Extra = 0; 1355 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1356 I != E; ++I) { 1357 if (I->isCtrl) continue; // ignore chain preds 1358 SUnit *PredSU = I->Dep; 1359 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1360 if (PredSethiUllman > SethiUllmanNumber) { 1361 SethiUllmanNumber = PredSethiUllman; 1362 Extra = 0; 1363 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1364 ++Extra; 1365 } 1366 1367 SethiUllmanNumber += Extra; 1368 } 1369 1370 return SethiUllmanNumber; 1371} 1372 1373/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1374/// scheduling units. 1375template<class SF> 1376void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1377 SethiUllmanNumbers.assign(SUnits->size(), 0); 1378 1379 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1380 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1381} 1382 1383//===----------------------------------------------------------------------===// 1384// Public Constructor Functions 1385//===----------------------------------------------------------------------===// 1386 1387llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1388 SelectionDAG *DAG, 1389 MachineBasicBlock *BB) { 1390 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1391 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1392 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 1393} 1394 1395llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1396 SelectionDAG *DAG, 1397 MachineBasicBlock *BB) { 1398 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1399 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1400} 1401 1402