ScheduleDAGRRList.cpp revision ba597da943f93db7849837368d79476518db4333
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include <climits> 32#include <queue> 33#include "llvm/Support/CommandLine.h" 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumCCCopies, "Number of cross class copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 " Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 " Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 ///a 62 SchedulingPriorityQueue *AvailableQueue; 63 64 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 65 /// that are "live". These nodes must be scheduled before any other nodes that 66 /// modifies the registers can be scheduled. 67 SmallSet<unsigned, 4> LiveRegs; 68 std::vector<SUnit*> LiveRegDefs; 69 std::vector<unsigned> LiveRegCycles; 70 71public: 72 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 73 const TargetMachine &tm, bool isbottomup, 74 SchedulingPriorityQueue *availqueue) 75 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 76 AvailableQueue(availqueue) { 77 } 78 79 ~ScheduleDAGRRList() { 80 delete AvailableQueue; 81 } 82 83 void Schedule(); 84 85private: 86 void ReleasePred(SUnit*, bool, unsigned); 87 void ReleaseSucc(SUnit*, bool isChain, unsigned); 88 void CapturePred(SUnit*, SUnit*, bool); 89 void ScheduleNodeBottomUp(SUnit*, unsigned); 90 void ScheduleNodeTopDown(SUnit*, unsigned); 91 void UnscheduleNodeBottomUp(SUnit*); 92 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 93 SUnit *CopyAndMoveSuccessors(SUnit*); 94 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 95 const TargetRegisterClass*, 96 const TargetRegisterClass*, 97 SmallVector<SUnit*, 2>&); 98 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 99 void ListScheduleTopDown(); 100 void ListScheduleBottomUp(); 101 void CommuteNodesToReducePressure(); 102}; 103} // end anonymous namespace 104 105 106/// Schedule - Schedule the DAG using list scheduling. 107void ScheduleDAGRRList::Schedule() { 108 DOUT << "********** List Scheduling **********\n"; 109 110 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 111 LiveRegCycles.resize(MRI->getNumRegs(), 0); 112 113 // Build scheduling units. 114 BuildSchedUnits(); 115 116 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 117 SUnits[su].dumpAll(&DAG)); 118 CalculateDepths(); 119 CalculateHeights(); 120 121 AvailableQueue->initNodes(SUnitMap, SUnits); 122 123 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 124 if (isBottomUp) 125 ListScheduleBottomUp(); 126 else 127 ListScheduleTopDown(); 128 129 AvailableQueue->releaseState(); 130 131 CommuteNodesToReducePressure(); 132 133 DOUT << "*** Final schedule ***\n"; 134 DEBUG(dumpSchedule()); 135 DOUT << "\n"; 136 137 // Emit in scheduled order 138 EmitSchedule(); 139} 140 141/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 142/// it is not the last use of its first operand, add it to the CommuteSet if 143/// possible. It will be commuted when it is translated to a MI. 144void ScheduleDAGRRList::CommuteNodesToReducePressure() { 145 SmallPtrSet<SUnit*, 4> OperandSeen; 146 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 147 SUnit *SU = Sequence[i]; 148 if (!SU || !SU->Node) continue; 149 if (SU->isCommutable) { 150 unsigned Opc = SU->Node->getTargetOpcode(); 151 unsigned NumRes = TII->getNumDefs(Opc); 152 unsigned NumOps = CountOperands(SU->Node); 153 for (unsigned j = 0; j != NumOps; ++j) { 154 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 155 continue; 156 157 SDNode *OpN = SU->Node->getOperand(j).Val; 158 SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; 159 if (OpSU && OperandSeen.count(OpSU) == 1) { 160 // Ok, so SU is not the last use of OpSU, but SU is two-address so 161 // it will clobber OpSU. Try to commute SU if no other source operands 162 // are live below. 163 bool DoCommute = true; 164 for (unsigned k = 0; k < NumOps; ++k) { 165 if (k != j) { 166 OpN = SU->Node->getOperand(k).Val; 167 OpSU = SUnitMap[OpN][SU->InstanceNo]; 168 if (OpSU && OperandSeen.count(OpSU) == 1) { 169 DoCommute = false; 170 break; 171 } 172 } 173 } 174 if (DoCommute) 175 CommuteSet.insert(SU->Node); 176 } 177 178 // Only look at the first use&def node for now. 179 break; 180 } 181 } 182 183 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 184 I != E; ++I) { 185 if (!I->isCtrl) 186 OperandSeen.insert(I->Dep); 187 } 188 } 189} 190 191//===----------------------------------------------------------------------===// 192// Bottom-Up Scheduling 193//===----------------------------------------------------------------------===// 194 195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 196/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 197void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 198 unsigned CurCycle) { 199 // FIXME: the distance between two nodes is not always == the predecessor's 200 // latency. For example, the reader can very well read the register written 201 // by the predecessor later than the issue cycle. It also depends on the 202 // interrupt model (drain vs. freeze). 203 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 204 205 --PredSU->NumSuccsLeft; 206 207#ifndef NDEBUG 208 if (PredSU->NumSuccsLeft < 0) { 209 cerr << "*** List scheduling failed! ***\n"; 210 PredSU->dump(&DAG); 211 cerr << " has been released too many times!\n"; 212 assert(0); 213 } 214#endif 215 216 if (PredSU->NumSuccsLeft == 0) { 217 // EntryToken has to go last! Special case it here. 218 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 219 PredSU->isAvailable = true; 220 AvailableQueue->push(PredSU); 221 } 222 } 223} 224 225/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 226/// count of its predecessors. If a predecessor pending count is zero, add it to 227/// the Available queue. 228void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 229 DOUT << "*** Scheduling [" << CurCycle << "]: "; 230 DEBUG(SU->dump(&DAG)); 231 SU->Cycle = CurCycle; 232 233 AvailableQueue->ScheduledNode(SU); 234 235 // Bottom up: release predecessors 236 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 237 I != E; ++I) { 238 ReleasePred(I->Dep, I->isCtrl, CurCycle); 239 if (I->Cost < 0) { 240 // This is a physical register dependency and it's impossible or 241 // expensive to copy the register. Make sure nothing that can 242 // clobber the register is scheduled between the predecessor and 243 // this node. 244 if (LiveRegs.insert(I->Reg)) { 245 LiveRegDefs[I->Reg] = I->Dep; 246 LiveRegCycles[I->Reg] = CurCycle; 247 } 248 } 249 } 250 251 // Release all the implicit physical register defs that are live. 252 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 253 I != E; ++I) { 254 if (I->Cost < 0) { 255 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 256 LiveRegs.erase(I->Reg); 257 assert(LiveRegDefs[I->Reg] == SU && 258 "Physical register dependency violated?"); 259 LiveRegDefs[I->Reg] = NULL; 260 LiveRegCycles[I->Reg] = 0; 261 } 262 } 263 } 264 265 SU->isScheduled = true; 266} 267 268/// CapturePred - This does the opposite of ReleasePred. Since SU is being 269/// unscheduled, incrcease the succ left count of its predecessors. Remove 270/// them from AvailableQueue if necessary. 271void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 272 PredSU->CycleBound = 0; 273 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 274 I != E; ++I) { 275 if (I->Dep == SU) 276 continue; 277 PredSU->CycleBound = std::max(PredSU->CycleBound, 278 I->Dep->Cycle + PredSU->Latency); 279 } 280 281 if (PredSU->isAvailable) { 282 PredSU->isAvailable = false; 283 if (!PredSU->isPending) 284 AvailableQueue->remove(PredSU); 285 } 286 287 ++PredSU->NumSuccsLeft; 288} 289 290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 291/// its predecessor states to reflect the change. 292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 293 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 294 DEBUG(SU->dump(&DAG)); 295 296 AvailableQueue->UnscheduledNode(SU); 297 298 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 299 I != E; ++I) { 300 CapturePred(I->Dep, SU, I->isCtrl); 301 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 302 LiveRegs.erase(I->Reg); 303 assert(LiveRegDefs[I->Reg] == I->Dep && 304 "Physical register dependency violated?"); 305 LiveRegDefs[I->Reg] = NULL; 306 LiveRegCycles[I->Reg] = 0; 307 } 308 } 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 if (I->Cost < 0) { 313 if (LiveRegs.insert(I->Reg)) { 314 assert(!LiveRegDefs[I->Reg] && 315 "Physical register dependency violated?"); 316 LiveRegDefs[I->Reg] = SU; 317 } 318 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 319 LiveRegCycles[I->Reg] = I->Dep->Cycle; 320 } 321 } 322 323 SU->Cycle = 0; 324 SU->isScheduled = false; 325 SU->isAvailable = true; 326 AvailableQueue->push(SU); 327} 328 329// FIXME: This is probably too slow! 330static void isReachable(SUnit *SU, SUnit *TargetSU, 331 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 332 if (Reached) return; 333 if (SU == TargetSU) { 334 Reached = true; 335 return; 336 } 337 if (!Visited.insert(SU)) return; 338 339 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 340 ++I) 341 isReachable(I->Dep, TargetSU, Visited, Reached); 342} 343 344static bool isReachable(SUnit *SU, SUnit *TargetSU) { 345 SmallPtrSet<SUnit*, 32> Visited; 346 bool Reached = false; 347 isReachable(SU, TargetSU, Visited, Reached); 348 return Reached; 349} 350 351/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 352/// create a cycle. 353static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 354 if (isReachable(TargetSU, SU)) 355 return true; 356 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 357 I != E; ++I) 358 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 359 return true; 360 return false; 361} 362 363/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 364/// BTCycle in order to schedule a specific node. Returns the last unscheduled 365/// SUnit. Also returns if a successor is unscheduled in the process. 366void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 367 unsigned &CurCycle) { 368 SUnit *OldSU = NULL; 369 while (CurCycle > BtCycle) { 370 OldSU = Sequence.back(); 371 Sequence.pop_back(); 372 if (SU->isSucc(OldSU)) 373 // Don't try to remove SU from AvailableQueue. 374 SU->isAvailable = false; 375 UnscheduleNodeBottomUp(OldSU); 376 --CurCycle; 377 } 378 379 380 if (SU->isSucc(OldSU)) { 381 assert(false && "Something is wrong!"); 382 abort(); 383 } 384 385 ++NumBacktracks; 386} 387 388/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, 389/// i.e. the node does not produce a flag, it does not read a flag and it does 390/// not have an incoming chain. 391static bool isSafeToCopy(SDNode *N) { 392 if (!N) 393 return true; 394 395 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) 396 if (N->getValueType(i) == MVT::Flag) 397 return false; 398 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 399 const SDOperand &Op = N->getOperand(i); 400 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 401 if (VT == MVT::Other || VT == MVT::Flag) 402 return false; 403 } 404 405 return true; 406} 407 408/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 409/// successors to the newly created node. 410SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 411 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 412 413 SUnit *NewSU = Clone(SU); 414 415 // New SUnit has the exact same predecessors. 416 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 417 I != E; ++I) 418 if (!I->isSpecial) { 419 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 420 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 421 } 422 423 // Only copy scheduled successors. Cut them from old node's successor 424 // list and move them over. 425 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 426 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 427 I != E; ++I) { 428 if (I->isSpecial) 429 continue; 430 if (I->Dep->isScheduled) { 431 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 432 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 433 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 434 } 435 } 436 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 437 SUnit *Succ = DelDeps[i].first; 438 bool isCtrl = DelDeps[i].second; 439 Succ->removePred(SU, isCtrl, false); 440 } 441 442 AvailableQueue->updateNode(SU); 443 AvailableQueue->addNode(NewSU); 444 445 ++NumDups; 446 return NewSU; 447} 448 449/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 450/// and move all scheduled successors of the given SUnit to the last copy. 451void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 452 const TargetRegisterClass *DestRC, 453 const TargetRegisterClass *SrcRC, 454 SmallVector<SUnit*, 2> &Copies) { 455 SUnit *CopyFromSU = NewSUnit(NULL); 456 CopyFromSU->CopySrcRC = SrcRC; 457 CopyFromSU->CopyDstRC = DestRC; 458 CopyFromSU->Depth = SU->Depth; 459 CopyFromSU->Height = SU->Height; 460 461 SUnit *CopyToSU = NewSUnit(NULL); 462 CopyToSU->CopySrcRC = DestRC; 463 CopyToSU->CopyDstRC = SrcRC; 464 465 // Only copy scheduled successors. Cut them from old node's successor 466 // list and move them over. 467 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 468 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 469 I != E; ++I) { 470 if (I->isSpecial) 471 continue; 472 if (I->Dep->isScheduled) { 473 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 474 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 475 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 476 } 477 } 478 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 479 SUnit *Succ = DelDeps[i].first; 480 bool isCtrl = DelDeps[i].second; 481 Succ->removePred(SU, isCtrl, false); 482 } 483 484 CopyFromSU->addPred(SU, false, false, Reg, -1); 485 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 486 487 AvailableQueue->updateNode(SU); 488 AvailableQueue->addNode(CopyFromSU); 489 AvailableQueue->addNode(CopyToSU); 490 Copies.push_back(CopyFromSU); 491 Copies.push_back(CopyToSU); 492 493 ++NumCCCopies; 494} 495 496/// getPhysicalRegisterVT - Returns the ValueType of the physical register 497/// definition of the specified node. 498/// FIXME: Move to SelectionDAG? 499static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 500 const TargetInstrInfo *TII) { 501 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 502 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 503 unsigned NumRes = TID.numDefs; 504 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 505 if (Reg == *ImpDef) 506 break; 507 ++NumRes; 508 } 509 return N->getValueType(NumRes); 510} 511 512/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 513/// scheduling of the given node to satisfy live physical register dependencies. 514/// If the specific node is the last one that's available to schedule, do 515/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 516bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 517 SmallVector<unsigned, 4> &LRegs){ 518 if (LiveRegs.empty()) 519 return false; 520 521 SmallSet<unsigned, 4> RegAdded; 522 // If this node would clobber any "live" register, then it's not ready. 523 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 524 I != E; ++I) { 525 if (I->Cost < 0) { 526 unsigned Reg = I->Reg; 527 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 528 if (RegAdded.insert(Reg)) 529 LRegs.push_back(Reg); 530 } 531 for (const unsigned *Alias = MRI->getAliasSet(Reg); 532 *Alias; ++Alias) 533 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 534 if (RegAdded.insert(*Alias)) 535 LRegs.push_back(*Alias); 536 } 537 } 538 } 539 540 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 541 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 542 if (!Node || !Node->isTargetOpcode()) 543 continue; 544 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 545 if (!TID.ImplicitDefs) 546 continue; 547 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 548 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 549 if (RegAdded.insert(*Reg)) 550 LRegs.push_back(*Reg); 551 } 552 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 553 *Alias; ++Alias) 554 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 555 if (RegAdded.insert(*Alias)) 556 LRegs.push_back(*Alias); 557 } 558 } 559 } 560 return !LRegs.empty(); 561} 562 563 564/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 565/// schedulers. 566void ScheduleDAGRRList::ListScheduleBottomUp() { 567 unsigned CurCycle = 0; 568 // Add root to Available queue. 569 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 570 RootSU->isAvailable = true; 571 AvailableQueue->push(RootSU); 572 573 // While Available queue is not empty, grab the node with the highest 574 // priority. If it is not ready put it back. Schedule the node. 575 SmallVector<SUnit*, 4> NotReady; 576 while (!AvailableQueue->empty()) { 577 bool Delayed = false; 578 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 579 SUnit *CurSU = AvailableQueue->pop(); 580 while (CurSU) { 581 if (CurSU->CycleBound <= CurCycle) { 582 SmallVector<unsigned, 4> LRegs; 583 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 584 break; 585 Delayed = true; 586 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 587 } 588 589 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 590 NotReady.push_back(CurSU); 591 CurSU = AvailableQueue->pop(); 592 } 593 594 // All candidates are delayed due to live physical reg dependencies. 595 // Try backtracking, code duplication, or inserting cross class copies 596 // to resolve it. 597 if (Delayed && !CurSU) { 598 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 599 SUnit *TrySU = NotReady[i]; 600 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 601 602 // Try unscheduling up to the point where it's safe to schedule 603 // this node. 604 unsigned LiveCycle = CurCycle; 605 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 606 unsigned Reg = LRegs[j]; 607 unsigned LCycle = LiveRegCycles[Reg]; 608 LiveCycle = std::min(LiveCycle, LCycle); 609 } 610 SUnit *OldSU = Sequence[LiveCycle]; 611 if (!WillCreateCycle(TrySU, OldSU)) { 612 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 613 // Force the current node to be scheduled before the node that 614 // requires the physical reg dep. 615 if (OldSU->isAvailable) { 616 OldSU->isAvailable = false; 617 AvailableQueue->remove(OldSU); 618 } 619 TrySU->addPred(OldSU, true, true); 620 // If one or more successors has been unscheduled, then the current 621 // node is no longer avaialable. Schedule a successor that's now 622 // available instead. 623 if (!TrySU->isAvailable) 624 CurSU = AvailableQueue->pop(); 625 else { 626 CurSU = TrySU; 627 TrySU->isPending = false; 628 NotReady.erase(NotReady.begin()+i); 629 } 630 break; 631 } 632 } 633 634 if (!CurSU) { 635 // Can't backtrace. Try duplicating the nodes that produces these 636 // "expensive to copy" values to break the dependency. In case even 637 // that doesn't work, insert cross class copies. 638 SUnit *TrySU = NotReady[0]; 639 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 640 assert(LRegs.size() == 1 && "Can't handle this yet!"); 641 unsigned Reg = LRegs[0]; 642 SUnit *LRDef = LiveRegDefs[Reg]; 643 SUnit *NewDef; 644 if (isSafeToCopy(LRDef->Node)) 645 NewDef = CopyAndMoveSuccessors(LRDef); 646 else { 647 // Issue expensive cross register class copies. 648 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 649 const TargetRegisterClass *RC = 650 MRI->getPhysicalRegisterRegClass(VT, Reg); 651 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 652 if (!DestRC) { 653 assert(false && "Don't know how to copy this physical register!"); 654 abort(); 655 } 656 SmallVector<SUnit*, 2> Copies; 657 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 658 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 659 << " to SU #" << Copies.front()->NodeNum << "\n"; 660 TrySU->addPred(Copies.front(), true, true); 661 NewDef = Copies.back(); 662 } 663 664 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 665 << " to SU #" << TrySU->NodeNum << "\n"; 666 LiveRegDefs[Reg] = NewDef; 667 NewDef->addPred(TrySU, true, true); 668 TrySU->isAvailable = false; 669 CurSU = NewDef; 670 } 671 672 if (!CurSU) { 673 assert(false && "Unable to resolve live physical register dependencies!"); 674 abort(); 675 } 676 } 677 678 // Add the nodes that aren't ready back onto the available list. 679 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 680 NotReady[i]->isPending = false; 681 // May no longer be available due to backtracking. 682 if (NotReady[i]->isAvailable) 683 AvailableQueue->push(NotReady[i]); 684 } 685 NotReady.clear(); 686 687 if (!CurSU) 688 Sequence.push_back(0); 689 else { 690 ScheduleNodeBottomUp(CurSU, CurCycle); 691 Sequence.push_back(CurSU); 692 } 693 ++CurCycle; 694 } 695 696 // Add entry node last 697 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 698 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 699 Sequence.push_back(Entry); 700 } 701 702 // Reverse the order if it is bottom up. 703 std::reverse(Sequence.begin(), Sequence.end()); 704 705 706#ifndef NDEBUG 707 // Verify that all SUnits were scheduled. 708 bool AnyNotSched = false; 709 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 710 if (SUnits[i].NumSuccsLeft != 0) { 711 if (!AnyNotSched) 712 cerr << "*** List scheduling failed! ***\n"; 713 SUnits[i].dump(&DAG); 714 cerr << "has not been scheduled!\n"; 715 AnyNotSched = true; 716 } 717 } 718 assert(!AnyNotSched); 719#endif 720} 721 722//===----------------------------------------------------------------------===// 723// Top-Down Scheduling 724//===----------------------------------------------------------------------===// 725 726/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 727/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 728void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 729 unsigned CurCycle) { 730 // FIXME: the distance between two nodes is not always == the predecessor's 731 // latency. For example, the reader can very well read the register written 732 // by the predecessor later than the issue cycle. It also depends on the 733 // interrupt model (drain vs. freeze). 734 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 735 736 --SuccSU->NumPredsLeft; 737 738#ifndef NDEBUG 739 if (SuccSU->NumPredsLeft < 0) { 740 cerr << "*** List scheduling failed! ***\n"; 741 SuccSU->dump(&DAG); 742 cerr << " has been released too many times!\n"; 743 assert(0); 744 } 745#endif 746 747 if (SuccSU->NumPredsLeft == 0) { 748 SuccSU->isAvailable = true; 749 AvailableQueue->push(SuccSU); 750 } 751} 752 753 754/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 755/// count of its successors. If a successor pending count is zero, add it to 756/// the Available queue. 757void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 758 DOUT << "*** Scheduling [" << CurCycle << "]: "; 759 DEBUG(SU->dump(&DAG)); 760 SU->Cycle = CurCycle; 761 762 AvailableQueue->ScheduledNode(SU); 763 764 // Top down: release successors 765 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 766 I != E; ++I) 767 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 768 SU->isScheduled = true; 769} 770 771/// ListScheduleTopDown - The main loop of list scheduling for top-down 772/// schedulers. 773void ScheduleDAGRRList::ListScheduleTopDown() { 774 unsigned CurCycle = 0; 775 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 776 777 // All leaves to Available queue. 778 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 779 // It is available if it has no predecessors. 780 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 781 AvailableQueue->push(&SUnits[i]); 782 SUnits[i].isAvailable = true; 783 } 784 } 785 786 // Emit the entry node first. 787 ScheduleNodeTopDown(Entry, CurCycle); 788 Sequence.push_back(Entry); 789 ++CurCycle; 790 791 // While Available queue is not empty, grab the node with the highest 792 // priority. If it is not ready put it back. Schedule the node. 793 std::vector<SUnit*> NotReady; 794 while (!AvailableQueue->empty()) { 795 SUnit *CurSU = AvailableQueue->pop(); 796 while (CurSU && CurSU->CycleBound > CurCycle) { 797 NotReady.push_back(CurSU); 798 CurSU = AvailableQueue->pop(); 799 } 800 801 // Add the nodes that aren't ready back onto the available list. 802 AvailableQueue->push_all(NotReady); 803 NotReady.clear(); 804 805 if (!CurSU) 806 Sequence.push_back(0); 807 else { 808 ScheduleNodeTopDown(CurSU, CurCycle); 809 Sequence.push_back(CurSU); 810 } 811 CurCycle++; 812 } 813 814 815#ifndef NDEBUG 816 // Verify that all SUnits were scheduled. 817 bool AnyNotSched = false; 818 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 819 if (!SUnits[i].isScheduled) { 820 if (!AnyNotSched) 821 cerr << "*** List scheduling failed! ***\n"; 822 SUnits[i].dump(&DAG); 823 cerr << "has not been scheduled!\n"; 824 AnyNotSched = true; 825 } 826 } 827 assert(!AnyNotSched); 828#endif 829} 830 831 832 833//===----------------------------------------------------------------------===// 834// RegReductionPriorityQueue Implementation 835//===----------------------------------------------------------------------===// 836// 837// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 838// to reduce register pressure. 839// 840namespace { 841 template<class SF> 842 class RegReductionPriorityQueue; 843 844 /// Sorting functions for the Available queue. 845 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 846 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 847 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 848 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 849 850 bool operator()(const SUnit* left, const SUnit* right) const; 851 }; 852 853 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 854 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 855 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 856 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 857 858 bool operator()(const SUnit* left, const SUnit* right) const; 859 }; 860} // end anonymous namespace 861 862static inline bool isCopyFromLiveIn(const SUnit *SU) { 863 SDNode *N = SU->Node; 864 return N && N->getOpcode() == ISD::CopyFromReg && 865 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 866} 867 868namespace { 869 template<class SF> 870 class VISIBILITY_HIDDEN RegReductionPriorityQueue 871 : public SchedulingPriorityQueue { 872 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 873 874 public: 875 RegReductionPriorityQueue() : 876 Queue(SF(this)) {} 877 878 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 879 std::vector<SUnit> &sunits) {} 880 881 virtual void addNode(const SUnit *SU) {} 882 883 virtual void updateNode(const SUnit *SU) {} 884 885 virtual void releaseState() {} 886 887 virtual unsigned getNodePriority(const SUnit *SU) const { 888 return 0; 889 } 890 891 unsigned size() const { return Queue.size(); } 892 893 bool empty() const { return Queue.empty(); } 894 895 void push(SUnit *U) { 896 Queue.push(U); 897 } 898 void push_all(const std::vector<SUnit *> &Nodes) { 899 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 900 Queue.push(Nodes[i]); 901 } 902 903 SUnit *pop() { 904 if (empty()) return NULL; 905 SUnit *V = Queue.top(); 906 Queue.pop(); 907 return V; 908 } 909 910 /// remove - This is a really inefficient way to remove a node from a 911 /// priority queue. We should roll our own heap to make this better or 912 /// something. 913 void remove(SUnit *SU) { 914 std::vector<SUnit*> Temp; 915 916 assert(!Queue.empty() && "Not in queue!"); 917 while (Queue.top() != SU) { 918 Temp.push_back(Queue.top()); 919 Queue.pop(); 920 assert(!Queue.empty() && "Not in queue!"); 921 } 922 923 // Remove the node from the PQ. 924 Queue.pop(); 925 926 // Add all the other nodes back. 927 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 928 Queue.push(Temp[i]); 929 } 930 }; 931 932 template<class SF> 933 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 934 : public RegReductionPriorityQueue<SF> { 935 // SUnitMap SDNode to SUnit mapping (n -> n). 936 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 937 938 // SUnits - The SUnits for the current graph. 939 const std::vector<SUnit> *SUnits; 940 941 // SethiUllmanNumbers - The SethiUllman number for each node. 942 std::vector<unsigned> SethiUllmanNumbers; 943 944 const TargetInstrInfo *TII; 945 public: 946 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 947 : TII(tii) {} 948 949 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 950 std::vector<SUnit> &sunits) { 951 SUnitMap = &sumap; 952 SUnits = &sunits; 953 // Add pseudo dependency edges for two-address nodes. 954 AddPseudoTwoAddrDeps(); 955 // Calculate node priorities. 956 CalculateSethiUllmanNumbers(); 957 } 958 959 void addNode(const SUnit *SU) { 960 SethiUllmanNumbers.resize(SUnits->size(), 0); 961 CalcNodeSethiUllmanNumber(SU); 962 } 963 964 void updateNode(const SUnit *SU) { 965 SethiUllmanNumbers[SU->NodeNum] = 0; 966 CalcNodeSethiUllmanNumber(SU); 967 } 968 969 void releaseState() { 970 SUnits = 0; 971 SethiUllmanNumbers.clear(); 972 } 973 974 unsigned getNodePriority(const SUnit *SU) const { 975 assert(SU->NodeNum < SethiUllmanNumbers.size()); 976 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 977 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 978 // CopyFromReg should be close to its def because it restricts 979 // allocation choices. But if it is a livein then perhaps we want it 980 // closer to its uses so it can be coalesced. 981 return 0xffff; 982 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 983 // CopyToReg should be close to its uses to facilitate coalescing and 984 // avoid spilling. 985 return 0; 986 else if (SU->NumSuccs == 0) 987 // If SU does not have a use, i.e. it doesn't produce a value that would 988 // be consumed (e.g. store), then it terminates a chain of computation. 989 // Give it a large SethiUllman number so it will be scheduled right 990 // before its predecessors that it doesn't lengthen their live ranges. 991 return 0xffff; 992 else if (SU->NumPreds == 0) 993 // If SU does not have a def, schedule it close to its uses because it 994 // does not lengthen any live ranges. 995 return 0; 996 else 997 return SethiUllmanNumbers[SU->NodeNum]; 998 } 999 1000 private: 1001 bool canClobber(SUnit *SU, SUnit *Op); 1002 void AddPseudoTwoAddrDeps(); 1003 void CalculateSethiUllmanNumbers(); 1004 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1005 }; 1006 1007 1008 template<class SF> 1009 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1010 : public RegReductionPriorityQueue<SF> { 1011 // SUnitMap SDNode to SUnit mapping (n -> n). 1012 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1013 1014 // SUnits - The SUnits for the current graph. 1015 const std::vector<SUnit> *SUnits; 1016 1017 // SethiUllmanNumbers - The SethiUllman number for each node. 1018 std::vector<unsigned> SethiUllmanNumbers; 1019 1020 public: 1021 TDRegReductionPriorityQueue() {} 1022 1023 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1024 std::vector<SUnit> &sunits) { 1025 SUnitMap = &sumap; 1026 SUnits = &sunits; 1027 // Calculate node priorities. 1028 CalculateSethiUllmanNumbers(); 1029 } 1030 1031 void addNode(const SUnit *SU) { 1032 SethiUllmanNumbers.resize(SUnits->size(), 0); 1033 CalcNodeSethiUllmanNumber(SU); 1034 } 1035 1036 void updateNode(const SUnit *SU) { 1037 SethiUllmanNumbers[SU->NodeNum] = 0; 1038 CalcNodeSethiUllmanNumber(SU); 1039 } 1040 1041 void releaseState() { 1042 SUnits = 0; 1043 SethiUllmanNumbers.clear(); 1044 } 1045 1046 unsigned getNodePriority(const SUnit *SU) const { 1047 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1048 return SethiUllmanNumbers[SU->NodeNum]; 1049 } 1050 1051 private: 1052 void CalculateSethiUllmanNumbers(); 1053 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1054 }; 1055} 1056 1057/// closestSucc - Returns the scheduled cycle of the successor which is 1058/// closet to the current cycle. 1059static unsigned closestSucc(const SUnit *SU) { 1060 unsigned MaxCycle = 0; 1061 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1062 I != E; ++I) { 1063 unsigned Cycle = I->Dep->Cycle; 1064 // If there are bunch of CopyToRegs stacked up, they should be considered 1065 // to be at the same position. 1066 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1067 Cycle = closestSucc(I->Dep)+1; 1068 if (Cycle > MaxCycle) 1069 MaxCycle = Cycle; 1070 } 1071 return MaxCycle; 1072} 1073 1074// Bottom up 1075bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1076 // There used to be a special tie breaker here that looked for 1077 // two-address instructions and preferred the instruction with a 1078 // def&use operand. The special case triggered diagnostics when 1079 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1080 // ordering that priority_queue requires. It didn't help much anyway 1081 // because AddPseudoTwoAddrDeps already covers many of the cases 1082 // where it would have applied. In addition, it's counter-intuitive 1083 // that a tie breaker would be the first thing attempted. There's a 1084 // "real" tie breaker below that is the operation of last resort. 1085 // The fact that the "special tie breaker" would trigger when there 1086 // wasn't otherwise a tie is what broke the strict weak ordering 1087 // constraint. 1088 1089 unsigned LPriority = SPQ->getNodePriority(left); 1090 unsigned RPriority = SPQ->getNodePriority(right); 1091 if (LPriority > RPriority) 1092 return true; 1093 else if (LPriority == RPriority) { 1094 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1095 // e.g. 1096 // t1 = op t2, c1 1097 // t3 = op t4, c2 1098 // 1099 // and the following instructions are both ready. 1100 // t2 = op c3 1101 // t4 = op c4 1102 // 1103 // Then schedule t2 = op first. 1104 // i.e. 1105 // t4 = op c4 1106 // t2 = op c3 1107 // t1 = op t2, c1 1108 // t3 = op t4, c2 1109 // 1110 // This creates more short live intervals. 1111 unsigned LDist = closestSucc(left); 1112 unsigned RDist = closestSucc(right); 1113 if (LDist < RDist) 1114 return true; 1115 else if (LDist == RDist) { 1116 if (left->Height > right->Height) 1117 return true; 1118 else if (left->Height == right->Height) 1119 if (left->Depth < right->Depth) 1120 return true; 1121 else if (left->Depth == right->Depth) 1122 if (left->CycleBound > right->CycleBound) 1123 return true; 1124 } 1125 } 1126 return false; 1127} 1128 1129template<class SF> 1130bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1131 if (SU->isTwoAddress) { 1132 unsigned Opc = SU->Node->getTargetOpcode(); 1133 unsigned NumRes = TII->getNumDefs(Opc); 1134 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1135 for (unsigned i = 0; i != NumOps; ++i) { 1136 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1137 SDNode *DU = SU->Node->getOperand(i).Val; 1138 if (Op == (*SUnitMap)[DU][SU->InstanceNo]) 1139 return true; 1140 } 1141 } 1142 } 1143 return false; 1144} 1145 1146 1147/// hasCopyToRegUse - Return true if SU has a value successor that is a 1148/// CopyToReg node. 1149static bool hasCopyToRegUse(SUnit *SU) { 1150 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1151 I != E; ++I) { 1152 if (I->isCtrl) continue; 1153 SUnit *SuccSU = I->Dep; 1154 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1155 return true; 1156 } 1157 return false; 1158} 1159 1160/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1161/// it as a def&use operand. Add a pseudo control edge from it to the other 1162/// node (if it won't create a cycle) so the two-address one will be scheduled 1163/// first (lower in the schedule). If both nodes are two-address, favor the 1164/// one that has a CopyToReg use (more likely to be a loop induction update). 1165/// If both are two-address, but one is commutable while the other is not 1166/// commutable, favor the one that's not commutable. 1167template<class SF> 1168void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1169 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1170 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1171 if (!SU->isTwoAddress) 1172 continue; 1173 1174 SDNode *Node = SU->Node; 1175 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1176 continue; 1177 1178 unsigned Opc = Node->getTargetOpcode(); 1179 unsigned NumRes = TII->getNumDefs(Opc); 1180 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1181 for (unsigned j = 0; j != NumOps; ++j) { 1182 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1183 SDNode *DU = SU->Node->getOperand(j).Val; 1184 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1185 if (!DUSU) continue; 1186 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1187 I != E; ++I) { 1188 if (I->isCtrl) continue; 1189 SUnit *SuccSU = I->Dep; 1190 // Don't constraint nodes with implicit defs. It can create cycles 1191 // plus it may increase register pressures. 1192 if (SuccSU == SU || SuccSU->hasPhysRegDefs) 1193 continue; 1194 // Be conservative. Ignore if nodes aren't at the same depth. 1195 if (SuccSU->Depth != SU->Depth) 1196 continue; 1197 if ((!canClobber(SuccSU, DUSU) || 1198 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1199 (!SU->isCommutable && SuccSU->isCommutable)) && 1200 !isReachable(SuccSU, SU)) { 1201 DOUT << "Adding an edge from SU # " << SU->NodeNum 1202 << " to SU #" << SuccSU->NodeNum << "\n"; 1203 SU->addPred(SuccSU, true, true); 1204 } 1205 } 1206 } 1207 } 1208 } 1209} 1210 1211/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1212/// Smaller number is the higher priority. 1213template<class SF> 1214unsigned BURegReductionPriorityQueue<SF>:: 1215CalcNodeSethiUllmanNumber(const SUnit *SU) { 1216 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1217 if (SethiUllmanNumber != 0) 1218 return SethiUllmanNumber; 1219 1220 unsigned Extra = 0; 1221 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1222 I != E; ++I) { 1223 if (I->isCtrl) continue; // ignore chain preds 1224 SUnit *PredSU = I->Dep; 1225 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1226 if (PredSethiUllman > SethiUllmanNumber) { 1227 SethiUllmanNumber = PredSethiUllman; 1228 Extra = 0; 1229 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1230 ++Extra; 1231 } 1232 1233 SethiUllmanNumber += Extra; 1234 1235 if (SethiUllmanNumber == 0) 1236 SethiUllmanNumber = 1; 1237 1238 return SethiUllmanNumber; 1239} 1240 1241/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1242/// scheduling units. 1243template<class SF> 1244void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1245 SethiUllmanNumbers.assign(SUnits->size(), 0); 1246 1247 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1248 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1249} 1250 1251static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1252 unsigned Sum = 0; 1253 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1254 I != E; ++I) { 1255 SUnit *SuccSU = I->Dep; 1256 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1257 EE = SuccSU->Preds.end(); II != EE; ++II) { 1258 SUnit *PredSU = II->Dep; 1259 if (!PredSU->isScheduled) 1260 ++Sum; 1261 } 1262 } 1263 1264 return Sum; 1265} 1266 1267 1268// Top down 1269bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1270 unsigned LPriority = SPQ->getNodePriority(left); 1271 unsigned RPriority = SPQ->getNodePriority(right); 1272 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1273 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1274 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1275 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1276 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1277 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1278 1279 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1280 return false; 1281 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1282 return true; 1283 1284 // Special tie breaker: if two nodes share a operand, the one that use it 1285 // as a def&use operand is preferred. 1286 if (LIsTarget && RIsTarget) { 1287 if (left->isTwoAddress && !right->isTwoAddress) { 1288 SDNode *DUNode = left->Node->getOperand(0).Val; 1289 if (DUNode->isOperand(right->Node)) 1290 RBonus += 2; 1291 } 1292 if (!left->isTwoAddress && right->isTwoAddress) { 1293 SDNode *DUNode = right->Node->getOperand(0).Val; 1294 if (DUNode->isOperand(left->Node)) 1295 LBonus += 2; 1296 } 1297 } 1298 if (LIsFloater) 1299 LBonus -= 2; 1300 if (RIsFloater) 1301 RBonus -= 2; 1302 if (left->NumSuccs == 1) 1303 LBonus += 2; 1304 if (right->NumSuccs == 1) 1305 RBonus += 2; 1306 1307 if (LPriority+LBonus < RPriority+RBonus) 1308 return true; 1309 else if (LPriority == RPriority) 1310 if (left->Depth < right->Depth) 1311 return true; 1312 else if (left->Depth == right->Depth) 1313 if (left->NumSuccsLeft > right->NumSuccsLeft) 1314 return true; 1315 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1316 if (left->CycleBound > right->CycleBound) 1317 return true; 1318 return false; 1319} 1320 1321/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1322/// Smaller number is the higher priority. 1323template<class SF> 1324unsigned TDRegReductionPriorityQueue<SF>:: 1325CalcNodeSethiUllmanNumber(const SUnit *SU) { 1326 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1327 if (SethiUllmanNumber != 0) 1328 return SethiUllmanNumber; 1329 1330 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1331 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1332 SethiUllmanNumber = 0xffff; 1333 else if (SU->NumSuccsLeft == 0) 1334 // If SU does not have a use, i.e. it doesn't produce a value that would 1335 // be consumed (e.g. store), then it terminates a chain of computation. 1336 // Give it a small SethiUllman number so it will be scheduled right before 1337 // its predecessors that it doesn't lengthen their live ranges. 1338 SethiUllmanNumber = 0; 1339 else if (SU->NumPredsLeft == 0 && 1340 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1341 SethiUllmanNumber = 0xffff; 1342 else { 1343 int Extra = 0; 1344 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1345 I != E; ++I) { 1346 if (I->isCtrl) continue; // ignore chain preds 1347 SUnit *PredSU = I->Dep; 1348 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1349 if (PredSethiUllman > SethiUllmanNumber) { 1350 SethiUllmanNumber = PredSethiUllman; 1351 Extra = 0; 1352 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1353 ++Extra; 1354 } 1355 1356 SethiUllmanNumber += Extra; 1357 } 1358 1359 return SethiUllmanNumber; 1360} 1361 1362/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1363/// scheduling units. 1364template<class SF> 1365void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1366 SethiUllmanNumbers.assign(SUnits->size(), 0); 1367 1368 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1369 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1370} 1371 1372//===----------------------------------------------------------------------===// 1373// Public Constructor Functions 1374//===----------------------------------------------------------------------===// 1375 1376llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1377 SelectionDAG *DAG, 1378 MachineBasicBlock *BB) { 1379 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1380 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1381 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 1382} 1383 1384llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1385 SelectionDAG *DAG, 1386 MachineBasicBlock *BB) { 1387 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1388 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1389} 1390 1391