ScheduleDAGRRList.cpp revision 713a98dee8ab07a3066d1707a07648d27dd0c19c
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/Statistic.h" 29#include <climits> 30#include <queue> 31#include "llvm/Support/CommandLine.h" 32using namespace llvm; 33 34static RegisterScheduler 35 burrListDAGScheduler("list-burr", 36 " Bottom-up register reduction list scheduling", 37 createBURRListDAGScheduler); 38static RegisterScheduler 39 tdrListrDAGScheduler("list-tdrr", 40 " Top-down register reduction list scheduling", 41 createTDRRListDAGScheduler); 42 43namespace { 44//===----------------------------------------------------------------------===// 45/// ScheduleDAGRRList - The actual register reduction list scheduler 46/// implementation. This supports both top-down and bottom-up scheduling. 47/// 48class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 49private: 50 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 51 /// it is top-down. 52 bool isBottomUp; 53 54 /// AvailableQueue - The priority queue to use for the available SUnits. 55 /// 56 SchedulingPriorityQueue *AvailableQueue; 57 58public: 59 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 60 const TargetMachine &tm, bool isbottomup, 61 SchedulingPriorityQueue *availqueue) 62 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 63 AvailableQueue(availqueue) { 64 } 65 66 ~ScheduleDAGRRList() { 67 delete AvailableQueue; 68 } 69 70 void Schedule(); 71 72private: 73 void ReleasePred(SUnit *PredSU, bool isChain, unsigned CurCycle); 74 void ReleaseSucc(SUnit *SuccSU, bool isChain, unsigned CurCycle); 75 void ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle); 76 void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); 77 void ListScheduleTopDown(); 78 void ListScheduleBottomUp(); 79 void CommuteNodesToReducePressure(); 80}; 81} // end anonymous namespace 82 83 84/// Schedule - Schedule the DAG using list scheduling. 85void ScheduleDAGRRList::Schedule() { 86 DOUT << "********** List Scheduling **********\n"; 87 88 // Build scheduling units. 89 BuildSchedUnits(); 90 91 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 92 SUnits[su].dumpAll(&DAG)); 93 CalculateDepths(); 94 CalculateHeights(); 95 96 AvailableQueue->initNodes(SUnitMap, SUnits); 97 98 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 99 if (isBottomUp) 100 ListScheduleBottomUp(); 101 else 102 ListScheduleTopDown(); 103 104 AvailableQueue->releaseState(); 105 106 CommuteNodesToReducePressure(); 107 108 DOUT << "*** Final schedule ***\n"; 109 DEBUG(dumpSchedule()); 110 DOUT << "\n"; 111 112 // Emit in scheduled order 113 EmitSchedule(); 114} 115 116/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 117/// it is not the last use of its first operand, add it to the CommuteSet if 118/// possible. It will be commuted when it is translated to a MI. 119void ScheduleDAGRRList::CommuteNodesToReducePressure() { 120 SmallPtrSet<SUnit*, 4> OperandSeen; 121 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 122 SUnit *SU = Sequence[i]; 123 if (!SU) continue; 124 if (SU->isCommutable) { 125 unsigned Opc = SU->Node->getTargetOpcode(); 126 unsigned NumRes = TII->getNumDefs(Opc); 127 unsigned NumOps = CountOperands(SU->Node); 128 for (unsigned j = 0; j != NumOps; ++j) { 129 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 130 continue; 131 132 SDNode *OpN = SU->Node->getOperand(j).Val; 133 SUnit *OpSU = SUnitMap[OpN]; 134 if (OpSU && OperandSeen.count(OpSU) == 1) { 135 // Ok, so SU is not the last use of OpSU, but SU is two-address so 136 // it will clobber OpSU. Try to commute SU if no other source operands 137 // are live below. 138 bool DoCommute = true; 139 for (unsigned k = 0; k < NumOps; ++k) { 140 if (k != j) { 141 OpN = SU->Node->getOperand(k).Val; 142 OpSU = SUnitMap[OpN]; 143 if (OpSU && OperandSeen.count(OpSU) == 1) { 144 DoCommute = false; 145 break; 146 } 147 } 148 } 149 if (DoCommute) 150 CommuteSet.insert(SU->Node); 151 } 152 153 // Only look at the first use&def node for now. 154 break; 155 } 156 } 157 158 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 159 I != E; ++I) { 160 if (!I->isCtrl) 161 OperandSeen.insert(I->Dep); 162 } 163 } 164} 165 166//===----------------------------------------------------------------------===// 167// Bottom-Up Scheduling 168//===----------------------------------------------------------------------===// 169 170/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 171/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 172void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 173 unsigned CurCycle) { 174 // FIXME: the distance between two nodes is not always == the predecessor's 175 // latency. For example, the reader can very well read the register written 176 // by the predecessor later than the issue cycle. It also depends on the 177 // interrupt model (drain vs. freeze). 178 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 179 180 if (!isChain) 181 PredSU->NumSuccsLeft--; 182 else 183 PredSU->NumChainSuccsLeft--; 184 185#ifndef NDEBUG 186 if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { 187 cerr << "*** List scheduling failed! ***\n"; 188 PredSU->dump(&DAG); 189 cerr << " has been released too many times!\n"; 190 assert(0); 191 } 192#endif 193 194 if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { 195 // EntryToken has to go last! Special case it here. 196 if (PredSU->Node->getOpcode() != ISD::EntryToken) { 197 PredSU->isAvailable = true; 198 AvailableQueue->push(PredSU); 199 } 200 } 201} 202 203/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 204/// count of its predecessors. If a predecessor pending count is zero, add it to 205/// the Available queue. 206void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 207 DOUT << "*** Scheduling [" << CurCycle << "]: "; 208 DEBUG(SU->dump(&DAG)); 209 SU->Cycle = CurCycle; 210 211 AvailableQueue->ScheduledNode(SU); 212 Sequence.push_back(SU); 213 214 // Bottom up: release predecessors 215 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 216 I != E; ++I) 217 ReleasePred(I->Dep, I->isCtrl, CurCycle); 218 SU->isScheduled = true; 219} 220 221/// isReady - True if node's lower cycle bound is less or equal to the current 222/// scheduling cycle. Always true if all nodes have uniform latency 1. 223static inline bool isReady(SUnit *SU, unsigned CurCycle) { 224 return SU->CycleBound <= CurCycle; 225} 226 227/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 228/// schedulers. 229void ScheduleDAGRRList::ListScheduleBottomUp() { 230 unsigned CurCycle = 0; 231 // Add root to Available queue. 232 AvailableQueue->push(SUnitMap[DAG.getRoot().Val]); 233 234 // While Available queue is not empty, grab the node with the highest 235 // priority. If it is not ready put it back. Schedule the node. 236 std::vector<SUnit*> NotReady; 237 while (!AvailableQueue->empty()) { 238 SUnit *CurNode = AvailableQueue->pop(); 239 while (CurNode && !isReady(CurNode, CurCycle)) { 240 NotReady.push_back(CurNode); 241 CurNode = AvailableQueue->pop(); 242 } 243 244 // Add the nodes that aren't ready back onto the available list. 245 AvailableQueue->push_all(NotReady); 246 NotReady.clear(); 247 248 if (CurNode != NULL) 249 ScheduleNodeBottomUp(CurNode, CurCycle); 250 CurCycle++; 251 } 252 253 // Add entry node last 254 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 255 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; 256 Sequence.push_back(Entry); 257 } 258 259 // Reverse the order if it is bottom up. 260 std::reverse(Sequence.begin(), Sequence.end()); 261 262 263#ifndef NDEBUG 264 // Verify that all SUnits were scheduled. 265 bool AnyNotSched = false; 266 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 267 if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { 268 if (!AnyNotSched) 269 cerr << "*** List scheduling failed! ***\n"; 270 SUnits[i].dump(&DAG); 271 cerr << "has not been scheduled!\n"; 272 AnyNotSched = true; 273 } 274 } 275 assert(!AnyNotSched); 276#endif 277} 278 279//===----------------------------------------------------------------------===// 280// Top-Down Scheduling 281//===----------------------------------------------------------------------===// 282 283/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 284/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 285void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 286 unsigned CurCycle) { 287 // FIXME: the distance between two nodes is not always == the predecessor's 288 // latency. For example, the reader can very well read the register written 289 // by the predecessor later than the issue cycle. It also depends on the 290 // interrupt model (drain vs. freeze). 291 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 292 293 if (!isChain) 294 SuccSU->NumPredsLeft--; 295 else 296 SuccSU->NumChainPredsLeft--; 297 298#ifndef NDEBUG 299 if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { 300 cerr << "*** List scheduling failed! ***\n"; 301 SuccSU->dump(&DAG); 302 cerr << " has been released too many times!\n"; 303 assert(0); 304 } 305#endif 306 307 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { 308 SuccSU->isAvailable = true; 309 AvailableQueue->push(SuccSU); 310 } 311} 312 313 314/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 315/// count of its successors. If a successor pending count is zero, add it to 316/// the Available queue. 317void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 318 DOUT << "*** Scheduling [" << CurCycle << "]: "; 319 DEBUG(SU->dump(&DAG)); 320 SU->Cycle = CurCycle; 321 322 AvailableQueue->ScheduledNode(SU); 323 Sequence.push_back(SU); 324 325 // Top down: release successors 326 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 327 I != E; ++I) 328 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 329 SU->isScheduled = true; 330} 331 332/// ListScheduleTopDown - The main loop of list scheduling for top-down 333/// schedulers. 334void ScheduleDAGRRList::ListScheduleTopDown() { 335 unsigned CurCycle = 0; 336 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val]; 337 338 // All leaves to Available queue. 339 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 340 // It is available if it has no predecessors. 341 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 342 AvailableQueue->push(&SUnits[i]); 343 SUnits[i].isAvailable = true; 344 } 345 } 346 347 // Emit the entry node first. 348 ScheduleNodeTopDown(Entry, CurCycle); 349 CurCycle++; 350 351 // While Available queue is not empty, grab the node with the highest 352 // priority. If it is not ready put it back. Schedule the node. 353 std::vector<SUnit*> NotReady; 354 while (!AvailableQueue->empty()) { 355 SUnit *CurNode = AvailableQueue->pop(); 356 while (CurNode && !isReady(CurNode, CurCycle)) { 357 NotReady.push_back(CurNode); 358 CurNode = AvailableQueue->pop(); 359 } 360 361 // Add the nodes that aren't ready back onto the available list. 362 AvailableQueue->push_all(NotReady); 363 NotReady.clear(); 364 365 if (CurNode != NULL) 366 ScheduleNodeTopDown(CurNode, CurCycle); 367 CurCycle++; 368 } 369 370 371#ifndef NDEBUG 372 // Verify that all SUnits were scheduled. 373 bool AnyNotSched = false; 374 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 375 if (!SUnits[i].isScheduled) { 376 if (!AnyNotSched) 377 cerr << "*** List scheduling failed! ***\n"; 378 SUnits[i].dump(&DAG); 379 cerr << "has not been scheduled!\n"; 380 AnyNotSched = true; 381 } 382 } 383 assert(!AnyNotSched); 384#endif 385} 386 387 388 389//===----------------------------------------------------------------------===// 390// RegReductionPriorityQueue Implementation 391//===----------------------------------------------------------------------===// 392// 393// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 394// to reduce register pressure. 395// 396namespace { 397 template<class SF> 398 class RegReductionPriorityQueue; 399 400 /// Sorting functions for the Available queue. 401 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 402 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 403 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 404 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 405 406 bool operator()(const SUnit* left, const SUnit* right) const; 407 }; 408 409 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 410 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 411 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 412 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 413 414 bool operator()(const SUnit* left, const SUnit* right) const; 415 }; 416} // end anonymous namespace 417 418static inline bool isCopyFromLiveIn(const SUnit *SU) { 419 SDNode *N = SU->Node; 420 return N->getOpcode() == ISD::CopyFromReg && 421 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 422} 423 424namespace { 425 template<class SF> 426 class VISIBILITY_HIDDEN RegReductionPriorityQueue 427 : public SchedulingPriorityQueue { 428 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 429 430 public: 431 RegReductionPriorityQueue() : 432 Queue(SF(this)) {} 433 434 virtual void initNodes(DenseMap<SDNode*, SUnit*> &sumap, 435 std::vector<SUnit> &sunits) {} 436 virtual void releaseState() {} 437 438 virtual unsigned getNodePriority(const SUnit *SU) const { 439 return 0; 440 } 441 442 bool empty() const { return Queue.empty(); } 443 444 void push(SUnit *U) { 445 Queue.push(U); 446 } 447 void push_all(const std::vector<SUnit *> &Nodes) { 448 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 449 Queue.push(Nodes[i]); 450 } 451 452 SUnit *pop() { 453 if (empty()) return NULL; 454 SUnit *V = Queue.top(); 455 Queue.pop(); 456 return V; 457 } 458 459 virtual bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { 460 return false; 461 } 462 }; 463 464 template<class SF> 465 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 466 : public RegReductionPriorityQueue<SF> { 467 // SUnitMap SDNode to SUnit mapping (n -> 1). 468 DenseMap<SDNode*, SUnit*> *SUnitMap; 469 470 // SUnits - The SUnits for the current graph. 471 const std::vector<SUnit> *SUnits; 472 473 // SethiUllmanNumbers - The SethiUllman number for each node. 474 std::vector<unsigned> SethiUllmanNumbers; 475 476 const TargetInstrInfo *TII; 477 public: 478 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 479 : TII(tii) {} 480 481 void initNodes(DenseMap<SDNode*, SUnit*> &sumap, 482 std::vector<SUnit> &sunits) { 483 SUnitMap = &sumap; 484 SUnits = &sunits; 485 // Add pseudo dependency edges for two-address nodes. 486 AddPseudoTwoAddrDeps(); 487 // Calculate node priorities. 488 CalculateSethiUllmanNumbers(); 489 } 490 491 void releaseState() { 492 SUnits = 0; 493 SethiUllmanNumbers.clear(); 494 } 495 496 unsigned getNodePriority(const SUnit *SU) const { 497 assert(SU->NodeNum < SethiUllmanNumbers.size()); 498 unsigned Opc = SU->Node->getOpcode(); 499 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 500 // CopyFromReg should be close to its def because it restricts 501 // allocation choices. But if it is a livein then perhaps we want it 502 // closer to its uses so it can be coalesced. 503 return 0xffff; 504 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 505 // CopyToReg should be close to its uses to facilitate coalescing and 506 // avoid spilling. 507 return 0; 508 else if (SU->NumSuccs == 0) 509 // If SU does not have a use, i.e. it doesn't produce a value that would 510 // be consumed (e.g. store), then it terminates a chain of computation. 511 // Give it a large SethiUllman number so it will be scheduled right 512 // before its predecessors that it doesn't lengthen their live ranges. 513 return 0xffff; 514 else if (SU->NumPreds == 0) 515 // If SU does not have a def, schedule it close to its uses because it 516 // does not lengthen any live ranges. 517 return 0; 518 else 519 return SethiUllmanNumbers[SU->NodeNum]; 520 } 521 522 bool isDUOperand(const SUnit *SU1, const SUnit *SU2) { 523 unsigned Opc = SU1->Node->getTargetOpcode(); 524 unsigned NumRes = TII->getNumDefs(Opc); 525 unsigned NumOps = ScheduleDAG::CountOperands(SU1->Node); 526 for (unsigned i = 0; i != NumOps; ++i) { 527 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) == -1) 528 continue; 529 if (SU1->Node->getOperand(i).isOperand(SU2->Node)) 530 return true; 531 } 532 return false; 533 } 534 private: 535 bool canClobber(SUnit *SU, SUnit *Op); 536 void AddPseudoTwoAddrDeps(); 537 void CalculateSethiUllmanNumbers(); 538 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 539 }; 540 541 542 template<class SF> 543 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 544 : public RegReductionPriorityQueue<SF> { 545 // SUnitMap SDNode to SUnit mapping (n -> 1). 546 DenseMap<SDNode*, SUnit*> *SUnitMap; 547 548 // SUnits - The SUnits for the current graph. 549 const std::vector<SUnit> *SUnits; 550 551 // SethiUllmanNumbers - The SethiUllman number for each node. 552 std::vector<unsigned> SethiUllmanNumbers; 553 554 public: 555 TDRegReductionPriorityQueue() {} 556 557 void initNodes(DenseMap<SDNode*, SUnit*> &sumap, 558 std::vector<SUnit> &sunits) { 559 SUnitMap = &sumap; 560 SUnits = &sunits; 561 // Calculate node priorities. 562 CalculateSethiUllmanNumbers(); 563 } 564 565 void releaseState() { 566 SUnits = 0; 567 SethiUllmanNumbers.clear(); 568 } 569 570 unsigned getNodePriority(const SUnit *SU) const { 571 assert(SU->NodeNum < SethiUllmanNumbers.size()); 572 return SethiUllmanNumbers[SU->NodeNum]; 573 } 574 575 private: 576 void CalculateSethiUllmanNumbers(); 577 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 578 }; 579} 580 581/// closestSucc - Returns the scheduled cycle of the successor which is 582/// closet to the current cycle. 583static unsigned closestSucc(const SUnit *SU) { 584 unsigned MaxCycle = 0; 585 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 586 I != E; ++I) { 587 unsigned Cycle = I->Dep->Cycle; 588 // If there are bunch of CopyToRegs stacked up, they should be considered 589 // to be at the same position. 590 if (I->Dep->Node->getOpcode() == ISD::CopyToReg) 591 Cycle = closestSucc(I->Dep)+1; 592 if (Cycle > MaxCycle) 593 MaxCycle = Cycle; 594 } 595 return MaxCycle; 596} 597 598/// calcMaxScratches - Returns an cost estimate of the worse case requirement 599/// for scratch registers. Live-in operands and live-out results don't count 600/// since they are "fixed". 601static unsigned calcMaxScratches(const SUnit *SU) { 602 unsigned Scratches = 0; 603 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 604 I != E; ++I) { 605 if (I->isCtrl) continue; // ignore chain preds 606 if (I->Dep->Node->getOpcode() != ISD::CopyFromReg) 607 Scratches++; 608 } 609 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 610 I != E; ++I) { 611 if (I->isCtrl) continue; // ignore chain succs 612 if (I->Dep->Node->getOpcode() != ISD::CopyToReg) 613 Scratches += 10; 614 } 615 return Scratches; 616} 617 618// Bottom up 619bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 620 // There used to be a special tie breaker here that looked for 621 // two-address instructions and preferred the instruction with a 622 // def&use operand. The special case triggered diagnostics when 623 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 624 // ordering that priority_queue requires. It didn't help much anyway 625 // because AddPseudoTwoAddrDeps already covers many of the cases 626 // where it would have applied. In addition, it's counter-intuitive 627 // that a tie breaker would be the first thing attempted. There's a 628 // "real" tie breaker below that is the operation of last resort. 629 // The fact that the "special tie breaker" would trigger when there 630 // wasn't otherwise a tie is what broke the strict weak ordering 631 // constraint. 632 633 unsigned LPriority = SPQ->getNodePriority(left); 634 unsigned RPriority = SPQ->getNodePriority(right); 635 if (LPriority > RPriority) 636 return true; 637 else if (LPriority == RPriority) { 638 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 639 // e.g. 640 // t1 = op t2, c1 641 // t3 = op t4, c2 642 // 643 // and the following instructions are both ready. 644 // t2 = op c3 645 // t4 = op c4 646 // 647 // Then schedule t2 = op first. 648 // i.e. 649 // t4 = op c4 650 // t2 = op c3 651 // t1 = op t2, c1 652 // t3 = op t4, c2 653 // 654 // This creates more short live intervals. 655 unsigned LDist = closestSucc(left); 656 unsigned RDist = closestSucc(right); 657 if (LDist < RDist) 658 return true; 659 else if (LDist == RDist) { 660 // Intuitively, it's good to push down instructions whose results are 661 // liveout so their long live ranges won't conflict with other values 662 // which are needed inside the BB. Further prioritize liveout instructions 663 // by the number of operands which are calculated within the BB. 664 unsigned LScratch = calcMaxScratches(left); 665 unsigned RScratch = calcMaxScratches(right); 666 if (LScratch > RScratch) 667 return true; 668 else if (LScratch == RScratch) 669 if (left->Height > right->Height) 670 return true; 671 else if (left->Height == right->Height) 672 if (left->Depth < right->Depth) 673 return true; 674 else if (left->Depth == right->Depth) 675 if (left->CycleBound > right->CycleBound) 676 return true; 677 } 678 } 679 return false; 680} 681 682// FIXME: This is probably too slow! 683static void isReachable(SUnit *SU, SUnit *TargetSU, 684 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 685 if (Reached) return; 686 if (SU == TargetSU) { 687 Reached = true; 688 return; 689 } 690 if (!Visited.insert(SU)) return; 691 692 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 693 ++I) 694 isReachable(I->Dep, TargetSU, Visited, Reached); 695} 696 697static bool isReachable(SUnit *SU, SUnit *TargetSU) { 698 SmallPtrSet<SUnit*, 32> Visited; 699 bool Reached = false; 700 isReachable(SU, TargetSU, Visited, Reached); 701 return Reached; 702} 703 704template<class SF> 705bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 706 if (SU->isTwoAddress) { 707 unsigned Opc = SU->Node->getTargetOpcode(); 708 unsigned NumRes = TII->getNumDefs(Opc); 709 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 710 for (unsigned i = 0; i != NumOps; ++i) { 711 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 712 SDNode *DU = SU->Node->getOperand(i).Val; 713 if (Op == (*SUnitMap)[DU]) 714 return true; 715 } 716 } 717 } 718 return false; 719} 720 721 722/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 723/// it as a def&use operand. Add a pseudo control edge from it to the other 724/// node (if it won't create a cycle) so the two-address one will be scheduled 725/// first (lower in the schedule). 726template<class SF> 727void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 728 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 729 SUnit *SU = (SUnit *)&((*SUnits)[i]); 730 if (!SU->isTwoAddress) 731 continue; 732 733 SDNode *Node = SU->Node; 734 if (!Node->isTargetOpcode()) 735 continue; 736 737 unsigned Opc = Node->getTargetOpcode(); 738 unsigned NumRes = TII->getNumDefs(Opc); 739 unsigned NumOps = ScheduleDAG::CountOperands(Node); 740 for (unsigned j = 0; j != NumOps; ++j) { 741 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 742 SDNode *DU = SU->Node->getOperand(j).Val; 743 SUnit *DUSU = (*SUnitMap)[DU]; 744 if (!DUSU) continue; 745 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 746 I != E; ++I) { 747 if (I->isCtrl) continue; 748 SUnit *SuccSU = I->Dep; 749 if (SuccSU != SU && 750 (!canClobber(SuccSU, DUSU) || 751 (!SU->isCommutable && SuccSU->isCommutable))){ 752 if (SuccSU->Depth == SU->Depth && !isReachable(SuccSU, SU)) { 753 DOUT << "Adding an edge from SU # " << SU->NodeNum 754 << " to SU #" << SuccSU->NodeNum << "\n"; 755 if (SU->addPred(SuccSU, true)) 756 SU->NumChainPredsLeft++; 757 if (SuccSU->addSucc(SU, true)) 758 SuccSU->NumChainSuccsLeft++; 759 } 760 } 761 } 762 } 763 } 764 } 765} 766 767/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 768/// Smaller number is the higher priority. 769template<class SF> 770unsigned BURegReductionPriorityQueue<SF>:: 771CalcNodeSethiUllmanNumber(const SUnit *SU) { 772 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 773 if (SethiUllmanNumber != 0) 774 return SethiUllmanNumber; 775 776 unsigned Extra = 0; 777 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 778 I != E; ++I) { 779 if (I->isCtrl) continue; // ignore chain preds 780 SUnit *PredSU = I->Dep; 781 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 782 if (PredSethiUllman > SethiUllmanNumber) { 783 SethiUllmanNumber = PredSethiUllman; 784 Extra = 0; 785 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 786 Extra++; 787 } 788 789 SethiUllmanNumber += Extra; 790 791 if (SethiUllmanNumber == 0) 792 SethiUllmanNumber = 1; 793 794 return SethiUllmanNumber; 795} 796 797/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 798/// scheduling units. 799template<class SF> 800void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 801 SethiUllmanNumbers.assign(SUnits->size(), 0); 802 803 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 804 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 805} 806 807static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 808 unsigned Sum = 0; 809 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 810 I != E; ++I) { 811 SUnit *SuccSU = I->Dep; 812 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 813 EE = SuccSU->Preds.end(); II != EE; ++II) { 814 SUnit *PredSU = II->Dep; 815 if (!PredSU->isScheduled) 816 Sum++; 817 } 818 } 819 820 return Sum; 821} 822 823 824// Top down 825bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 826 unsigned LPriority = SPQ->getNodePriority(left); 827 unsigned RPriority = SPQ->getNodePriority(right); 828 bool LIsTarget = left->Node->isTargetOpcode(); 829 bool RIsTarget = right->Node->isTargetOpcode(); 830 bool LIsFloater = LIsTarget && left->NumPreds == 0; 831 bool RIsFloater = RIsTarget && right->NumPreds == 0; 832 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 833 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 834 835 if (left->NumSuccs == 0 && right->NumSuccs != 0) 836 return false; 837 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 838 return true; 839 840 // Special tie breaker: if two nodes share a operand, the one that use it 841 // as a def&use operand is preferred. 842 if (LIsTarget && RIsTarget) { 843 if (left->isTwoAddress && !right->isTwoAddress) { 844 SDNode *DUNode = left->Node->getOperand(0).Val; 845 if (DUNode->isOperand(right->Node)) 846 RBonus += 2; 847 } 848 if (!left->isTwoAddress && right->isTwoAddress) { 849 SDNode *DUNode = right->Node->getOperand(0).Val; 850 if (DUNode->isOperand(left->Node)) 851 LBonus += 2; 852 } 853 } 854 if (LIsFloater) 855 LBonus -= 2; 856 if (RIsFloater) 857 RBonus -= 2; 858 if (left->NumSuccs == 1) 859 LBonus += 2; 860 if (right->NumSuccs == 1) 861 RBonus += 2; 862 863 if (LPriority+LBonus < RPriority+RBonus) 864 return true; 865 else if (LPriority == RPriority) 866 if (left->Depth < right->Depth) 867 return true; 868 else if (left->Depth == right->Depth) 869 if (left->NumSuccsLeft > right->NumSuccsLeft) 870 return true; 871 else if (left->NumSuccsLeft == right->NumSuccsLeft) 872 if (left->CycleBound > right->CycleBound) 873 return true; 874 return false; 875} 876 877/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 878/// Smaller number is the higher priority. 879template<class SF> 880unsigned TDRegReductionPriorityQueue<SF>:: 881CalcNodeSethiUllmanNumber(const SUnit *SU) { 882 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 883 if (SethiUllmanNumber != 0) 884 return SethiUllmanNumber; 885 886 unsigned Opc = SU->Node->getOpcode(); 887 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 888 SethiUllmanNumber = 0xffff; 889 else if (SU->NumSuccsLeft == 0) 890 // If SU does not have a use, i.e. it doesn't produce a value that would 891 // be consumed (e.g. store), then it terminates a chain of computation. 892 // Give it a small SethiUllman number so it will be scheduled right before 893 // its predecessors that it doesn't lengthen their live ranges. 894 SethiUllmanNumber = 0; 895 else if (SU->NumPredsLeft == 0 && 896 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 897 SethiUllmanNumber = 0xffff; 898 else { 899 int Extra = 0; 900 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 901 I != E; ++I) { 902 if (I->isCtrl) continue; // ignore chain preds 903 SUnit *PredSU = I->Dep; 904 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 905 if (PredSethiUllman > SethiUllmanNumber) { 906 SethiUllmanNumber = PredSethiUllman; 907 Extra = 0; 908 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 909 Extra++; 910 } 911 912 SethiUllmanNumber += Extra; 913 } 914 915 return SethiUllmanNumber; 916} 917 918/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 919/// scheduling units. 920template<class SF> 921void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 922 SethiUllmanNumbers.assign(SUnits->size(), 0); 923 924 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 925 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 926} 927 928//===----------------------------------------------------------------------===// 929// Public Constructor Functions 930//===----------------------------------------------------------------------===// 931 932llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 933 SelectionDAG *DAG, 934 MachineBasicBlock *BB) { 935 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 936 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 937 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 938} 939 940llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 941 SelectionDAG *DAG, 942 MachineBasicBlock *BB) { 943 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 944 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 945} 946 947