ScheduleDAGRRList.cpp revision 5ec4b76e12cfff33a4b3ac504e75d7d119845d6a
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include <climits> 31#include <queue> 32#include "llvm/Support/CommandLine.h" 33using namespace llvm; 34 35static RegisterScheduler 36 burrListDAGScheduler("list-burr", 37 " Bottom-up register reduction list scheduling", 38 createBURRListDAGScheduler); 39static RegisterScheduler 40 tdrListrDAGScheduler("list-tdrr", 41 " Top-down register reduction list scheduling", 42 createTDRRListDAGScheduler); 43 44namespace { 45//===----------------------------------------------------------------------===// 46/// ScheduleDAGRRList - The actual register reduction list scheduler 47/// implementation. This supports both top-down and bottom-up scheduling. 48/// 49class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 50private: 51 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 52 /// it is top-down. 53 bool isBottomUp; 54 55 /// AvailableQueue - The priority queue to use for the available SUnits. 56 ///a 57 SchedulingPriorityQueue *AvailableQueue; 58 59 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 60 /// that are "live". These nodes must be scheduled before any other nodes that 61 /// modifies the registers can be scheduled. 62 SmallSet<unsigned, 4> LiveRegs; 63 std::vector<SUnit*> LiveRegDefs; 64 std::vector<unsigned> LiveRegCycles; 65 66public: 67 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 68 const TargetMachine &tm, bool isbottomup, 69 SchedulingPriorityQueue *availqueue) 70 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 71 AvailableQueue(availqueue) { 72 } 73 74 ~ScheduleDAGRRList() { 75 delete AvailableQueue; 76 } 77 78 void Schedule(); 79 80private: 81 void ReleasePred(SUnit*, bool, unsigned); 82 void ReleaseSucc(SUnit*, bool isChain, unsigned); 83 void CapturePred(SUnit*, SUnit*, bool); 84 void ScheduleNodeBottomUp(SUnit*, unsigned); 85 void ScheduleNodeTopDown(SUnit*, unsigned); 86 void UnscheduleNodeBottomUp(SUnit*); 87 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 88 SUnit *CopyAndMoveSuccessors(SUnit*); 89 SUnit *InsertCopiesAndMoveSuccs(SUnit*, unsigned, 90 const TargetRegisterClass*, 91 const TargetRegisterClass*); 92 bool DelayForLiveRegsBottomUp(SUnit*, unsigned&); 93 void ListScheduleTopDown(); 94 void ListScheduleBottomUp(); 95 void CommuteNodesToReducePressure(); 96}; 97} // end anonymous namespace 98 99 100/// Schedule - Schedule the DAG using list scheduling. 101void ScheduleDAGRRList::Schedule() { 102 DOUT << "********** List Scheduling **********\n"; 103 104 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 105 LiveRegCycles.resize(MRI->getNumRegs(), 0); 106 107 // Build scheduling units. 108 BuildSchedUnits(); 109 110 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 111 SUnits[su].dumpAll(&DAG)); 112 CalculateDepths(); 113 CalculateHeights(); 114 115 AvailableQueue->initNodes(SUnitMap, SUnits); 116 117 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 118 if (isBottomUp) 119 ListScheduleBottomUp(); 120 else 121 ListScheduleTopDown(); 122 123 AvailableQueue->releaseState(); 124 125 CommuteNodesToReducePressure(); 126 127 DOUT << "*** Final schedule ***\n"; 128 DEBUG(dumpSchedule()); 129 DOUT << "\n"; 130 131 // Emit in scheduled order 132 EmitSchedule(); 133} 134 135/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 136/// it is not the last use of its first operand, add it to the CommuteSet if 137/// possible. It will be commuted when it is translated to a MI. 138void ScheduleDAGRRList::CommuteNodesToReducePressure() { 139 SmallPtrSet<SUnit*, 4> OperandSeen; 140 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 141 SUnit *SU = Sequence[i]; 142 if (!SU || !SU->Node) continue; 143 if (SU->isCommutable) { 144 unsigned Opc = SU->Node->getTargetOpcode(); 145 unsigned NumRes = TII->getNumDefs(Opc); 146 unsigned NumOps = CountOperands(SU->Node); 147 for (unsigned j = 0; j != NumOps; ++j) { 148 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 149 continue; 150 151 SDNode *OpN = SU->Node->getOperand(j).Val; 152 SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; 153 if (OpSU && OperandSeen.count(OpSU) == 1) { 154 // Ok, so SU is not the last use of OpSU, but SU is two-address so 155 // it will clobber OpSU. Try to commute SU if no other source operands 156 // are live below. 157 bool DoCommute = true; 158 for (unsigned k = 0; k < NumOps; ++k) { 159 if (k != j) { 160 OpN = SU->Node->getOperand(k).Val; 161 OpSU = SUnitMap[OpN][SU->InstanceNo]; 162 if (OpSU && OperandSeen.count(OpSU) == 1) { 163 DoCommute = false; 164 break; 165 } 166 } 167 } 168 if (DoCommute) 169 CommuteSet.insert(SU->Node); 170 } 171 172 // Only look at the first use&def node for now. 173 break; 174 } 175 } 176 177 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 178 I != E; ++I) { 179 if (!I->isCtrl) 180 OperandSeen.insert(I->Dep); 181 } 182 } 183} 184 185//===----------------------------------------------------------------------===// 186// Bottom-Up Scheduling 187//===----------------------------------------------------------------------===// 188 189/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 190/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 191void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 192 unsigned CurCycle) { 193 // FIXME: the distance between two nodes is not always == the predecessor's 194 // latency. For example, the reader can very well read the register written 195 // by the predecessor later than the issue cycle. It also depends on the 196 // interrupt model (drain vs. freeze). 197 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 198 199 if (!isChain) 200 --PredSU->NumSuccsLeft; 201 else 202 --PredSU->NumChainSuccsLeft; 203 204#ifndef NDEBUG 205 if (PredSU->NumSuccsLeft < 0 || PredSU->NumChainSuccsLeft < 0) { 206 cerr << "*** List scheduling failed! ***\n"; 207 PredSU->dump(&DAG); 208 cerr << " has been released too many times!\n"; 209 assert(0); 210 } 211#endif 212 213 if ((PredSU->NumSuccsLeft + PredSU->NumChainSuccsLeft) == 0) { 214 // EntryToken has to go last! Special case it here. 215 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 216 PredSU->isAvailable = true; 217 AvailableQueue->push(PredSU); 218 } 219 } 220} 221 222/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 223/// count of its predecessors. If a predecessor pending count is zero, add it to 224/// the Available queue. 225void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 226 DOUT << "*** Scheduling [" << CurCycle << "]: "; 227 DEBUG(SU->dump(&DAG)); 228 SU->Cycle = CurCycle; 229 230 AvailableQueue->ScheduledNode(SU); 231 232 // Bottom up: release predecessors 233 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 234 I != E; ++I) { 235 ReleasePred(I->Dep, I->isCtrl, CurCycle); 236 if (I->Cost < 0) { 237 // This is a physical register dependency and it's impossible or 238 // expensive to copy the register. Make sure nothing that can 239 // clobber the register is scheduled between the predecessor and 240 // this node. 241 if (LiveRegs.insert(I->Reg)) { 242 LiveRegDefs[I->Reg] = I->Dep; 243 LiveRegCycles[I->Reg] = CurCycle; 244 } 245 } 246 } 247 248 // Release all the implicit physical register defs that are live. 249 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 250 I != E; ++I) { 251 if (I->Cost < 0) { 252 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 253 LiveRegs.erase(I->Reg); 254 assert(LiveRegDefs[I->Reg] == SU && 255 "Physical register dependency violated?"); 256 LiveRegDefs[I->Reg] = NULL; 257 LiveRegCycles[I->Reg] = 0; 258 } 259 } 260 } 261 262 SU->isScheduled = true; 263} 264 265/// CapturePred - This does the opposite of ReleasePred. Since SU is being 266/// unscheduled, incrcease the succ left count of its predecessors. Remove 267/// them from AvailableQueue if necessary. 268void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 269 PredSU->CycleBound = 0; 270 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 271 I != E; ++I) { 272 if (I->Dep == SU) 273 continue; 274 PredSU->CycleBound = std::max(PredSU->CycleBound, 275 I->Dep->Cycle + PredSU->Latency); 276 } 277 278 if (PredSU->isAvailable) { 279 PredSU->isAvailable = false; 280 if (!PredSU->isPending) 281 AvailableQueue->remove(PredSU); 282 } 283 284 if (!isChain) 285 ++PredSU->NumSuccsLeft; 286 else 287 ++PredSU->NumChainSuccsLeft; 288} 289 290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 291/// its predecessor states to reflect the change. 292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 293 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 294 DEBUG(SU->dump(&DAG)); 295 296 AvailableQueue->UnscheduledNode(SU); 297 298 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 299 I != E; ++I) { 300 CapturePred(I->Dep, SU, I->isCtrl); 301 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 302 LiveRegs.erase(I->Reg); 303 assert(LiveRegDefs[I->Reg] == I->Dep && 304 "Physical register dependency violated?"); 305 LiveRegDefs[I->Reg] = NULL; 306 LiveRegCycles[I->Reg] = 0; 307 } 308 } 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 if (I->Cost < 0) { 313 if (LiveRegs.insert(I->Reg)) { 314 assert(!LiveRegDefs[I->Reg] && 315 "Physical register dependency violated?"); 316 LiveRegDefs[I->Reg] = SU; 317 } 318 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 319 LiveRegCycles[I->Reg] = I->Dep->Cycle; 320 } 321 } 322 323 SU->Cycle = 0; 324 SU->isScheduled = false; 325 SU->isAvailable = true; 326 AvailableQueue->push(SU); 327} 328 329/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 330/// BTCycle in order to schedule a specific node. Returns the last unscheduled 331/// SUnit. Also returns if a successor is unscheduled in the process. 332void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 333 unsigned &CurCycle) { 334 SUnit *OldSU = NULL; 335 while (CurCycle > BtCycle) { 336 OldSU = Sequence.back(); 337 Sequence.pop_back(); 338 if (SU->isSucc(OldSU)) 339 // Don't try to remove SU from AvailableQueue. 340 SU->isAvailable = false; 341 UnscheduleNodeBottomUp(OldSU); 342 --CurCycle; 343 } 344 345 346 if (SU->isSucc(OldSU)) { 347 assert(false && "Something is wrong!"); 348 abort(); 349 } 350} 351 352/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, 353/// i.e. the node does not produce a flag, it does not read a flag and it does 354/// not have an incoming chain. 355static bool isSafeToCopy(SDNode *N) { 356 if (!N) 357 return true; 358 359 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) 360 if (N->getValueType(i) == MVT::Flag) 361 return false; 362 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 363 const SDOperand &Op = N->getOperand(i); 364 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 365 if (VT == MVT::Other || VT == MVT::Flag) 366 return false; 367 } 368 369 return true; 370} 371 372/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 373/// successors to the newly created node. 374SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 375 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 376 377 SUnit *NewSU = Clone(SU); 378 379 // New SUnit has the exact same predecessors. 380 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 381 I != E; ++I) 382 if (!I->isSpecial) { 383 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 384 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 385 } 386 387 // Only copy scheduled successors. Cut them from old node's successor 388 // list and move them over. 389 SmallVector<SDep*, 2> DelDeps; 390 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 391 I != E; ++I) { 392 if (I->isSpecial) 393 continue; 394 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 395 if (I->Dep->isScheduled) { 396 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 397 DelDeps.push_back(I); 398 } 399 } 400 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 401 SUnit *Succ = DelDeps[i]->Dep; 402 bool isCtrl = DelDeps[i]->isCtrl; 403 Succ->removePred(SU, isCtrl, false); 404 } 405 406 AvailableQueue->updateNode(SU); 407 AvailableQueue->addNode(NewSU); 408 409 return NewSU; 410} 411 412/// InsertCopiesAndMoveSuccs - Insert expensive cross register class copies and 413/// move all scheduled successors of the given SUnit to the last copy. 414SUnit *ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 415 const TargetRegisterClass *DestRC, 416 const TargetRegisterClass *SrcRC) { 417 SUnit *CopyFromSU = NewSUnit(NULL); 418 CopyFromSU->CopySrcRC = SrcRC; 419 CopyFromSU->CopyDstRC = DestRC; 420 CopyFromSU->Depth = SU->Depth; 421 CopyFromSU->Height = SU->Height; 422 423 SUnit *CopyToSU = NewSUnit(NULL); 424 CopyToSU->CopySrcRC = DestRC; 425 CopyToSU->CopyDstRC = SrcRC; 426 427 // Only copy scheduled successors. Cut them from old node's successor 428 // list and move them over. 429 SmallVector<SDep*, 2> DelDeps; 430 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 431 I != E; ++I) { 432 if (I->isSpecial) 433 continue; 434 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 435 if (I->Dep->isScheduled) { 436 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 437 DelDeps.push_back(I); 438 } 439 } 440 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 441 SUnit *Succ = DelDeps[i]->Dep; 442 bool isCtrl = DelDeps[i]->isCtrl; 443 Succ->removePred(SU, isCtrl, false); 444 } 445 446 CopyFromSU->addPred(SU, false, false, Reg, -1); 447 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 448 449 AvailableQueue->updateNode(SU); 450 AvailableQueue->addNode(CopyFromSU); 451 AvailableQueue->addNode(CopyToSU); 452 453 return CopyToSU; 454} 455 456/// getPhysicalRegisterVT - Returns the ValueType of the physical register 457/// definition of the specified node. 458/// FIXME: Move to SelectionDAG? 459static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 460 const TargetInstrInfo *TII) { 461 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 462 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 463 unsigned NumRes = TID.numDefs; 464 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 465 if (Reg == *ImpDef) 466 break; 467 ++NumRes; 468 } 469 return N->getValueType(NumRes); 470} 471 472// FIXME: This is probably too slow! 473static void isReachable(SUnit *SU, SUnit *TargetSU, 474 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 475 if (Reached) return; 476 if (SU == TargetSU) { 477 Reached = true; 478 return; 479 } 480 if (!Visited.insert(SU)) return; 481 482 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 483 ++I) 484 isReachable(I->Dep, TargetSU, Visited, Reached); 485} 486 487static bool isReachable(SUnit *SU, SUnit *TargetSU) { 488 SmallPtrSet<SUnit*, 32> Visited; 489 bool Reached = false; 490 isReachable(SU, TargetSU, Visited, Reached); 491 return Reached; 492} 493 494/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 495/// scheduling of the given node to satisfy live physical register dependencies. 496/// If the specific node is the last one that's available to schedule, do 497/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 498bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, unsigned &CurCycle){ 499 if (LiveRegs.empty()) 500 return false; 501 502 // If this node would clobber any "live" register, then it's not ready. 503 // However, if this is the last "available" node, then we may have to 504 // backtrack. 505 bool MustSched = AvailableQueue->empty(); 506 SmallVector<unsigned, 4> LRegs; 507 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 508 I != E; ++I) { 509 if (I->Cost < 0) { 510 unsigned Reg = I->Reg; 511 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) 512 LRegs.push_back(Reg); 513 for (const unsigned *Alias = MRI->getAliasSet(Reg); 514 *Alias; ++Alias) 515 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) 516 LRegs.push_back(*Alias); 517 } 518 } 519 520 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 521 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 522 if (!Node || !Node->isTargetOpcode()) 523 continue; 524 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 525 if (!TID.ImplicitDefs) 526 continue; 527 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 528 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) 529 LRegs.push_back(*Reg); 530 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 531 *Alias; ++Alias) 532 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) 533 LRegs.push_back(*Alias); 534 } 535 } 536 537 if (MustSched && !LRegs.empty()) { 538 // We have made a mistake by scheduling some nodes too early. Now we must 539 // schedule the current node which will end up clobbering some live 540 // registers that are expensive / impossible to copy. Try unscheduling 541 // up to the point where it's safe to schedule the current node. 542 unsigned LiveCycle = CurCycle; 543 for (unsigned i = 0, e = LRegs.size(); i != e; ++i) { 544 unsigned Reg = LRegs[i]; 545 unsigned LCycle = LiveRegCycles[Reg]; 546 LiveCycle = std::min(LiveCycle, LCycle); 547 } 548 549 SUnit *OldSU = Sequence[LiveCycle]; 550 if (!isReachable(Sequence[LiveCycle], SU)) { 551 // If CycleBound is greater than backtrack cycle, then some of SU 552 // successors are going to be unscheduled. 553 bool SuccUnsched = SU->CycleBound > LiveCycle; 554 BacktrackBottomUp(SU, LiveCycle, CurCycle); 555 // Force the current node to be scheduled before the node that 556 // requires the physical reg dep. 557 if (OldSU->isAvailable) { 558 OldSU->isAvailable = false; 559 AvailableQueue->remove(OldSU); 560 } 561 SU->addPred(OldSU, true, true); 562 // If a successor has been unscheduled, then it's not possible to 563 // schedule the current node. 564 return SuccUnsched; 565 } else { 566 // Try duplicating the nodes that produces these "expensive to copy" 567 // values to break the dependency. 568 assert(LRegs.size() == 1 && "Can't handle this yet!"); 569 unsigned Reg = LRegs[0]; 570 SUnit *LRDef = LiveRegDefs[Reg]; 571 SUnit *NewDef; 572 if (isSafeToCopy(LRDef->Node)) 573 NewDef = CopyAndMoveSuccessors(LRDef); 574 else { 575 // Issue expensive cross register class copies. 576 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 577 const TargetRegisterClass *RC = 578 MRI->getPhysicalRegisterRegClass(VT, Reg); 579 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 580 if (!DestRC) { 581 assert(false && "Don't know how to copy this physical register!"); 582 abort(); 583 } 584 NewDef = InsertCopiesAndMoveSuccs(LRDef,Reg,DestRC,RC); 585 } 586 587 DOUT << "Adding an edge from SU # " << SU->NodeNum 588 << " to SU #" << NewDef->NodeNum << "\n"; 589 LiveRegDefs[Reg] = NewDef; 590 NewDef->addPred(SU, true, true); 591 SU->isAvailable = false; 592 AvailableQueue->push(NewDef); 593 return true; 594 } 595 } 596 return !LRegs.empty(); 597} 598 599/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 600/// schedulers. 601void ScheduleDAGRRList::ListScheduleBottomUp() { 602 unsigned CurCycle = 0; 603 // Add root to Available queue. 604 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 605 RootSU->isAvailable = true; 606 AvailableQueue->push(RootSU); 607 608 // While Available queue is not empty, grab the node with the highest 609 // priority. If it is not ready put it back. Schedule the node. 610 SmallVector<SUnit*, 4> NotReady; 611 while (!AvailableQueue->empty()) { 612 SUnit *CurSU = AvailableQueue->pop(); 613 while (CurSU) { 614 if (CurSU->CycleBound <= CurCycle) 615 if (!DelayForLiveRegsBottomUp(CurSU, CurCycle)) 616 break; 617 618 // Verify node is still ready. It may not be in case the 619 // scheduler has backtracked. 620 if (CurSU->isAvailable) { 621 CurSU->isPending = true; 622 NotReady.push_back(CurSU); 623 } 624 CurSU = AvailableQueue->pop(); 625 } 626 627 // Add the nodes that aren't ready back onto the available list. 628 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 629 NotReady[i]->isPending = false; 630 if (NotReady[i]->isAvailable) 631 AvailableQueue->push(NotReady[i]); 632 } 633 NotReady.clear(); 634 635 if (!CurSU) 636 Sequence.push_back(0); 637 else { 638 ScheduleNodeBottomUp(CurSU, CurCycle); 639 Sequence.push_back(CurSU); 640 } 641 ++CurCycle; 642 } 643 644 // Add entry node last 645 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 646 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 647 Sequence.push_back(Entry); 648 } 649 650 // Reverse the order if it is bottom up. 651 std::reverse(Sequence.begin(), Sequence.end()); 652 653 654#ifndef NDEBUG 655 // Verify that all SUnits were scheduled. 656 bool AnyNotSched = false; 657 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 658 if (SUnits[i].NumSuccsLeft != 0 || SUnits[i].NumChainSuccsLeft != 0) { 659 if (!AnyNotSched) 660 cerr << "*** List scheduling failed! ***\n"; 661 SUnits[i].dump(&DAG); 662 cerr << "has not been scheduled!\n"; 663 AnyNotSched = true; 664 } 665 } 666 assert(!AnyNotSched); 667#endif 668} 669 670//===----------------------------------------------------------------------===// 671// Top-Down Scheduling 672//===----------------------------------------------------------------------===// 673 674/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 675/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 676void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 677 unsigned CurCycle) { 678 // FIXME: the distance between two nodes is not always == the predecessor's 679 // latency. For example, the reader can very well read the register written 680 // by the predecessor later than the issue cycle. It also depends on the 681 // interrupt model (drain vs. freeze). 682 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 683 684 if (!isChain) 685 --SuccSU->NumPredsLeft; 686 else 687 --SuccSU->NumChainPredsLeft; 688 689#ifndef NDEBUG 690 if (SuccSU->NumPredsLeft < 0 || SuccSU->NumChainPredsLeft < 0) { 691 cerr << "*** List scheduling failed! ***\n"; 692 SuccSU->dump(&DAG); 693 cerr << " has been released too many times!\n"; 694 assert(0); 695 } 696#endif 697 698 if ((SuccSU->NumPredsLeft + SuccSU->NumChainPredsLeft) == 0) { 699 SuccSU->isAvailable = true; 700 AvailableQueue->push(SuccSU); 701 } 702} 703 704 705/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 706/// count of its successors. If a successor pending count is zero, add it to 707/// the Available queue. 708void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 709 DOUT << "*** Scheduling [" << CurCycle << "]: "; 710 DEBUG(SU->dump(&DAG)); 711 SU->Cycle = CurCycle; 712 713 AvailableQueue->ScheduledNode(SU); 714 715 // Top down: release successors 716 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 717 I != E; ++I) 718 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 719 SU->isScheduled = true; 720} 721 722/// ListScheduleTopDown - The main loop of list scheduling for top-down 723/// schedulers. 724void ScheduleDAGRRList::ListScheduleTopDown() { 725 unsigned CurCycle = 0; 726 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 727 728 // All leaves to Available queue. 729 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 730 // It is available if it has no predecessors. 731 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 732 AvailableQueue->push(&SUnits[i]); 733 SUnits[i].isAvailable = true; 734 } 735 } 736 737 // Emit the entry node first. 738 ScheduleNodeTopDown(Entry, CurCycle); 739 Sequence.push_back(Entry); 740 ++CurCycle; 741 742 // While Available queue is not empty, grab the node with the highest 743 // priority. If it is not ready put it back. Schedule the node. 744 std::vector<SUnit*> NotReady; 745 while (!AvailableQueue->empty()) { 746 SUnit *CurSU = AvailableQueue->pop(); 747 while (CurSU && CurSU->CycleBound > CurCycle) { 748 NotReady.push_back(CurSU); 749 CurSU = AvailableQueue->pop(); 750 } 751 752 // Add the nodes that aren't ready back onto the available list. 753 AvailableQueue->push_all(NotReady); 754 NotReady.clear(); 755 756 if (!CurSU) 757 Sequence.push_back(0); 758 else { 759 ScheduleNodeTopDown(CurSU, CurCycle); 760 Sequence.push_back(CurSU); 761 } 762 CurCycle++; 763 } 764 765 766#ifndef NDEBUG 767 // Verify that all SUnits were scheduled. 768 bool AnyNotSched = false; 769 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 770 if (!SUnits[i].isScheduled) { 771 if (!AnyNotSched) 772 cerr << "*** List scheduling failed! ***\n"; 773 SUnits[i].dump(&DAG); 774 cerr << "has not been scheduled!\n"; 775 AnyNotSched = true; 776 } 777 } 778 assert(!AnyNotSched); 779#endif 780} 781 782 783 784//===----------------------------------------------------------------------===// 785// RegReductionPriorityQueue Implementation 786//===----------------------------------------------------------------------===// 787// 788// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 789// to reduce register pressure. 790// 791namespace { 792 template<class SF> 793 class RegReductionPriorityQueue; 794 795 /// Sorting functions for the Available queue. 796 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 797 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 798 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 799 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 800 801 bool operator()(const SUnit* left, const SUnit* right) const; 802 }; 803 804 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 805 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 806 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 807 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 808 809 bool operator()(const SUnit* left, const SUnit* right) const; 810 }; 811} // end anonymous namespace 812 813static inline bool isCopyFromLiveIn(const SUnit *SU) { 814 SDNode *N = SU->Node; 815 return N && N->getOpcode() == ISD::CopyFromReg && 816 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 817} 818 819namespace { 820 template<class SF> 821 class VISIBILITY_HIDDEN RegReductionPriorityQueue 822 : public SchedulingPriorityQueue { 823 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 824 825 public: 826 RegReductionPriorityQueue() : 827 Queue(SF(this)) {} 828 829 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 830 std::vector<SUnit> &sunits) {} 831 832 virtual void addNode(const SUnit *SU) {} 833 834 virtual void updateNode(const SUnit *SU) {} 835 836 virtual void releaseState() {} 837 838 virtual unsigned getNodePriority(const SUnit *SU) const { 839 return 0; 840 } 841 842 unsigned size() const { return Queue.size(); } 843 844 bool empty() const { return Queue.empty(); } 845 846 void push(SUnit *U) { 847 Queue.push(U); 848 } 849 void push_all(const std::vector<SUnit *> &Nodes) { 850 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 851 Queue.push(Nodes[i]); 852 } 853 854 SUnit *pop() { 855 if (empty()) return NULL; 856 SUnit *V = Queue.top(); 857 Queue.pop(); 858 return V; 859 } 860 861 /// remove - This is a really inefficient way to remove a node from a 862 /// priority queue. We should roll our own heap to make this better or 863 /// something. 864 void remove(SUnit *SU) { 865 std::vector<SUnit*> Temp; 866 867 assert(!Queue.empty() && "Not in queue!"); 868 while (Queue.top() != SU) { 869 Temp.push_back(Queue.top()); 870 Queue.pop(); 871 assert(!Queue.empty() && "Not in queue!"); 872 } 873 874 // Remove the node from the PQ. 875 Queue.pop(); 876 877 // Add all the other nodes back. 878 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 879 Queue.push(Temp[i]); 880 } 881 }; 882 883 template<class SF> 884 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 885 : public RegReductionPriorityQueue<SF> { 886 // SUnitMap SDNode to SUnit mapping (n -> n). 887 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 888 889 // SUnits - The SUnits for the current graph. 890 const std::vector<SUnit> *SUnits; 891 892 // SethiUllmanNumbers - The SethiUllman number for each node. 893 std::vector<unsigned> SethiUllmanNumbers; 894 895 const TargetInstrInfo *TII; 896 public: 897 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 898 : TII(tii) {} 899 900 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 901 std::vector<SUnit> &sunits) { 902 SUnitMap = &sumap; 903 SUnits = &sunits; 904 // Add pseudo dependency edges for two-address nodes. 905 AddPseudoTwoAddrDeps(); 906 // Calculate node priorities. 907 CalculateSethiUllmanNumbers(); 908 } 909 910 void addNode(const SUnit *SU) { 911 SethiUllmanNumbers.resize(SUnits->size(), 0); 912 CalcNodeSethiUllmanNumber(SU); 913 } 914 915 void updateNode(const SUnit *SU) { 916 SethiUllmanNumbers[SU->NodeNum] = 0; 917 CalcNodeSethiUllmanNumber(SU); 918 } 919 920 void releaseState() { 921 SUnits = 0; 922 SethiUllmanNumbers.clear(); 923 } 924 925 unsigned getNodePriority(const SUnit *SU) const { 926 assert(SU->NodeNum < SethiUllmanNumbers.size()); 927 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 928 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 929 // CopyFromReg should be close to its def because it restricts 930 // allocation choices. But if it is a livein then perhaps we want it 931 // closer to its uses so it can be coalesced. 932 return 0xffff; 933 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 934 // CopyToReg should be close to its uses to facilitate coalescing and 935 // avoid spilling. 936 return 0; 937 else if (SU->NumSuccs == 0) 938 // If SU does not have a use, i.e. it doesn't produce a value that would 939 // be consumed (e.g. store), then it terminates a chain of computation. 940 // Give it a large SethiUllman number so it will be scheduled right 941 // before its predecessors that it doesn't lengthen their live ranges. 942 return 0xffff; 943 else if (SU->NumPreds == 0) 944 // If SU does not have a def, schedule it close to its uses because it 945 // does not lengthen any live ranges. 946 return 0; 947 else 948 return SethiUllmanNumbers[SU->NodeNum]; 949 } 950 951 private: 952 bool canClobber(SUnit *SU, SUnit *Op); 953 void AddPseudoTwoAddrDeps(); 954 void CalculateSethiUllmanNumbers(); 955 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 956 }; 957 958 959 template<class SF> 960 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 961 : public RegReductionPriorityQueue<SF> { 962 // SUnitMap SDNode to SUnit mapping (n -> n). 963 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 964 965 // SUnits - The SUnits for the current graph. 966 const std::vector<SUnit> *SUnits; 967 968 // SethiUllmanNumbers - The SethiUllman number for each node. 969 std::vector<unsigned> SethiUllmanNumbers; 970 971 public: 972 TDRegReductionPriorityQueue() {} 973 974 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 975 std::vector<SUnit> &sunits) { 976 SUnitMap = &sumap; 977 SUnits = &sunits; 978 // Calculate node priorities. 979 CalculateSethiUllmanNumbers(); 980 } 981 982 void addNode(const SUnit *SU) { 983 SethiUllmanNumbers.resize(SUnits->size(), 0); 984 CalcNodeSethiUllmanNumber(SU); 985 } 986 987 void updateNode(const SUnit *SU) { 988 SethiUllmanNumbers[SU->NodeNum] = 0; 989 CalcNodeSethiUllmanNumber(SU); 990 } 991 992 void releaseState() { 993 SUnits = 0; 994 SethiUllmanNumbers.clear(); 995 } 996 997 unsigned getNodePriority(const SUnit *SU) const { 998 assert(SU->NodeNum < SethiUllmanNumbers.size()); 999 return SethiUllmanNumbers[SU->NodeNum]; 1000 } 1001 1002 private: 1003 void CalculateSethiUllmanNumbers(); 1004 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1005 }; 1006} 1007 1008/// closestSucc - Returns the scheduled cycle of the successor which is 1009/// closet to the current cycle. 1010static unsigned closestSucc(const SUnit *SU) { 1011 unsigned MaxCycle = 0; 1012 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1013 I != E; ++I) { 1014 unsigned Cycle = I->Dep->Cycle; 1015 // If there are bunch of CopyToRegs stacked up, they should be considered 1016 // to be at the same position. 1017 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1018 Cycle = closestSucc(I->Dep)+1; 1019 if (Cycle > MaxCycle) 1020 MaxCycle = Cycle; 1021 } 1022 return MaxCycle; 1023} 1024 1025/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1026/// for scratch registers. Live-in operands and live-out results don't count 1027/// since they are "fixed". 1028static unsigned calcMaxScratches(const SUnit *SU) { 1029 unsigned Scratches = 0; 1030 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1031 I != E; ++I) { 1032 if (I->isCtrl) continue; // ignore chain preds 1033 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1034 Scratches++; 1035 } 1036 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1037 I != E; ++I) { 1038 if (I->isCtrl) continue; // ignore chain succs 1039 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyToReg) 1040 Scratches += 10; 1041 } 1042 return Scratches; 1043} 1044 1045// Bottom up 1046bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1047 // There used to be a special tie breaker here that looked for 1048 // two-address instructions and preferred the instruction with a 1049 // def&use operand. The special case triggered diagnostics when 1050 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1051 // ordering that priority_queue requires. It didn't help much anyway 1052 // because AddPseudoTwoAddrDeps already covers many of the cases 1053 // where it would have applied. In addition, it's counter-intuitive 1054 // that a tie breaker would be the first thing attempted. There's a 1055 // "real" tie breaker below that is the operation of last resort. 1056 // The fact that the "special tie breaker" would trigger when there 1057 // wasn't otherwise a tie is what broke the strict weak ordering 1058 // constraint. 1059 1060 unsigned LPriority = SPQ->getNodePriority(left); 1061 unsigned RPriority = SPQ->getNodePriority(right); 1062 if (LPriority > RPriority) 1063 return true; 1064 else if (LPriority == RPriority) { 1065 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1066 // e.g. 1067 // t1 = op t2, c1 1068 // t3 = op t4, c2 1069 // 1070 // and the following instructions are both ready. 1071 // t2 = op c3 1072 // t4 = op c4 1073 // 1074 // Then schedule t2 = op first. 1075 // i.e. 1076 // t4 = op c4 1077 // t2 = op c3 1078 // t1 = op t2, c1 1079 // t3 = op t4, c2 1080 // 1081 // This creates more short live intervals. 1082 unsigned LDist = closestSucc(left); 1083 unsigned RDist = closestSucc(right); 1084 if (LDist < RDist) 1085 return true; 1086 else if (LDist == RDist) { 1087 // Intuitively, it's good to push down instructions whose results are 1088 // liveout so their long live ranges won't conflict with other values 1089 // which are needed inside the BB. Further prioritize liveout instructions 1090 // by the number of operands which are calculated within the BB. 1091 unsigned LScratch = calcMaxScratches(left); 1092 unsigned RScratch = calcMaxScratches(right); 1093 if (LScratch > RScratch) 1094 return true; 1095 else if (LScratch == RScratch) 1096 if (left->Height > right->Height) 1097 return true; 1098 else if (left->Height == right->Height) 1099 if (left->Depth < right->Depth) 1100 return true; 1101 else if (left->Depth == right->Depth) 1102 if (left->CycleBound > right->CycleBound) 1103 return true; 1104 } 1105 } 1106 return false; 1107} 1108 1109template<class SF> 1110bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1111 if (SU->isTwoAddress) { 1112 unsigned Opc = SU->Node->getTargetOpcode(); 1113 unsigned NumRes = TII->getNumDefs(Opc); 1114 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1115 for (unsigned i = 0; i != NumOps; ++i) { 1116 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1117 SDNode *DU = SU->Node->getOperand(i).Val; 1118 if (Op == (*SUnitMap)[DU][SU->InstanceNo]) 1119 return true; 1120 } 1121 } 1122 } 1123 return false; 1124} 1125 1126 1127/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1128/// it as a def&use operand. Add a pseudo control edge from it to the other 1129/// node (if it won't create a cycle) so the two-address one will be scheduled 1130/// first (lower in the schedule). 1131template<class SF> 1132void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1133 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1134 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1135 if (!SU->isTwoAddress) 1136 continue; 1137 1138 SDNode *Node = SU->Node; 1139 if (!Node || !Node->isTargetOpcode()) 1140 continue; 1141 1142 unsigned Opc = Node->getTargetOpcode(); 1143 unsigned NumRes = TII->getNumDefs(Opc); 1144 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1145 for (unsigned j = 0; j != NumOps; ++j) { 1146 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1147 SDNode *DU = SU->Node->getOperand(j).Val; 1148 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1149 if (!DUSU) continue; 1150 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1151 I != E; ++I) { 1152 if (I->isCtrl) continue; 1153 SUnit *SuccSU = I->Dep; 1154 // Don't constraint nodes with implicit defs. It can create cycles 1155 // plus it may increase register pressures. 1156 if (SuccSU == SU || SuccSU->hasImplicitDefs) 1157 continue; 1158 // Be conservative. Ignore if nodes aren't at the same depth. 1159 if (SuccSU->Depth != SU->Depth) 1160 continue; 1161 if ((!canClobber(SuccSU, DUSU) || 1162 (!SU->isCommutable && SuccSU->isCommutable)) && 1163 !isReachable(SuccSU, SU)) { 1164 DOUT << "Adding an edge from SU # " << SU->NodeNum 1165 << " to SU #" << SuccSU->NodeNum << "\n"; 1166 SU->addPred(SuccSU, true, true); 1167 } 1168 } 1169 } 1170 } 1171 } 1172} 1173 1174/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1175/// Smaller number is the higher priority. 1176template<class SF> 1177unsigned BURegReductionPriorityQueue<SF>:: 1178CalcNodeSethiUllmanNumber(const SUnit *SU) { 1179 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1180 if (SethiUllmanNumber != 0) 1181 return SethiUllmanNumber; 1182 1183 unsigned Extra = 0; 1184 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1185 I != E; ++I) { 1186 if (I->isCtrl) continue; // ignore chain preds 1187 SUnit *PredSU = I->Dep; 1188 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1189 if (PredSethiUllman > SethiUllmanNumber) { 1190 SethiUllmanNumber = PredSethiUllman; 1191 Extra = 0; 1192 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1193 ++Extra; 1194 } 1195 1196 SethiUllmanNumber += Extra; 1197 1198 if (SethiUllmanNumber == 0) 1199 SethiUllmanNumber = 1; 1200 1201 return SethiUllmanNumber; 1202} 1203 1204/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1205/// scheduling units. 1206template<class SF> 1207void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1208 SethiUllmanNumbers.assign(SUnits->size(), 0); 1209 1210 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1211 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1212} 1213 1214static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1215 unsigned Sum = 0; 1216 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1217 I != E; ++I) { 1218 SUnit *SuccSU = I->Dep; 1219 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1220 EE = SuccSU->Preds.end(); II != EE; ++II) { 1221 SUnit *PredSU = II->Dep; 1222 if (!PredSU->isScheduled) 1223 ++Sum; 1224 } 1225 } 1226 1227 return Sum; 1228} 1229 1230 1231// Top down 1232bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1233 unsigned LPriority = SPQ->getNodePriority(left); 1234 unsigned RPriority = SPQ->getNodePriority(right); 1235 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1236 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1237 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1238 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1239 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1240 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1241 1242 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1243 return false; 1244 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1245 return true; 1246 1247 // Special tie breaker: if two nodes share a operand, the one that use it 1248 // as a def&use operand is preferred. 1249 if (LIsTarget && RIsTarget) { 1250 if (left->isTwoAddress && !right->isTwoAddress) { 1251 SDNode *DUNode = left->Node->getOperand(0).Val; 1252 if (DUNode->isOperand(right->Node)) 1253 RBonus += 2; 1254 } 1255 if (!left->isTwoAddress && right->isTwoAddress) { 1256 SDNode *DUNode = right->Node->getOperand(0).Val; 1257 if (DUNode->isOperand(left->Node)) 1258 LBonus += 2; 1259 } 1260 } 1261 if (LIsFloater) 1262 LBonus -= 2; 1263 if (RIsFloater) 1264 RBonus -= 2; 1265 if (left->NumSuccs == 1) 1266 LBonus += 2; 1267 if (right->NumSuccs == 1) 1268 RBonus += 2; 1269 1270 if (LPriority+LBonus < RPriority+RBonus) 1271 return true; 1272 else if (LPriority == RPriority) 1273 if (left->Depth < right->Depth) 1274 return true; 1275 else if (left->Depth == right->Depth) 1276 if (left->NumSuccsLeft > right->NumSuccsLeft) 1277 return true; 1278 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1279 if (left->CycleBound > right->CycleBound) 1280 return true; 1281 return false; 1282} 1283 1284/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1285/// Smaller number is the higher priority. 1286template<class SF> 1287unsigned TDRegReductionPriorityQueue<SF>:: 1288CalcNodeSethiUllmanNumber(const SUnit *SU) { 1289 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1290 if (SethiUllmanNumber != 0) 1291 return SethiUllmanNumber; 1292 1293 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1294 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1295 SethiUllmanNumber = 0xffff; 1296 else if (SU->NumSuccsLeft == 0) 1297 // If SU does not have a use, i.e. it doesn't produce a value that would 1298 // be consumed (e.g. store), then it terminates a chain of computation. 1299 // Give it a small SethiUllman number so it will be scheduled right before 1300 // its predecessors that it doesn't lengthen their live ranges. 1301 SethiUllmanNumber = 0; 1302 else if (SU->NumPredsLeft == 0 && 1303 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1304 SethiUllmanNumber = 0xffff; 1305 else { 1306 int Extra = 0; 1307 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1308 I != E; ++I) { 1309 if (I->isCtrl) continue; // ignore chain preds 1310 SUnit *PredSU = I->Dep; 1311 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1312 if (PredSethiUllman > SethiUllmanNumber) { 1313 SethiUllmanNumber = PredSethiUllman; 1314 Extra = 0; 1315 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1316 ++Extra; 1317 } 1318 1319 SethiUllmanNumber += Extra; 1320 } 1321 1322 return SethiUllmanNumber; 1323} 1324 1325/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1326/// scheduling units. 1327template<class SF> 1328void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1329 SethiUllmanNumbers.assign(SUnits->size(), 0); 1330 1331 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1332 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1333} 1334 1335//===----------------------------------------------------------------------===// 1336// Public Constructor Functions 1337//===----------------------------------------------------------------------===// 1338 1339llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1340 SelectionDAG *DAG, 1341 MachineBasicBlock *BB) { 1342 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1343 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1344 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 1345} 1346 1347llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1348 SelectionDAG *DAG, 1349 MachineBasicBlock *BB) { 1350 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1351 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1352} 1353 1354