ScheduleDAGRRList.cpp revision 2eb4ebd953314d7c7f96cfb5897d8ded94f6c9d4
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include <climits> 32#include <queue> 33#include "llvm/Support/CommandLine.h" 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumCCCopies, "Number of cross class copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 " Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 " Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49 50namespace { 51//===----------------------------------------------------------------------===// 52/// ScheduleDAGRRList - The actual register reduction list scheduler 53/// implementation. This supports both top-down and bottom-up scheduling. 54/// 55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 56private: 57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 58 /// it is top-down. 59 bool isBottomUp; 60 61 /// AvailableQueue - The priority queue to use for the available SUnits. 62 ///a 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 SmallSet<unsigned, 4> LiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72public: 73 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 74 const TargetMachine &tm, bool isbottomup, 75 SchedulingPriorityQueue *availqueue) 76 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 77 AvailableQueue(availqueue) { 78 } 79 80 ~ScheduleDAGRRList() { 81 delete AvailableQueue; 82 } 83 84 void Schedule(); 85 86private: 87 void ReleasePred(SUnit*, bool, unsigned); 88 void ReleaseSucc(SUnit*, bool isChain, unsigned); 89 void CapturePred(SUnit*, SUnit*, bool); 90 void ScheduleNodeBottomUp(SUnit*, unsigned); 91 void ScheduleNodeTopDown(SUnit*, unsigned); 92 void UnscheduleNodeBottomUp(SUnit*); 93 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 94 SUnit *CopyAndMoveSuccessors(SUnit*); 95 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 96 const TargetRegisterClass*, 97 const TargetRegisterClass*, 98 SmallVector<SUnit*, 2>&); 99 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 100 void ListScheduleTopDown(); 101 void ListScheduleBottomUp(); 102 void CommuteNodesToReducePressure(); 103}; 104} // end anonymous namespace 105 106 107/// Schedule - Schedule the DAG using list scheduling. 108void ScheduleDAGRRList::Schedule() { 109 DOUT << "********** List Scheduling **********\n"; 110 111 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 112 LiveRegCycles.resize(MRI->getNumRegs(), 0); 113 114 // Build scheduling units. 115 BuildSchedUnits(); 116 117 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 118 SUnits[su].dumpAll(&DAG)); 119 CalculateDepths(); 120 CalculateHeights(); 121 122 AvailableQueue->initNodes(SUnitMap, SUnits); 123 124 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 125 if (isBottomUp) 126 ListScheduleBottomUp(); 127 else 128 ListScheduleTopDown(); 129 130 AvailableQueue->releaseState(); 131 132 CommuteNodesToReducePressure(); 133 134 DOUT << "*** Final schedule ***\n"; 135 DEBUG(dumpSchedule()); 136 DOUT << "\n"; 137 138 // Emit in scheduled order 139 EmitSchedule(); 140} 141 142/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 143/// it is not the last use of its first operand, add it to the CommuteSet if 144/// possible. It will be commuted when it is translated to a MI. 145void ScheduleDAGRRList::CommuteNodesToReducePressure() { 146 SmallPtrSet<SUnit*, 4> OperandSeen; 147 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 148 SUnit *SU = Sequence[i]; 149 if (!SU || !SU->Node) continue; 150 if (SU->isCommutable) { 151 unsigned Opc = SU->Node->getTargetOpcode(); 152 unsigned NumRes = TII->getNumDefs(Opc); 153 unsigned NumOps = CountOperands(SU->Node); 154 for (unsigned j = 0; j != NumOps; ++j) { 155 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 156 continue; 157 158 SDNode *OpN = SU->Node->getOperand(j).Val; 159 SUnit *OpSU = SUnitMap[OpN][SU->InstanceNo]; 160 if (OpSU && OperandSeen.count(OpSU) == 1) { 161 // Ok, so SU is not the last use of OpSU, but SU is two-address so 162 // it will clobber OpSU. Try to commute SU if no other source operands 163 // are live below. 164 bool DoCommute = true; 165 for (unsigned k = 0; k < NumOps; ++k) { 166 if (k != j) { 167 OpN = SU->Node->getOperand(k).Val; 168 OpSU = SUnitMap[OpN][SU->InstanceNo]; 169 if (OpSU && OperandSeen.count(OpSU) == 1) { 170 DoCommute = false; 171 break; 172 } 173 } 174 } 175 if (DoCommute) 176 CommuteSet.insert(SU->Node); 177 } 178 179 // Only look at the first use&def node for now. 180 break; 181 } 182 } 183 184 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 185 I != E; ++I) { 186 if (!I->isCtrl) 187 OperandSeen.insert(I->Dep); 188 } 189 } 190} 191 192//===----------------------------------------------------------------------===// 193// Bottom-Up Scheduling 194//===----------------------------------------------------------------------===// 195 196/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 197/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 198void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 199 unsigned CurCycle) { 200 // FIXME: the distance between two nodes is not always == the predecessor's 201 // latency. For example, the reader can very well read the register written 202 // by the predecessor later than the issue cycle. It also depends on the 203 // interrupt model (drain vs. freeze). 204 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 205 206 --PredSU->NumSuccsLeft; 207 208#ifndef NDEBUG 209 if (PredSU->NumSuccsLeft < 0) { 210 cerr << "*** List scheduling failed! ***\n"; 211 PredSU->dump(&DAG); 212 cerr << " has been released too many times!\n"; 213 assert(0); 214 } 215#endif 216 217 if (PredSU->NumSuccsLeft == 0) { 218 // EntryToken has to go last! Special case it here. 219 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 220 PredSU->isAvailable = true; 221 AvailableQueue->push(PredSU); 222 } 223 } 224} 225 226/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 227/// count of its predecessors. If a predecessor pending count is zero, add it to 228/// the Available queue. 229void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 230 DOUT << "*** Scheduling [" << CurCycle << "]: "; 231 DEBUG(SU->dump(&DAG)); 232 SU->Cycle = CurCycle; 233 234 AvailableQueue->ScheduledNode(SU); 235 236 // Bottom up: release predecessors 237 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 238 I != E; ++I) { 239 ReleasePred(I->Dep, I->isCtrl, CurCycle); 240 if (I->Cost < 0) { 241 // This is a physical register dependency and it's impossible or 242 // expensive to copy the register. Make sure nothing that can 243 // clobber the register is scheduled between the predecessor and 244 // this node. 245 if (LiveRegs.insert(I->Reg)) { 246 LiveRegDefs[I->Reg] = I->Dep; 247 LiveRegCycles[I->Reg] = CurCycle; 248 } 249 } 250 } 251 252 // Release all the implicit physical register defs that are live. 253 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 254 I != E; ++I) { 255 if (I->Cost < 0) { 256 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 257 LiveRegs.erase(I->Reg); 258 assert(LiveRegDefs[I->Reg] == SU && 259 "Physical register dependency violated?"); 260 LiveRegDefs[I->Reg] = NULL; 261 LiveRegCycles[I->Reg] = 0; 262 } 263 } 264 } 265 266 SU->isScheduled = true; 267} 268 269/// CapturePred - This does the opposite of ReleasePred. Since SU is being 270/// unscheduled, incrcease the succ left count of its predecessors. Remove 271/// them from AvailableQueue if necessary. 272void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 273 PredSU->CycleBound = 0; 274 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 275 I != E; ++I) { 276 if (I->Dep == SU) 277 continue; 278 PredSU->CycleBound = std::max(PredSU->CycleBound, 279 I->Dep->Cycle + PredSU->Latency); 280 } 281 282 if (PredSU->isAvailable) { 283 PredSU->isAvailable = false; 284 if (!PredSU->isPending) 285 AvailableQueue->remove(PredSU); 286 } 287 288 ++PredSU->NumSuccsLeft; 289} 290 291/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 292/// its predecessor states to reflect the change. 293void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 294 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 295 DEBUG(SU->dump(&DAG)); 296 297 AvailableQueue->UnscheduledNode(SU); 298 299 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 300 I != E; ++I) { 301 CapturePred(I->Dep, SU, I->isCtrl); 302 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 303 LiveRegs.erase(I->Reg); 304 assert(LiveRegDefs[I->Reg] == I->Dep && 305 "Physical register dependency violated?"); 306 LiveRegDefs[I->Reg] = NULL; 307 LiveRegCycles[I->Reg] = 0; 308 } 309 } 310 311 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 312 I != E; ++I) { 313 if (I->Cost < 0) { 314 if (LiveRegs.insert(I->Reg)) { 315 assert(!LiveRegDefs[I->Reg] && 316 "Physical register dependency violated?"); 317 LiveRegDefs[I->Reg] = SU; 318 } 319 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 320 LiveRegCycles[I->Reg] = I->Dep->Cycle; 321 } 322 } 323 324 SU->Cycle = 0; 325 SU->isScheduled = false; 326 SU->isAvailable = true; 327 AvailableQueue->push(SU); 328} 329 330// FIXME: This is probably too slow! 331static void isReachable(SUnit *SU, SUnit *TargetSU, 332 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 333 if (Reached) return; 334 if (SU == TargetSU) { 335 Reached = true; 336 return; 337 } 338 if (!Visited.insert(SU)) return; 339 340 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 341 ++I) 342 isReachable(I->Dep, TargetSU, Visited, Reached); 343} 344 345static bool isReachable(SUnit *SU, SUnit *TargetSU) { 346 SmallPtrSet<SUnit*, 32> Visited; 347 bool Reached = false; 348 isReachable(SU, TargetSU, Visited, Reached); 349 return Reached; 350} 351 352/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 353/// create a cycle. 354static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 355 if (isReachable(TargetSU, SU)) 356 return true; 357 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358 I != E; ++I) 359 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 360 return true; 361 return false; 362} 363 364/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 365/// BTCycle in order to schedule a specific node. Returns the last unscheduled 366/// SUnit. Also returns if a successor is unscheduled in the process. 367void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 368 unsigned &CurCycle) { 369 SUnit *OldSU = NULL; 370 while (CurCycle > BtCycle) { 371 OldSU = Sequence.back(); 372 Sequence.pop_back(); 373 if (SU->isSucc(OldSU)) 374 // Don't try to remove SU from AvailableQueue. 375 SU->isAvailable = false; 376 UnscheduleNodeBottomUp(OldSU); 377 --CurCycle; 378 } 379 380 381 if (SU->isSucc(OldSU)) { 382 assert(false && "Something is wrong!"); 383 abort(); 384 } 385 386 ++NumBacktracks; 387} 388 389/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 390/// successors to the newly created node. 391SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 392 if (SU->FlaggedNodes.size()) 393 return NULL; 394 395 SDNode *N = SU->Node; 396 if (!N) 397 return NULL; 398 399 SUnit *NewSU; 400 bool TryUnfold = false; 401 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 402 MVT::ValueType VT = N->getValueType(i); 403 if (VT == MVT::Flag) 404 return NULL; 405 else if (VT == MVT::Other) 406 TryUnfold = true; 407 } 408 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 409 const SDOperand &Op = N->getOperand(i); 410 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 411 if (VT == MVT::Flag) 412 return NULL; 413 } 414 415 if (TryUnfold) { 416 SmallVector<SDNode*, 4> NewNodes; 417 if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes)) 418 return NULL; 419 420 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 421 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 422 423 N = NewNodes[1]; 424 SDNode *LoadNode = NewNodes[0]; 425 std::vector<SDNode*> Deleted; 426 unsigned NumVals = N->getNumValues(); 427 unsigned OldNumVals = SU->Node->getNumValues(); 428 for (unsigned i = 0; i != NumVals; ++i) 429 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), 430 SDOperand(N, i), Deleted); 431 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), 432 SDOperand(LoadNode, 1), Deleted); 433 434 SUnit *LoadSU = NewSUnit(LoadNode); 435 SUnit *NewSU = NewSUnit(N); 436 SUnitMap[LoadNode].push_back(LoadSU); 437 SUnitMap[N].push_back(NewSU); 438 const TargetInstrDescriptor *TID = &TII->get(LoadNode->getTargetOpcode()); 439 for (unsigned i = 0; i != TID->numOperands; ++i) { 440 if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { 441 LoadSU->isTwoAddress = true; 442 break; 443 } 444 } 445 if (TID->Flags & M_COMMUTABLE) 446 LoadSU->isCommutable = true; 447 448 TID = &TII->get(N->getTargetOpcode()); 449 for (unsigned i = 0; i != TID->numOperands; ++i) { 450 if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { 451 NewSU->isTwoAddress = true; 452 break; 453 } 454 } 455 if (TID->Flags & M_COMMUTABLE) 456 NewSU->isCommutable = true; 457 458 // FIXME: Calculate height / depth and propagate the changes? 459 LoadSU->Depth = NewSU->Depth = SU->Depth; 460 LoadSU->Height = NewSU->Height = SU->Height; 461 ComputeLatency(LoadSU); 462 ComputeLatency(NewSU); 463 464 SUnit *ChainPred = NULL; 465 SmallVector<SDep, 4> ChainSuccs; 466 SmallVector<SDep, 4> LoadPreds; 467 SmallVector<SDep, 4> NodePreds; 468 SmallVector<SDep, 4> NodeSuccs; 469 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 470 I != E; ++I) { 471 if (I->isCtrl) 472 ChainPred = I->Dep; 473 else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) 474 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 475 else 476 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 477 } 478 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 479 I != E; ++I) { 480 if (I->isCtrl) 481 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 482 I->isCtrl, I->isSpecial)); 483 else 484 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 485 I->isCtrl, I->isSpecial)); 486 } 487 488 SU->removePred(ChainPred, true, false); 489 LoadSU->addPred(ChainPred, true, false); 490 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 491 SDep *Pred = &LoadPreds[i]; 492 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 493 LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 494 Pred->Reg, Pred->Cost); 495 } 496 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 497 SDep *Pred = &NodePreds[i]; 498 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 499 NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 500 Pred->Reg, Pred->Cost); 501 } 502 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 503 SDep *Succ = &NodeSuccs[i]; 504 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 505 Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, 506 Succ->Reg, Succ->Cost); 507 } 508 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 509 SDep *Succ = &ChainSuccs[i]; 510 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 511 Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, 512 Succ->Reg, Succ->Cost); 513 } 514 NewSU->addPred(LoadSU, false, false); 515 516 AvailableQueue->addNode(LoadSU); 517 AvailableQueue->addNode(NewSU); 518 519 ++NumUnfolds; 520 521 if (NewSU->NumSuccsLeft == 0) { 522 NewSU->isAvailable = true; 523 return NewSU; 524 } else 525 SU = NewSU; 526 } 527 528 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 529 NewSU = Clone(SU); 530 531 // New SUnit has the exact same predecessors. 532 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 533 I != E; ++I) 534 if (!I->isSpecial) { 535 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 536 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 537 } 538 539 // Only copy scheduled successors. Cut them from old node's successor 540 // list and move them over. 541 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 542 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 543 I != E; ++I) { 544 if (I->isSpecial) 545 continue; 546 if (I->Dep->isScheduled) { 547 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 548 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 549 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 550 } 551 } 552 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 553 SUnit *Succ = DelDeps[i].first; 554 bool isCtrl = DelDeps[i].second; 555 Succ->removePred(SU, isCtrl, false); 556 } 557 558 AvailableQueue->updateNode(SU); 559 AvailableQueue->addNode(NewSU); 560 561 ++NumDups; 562 return NewSU; 563} 564 565/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 566/// and move all scheduled successors of the given SUnit to the last copy. 567void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 568 const TargetRegisterClass *DestRC, 569 const TargetRegisterClass *SrcRC, 570 SmallVector<SUnit*, 2> &Copies) { 571 SUnit *CopyFromSU = NewSUnit(NULL); 572 CopyFromSU->CopySrcRC = SrcRC; 573 CopyFromSU->CopyDstRC = DestRC; 574 CopyFromSU->Depth = SU->Depth; 575 CopyFromSU->Height = SU->Height; 576 577 SUnit *CopyToSU = NewSUnit(NULL); 578 CopyToSU->CopySrcRC = DestRC; 579 CopyToSU->CopyDstRC = SrcRC; 580 581 // Only copy scheduled successors. Cut them from old node's successor 582 // list and move them over. 583 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 584 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 585 I != E; ++I) { 586 if (I->isSpecial) 587 continue; 588 if (I->Dep->isScheduled) { 589 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 590 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 591 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 592 } 593 } 594 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 595 SUnit *Succ = DelDeps[i].first; 596 bool isCtrl = DelDeps[i].second; 597 Succ->removePred(SU, isCtrl, false); 598 } 599 600 CopyFromSU->addPred(SU, false, false, Reg, -1); 601 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 602 603 AvailableQueue->updateNode(SU); 604 AvailableQueue->addNode(CopyFromSU); 605 AvailableQueue->addNode(CopyToSU); 606 Copies.push_back(CopyFromSU); 607 Copies.push_back(CopyToSU); 608 609 ++NumCCCopies; 610} 611 612/// getPhysicalRegisterVT - Returns the ValueType of the physical register 613/// definition of the specified node. 614/// FIXME: Move to SelectionDAG? 615static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 616 const TargetInstrInfo *TII) { 617 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 618 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 619 unsigned NumRes = TID.numDefs; 620 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 621 if (Reg == *ImpDef) 622 break; 623 ++NumRes; 624 } 625 return N->getValueType(NumRes); 626} 627 628/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 629/// scheduling of the given node to satisfy live physical register dependencies. 630/// If the specific node is the last one that's available to schedule, do 631/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 632bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 633 SmallVector<unsigned, 4> &LRegs){ 634 if (LiveRegs.empty()) 635 return false; 636 637 SmallSet<unsigned, 4> RegAdded; 638 // If this node would clobber any "live" register, then it's not ready. 639 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 640 I != E; ++I) { 641 if (I->Cost < 0) { 642 unsigned Reg = I->Reg; 643 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 644 if (RegAdded.insert(Reg)) 645 LRegs.push_back(Reg); 646 } 647 for (const unsigned *Alias = MRI->getAliasSet(Reg); 648 *Alias; ++Alias) 649 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 650 if (RegAdded.insert(*Alias)) 651 LRegs.push_back(*Alias); 652 } 653 } 654 } 655 656 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 657 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 658 if (!Node || !Node->isTargetOpcode()) 659 continue; 660 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 661 if (!TID.ImplicitDefs) 662 continue; 663 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 664 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 665 if (RegAdded.insert(*Reg)) 666 LRegs.push_back(*Reg); 667 } 668 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 669 *Alias; ++Alias) 670 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 671 if (RegAdded.insert(*Alias)) 672 LRegs.push_back(*Alias); 673 } 674 } 675 } 676 return !LRegs.empty(); 677} 678 679 680/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 681/// schedulers. 682void ScheduleDAGRRList::ListScheduleBottomUp() { 683 unsigned CurCycle = 0; 684 // Add root to Available queue. 685 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 686 RootSU->isAvailable = true; 687 AvailableQueue->push(RootSU); 688 689 // While Available queue is not empty, grab the node with the highest 690 // priority. If it is not ready put it back. Schedule the node. 691 SmallVector<SUnit*, 4> NotReady; 692 while (!AvailableQueue->empty()) { 693 bool Delayed = false; 694 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 695 SUnit *CurSU = AvailableQueue->pop(); 696 while (CurSU) { 697 if (CurSU->CycleBound <= CurCycle) { 698 SmallVector<unsigned, 4> LRegs; 699 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 700 break; 701 Delayed = true; 702 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 703 } 704 705 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 706 NotReady.push_back(CurSU); 707 CurSU = AvailableQueue->pop(); 708 } 709 710 // All candidates are delayed due to live physical reg dependencies. 711 // Try backtracking, code duplication, or inserting cross class copies 712 // to resolve it. 713 if (Delayed && !CurSU) { 714 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 715 SUnit *TrySU = NotReady[i]; 716 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 717 718 // Try unscheduling up to the point where it's safe to schedule 719 // this node. 720 unsigned LiveCycle = CurCycle; 721 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 722 unsigned Reg = LRegs[j]; 723 unsigned LCycle = LiveRegCycles[Reg]; 724 LiveCycle = std::min(LiveCycle, LCycle); 725 } 726 SUnit *OldSU = Sequence[LiveCycle]; 727 if (!WillCreateCycle(TrySU, OldSU)) { 728 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 729 // Force the current node to be scheduled before the node that 730 // requires the physical reg dep. 731 if (OldSU->isAvailable) { 732 OldSU->isAvailable = false; 733 AvailableQueue->remove(OldSU); 734 } 735 TrySU->addPred(OldSU, true, true); 736 // If one or more successors has been unscheduled, then the current 737 // node is no longer avaialable. Schedule a successor that's now 738 // available instead. 739 if (!TrySU->isAvailable) 740 CurSU = AvailableQueue->pop(); 741 else { 742 CurSU = TrySU; 743 TrySU->isPending = false; 744 NotReady.erase(NotReady.begin()+i); 745 } 746 break; 747 } 748 } 749 750 if (!CurSU) { 751 // Can't backtrace. Try duplicating the nodes that produces these 752 // "expensive to copy" values to break the dependency. In case even 753 // that doesn't work, insert cross class copies. 754 SUnit *TrySU = NotReady[0]; 755 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 756 assert(LRegs.size() == 1 && "Can't handle this yet!"); 757 unsigned Reg = LRegs[0]; 758 SUnit *LRDef = LiveRegDefs[Reg]; 759 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 760 if (!NewDef) { 761 // Issue expensive cross register class copies. 762 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 763 const TargetRegisterClass *RC = 764 MRI->getPhysicalRegisterRegClass(VT, Reg); 765 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 766 if (!DestRC) { 767 assert(false && "Don't know how to copy this physical register!"); 768 abort(); 769 } 770 SmallVector<SUnit*, 2> Copies; 771 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 772 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 773 << " to SU #" << Copies.front()->NodeNum << "\n"; 774 TrySU->addPred(Copies.front(), true, true); 775 NewDef = Copies.back(); 776 } 777 778 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 779 << " to SU #" << TrySU->NodeNum << "\n"; 780 LiveRegDefs[Reg] = NewDef; 781 NewDef->addPred(TrySU, true, true); 782 TrySU->isAvailable = false; 783 CurSU = NewDef; 784 } 785 786 if (!CurSU) { 787 assert(false && "Unable to resolve live physical register dependencies!"); 788 abort(); 789 } 790 } 791 792 // Add the nodes that aren't ready back onto the available list. 793 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 794 NotReady[i]->isPending = false; 795 // May no longer be available due to backtracking. 796 if (NotReady[i]->isAvailable) 797 AvailableQueue->push(NotReady[i]); 798 } 799 NotReady.clear(); 800 801 if (!CurSU) 802 Sequence.push_back(0); 803 else { 804 ScheduleNodeBottomUp(CurSU, CurCycle); 805 Sequence.push_back(CurSU); 806 } 807 ++CurCycle; 808 } 809 810 // Add entry node last 811 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 812 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 813 Sequence.push_back(Entry); 814 } 815 816 // Reverse the order if it is bottom up. 817 std::reverse(Sequence.begin(), Sequence.end()); 818 819 820#ifndef NDEBUG 821 // Verify that all SUnits were scheduled. 822 bool AnyNotSched = false; 823 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 824 if (SUnits[i].NumSuccsLeft != 0) { 825 if (!AnyNotSched) 826 cerr << "*** List scheduling failed! ***\n"; 827 SUnits[i].dump(&DAG); 828 cerr << "has not been scheduled!\n"; 829 AnyNotSched = true; 830 } 831 } 832 assert(!AnyNotSched); 833#endif 834} 835 836//===----------------------------------------------------------------------===// 837// Top-Down Scheduling 838//===----------------------------------------------------------------------===// 839 840/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 841/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 842void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 843 unsigned CurCycle) { 844 // FIXME: the distance between two nodes is not always == the predecessor's 845 // latency. For example, the reader can very well read the register written 846 // by the predecessor later than the issue cycle. It also depends on the 847 // interrupt model (drain vs. freeze). 848 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 849 850 --SuccSU->NumPredsLeft; 851 852#ifndef NDEBUG 853 if (SuccSU->NumPredsLeft < 0) { 854 cerr << "*** List scheduling failed! ***\n"; 855 SuccSU->dump(&DAG); 856 cerr << " has been released too many times!\n"; 857 assert(0); 858 } 859#endif 860 861 if (SuccSU->NumPredsLeft == 0) { 862 SuccSU->isAvailable = true; 863 AvailableQueue->push(SuccSU); 864 } 865} 866 867 868/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 869/// count of its successors. If a successor pending count is zero, add it to 870/// the Available queue. 871void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 872 DOUT << "*** Scheduling [" << CurCycle << "]: "; 873 DEBUG(SU->dump(&DAG)); 874 SU->Cycle = CurCycle; 875 876 AvailableQueue->ScheduledNode(SU); 877 878 // Top down: release successors 879 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 880 I != E; ++I) 881 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 882 SU->isScheduled = true; 883} 884 885/// ListScheduleTopDown - The main loop of list scheduling for top-down 886/// schedulers. 887void ScheduleDAGRRList::ListScheduleTopDown() { 888 unsigned CurCycle = 0; 889 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 890 891 // All leaves to Available queue. 892 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 893 // It is available if it has no predecessors. 894 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 895 AvailableQueue->push(&SUnits[i]); 896 SUnits[i].isAvailable = true; 897 } 898 } 899 900 // Emit the entry node first. 901 ScheduleNodeTopDown(Entry, CurCycle); 902 Sequence.push_back(Entry); 903 ++CurCycle; 904 905 // While Available queue is not empty, grab the node with the highest 906 // priority. If it is not ready put it back. Schedule the node. 907 std::vector<SUnit*> NotReady; 908 while (!AvailableQueue->empty()) { 909 SUnit *CurSU = AvailableQueue->pop(); 910 while (CurSU && CurSU->CycleBound > CurCycle) { 911 NotReady.push_back(CurSU); 912 CurSU = AvailableQueue->pop(); 913 } 914 915 // Add the nodes that aren't ready back onto the available list. 916 AvailableQueue->push_all(NotReady); 917 NotReady.clear(); 918 919 if (!CurSU) 920 Sequence.push_back(0); 921 else { 922 ScheduleNodeTopDown(CurSU, CurCycle); 923 Sequence.push_back(CurSU); 924 } 925 CurCycle++; 926 } 927 928 929#ifndef NDEBUG 930 // Verify that all SUnits were scheduled. 931 bool AnyNotSched = false; 932 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 933 if (!SUnits[i].isScheduled) { 934 if (!AnyNotSched) 935 cerr << "*** List scheduling failed! ***\n"; 936 SUnits[i].dump(&DAG); 937 cerr << "has not been scheduled!\n"; 938 AnyNotSched = true; 939 } 940 } 941 assert(!AnyNotSched); 942#endif 943} 944 945 946 947//===----------------------------------------------------------------------===// 948// RegReductionPriorityQueue Implementation 949//===----------------------------------------------------------------------===// 950// 951// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 952// to reduce register pressure. 953// 954namespace { 955 template<class SF> 956 class RegReductionPriorityQueue; 957 958 /// Sorting functions for the Available queue. 959 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 960 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 961 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 962 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 963 964 bool operator()(const SUnit* left, const SUnit* right) const; 965 }; 966 967 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 968 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 969 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 970 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 971 972 bool operator()(const SUnit* left, const SUnit* right) const; 973 }; 974} // end anonymous namespace 975 976static inline bool isCopyFromLiveIn(const SUnit *SU) { 977 SDNode *N = SU->Node; 978 return N && N->getOpcode() == ISD::CopyFromReg && 979 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 980} 981 982namespace { 983 template<class SF> 984 class VISIBILITY_HIDDEN RegReductionPriorityQueue 985 : public SchedulingPriorityQueue { 986 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 987 988 public: 989 RegReductionPriorityQueue() : 990 Queue(SF(this)) {} 991 992 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 993 std::vector<SUnit> &sunits) {} 994 995 virtual void addNode(const SUnit *SU) {} 996 997 virtual void updateNode(const SUnit *SU) {} 998 999 virtual void releaseState() {} 1000 1001 virtual unsigned getNodePriority(const SUnit *SU) const { 1002 return 0; 1003 } 1004 1005 unsigned size() const { return Queue.size(); } 1006 1007 bool empty() const { return Queue.empty(); } 1008 1009 void push(SUnit *U) { 1010 Queue.push(U); 1011 } 1012 void push_all(const std::vector<SUnit *> &Nodes) { 1013 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1014 Queue.push(Nodes[i]); 1015 } 1016 1017 SUnit *pop() { 1018 if (empty()) return NULL; 1019 SUnit *V = Queue.top(); 1020 Queue.pop(); 1021 return V; 1022 } 1023 1024 /// remove - This is a really inefficient way to remove a node from a 1025 /// priority queue. We should roll our own heap to make this better or 1026 /// something. 1027 void remove(SUnit *SU) { 1028 std::vector<SUnit*> Temp; 1029 1030 assert(!Queue.empty() && "Not in queue!"); 1031 while (Queue.top() != SU) { 1032 Temp.push_back(Queue.top()); 1033 Queue.pop(); 1034 assert(!Queue.empty() && "Not in queue!"); 1035 } 1036 1037 // Remove the node from the PQ. 1038 Queue.pop(); 1039 1040 // Add all the other nodes back. 1041 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 1042 Queue.push(Temp[i]); 1043 } 1044 }; 1045 1046 template<class SF> 1047 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 1048 : public RegReductionPriorityQueue<SF> { 1049 // SUnitMap SDNode to SUnit mapping (n -> n). 1050 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1051 1052 // SUnits - The SUnits for the current graph. 1053 const std::vector<SUnit> *SUnits; 1054 1055 // SethiUllmanNumbers - The SethiUllman number for each node. 1056 std::vector<unsigned> SethiUllmanNumbers; 1057 1058 const TargetInstrInfo *TII; 1059 public: 1060 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 1061 : TII(tii) {} 1062 1063 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1064 std::vector<SUnit> &sunits) { 1065 SUnitMap = &sumap; 1066 SUnits = &sunits; 1067 // Add pseudo dependency edges for two-address nodes. 1068 AddPseudoTwoAddrDeps(); 1069 // Calculate node priorities. 1070 CalculateSethiUllmanNumbers(); 1071 } 1072 1073 void addNode(const SUnit *SU) { 1074 SethiUllmanNumbers.resize(SUnits->size(), 0); 1075 CalcNodeSethiUllmanNumber(SU); 1076 } 1077 1078 void updateNode(const SUnit *SU) { 1079 SethiUllmanNumbers[SU->NodeNum] = 0; 1080 CalcNodeSethiUllmanNumber(SU); 1081 } 1082 1083 void releaseState() { 1084 SUnits = 0; 1085 SethiUllmanNumbers.clear(); 1086 } 1087 1088 unsigned getNodePriority(const SUnit *SU) const { 1089 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1090 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1091 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1092 // CopyFromReg should be close to its def because it restricts 1093 // allocation choices. But if it is a livein then perhaps we want it 1094 // closer to its uses so it can be coalesced. 1095 return 0xffff; 1096 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1097 // CopyToReg should be close to its uses to facilitate coalescing and 1098 // avoid spilling. 1099 return 0; 1100 else if (SU->NumSuccs == 0) 1101 // If SU does not have a use, i.e. it doesn't produce a value that would 1102 // be consumed (e.g. store), then it terminates a chain of computation. 1103 // Give it a large SethiUllman number so it will be scheduled right 1104 // before its predecessors that it doesn't lengthen their live ranges. 1105 return 0xffff; 1106 else if (SU->NumPreds == 0) 1107 // If SU does not have a def, schedule it close to its uses because it 1108 // does not lengthen any live ranges. 1109 return 0; 1110 else 1111 return SethiUllmanNumbers[SU->NodeNum]; 1112 } 1113 1114 private: 1115 bool canClobber(SUnit *SU, SUnit *Op); 1116 void AddPseudoTwoAddrDeps(); 1117 void CalculateSethiUllmanNumbers(); 1118 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1119 }; 1120 1121 1122 template<class SF> 1123 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1124 : public RegReductionPriorityQueue<SF> { 1125 // SUnitMap SDNode to SUnit mapping (n -> n). 1126 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1127 1128 // SUnits - The SUnits for the current graph. 1129 const std::vector<SUnit> *SUnits; 1130 1131 // SethiUllmanNumbers - The SethiUllman number for each node. 1132 std::vector<unsigned> SethiUllmanNumbers; 1133 1134 public: 1135 TDRegReductionPriorityQueue() {} 1136 1137 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1138 std::vector<SUnit> &sunits) { 1139 SUnitMap = &sumap; 1140 SUnits = &sunits; 1141 // Calculate node priorities. 1142 CalculateSethiUllmanNumbers(); 1143 } 1144 1145 void addNode(const SUnit *SU) { 1146 SethiUllmanNumbers.resize(SUnits->size(), 0); 1147 CalcNodeSethiUllmanNumber(SU); 1148 } 1149 1150 void updateNode(const SUnit *SU) { 1151 SethiUllmanNumbers[SU->NodeNum] = 0; 1152 CalcNodeSethiUllmanNumber(SU); 1153 } 1154 1155 void releaseState() { 1156 SUnits = 0; 1157 SethiUllmanNumbers.clear(); 1158 } 1159 1160 unsigned getNodePriority(const SUnit *SU) const { 1161 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1162 return SethiUllmanNumbers[SU->NodeNum]; 1163 } 1164 1165 private: 1166 void CalculateSethiUllmanNumbers(); 1167 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1168 }; 1169} 1170 1171/// closestSucc - Returns the scheduled cycle of the successor which is 1172/// closet to the current cycle. 1173static unsigned closestSucc(const SUnit *SU) { 1174 unsigned MaxCycle = 0; 1175 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1176 I != E; ++I) { 1177 unsigned Cycle = I->Dep->Cycle; 1178 // If there are bunch of CopyToRegs stacked up, they should be considered 1179 // to be at the same position. 1180 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1181 Cycle = closestSucc(I->Dep)+1; 1182 if (Cycle > MaxCycle) 1183 MaxCycle = Cycle; 1184 } 1185 return MaxCycle; 1186} 1187 1188// Bottom up 1189bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1190 // There used to be a special tie breaker here that looked for 1191 // two-address instructions and preferred the instruction with a 1192 // def&use operand. The special case triggered diagnostics when 1193 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1194 // ordering that priority_queue requires. It didn't help much anyway 1195 // because AddPseudoTwoAddrDeps already covers many of the cases 1196 // where it would have applied. In addition, it's counter-intuitive 1197 // that a tie breaker would be the first thing attempted. There's a 1198 // "real" tie breaker below that is the operation of last resort. 1199 // The fact that the "special tie breaker" would trigger when there 1200 // wasn't otherwise a tie is what broke the strict weak ordering 1201 // constraint. 1202 1203 unsigned LPriority = SPQ->getNodePriority(left); 1204 unsigned RPriority = SPQ->getNodePriority(right); 1205 if (LPriority > RPriority) 1206 return true; 1207 else if (LPriority == RPriority) { 1208 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1209 // e.g. 1210 // t1 = op t2, c1 1211 // t3 = op t4, c2 1212 // 1213 // and the following instructions are both ready. 1214 // t2 = op c3 1215 // t4 = op c4 1216 // 1217 // Then schedule t2 = op first. 1218 // i.e. 1219 // t4 = op c4 1220 // t2 = op c3 1221 // t1 = op t2, c1 1222 // t3 = op t4, c2 1223 // 1224 // This creates more short live intervals. 1225 unsigned LDist = closestSucc(left); 1226 unsigned RDist = closestSucc(right); 1227 if (LDist < RDist) 1228 return true; 1229 else if (LDist == RDist) { 1230 if (left->Height > right->Height) 1231 return true; 1232 else if (left->Height == right->Height) 1233 if (left->Depth < right->Depth) 1234 return true; 1235 else if (left->Depth == right->Depth) 1236 if (left->CycleBound > right->CycleBound) 1237 return true; 1238 } 1239 } 1240 return false; 1241} 1242 1243template<class SF> 1244bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1245 if (SU->isTwoAddress) { 1246 unsigned Opc = SU->Node->getTargetOpcode(); 1247 unsigned NumRes = TII->getNumDefs(Opc); 1248 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1249 for (unsigned i = 0; i != NumOps; ++i) { 1250 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1251 SDNode *DU = SU->Node->getOperand(i).Val; 1252 if (Op == (*SUnitMap)[DU][SU->InstanceNo]) 1253 return true; 1254 } 1255 } 1256 } 1257 return false; 1258} 1259 1260 1261/// hasCopyToRegUse - Return true if SU has a value successor that is a 1262/// CopyToReg node. 1263static bool hasCopyToRegUse(SUnit *SU) { 1264 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1265 I != E; ++I) { 1266 if (I->isCtrl) continue; 1267 SUnit *SuccSU = I->Dep; 1268 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1269 return true; 1270 } 1271 return false; 1272} 1273 1274/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1275/// it as a def&use operand. Add a pseudo control edge from it to the other 1276/// node (if it won't create a cycle) so the two-address one will be scheduled 1277/// first (lower in the schedule). If both nodes are two-address, favor the 1278/// one that has a CopyToReg use (more likely to be a loop induction update). 1279/// If both are two-address, but one is commutable while the other is not 1280/// commutable, favor the one that's not commutable. 1281template<class SF> 1282void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1283 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1284 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1285 if (!SU->isTwoAddress) 1286 continue; 1287 1288 SDNode *Node = SU->Node; 1289 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1290 continue; 1291 1292 unsigned Opc = Node->getTargetOpcode(); 1293 unsigned NumRes = TII->getNumDefs(Opc); 1294 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1295 for (unsigned j = 0; j != NumOps; ++j) { 1296 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1297 SDNode *DU = SU->Node->getOperand(j).Val; 1298 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1299 if (!DUSU) continue; 1300 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1301 I != E; ++I) { 1302 if (I->isCtrl) continue; 1303 SUnit *SuccSU = I->Dep; 1304 // Don't constrain nodes with implicit defs. It can create cycles 1305 // plus it may increase register pressures. 1306 if (SuccSU == SU || SuccSU->hasPhysRegDefs) 1307 continue; 1308 // Be conservative. Ignore if nodes aren't at the same depth. 1309 if (SuccSU->Depth != SU->Depth) 1310 continue; 1311 if ((!canClobber(SuccSU, DUSU) || 1312 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1313 (!SU->isCommutable && SuccSU->isCommutable)) && 1314 !isReachable(SuccSU, SU)) { 1315 DOUT << "Adding an edge from SU # " << SU->NodeNum 1316 << " to SU #" << SuccSU->NodeNum << "\n"; 1317 SU->addPred(SuccSU, true, true); 1318 } 1319 } 1320 } 1321 } 1322 } 1323} 1324 1325/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1326/// Smaller number is the higher priority. 1327template<class SF> 1328unsigned BURegReductionPriorityQueue<SF>:: 1329CalcNodeSethiUllmanNumber(const SUnit *SU) { 1330 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1331 if (SethiUllmanNumber != 0) 1332 return SethiUllmanNumber; 1333 1334 unsigned Extra = 0; 1335 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1336 I != E; ++I) { 1337 if (I->isCtrl) continue; // ignore chain preds 1338 SUnit *PredSU = I->Dep; 1339 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1340 if (PredSethiUllman > SethiUllmanNumber) { 1341 SethiUllmanNumber = PredSethiUllman; 1342 Extra = 0; 1343 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1344 ++Extra; 1345 } 1346 1347 SethiUllmanNumber += Extra; 1348 1349 if (SethiUllmanNumber == 0) 1350 SethiUllmanNumber = 1; 1351 1352 return SethiUllmanNumber; 1353} 1354 1355/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1356/// scheduling units. 1357template<class SF> 1358void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1359 SethiUllmanNumbers.assign(SUnits->size(), 0); 1360 1361 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1362 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1363} 1364 1365static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1366 unsigned Sum = 0; 1367 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1368 I != E; ++I) { 1369 SUnit *SuccSU = I->Dep; 1370 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1371 EE = SuccSU->Preds.end(); II != EE; ++II) { 1372 SUnit *PredSU = II->Dep; 1373 if (!PredSU->isScheduled) 1374 ++Sum; 1375 } 1376 } 1377 1378 return Sum; 1379} 1380 1381 1382// Top down 1383bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1384 unsigned LPriority = SPQ->getNodePriority(left); 1385 unsigned RPriority = SPQ->getNodePriority(right); 1386 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1387 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1388 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1389 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1390 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1391 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1392 1393 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1394 return false; 1395 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1396 return true; 1397 1398 // Special tie breaker: if two nodes share a operand, the one that use it 1399 // as a def&use operand is preferred. 1400 if (LIsTarget && RIsTarget) { 1401 if (left->isTwoAddress && !right->isTwoAddress) { 1402 SDNode *DUNode = left->Node->getOperand(0).Val; 1403 if (DUNode->isOperand(right->Node)) 1404 RBonus += 2; 1405 } 1406 if (!left->isTwoAddress && right->isTwoAddress) { 1407 SDNode *DUNode = right->Node->getOperand(0).Val; 1408 if (DUNode->isOperand(left->Node)) 1409 LBonus += 2; 1410 } 1411 } 1412 if (LIsFloater) 1413 LBonus -= 2; 1414 if (RIsFloater) 1415 RBonus -= 2; 1416 if (left->NumSuccs == 1) 1417 LBonus += 2; 1418 if (right->NumSuccs == 1) 1419 RBonus += 2; 1420 1421 if (LPriority+LBonus < RPriority+RBonus) 1422 return true; 1423 else if (LPriority == RPriority) 1424 if (left->Depth < right->Depth) 1425 return true; 1426 else if (left->Depth == right->Depth) 1427 if (left->NumSuccsLeft > right->NumSuccsLeft) 1428 return true; 1429 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1430 if (left->CycleBound > right->CycleBound) 1431 return true; 1432 return false; 1433} 1434 1435/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1436/// Smaller number is the higher priority. 1437template<class SF> 1438unsigned TDRegReductionPriorityQueue<SF>:: 1439CalcNodeSethiUllmanNumber(const SUnit *SU) { 1440 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1441 if (SethiUllmanNumber != 0) 1442 return SethiUllmanNumber; 1443 1444 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1445 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1446 SethiUllmanNumber = 0xffff; 1447 else if (SU->NumSuccsLeft == 0) 1448 // If SU does not have a use, i.e. it doesn't produce a value that would 1449 // be consumed (e.g. store), then it terminates a chain of computation. 1450 // Give it a small SethiUllman number so it will be scheduled right before 1451 // its predecessors that it doesn't lengthen their live ranges. 1452 SethiUllmanNumber = 0; 1453 else if (SU->NumPredsLeft == 0 && 1454 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1455 SethiUllmanNumber = 0xffff; 1456 else { 1457 int Extra = 0; 1458 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1459 I != E; ++I) { 1460 if (I->isCtrl) continue; // ignore chain preds 1461 SUnit *PredSU = I->Dep; 1462 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1463 if (PredSethiUllman > SethiUllmanNumber) { 1464 SethiUllmanNumber = PredSethiUllman; 1465 Extra = 0; 1466 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1467 ++Extra; 1468 } 1469 1470 SethiUllmanNumber += Extra; 1471 } 1472 1473 return SethiUllmanNumber; 1474} 1475 1476/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1477/// scheduling units. 1478template<class SF> 1479void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1480 SethiUllmanNumbers.assign(SUnits->size(), 0); 1481 1482 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1483 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1484} 1485 1486//===----------------------------------------------------------------------===// 1487// Public Constructor Functions 1488//===----------------------------------------------------------------------===// 1489 1490llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1491 SelectionDAG *DAG, 1492 MachineBasicBlock *BB) { 1493 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1494 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1495 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 1496} 1497 1498llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1499 SelectionDAG *DAG, 1500 MachineBasicBlock *BB) { 1501 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1502 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1503} 1504 1505