ScheduleDAGRRList.cpp revision 6f0d024a534af18d9e60b3ea757376cd8a3a980e
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/TargetRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/SmallPtrSet.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include <climits> 31#include <queue> 32#include "llvm/Support/CommandLine.h" 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 36STATISTIC(NumUnfolds, "Number of nodes unfolded"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumCCCopies, "Number of cross class copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 " Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 " Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 SchedulingPriorityQueue *AvailableQueue; 62 63 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 64 /// that are "live". These nodes must be scheduled before any other nodes that 65 /// modifies the registers can be scheduled. 66 SmallSet<unsigned, 4> LiveRegs; 67 std::vector<SUnit*> LiveRegDefs; 68 std::vector<unsigned> LiveRegCycles; 69 70public: 71 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 72 const TargetMachine &tm, bool isbottomup, 73 SchedulingPriorityQueue *availqueue) 74 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 75 AvailableQueue(availqueue) { 76 } 77 78 ~ScheduleDAGRRList() { 79 delete AvailableQueue; 80 } 81 82 void Schedule(); 83 84private: 85 void ReleasePred(SUnit*, bool, unsigned); 86 void ReleaseSucc(SUnit*, bool isChain, unsigned); 87 void CapturePred(SUnit*, SUnit*, bool); 88 void ScheduleNodeBottomUp(SUnit*, unsigned); 89 void ScheduleNodeTopDown(SUnit*, unsigned); 90 void UnscheduleNodeBottomUp(SUnit*); 91 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 92 SUnit *CopyAndMoveSuccessors(SUnit*); 93 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 94 const TargetRegisterClass*, 95 const TargetRegisterClass*, 96 SmallVector<SUnit*, 2>&); 97 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 98 void ListScheduleTopDown(); 99 void ListScheduleBottomUp(); 100 void CommuteNodesToReducePressure(); 101}; 102} // end anonymous namespace 103 104 105/// Schedule - Schedule the DAG using list scheduling. 106void ScheduleDAGRRList::Schedule() { 107 DOUT << "********** List Scheduling **********\n"; 108 109 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 110 LiveRegCycles.resize(TRI->getNumRegs(), 0); 111 112 // Build scheduling units. 113 BuildSchedUnits(); 114 115 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 116 SUnits[su].dumpAll(&DAG)); 117 CalculateDepths(); 118 CalculateHeights(); 119 120 AvailableQueue->initNodes(SUnitMap, SUnits); 121 122 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 123 if (isBottomUp) 124 ListScheduleBottomUp(); 125 else 126 ListScheduleTopDown(); 127 128 AvailableQueue->releaseState(); 129 130 CommuteNodesToReducePressure(); 131 132 DOUT << "*** Final schedule ***\n"; 133 DEBUG(dumpSchedule()); 134 DOUT << "\n"; 135 136 // Emit in scheduled order 137 EmitSchedule(); 138} 139 140/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 141/// it is not the last use of its first operand, add it to the CommuteSet if 142/// possible. It will be commuted when it is translated to a MI. 143void ScheduleDAGRRList::CommuteNodesToReducePressure() { 144 SmallPtrSet<SUnit*, 4> OperandSeen; 145 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 146 SUnit *SU = Sequence[i]; 147 if (!SU || !SU->Node) continue; 148 if (SU->isCommutable) { 149 unsigned Opc = SU->Node->getTargetOpcode(); 150 const TargetInstrDesc &TID = TII->get(Opc); 151 unsigned NumRes = TID.getNumDefs(); 152 unsigned NumOps = CountOperands(SU->Node); 153 for (unsigned j = 0; j != NumOps; ++j) { 154 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1) 155 continue; 156 157 SDNode *OpN = SU->Node->getOperand(j).Val; 158 SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 159 if (OpSU && OperandSeen.count(OpSU) == 1) { 160 // Ok, so SU is not the last use of OpSU, but SU is two-address so 161 // it will clobber OpSU. Try to commute SU if no other source operands 162 // are live below. 163 bool DoCommute = true; 164 for (unsigned k = 0; k < NumOps; ++k) { 165 if (k != j) { 166 OpN = SU->Node->getOperand(k).Val; 167 OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 168 if (OpSU && OperandSeen.count(OpSU) == 1) { 169 DoCommute = false; 170 break; 171 } 172 } 173 } 174 if (DoCommute) 175 CommuteSet.insert(SU->Node); 176 } 177 178 // Only look at the first use&def node for now. 179 break; 180 } 181 } 182 183 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 184 I != E; ++I) { 185 if (!I->isCtrl) 186 OperandSeen.insert(I->Dep); 187 } 188 } 189} 190 191//===----------------------------------------------------------------------===// 192// Bottom-Up Scheduling 193//===----------------------------------------------------------------------===// 194 195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 196/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 197void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 198 unsigned CurCycle) { 199 // FIXME: the distance between two nodes is not always == the predecessor's 200 // latency. For example, the reader can very well read the register written 201 // by the predecessor later than the issue cycle. It also depends on the 202 // interrupt model (drain vs. freeze). 203 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 204 205 --PredSU->NumSuccsLeft; 206 207#ifndef NDEBUG 208 if (PredSU->NumSuccsLeft < 0) { 209 cerr << "*** List scheduling failed! ***\n"; 210 PredSU->dump(&DAG); 211 cerr << " has been released too many times!\n"; 212 assert(0); 213 } 214#endif 215 216 if (PredSU->NumSuccsLeft == 0) { 217 // EntryToken has to go last! Special case it here. 218 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 219 PredSU->isAvailable = true; 220 AvailableQueue->push(PredSU); 221 } 222 } 223} 224 225/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 226/// count of its predecessors. If a predecessor pending count is zero, add it to 227/// the Available queue. 228void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 229 DOUT << "*** Scheduling [" << CurCycle << "]: "; 230 DEBUG(SU->dump(&DAG)); 231 SU->Cycle = CurCycle; 232 233 AvailableQueue->ScheduledNode(SU); 234 235 // Bottom up: release predecessors 236 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 237 I != E; ++I) { 238 ReleasePred(I->Dep, I->isCtrl, CurCycle); 239 if (I->Cost < 0) { 240 // This is a physical register dependency and it's impossible or 241 // expensive to copy the register. Make sure nothing that can 242 // clobber the register is scheduled between the predecessor and 243 // this node. 244 if (LiveRegs.insert(I->Reg)) { 245 LiveRegDefs[I->Reg] = I->Dep; 246 LiveRegCycles[I->Reg] = CurCycle; 247 } 248 } 249 } 250 251 // Release all the implicit physical register defs that are live. 252 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 253 I != E; ++I) { 254 if (I->Cost < 0) { 255 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 256 LiveRegs.erase(I->Reg); 257 assert(LiveRegDefs[I->Reg] == SU && 258 "Physical register dependency violated?"); 259 LiveRegDefs[I->Reg] = NULL; 260 LiveRegCycles[I->Reg] = 0; 261 } 262 } 263 } 264 265 SU->isScheduled = true; 266} 267 268/// CapturePred - This does the opposite of ReleasePred. Since SU is being 269/// unscheduled, incrcease the succ left count of its predecessors. Remove 270/// them from AvailableQueue if necessary. 271void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 272 PredSU->CycleBound = 0; 273 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 274 I != E; ++I) { 275 if (I->Dep == SU) 276 continue; 277 PredSU->CycleBound = std::max(PredSU->CycleBound, 278 I->Dep->Cycle + PredSU->Latency); 279 } 280 281 if (PredSU->isAvailable) { 282 PredSU->isAvailable = false; 283 if (!PredSU->isPending) 284 AvailableQueue->remove(PredSU); 285 } 286 287 ++PredSU->NumSuccsLeft; 288} 289 290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 291/// its predecessor states to reflect the change. 292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 293 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 294 DEBUG(SU->dump(&DAG)); 295 296 AvailableQueue->UnscheduledNode(SU); 297 298 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 299 I != E; ++I) { 300 CapturePred(I->Dep, SU, I->isCtrl); 301 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 302 LiveRegs.erase(I->Reg); 303 assert(LiveRegDefs[I->Reg] == I->Dep && 304 "Physical register dependency violated?"); 305 LiveRegDefs[I->Reg] = NULL; 306 LiveRegCycles[I->Reg] = 0; 307 } 308 } 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 if (I->Cost < 0) { 313 if (LiveRegs.insert(I->Reg)) { 314 assert(!LiveRegDefs[I->Reg] && 315 "Physical register dependency violated?"); 316 LiveRegDefs[I->Reg] = SU; 317 } 318 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 319 LiveRegCycles[I->Reg] = I->Dep->Cycle; 320 } 321 } 322 323 SU->Cycle = 0; 324 SU->isScheduled = false; 325 SU->isAvailable = true; 326 AvailableQueue->push(SU); 327} 328 329// FIXME: This is probably too slow! 330static void isReachable(SUnit *SU, SUnit *TargetSU, 331 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 332 if (Reached) return; 333 if (SU == TargetSU) { 334 Reached = true; 335 return; 336 } 337 if (!Visited.insert(SU)) return; 338 339 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 340 ++I) 341 isReachable(I->Dep, TargetSU, Visited, Reached); 342} 343 344static bool isReachable(SUnit *SU, SUnit *TargetSU) { 345 SmallPtrSet<SUnit*, 32> Visited; 346 bool Reached = false; 347 isReachable(SU, TargetSU, Visited, Reached); 348 return Reached; 349} 350 351/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 352/// create a cycle. 353static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 354 if (isReachable(TargetSU, SU)) 355 return true; 356 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 357 I != E; ++I) 358 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 359 return true; 360 return false; 361} 362 363/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 364/// BTCycle in order to schedule a specific node. Returns the last unscheduled 365/// SUnit. Also returns if a successor is unscheduled in the process. 366void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 367 unsigned &CurCycle) { 368 SUnit *OldSU = NULL; 369 while (CurCycle > BtCycle) { 370 OldSU = Sequence.back(); 371 Sequence.pop_back(); 372 if (SU->isSucc(OldSU)) 373 // Don't try to remove SU from AvailableQueue. 374 SU->isAvailable = false; 375 UnscheduleNodeBottomUp(OldSU); 376 --CurCycle; 377 } 378 379 380 if (SU->isSucc(OldSU)) { 381 assert(false && "Something is wrong!"); 382 abort(); 383 } 384 385 ++NumBacktracks; 386} 387 388/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 389/// successors to the newly created node. 390SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 391 if (SU->FlaggedNodes.size()) 392 return NULL; 393 394 SDNode *N = SU->Node; 395 if (!N) 396 return NULL; 397 398 SUnit *NewSU; 399 bool TryUnfold = false; 400 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 401 MVT::ValueType VT = N->getValueType(i); 402 if (VT == MVT::Flag) 403 return NULL; 404 else if (VT == MVT::Other) 405 TryUnfold = true; 406 } 407 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 408 const SDOperand &Op = N->getOperand(i); 409 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 410 if (VT == MVT::Flag) 411 return NULL; 412 } 413 414 if (TryUnfold) { 415 SmallVector<SDNode*, 4> NewNodes; 416 if (!TII->unfoldMemoryOperand(DAG, N, NewNodes)) 417 return NULL; 418 419 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 420 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 421 422 N = NewNodes[1]; 423 SDNode *LoadNode = NewNodes[0]; 424 unsigned NumVals = N->getNumValues(); 425 unsigned OldNumVals = SU->Node->getNumValues(); 426 for (unsigned i = 0; i != NumVals; ++i) 427 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i)); 428 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), 429 SDOperand(LoadNode, 1)); 430 431 SUnit *NewSU = NewSUnit(N); 432 SUnitMap[N].push_back(NewSU); 433 const TargetInstrDesc &TID = TII->get(N->getTargetOpcode()); 434 for (unsigned i = 0; i != TID.getNumOperands(); ++i) { 435 if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) { 436 NewSU->isTwoAddress = true; 437 break; 438 } 439 } 440 if (TID.isCommutable()) 441 NewSU->isCommutable = true; 442 // FIXME: Calculate height / depth and propagate the changes? 443 NewSU->Depth = SU->Depth; 444 NewSU->Height = SU->Height; 445 ComputeLatency(NewSU); 446 447 // LoadNode may already exist. This can happen when there is another 448 // load from the same location and producing the same type of value 449 // but it has different alignment or volatileness. 450 bool isNewLoad = true; 451 SUnit *LoadSU; 452 DenseMap<SDNode*, std::vector<SUnit*> >::iterator SMI = 453 SUnitMap.find(LoadNode); 454 if (SMI != SUnitMap.end()) { 455 LoadSU = SMI->second.front(); 456 isNewLoad = false; 457 } else { 458 LoadSU = NewSUnit(LoadNode); 459 SUnitMap[LoadNode].push_back(LoadSU); 460 461 LoadSU->Depth = SU->Depth; 462 LoadSU->Height = SU->Height; 463 ComputeLatency(LoadSU); 464 } 465 466 SUnit *ChainPred = NULL; 467 SmallVector<SDep, 4> ChainSuccs; 468 SmallVector<SDep, 4> LoadPreds; 469 SmallVector<SDep, 4> NodePreds; 470 SmallVector<SDep, 4> NodeSuccs; 471 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 472 I != E; ++I) { 473 if (I->isCtrl) 474 ChainPred = I->Dep; 475 else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) 476 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 477 else 478 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 479 } 480 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 481 I != E; ++I) { 482 if (I->isCtrl) 483 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 484 I->isCtrl, I->isSpecial)); 485 else 486 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 487 I->isCtrl, I->isSpecial)); 488 } 489 490 SU->removePred(ChainPred, true, false); 491 if (isNewLoad) 492 LoadSU->addPred(ChainPred, true, false); 493 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 494 SDep *Pred = &LoadPreds[i]; 495 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 496 if (isNewLoad) 497 LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 498 Pred->Reg, Pred->Cost); 499 } 500 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 501 SDep *Pred = &NodePreds[i]; 502 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 503 NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 504 Pred->Reg, Pred->Cost); 505 } 506 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 507 SDep *Succ = &NodeSuccs[i]; 508 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 509 Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, 510 Succ->Reg, Succ->Cost); 511 } 512 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 513 SDep *Succ = &ChainSuccs[i]; 514 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 515 if (isNewLoad) 516 Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, 517 Succ->Reg, Succ->Cost); 518 } 519 if (isNewLoad) 520 NewSU->addPred(LoadSU, false, false); 521 522 if (isNewLoad) 523 AvailableQueue->addNode(LoadSU); 524 AvailableQueue->addNode(NewSU); 525 526 ++NumUnfolds; 527 528 if (NewSU->NumSuccsLeft == 0) { 529 NewSU->isAvailable = true; 530 return NewSU; 531 } 532 SU = NewSU; 533 } 534 535 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 536 NewSU = Clone(SU); 537 538 // New SUnit has the exact same predecessors. 539 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 540 I != E; ++I) 541 if (!I->isSpecial) { 542 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 543 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 544 } 545 546 // Only copy scheduled successors. Cut them from old node's successor 547 // list and move them over. 548 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 550 I != E; ++I) { 551 if (I->isSpecial) 552 continue; 553 if (I->Dep->isScheduled) { 554 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 555 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 556 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 557 } 558 } 559 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 560 SUnit *Succ = DelDeps[i].first; 561 bool isCtrl = DelDeps[i].second; 562 Succ->removePred(SU, isCtrl, false); 563 } 564 565 AvailableQueue->updateNode(SU); 566 AvailableQueue->addNode(NewSU); 567 568 ++NumDups; 569 return NewSU; 570} 571 572/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 573/// and move all scheduled successors of the given SUnit to the last copy. 574void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 575 const TargetRegisterClass *DestRC, 576 const TargetRegisterClass *SrcRC, 577 SmallVector<SUnit*, 2> &Copies) { 578 SUnit *CopyFromSU = NewSUnit(NULL); 579 CopyFromSU->CopySrcRC = SrcRC; 580 CopyFromSU->CopyDstRC = DestRC; 581 CopyFromSU->Depth = SU->Depth; 582 CopyFromSU->Height = SU->Height; 583 584 SUnit *CopyToSU = NewSUnit(NULL); 585 CopyToSU->CopySrcRC = DestRC; 586 CopyToSU->CopyDstRC = SrcRC; 587 588 // Only copy scheduled successors. Cut them from old node's successor 589 // list and move them over. 590 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 591 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 592 I != E; ++I) { 593 if (I->isSpecial) 594 continue; 595 if (I->Dep->isScheduled) { 596 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 597 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 598 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 599 } 600 } 601 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 602 SUnit *Succ = DelDeps[i].first; 603 bool isCtrl = DelDeps[i].second; 604 Succ->removePred(SU, isCtrl, false); 605 } 606 607 CopyFromSU->addPred(SU, false, false, Reg, -1); 608 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 609 610 AvailableQueue->updateNode(SU); 611 AvailableQueue->addNode(CopyFromSU); 612 AvailableQueue->addNode(CopyToSU); 613 Copies.push_back(CopyFromSU); 614 Copies.push_back(CopyToSU); 615 616 ++NumCCCopies; 617} 618 619/// getPhysicalRegisterVT - Returns the ValueType of the physical register 620/// definition of the specified node. 621/// FIXME: Move to SelectionDAG? 622static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 623 const TargetInstrInfo *TII) { 624 const TargetInstrDesc &TID = TII->get(N->getTargetOpcode()); 625 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 626 unsigned NumRes = TID.getNumDefs(); 627 for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) { 628 if (Reg == *ImpDef) 629 break; 630 ++NumRes; 631 } 632 return N->getValueType(NumRes); 633} 634 635/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 636/// scheduling of the given node to satisfy live physical register dependencies. 637/// If the specific node is the last one that's available to schedule, do 638/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 639bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 640 SmallVector<unsigned, 4> &LRegs){ 641 if (LiveRegs.empty()) 642 return false; 643 644 SmallSet<unsigned, 4> RegAdded; 645 // If this node would clobber any "live" register, then it's not ready. 646 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 647 I != E; ++I) { 648 if (I->Cost < 0) { 649 unsigned Reg = I->Reg; 650 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 651 if (RegAdded.insert(Reg)) 652 LRegs.push_back(Reg); 653 } 654 for (const unsigned *Alias = TRI->getAliasSet(Reg); 655 *Alias; ++Alias) 656 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 657 if (RegAdded.insert(*Alias)) 658 LRegs.push_back(*Alias); 659 } 660 } 661 } 662 663 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 664 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 665 if (!Node || !Node->isTargetOpcode()) 666 continue; 667 const TargetInstrDesc &TID = TII->get(Node->getTargetOpcode()); 668 if (!TID.ImplicitDefs) 669 continue; 670 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 671 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 672 if (RegAdded.insert(*Reg)) 673 LRegs.push_back(*Reg); 674 } 675 for (const unsigned *Alias = TRI->getAliasSet(*Reg); 676 *Alias; ++Alias) 677 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 678 if (RegAdded.insert(*Alias)) 679 LRegs.push_back(*Alias); 680 } 681 } 682 } 683 return !LRegs.empty(); 684} 685 686 687/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 688/// schedulers. 689void ScheduleDAGRRList::ListScheduleBottomUp() { 690 unsigned CurCycle = 0; 691 // Add root to Available queue. 692 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 693 RootSU->isAvailable = true; 694 AvailableQueue->push(RootSU); 695 696 // While Available queue is not empty, grab the node with the highest 697 // priority. If it is not ready put it back. Schedule the node. 698 SmallVector<SUnit*, 4> NotReady; 699 while (!AvailableQueue->empty()) { 700 bool Delayed = false; 701 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 702 SUnit *CurSU = AvailableQueue->pop(); 703 while (CurSU) { 704 if (CurSU->CycleBound <= CurCycle) { 705 SmallVector<unsigned, 4> LRegs; 706 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 707 break; 708 Delayed = true; 709 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 710 } 711 712 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 713 NotReady.push_back(CurSU); 714 CurSU = AvailableQueue->pop(); 715 } 716 717 // All candidates are delayed due to live physical reg dependencies. 718 // Try backtracking, code duplication, or inserting cross class copies 719 // to resolve it. 720 if (Delayed && !CurSU) { 721 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 722 SUnit *TrySU = NotReady[i]; 723 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 724 725 // Try unscheduling up to the point where it's safe to schedule 726 // this node. 727 unsigned LiveCycle = CurCycle; 728 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 729 unsigned Reg = LRegs[j]; 730 unsigned LCycle = LiveRegCycles[Reg]; 731 LiveCycle = std::min(LiveCycle, LCycle); 732 } 733 SUnit *OldSU = Sequence[LiveCycle]; 734 if (!WillCreateCycle(TrySU, OldSU)) { 735 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 736 // Force the current node to be scheduled before the node that 737 // requires the physical reg dep. 738 if (OldSU->isAvailable) { 739 OldSU->isAvailable = false; 740 AvailableQueue->remove(OldSU); 741 } 742 TrySU->addPred(OldSU, true, true); 743 // If one or more successors has been unscheduled, then the current 744 // node is no longer avaialable. Schedule a successor that's now 745 // available instead. 746 if (!TrySU->isAvailable) 747 CurSU = AvailableQueue->pop(); 748 else { 749 CurSU = TrySU; 750 TrySU->isPending = false; 751 NotReady.erase(NotReady.begin()+i); 752 } 753 break; 754 } 755 } 756 757 if (!CurSU) { 758 // Can't backtrace. Try duplicating the nodes that produces these 759 // "expensive to copy" values to break the dependency. In case even 760 // that doesn't work, insert cross class copies. 761 SUnit *TrySU = NotReady[0]; 762 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 763 assert(LRegs.size() == 1 && "Can't handle this yet!"); 764 unsigned Reg = LRegs[0]; 765 SUnit *LRDef = LiveRegDefs[Reg]; 766 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 767 if (!NewDef) { 768 // Issue expensive cross register class copies. 769 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 770 const TargetRegisterClass *RC = 771 TRI->getPhysicalRegisterRegClass(VT, Reg); 772 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 773 if (!DestRC) { 774 assert(false && "Don't know how to copy this physical register!"); 775 abort(); 776 } 777 SmallVector<SUnit*, 2> Copies; 778 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 779 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 780 << " to SU #" << Copies.front()->NodeNum << "\n"; 781 TrySU->addPred(Copies.front(), true, true); 782 NewDef = Copies.back(); 783 } 784 785 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 786 << " to SU #" << TrySU->NodeNum << "\n"; 787 LiveRegDefs[Reg] = NewDef; 788 NewDef->addPred(TrySU, true, true); 789 TrySU->isAvailable = false; 790 CurSU = NewDef; 791 } 792 793 if (!CurSU) { 794 assert(false && "Unable to resolve live physical register dependencies!"); 795 abort(); 796 } 797 } 798 799 // Add the nodes that aren't ready back onto the available list. 800 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 801 NotReady[i]->isPending = false; 802 // May no longer be available due to backtracking. 803 if (NotReady[i]->isAvailable) 804 AvailableQueue->push(NotReady[i]); 805 } 806 NotReady.clear(); 807 808 if (!CurSU) 809 Sequence.push_back(0); 810 else { 811 ScheduleNodeBottomUp(CurSU, CurCycle); 812 Sequence.push_back(CurSU); 813 } 814 ++CurCycle; 815 } 816 817 // Add entry node last 818 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 819 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 820 Sequence.push_back(Entry); 821 } 822 823 // Reverse the order if it is bottom up. 824 std::reverse(Sequence.begin(), Sequence.end()); 825 826 827#ifndef NDEBUG 828 // Verify that all SUnits were scheduled. 829 bool AnyNotSched = false; 830 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 831 if (SUnits[i].NumSuccsLeft != 0) { 832 if (!AnyNotSched) 833 cerr << "*** List scheduling failed! ***\n"; 834 SUnits[i].dump(&DAG); 835 cerr << "has not been scheduled!\n"; 836 AnyNotSched = true; 837 } 838 } 839 assert(!AnyNotSched); 840#endif 841} 842 843//===----------------------------------------------------------------------===// 844// Top-Down Scheduling 845//===----------------------------------------------------------------------===// 846 847/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 848/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 849void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 850 unsigned CurCycle) { 851 // FIXME: the distance between two nodes is not always == the predecessor's 852 // latency. For example, the reader can very well read the register written 853 // by the predecessor later than the issue cycle. It also depends on the 854 // interrupt model (drain vs. freeze). 855 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 856 857 --SuccSU->NumPredsLeft; 858 859#ifndef NDEBUG 860 if (SuccSU->NumPredsLeft < 0) { 861 cerr << "*** List scheduling failed! ***\n"; 862 SuccSU->dump(&DAG); 863 cerr << " has been released too many times!\n"; 864 assert(0); 865 } 866#endif 867 868 if (SuccSU->NumPredsLeft == 0) { 869 SuccSU->isAvailable = true; 870 AvailableQueue->push(SuccSU); 871 } 872} 873 874 875/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 876/// count of its successors. If a successor pending count is zero, add it to 877/// the Available queue. 878void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 879 DOUT << "*** Scheduling [" << CurCycle << "]: "; 880 DEBUG(SU->dump(&DAG)); 881 SU->Cycle = CurCycle; 882 883 AvailableQueue->ScheduledNode(SU); 884 885 // Top down: release successors 886 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 887 I != E; ++I) 888 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 889 SU->isScheduled = true; 890} 891 892/// ListScheduleTopDown - The main loop of list scheduling for top-down 893/// schedulers. 894void ScheduleDAGRRList::ListScheduleTopDown() { 895 unsigned CurCycle = 0; 896 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 897 898 // All leaves to Available queue. 899 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 900 // It is available if it has no predecessors. 901 if (SUnits[i].Preds.empty() && &SUnits[i] != Entry) { 902 AvailableQueue->push(&SUnits[i]); 903 SUnits[i].isAvailable = true; 904 } 905 } 906 907 // Emit the entry node first. 908 ScheduleNodeTopDown(Entry, CurCycle); 909 Sequence.push_back(Entry); 910 ++CurCycle; 911 912 // While Available queue is not empty, grab the node with the highest 913 // priority. If it is not ready put it back. Schedule the node. 914 std::vector<SUnit*> NotReady; 915 while (!AvailableQueue->empty()) { 916 SUnit *CurSU = AvailableQueue->pop(); 917 while (CurSU && CurSU->CycleBound > CurCycle) { 918 NotReady.push_back(CurSU); 919 CurSU = AvailableQueue->pop(); 920 } 921 922 // Add the nodes that aren't ready back onto the available list. 923 AvailableQueue->push_all(NotReady); 924 NotReady.clear(); 925 926 if (!CurSU) 927 Sequence.push_back(0); 928 else { 929 ScheduleNodeTopDown(CurSU, CurCycle); 930 Sequence.push_back(CurSU); 931 } 932 CurCycle++; 933 } 934 935 936#ifndef NDEBUG 937 // Verify that all SUnits were scheduled. 938 bool AnyNotSched = false; 939 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 940 if (!SUnits[i].isScheduled) { 941 if (!AnyNotSched) 942 cerr << "*** List scheduling failed! ***\n"; 943 SUnits[i].dump(&DAG); 944 cerr << "has not been scheduled!\n"; 945 AnyNotSched = true; 946 } 947 } 948 assert(!AnyNotSched); 949#endif 950} 951 952 953 954//===----------------------------------------------------------------------===// 955// RegReductionPriorityQueue Implementation 956//===----------------------------------------------------------------------===// 957// 958// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 959// to reduce register pressure. 960// 961namespace { 962 template<class SF> 963 class RegReductionPriorityQueue; 964 965 /// Sorting functions for the Available queue. 966 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 967 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 968 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 969 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 970 971 bool operator()(const SUnit* left, const SUnit* right) const; 972 }; 973 974 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 975 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 976 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 977 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 978 979 bool operator()(const SUnit* left, const SUnit* right) const; 980 }; 981} // end anonymous namespace 982 983static inline bool isCopyFromLiveIn(const SUnit *SU) { 984 SDNode *N = SU->Node; 985 return N && N->getOpcode() == ISD::CopyFromReg && 986 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 987} 988 989namespace { 990 template<class SF> 991 class VISIBILITY_HIDDEN RegReductionPriorityQueue 992 : public SchedulingPriorityQueue { 993 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 994 995 public: 996 RegReductionPriorityQueue() : 997 Queue(SF(this)) {} 998 999 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1000 std::vector<SUnit> &sunits) {} 1001 1002 virtual void addNode(const SUnit *SU) {} 1003 1004 virtual void updateNode(const SUnit *SU) {} 1005 1006 virtual void releaseState() {} 1007 1008 virtual unsigned getNodePriority(const SUnit *SU) const { 1009 return 0; 1010 } 1011 1012 unsigned size() const { return Queue.size(); } 1013 1014 bool empty() const { return Queue.empty(); } 1015 1016 void push(SUnit *U) { 1017 Queue.push(U); 1018 } 1019 void push_all(const std::vector<SUnit *> &Nodes) { 1020 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1021 Queue.push(Nodes[i]); 1022 } 1023 1024 SUnit *pop() { 1025 if (empty()) return NULL; 1026 SUnit *V = Queue.top(); 1027 Queue.pop(); 1028 return V; 1029 } 1030 1031 /// remove - This is a really inefficient way to remove a node from a 1032 /// priority queue. We should roll our own heap to make this better or 1033 /// something. 1034 void remove(SUnit *SU) { 1035 std::vector<SUnit*> Temp; 1036 1037 assert(!Queue.empty() && "Not in queue!"); 1038 while (Queue.top() != SU) { 1039 Temp.push_back(Queue.top()); 1040 Queue.pop(); 1041 assert(!Queue.empty() && "Not in queue!"); 1042 } 1043 1044 // Remove the node from the PQ. 1045 Queue.pop(); 1046 1047 // Add all the other nodes back. 1048 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 1049 Queue.push(Temp[i]); 1050 } 1051 }; 1052 1053 template<class SF> 1054 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 1055 : public RegReductionPriorityQueue<SF> { 1056 // SUnitMap SDNode to SUnit mapping (n -> n). 1057 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1058 1059 // SUnits - The SUnits for the current graph. 1060 const std::vector<SUnit> *SUnits; 1061 1062 // SethiUllmanNumbers - The SethiUllman number for each node. 1063 std::vector<unsigned> SethiUllmanNumbers; 1064 1065 const TargetInstrInfo *TII; 1066 const TargetRegisterInfo *TRI; 1067 public: 1068 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii, 1069 const TargetRegisterInfo *tri) 1070 : TII(tii), TRI(tri) {} 1071 1072 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1073 std::vector<SUnit> &sunits) { 1074 SUnitMap = &sumap; 1075 SUnits = &sunits; 1076 // Add pseudo dependency edges for two-address nodes. 1077 AddPseudoTwoAddrDeps(); 1078 // Calculate node priorities. 1079 CalculateSethiUllmanNumbers(); 1080 } 1081 1082 void addNode(const SUnit *SU) { 1083 SethiUllmanNumbers.resize(SUnits->size(), 0); 1084 CalcNodeSethiUllmanNumber(SU); 1085 } 1086 1087 void updateNode(const SUnit *SU) { 1088 SethiUllmanNumbers[SU->NodeNum] = 0; 1089 CalcNodeSethiUllmanNumber(SU); 1090 } 1091 1092 void releaseState() { 1093 SUnits = 0; 1094 SethiUllmanNumbers.clear(); 1095 } 1096 1097 unsigned getNodePriority(const SUnit *SU) const { 1098 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1099 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1100 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1101 // CopyFromReg should be close to its def because it restricts 1102 // allocation choices. But if it is a livein then perhaps we want it 1103 // closer to its uses so it can be coalesced. 1104 return 0xffff; 1105 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1106 // CopyToReg should be close to its uses to facilitate coalescing and 1107 // avoid spilling. 1108 return 0; 1109 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1110 Opc == TargetInstrInfo::INSERT_SUBREG) 1111 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1112 // facilitate coalescing. 1113 return 0; 1114 else if (SU->NumSuccs == 0) 1115 // If SU does not have a use, i.e. it doesn't produce a value that would 1116 // be consumed (e.g. store), then it terminates a chain of computation. 1117 // Give it a large SethiUllman number so it will be scheduled right 1118 // before its predecessors that it doesn't lengthen their live ranges. 1119 return 0xffff; 1120 else if (SU->NumPreds == 0) 1121 // If SU does not have a def, schedule it close to its uses because it 1122 // does not lengthen any live ranges. 1123 return 0; 1124 else 1125 return SethiUllmanNumbers[SU->NodeNum]; 1126 } 1127 1128 private: 1129 bool canClobber(SUnit *SU, SUnit *Op); 1130 void AddPseudoTwoAddrDeps(); 1131 void CalculateSethiUllmanNumbers(); 1132 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1133 }; 1134 1135 1136 template<class SF> 1137 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1138 : public RegReductionPriorityQueue<SF> { 1139 // SUnitMap SDNode to SUnit mapping (n -> n). 1140 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1141 1142 // SUnits - The SUnits for the current graph. 1143 const std::vector<SUnit> *SUnits; 1144 1145 // SethiUllmanNumbers - The SethiUllman number for each node. 1146 std::vector<unsigned> SethiUllmanNumbers; 1147 1148 public: 1149 TDRegReductionPriorityQueue() {} 1150 1151 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1152 std::vector<SUnit> &sunits) { 1153 SUnitMap = &sumap; 1154 SUnits = &sunits; 1155 // Calculate node priorities. 1156 CalculateSethiUllmanNumbers(); 1157 } 1158 1159 void addNode(const SUnit *SU) { 1160 SethiUllmanNumbers.resize(SUnits->size(), 0); 1161 CalcNodeSethiUllmanNumber(SU); 1162 } 1163 1164 void updateNode(const SUnit *SU) { 1165 SethiUllmanNumbers[SU->NodeNum] = 0; 1166 CalcNodeSethiUllmanNumber(SU); 1167 } 1168 1169 void releaseState() { 1170 SUnits = 0; 1171 SethiUllmanNumbers.clear(); 1172 } 1173 1174 unsigned getNodePriority(const SUnit *SU) const { 1175 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1176 return SethiUllmanNumbers[SU->NodeNum]; 1177 } 1178 1179 private: 1180 void CalculateSethiUllmanNumbers(); 1181 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1182 }; 1183} 1184 1185/// closestSucc - Returns the scheduled cycle of the successor which is 1186/// closet to the current cycle. 1187static unsigned closestSucc(const SUnit *SU) { 1188 unsigned MaxCycle = 0; 1189 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1190 I != E; ++I) { 1191 unsigned Cycle = I->Dep->Cycle; 1192 // If there are bunch of CopyToRegs stacked up, they should be considered 1193 // to be at the same position. 1194 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1195 Cycle = closestSucc(I->Dep)+1; 1196 if (Cycle > MaxCycle) 1197 MaxCycle = Cycle; 1198 } 1199 return MaxCycle; 1200} 1201 1202/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1203/// for scratch registers. Live-in operands and live-out results don't count 1204/// since they are "fixed". 1205static unsigned calcMaxScratches(const SUnit *SU) { 1206 unsigned Scratches = 0; 1207 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1208 I != E; ++I) { 1209 if (I->isCtrl) continue; // ignore chain preds 1210 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1211 Scratches++; 1212 } 1213 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1214 I != E; ++I) { 1215 if (I->isCtrl) continue; // ignore chain succs 1216 if (!I->Dep->Node || I->Dep->Node->getOpcode() != ISD::CopyToReg) 1217 Scratches += 10; 1218 } 1219 return Scratches; 1220} 1221 1222// Bottom up 1223bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1224 // There used to be a special tie breaker here that looked for 1225 // two-address instructions and preferred the instruction with a 1226 // def&use operand. The special case triggered diagnostics when 1227 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1228 // ordering that priority_queue requires. It didn't help much anyway 1229 // because AddPseudoTwoAddrDeps already covers many of the cases 1230 // where it would have applied. In addition, it's counter-intuitive 1231 // that a tie breaker would be the first thing attempted. There's a 1232 // "real" tie breaker below that is the operation of last resort. 1233 // The fact that the "special tie breaker" would trigger when there 1234 // wasn't otherwise a tie is what broke the strict weak ordering 1235 // constraint. 1236 1237 unsigned LPriority = SPQ->getNodePriority(left); 1238 unsigned RPriority = SPQ->getNodePriority(right); 1239 if (LPriority > RPriority) 1240 return true; 1241 else if (LPriority == RPriority) { 1242 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1243 // e.g. 1244 // t1 = op t2, c1 1245 // t3 = op t4, c2 1246 // 1247 // and the following instructions are both ready. 1248 // t2 = op c3 1249 // t4 = op c4 1250 // 1251 // Then schedule t2 = op first. 1252 // i.e. 1253 // t4 = op c4 1254 // t2 = op c3 1255 // t1 = op t2, c1 1256 // t3 = op t4, c2 1257 // 1258 // This creates more short live intervals. 1259 unsigned LDist = closestSucc(left); 1260 unsigned RDist = closestSucc(right); 1261 if (LDist < RDist) 1262 return true; 1263 else if (LDist == RDist) { 1264 // Intuitively, it's good to push down instructions whose results are 1265 // liveout so their long live ranges won't conflict with other values 1266 // which are needed inside the BB. Further prioritize liveout instructions 1267 // by the number of operands which are calculated within the BB. 1268 unsigned LScratch = calcMaxScratches(left); 1269 unsigned RScratch = calcMaxScratches(right); 1270 if (LScratch > RScratch) 1271 return true; 1272 else if (LScratch == RScratch) 1273 if (left->Height > right->Height) 1274 return true; 1275 else if (left->Height == right->Height) 1276 if (left->Depth < right->Depth) 1277 return true; 1278 else if (left->Depth == right->Depth) 1279 if (left->CycleBound > right->CycleBound) 1280 return true; 1281 } 1282 } 1283 return false; 1284} 1285 1286template<class SF> 1287bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1288 if (SU->isTwoAddress) { 1289 unsigned Opc = SU->Node->getTargetOpcode(); 1290 const TargetInstrDesc &TID = TII->get(Opc); 1291 unsigned NumRes = TID.getNumDefs(); 1292 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1293 for (unsigned i = 0; i != NumOps; ++i) { 1294 if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) { 1295 SDNode *DU = SU->Node->getOperand(i).Val; 1296 if ((*SUnitMap).find(DU) != (*SUnitMap).end() && 1297 Op == (*SUnitMap)[DU][SU->InstanceNo]) 1298 return true; 1299 } 1300 } 1301 } 1302 return false; 1303} 1304 1305 1306/// hasCopyToRegUse - Return true if SU has a value successor that is a 1307/// CopyToReg node. 1308static bool hasCopyToRegUse(SUnit *SU) { 1309 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1310 I != E; ++I) { 1311 if (I->isCtrl) continue; 1312 SUnit *SuccSU = I->Dep; 1313 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1314 return true; 1315 } 1316 return false; 1317} 1318 1319/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1320/// physical register def. 1321static bool canClobberPhysRegDefs(SUnit *SuccSU, SUnit *SU, 1322 const TargetInstrInfo *TII, 1323 const TargetRegisterInfo *TRI) { 1324 SDNode *N = SuccSU->Node; 1325 unsigned NumDefs = TII->get(N->getTargetOpcode()).getNumDefs(); 1326 const unsigned *ImpDefs = TII->get(N->getTargetOpcode()).getImplicitDefs(); 1327 if (!ImpDefs) 1328 return false; 1329 const unsigned *SUImpDefs = 1330 TII->get(SU->Node->getTargetOpcode()).getImplicitDefs(); 1331 if (!SUImpDefs) 1332 return false; 1333 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1334 MVT::ValueType VT = N->getValueType(i); 1335 if (VT == MVT::Flag || VT == MVT::Other) 1336 continue; 1337 unsigned Reg = ImpDefs[i - NumDefs]; 1338 for (;*SUImpDefs; ++SUImpDefs) { 1339 unsigned SUReg = *SUImpDefs; 1340 if (TRI->regsOverlap(Reg, SUReg)) 1341 return true; 1342 } 1343 } 1344 return false; 1345} 1346 1347/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1348/// it as a def&use operand. Add a pseudo control edge from it to the other 1349/// node (if it won't create a cycle) so the two-address one will be scheduled 1350/// first (lower in the schedule). If both nodes are two-address, favor the 1351/// one that has a CopyToReg use (more likely to be a loop induction update). 1352/// If both are two-address, but one is commutable while the other is not 1353/// commutable, favor the one that's not commutable. 1354template<class SF> 1355void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1356 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1357 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1358 if (!SU->isTwoAddress) 1359 continue; 1360 1361 SDNode *Node = SU->Node; 1362 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1363 continue; 1364 1365 unsigned Opc = Node->getTargetOpcode(); 1366 const TargetInstrDesc &TID = TII->get(Opc); 1367 unsigned NumRes = TID.getNumDefs(); 1368 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1369 for (unsigned j = 0; j != NumOps; ++j) { 1370 if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) != -1) { 1371 SDNode *DU = SU->Node->getOperand(j).Val; 1372 if ((*SUnitMap).find(DU) == (*SUnitMap).end()) 1373 continue; 1374 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1375 if (!DUSU) continue; 1376 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1377 I != E; ++I) { 1378 if (I->isCtrl) continue; 1379 SUnit *SuccSU = I->Dep; 1380 if (SuccSU == SU) 1381 continue; 1382 // Be conservative. Ignore if nodes aren't at roughly the same 1383 // depth and height. 1384 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1385 continue; 1386 if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode()) 1387 continue; 1388 // Don't constrain nodes with physical register defs if the 1389 // predecessor can clobber them. 1390 if (SuccSU->hasPhysRegDefs) { 1391 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 1392 continue; 1393 } 1394 // Don't constraint extract_subreg / insert_subreg these may be 1395 // coalesced away. We don't them close to their uses. 1396 unsigned SuccOpc = SuccSU->Node->getTargetOpcode(); 1397 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1398 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1399 continue; 1400 if ((!canClobber(SuccSU, DUSU) || 1401 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1402 (!SU->isCommutable && SuccSU->isCommutable)) && 1403 !isReachable(SuccSU, SU)) { 1404 DOUT << "Adding an edge from SU # " << SU->NodeNum 1405 << " to SU #" << SuccSU->NodeNum << "\n"; 1406 SU->addPred(SuccSU, true, true); 1407 } 1408 } 1409 } 1410 } 1411 } 1412} 1413 1414/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1415/// Smaller number is the higher priority. 1416template<class SF> 1417unsigned BURegReductionPriorityQueue<SF>:: 1418CalcNodeSethiUllmanNumber(const SUnit *SU) { 1419 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1420 if (SethiUllmanNumber != 0) 1421 return SethiUllmanNumber; 1422 1423 unsigned Extra = 0; 1424 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1425 I != E; ++I) { 1426 if (I->isCtrl) continue; // ignore chain preds 1427 SUnit *PredSU = I->Dep; 1428 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1429 if (PredSethiUllman > SethiUllmanNumber) { 1430 SethiUllmanNumber = PredSethiUllman; 1431 Extra = 0; 1432 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1433 ++Extra; 1434 } 1435 1436 SethiUllmanNumber += Extra; 1437 1438 if (SethiUllmanNumber == 0) 1439 SethiUllmanNumber = 1; 1440 1441 return SethiUllmanNumber; 1442} 1443 1444/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1445/// scheduling units. 1446template<class SF> 1447void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1448 SethiUllmanNumbers.assign(SUnits->size(), 0); 1449 1450 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1451 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1452} 1453 1454static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1455 unsigned Sum = 0; 1456 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1457 I != E; ++I) { 1458 SUnit *SuccSU = I->Dep; 1459 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1460 EE = SuccSU->Preds.end(); II != EE; ++II) { 1461 SUnit *PredSU = II->Dep; 1462 if (!PredSU->isScheduled) 1463 ++Sum; 1464 } 1465 } 1466 1467 return Sum; 1468} 1469 1470 1471// Top down 1472bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1473 unsigned LPriority = SPQ->getNodePriority(left); 1474 unsigned RPriority = SPQ->getNodePriority(right); 1475 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1476 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1477 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1478 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1479 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1480 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1481 1482 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1483 return false; 1484 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1485 return true; 1486 1487 // Special tie breaker: if two nodes share a operand, the one that use it 1488 // as a def&use operand is preferred. 1489 if (LIsTarget && RIsTarget) { 1490 if (left->isTwoAddress && !right->isTwoAddress) { 1491 SDNode *DUNode = left->Node->getOperand(0).Val; 1492 if (DUNode->isOperand(right->Node)) 1493 RBonus += 2; 1494 } 1495 if (!left->isTwoAddress && right->isTwoAddress) { 1496 SDNode *DUNode = right->Node->getOperand(0).Val; 1497 if (DUNode->isOperand(left->Node)) 1498 LBonus += 2; 1499 } 1500 } 1501 if (LIsFloater) 1502 LBonus -= 2; 1503 if (RIsFloater) 1504 RBonus -= 2; 1505 if (left->NumSuccs == 1) 1506 LBonus += 2; 1507 if (right->NumSuccs == 1) 1508 RBonus += 2; 1509 1510 if (LPriority+LBonus < RPriority+RBonus) 1511 return true; 1512 else if (LPriority == RPriority) 1513 if (left->Depth < right->Depth) 1514 return true; 1515 else if (left->Depth == right->Depth) 1516 if (left->NumSuccsLeft > right->NumSuccsLeft) 1517 return true; 1518 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1519 if (left->CycleBound > right->CycleBound) 1520 return true; 1521 return false; 1522} 1523 1524/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1525/// Smaller number is the higher priority. 1526template<class SF> 1527unsigned TDRegReductionPriorityQueue<SF>:: 1528CalcNodeSethiUllmanNumber(const SUnit *SU) { 1529 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1530 if (SethiUllmanNumber != 0) 1531 return SethiUllmanNumber; 1532 1533 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1534 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1535 SethiUllmanNumber = 0xffff; 1536 else if (SU->NumSuccsLeft == 0) 1537 // If SU does not have a use, i.e. it doesn't produce a value that would 1538 // be consumed (e.g. store), then it terminates a chain of computation. 1539 // Give it a small SethiUllman number so it will be scheduled right before 1540 // its predecessors that it doesn't lengthen their live ranges. 1541 SethiUllmanNumber = 0; 1542 else if (SU->NumPredsLeft == 0 && 1543 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1544 SethiUllmanNumber = 0xffff; 1545 else { 1546 int Extra = 0; 1547 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1548 I != E; ++I) { 1549 if (I->isCtrl) continue; // ignore chain preds 1550 SUnit *PredSU = I->Dep; 1551 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1552 if (PredSethiUllman > SethiUllmanNumber) { 1553 SethiUllmanNumber = PredSethiUllman; 1554 Extra = 0; 1555 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1556 ++Extra; 1557 } 1558 1559 SethiUllmanNumber += Extra; 1560 } 1561 1562 return SethiUllmanNumber; 1563} 1564 1565/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1566/// scheduling units. 1567template<class SF> 1568void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1569 SethiUllmanNumbers.assign(SUnits->size(), 0); 1570 1571 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1572 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1573} 1574 1575//===----------------------------------------------------------------------===// 1576// Public Constructor Functions 1577//===----------------------------------------------------------------------===// 1578 1579llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1580 SelectionDAG *DAG, 1581 MachineBasicBlock *BB) { 1582 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1583 const TargetRegisterInfo *TRI = DAG->getTarget().getRegisterInfo(); 1584 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1585 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII, TRI)); 1586} 1587 1588llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1589 SelectionDAG *DAG, 1590 MachineBasicBlock *BB) { 1591 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1592 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1593} 1594 1595