ScheduleDAGRRList.cpp revision 7cead1c297f367069a91935694ae50eb5093c25b
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Evan Cheng and is distributed under the 6// University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include <climits> 32#include <queue> 33#include "llvm/Support/CommandLine.h" 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumCCCopies, "Number of cross class copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 " Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 " Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49 50namespace { 51//===----------------------------------------------------------------------===// 52/// ScheduleDAGRRList - The actual register reduction list scheduler 53/// implementation. This supports both top-down and bottom-up scheduling. 54/// 55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 56private: 57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 58 /// it is top-down. 59 bool isBottomUp; 60 61 /// AvailableQueue - The priority queue to use for the available SUnits. 62 ///a 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 SmallSet<unsigned, 4> LiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72public: 73 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 74 const TargetMachine &tm, bool isbottomup, 75 SchedulingPriorityQueue *availqueue) 76 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 77 AvailableQueue(availqueue) { 78 } 79 80 ~ScheduleDAGRRList() { 81 delete AvailableQueue; 82 } 83 84 void Schedule(); 85 86private: 87 void ReleasePred(SUnit*, bool, unsigned); 88 void ReleaseSucc(SUnit*, bool isChain, unsigned); 89 void CapturePred(SUnit*, SUnit*, bool); 90 void ScheduleNodeBottomUp(SUnit*, unsigned); 91 void ScheduleNodeTopDown(SUnit*, unsigned); 92 void UnscheduleNodeBottomUp(SUnit*); 93 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 94 SUnit *CopyAndMoveSuccessors(SUnit*); 95 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 96 const TargetRegisterClass*, 97 const TargetRegisterClass*, 98 SmallVector<SUnit*, 2>&); 99 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 100 void ListScheduleTopDown(); 101 void ListScheduleBottomUp(); 102 void CommuteNodesToReducePressure(); 103}; 104} // end anonymous namespace 105 106 107/// Schedule - Schedule the DAG using list scheduling. 108void ScheduleDAGRRList::Schedule() { 109 DOUT << "********** List Scheduling **********\n"; 110 111 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 112 LiveRegCycles.resize(MRI->getNumRegs(), 0); 113 114 // Build scheduling units. 115 BuildSchedUnits(); 116 117 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 118 SUnits[su].dumpAll(&DAG)); 119 CalculateDepths(); 120 CalculateHeights(); 121 122 AvailableQueue->initNodes(SUnitMap, SUnits); 123 124 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 125 if (isBottomUp) 126 ListScheduleBottomUp(); 127 else 128 ListScheduleTopDown(); 129 130 AvailableQueue->releaseState(); 131 132 CommuteNodesToReducePressure(); 133 134 DOUT << "*** Final schedule ***\n"; 135 DEBUG(dumpSchedule()); 136 DOUT << "\n"; 137 138 // Emit in scheduled order 139 EmitSchedule(); 140} 141 142/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 143/// it is not the last use of its first operand, add it to the CommuteSet if 144/// possible. It will be commuted when it is translated to a MI. 145void ScheduleDAGRRList::CommuteNodesToReducePressure() { 146 SmallPtrSet<SUnit*, 4> OperandSeen; 147 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 148 SUnit *SU = Sequence[i]; 149 if (!SU || !SU->Node) continue; 150 if (SU->isCommutable) { 151 unsigned Opc = SU->Node->getTargetOpcode(); 152 unsigned NumRes = TII->getNumDefs(Opc); 153 unsigned NumOps = CountOperands(SU->Node); 154 for (unsigned j = 0; j != NumOps; ++j) { 155 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 156 continue; 157 158 SDNode *OpN = SU->Node->getOperand(j).Val; 159 SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 160 if (OpSU && OperandSeen.count(OpSU) == 1) { 161 // Ok, so SU is not the last use of OpSU, but SU is two-address so 162 // it will clobber OpSU. Try to commute SU if no other source operands 163 // are live below. 164 bool DoCommute = true; 165 for (unsigned k = 0; k < NumOps; ++k) { 166 if (k != j) { 167 OpN = SU->Node->getOperand(k).Val; 168 OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 169 if (OpSU && OperandSeen.count(OpSU) == 1) { 170 DoCommute = false; 171 break; 172 } 173 } 174 } 175 if (DoCommute) 176 CommuteSet.insert(SU->Node); 177 } 178 179 // Only look at the first use&def node for now. 180 break; 181 } 182 } 183 184 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 185 I != E; ++I) { 186 if (!I->isCtrl) 187 OperandSeen.insert(I->Dep); 188 } 189 } 190} 191 192//===----------------------------------------------------------------------===// 193// Bottom-Up Scheduling 194//===----------------------------------------------------------------------===// 195 196/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 197/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 198void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 199 unsigned CurCycle) { 200 // FIXME: the distance between two nodes is not always == the predecessor's 201 // latency. For example, the reader can very well read the register written 202 // by the predecessor later than the issue cycle. It also depends on the 203 // interrupt model (drain vs. freeze). 204 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 205 206 --PredSU->NumSuccsLeft; 207 208#ifndef NDEBUG 209 if (PredSU->NumSuccsLeft < 0) { 210 cerr << "*** List scheduling failed! ***\n"; 211 PredSU->dump(&DAG); 212 cerr << " has been released too many times!\n"; 213 assert(0); 214 } 215#endif 216 217 if (PredSU->NumSuccsLeft == 0) { 218 // EntryToken has to go last! Special case it here. 219 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 220 PredSU->isAvailable = true; 221 AvailableQueue->push(PredSU); 222 } 223 } 224} 225 226/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 227/// count of its predecessors. If a predecessor pending count is zero, add it to 228/// the Available queue. 229void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 230 DOUT << "*** Scheduling [" << CurCycle << "]: "; 231 DEBUG(SU->dump(&DAG)); 232 SU->Cycle = CurCycle; 233 234 AvailableQueue->ScheduledNode(SU); 235 236 // Bottom up: release predecessors 237 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 238 I != E; ++I) { 239 ReleasePred(I->Dep, I->isCtrl, CurCycle); 240 if (I->Cost < 0) { 241 // This is a physical register dependency and it's impossible or 242 // expensive to copy the register. Make sure nothing that can 243 // clobber the register is scheduled between the predecessor and 244 // this node. 245 if (LiveRegs.insert(I->Reg)) { 246 LiveRegDefs[I->Reg] = I->Dep; 247 LiveRegCycles[I->Reg] = CurCycle; 248 } 249 } 250 } 251 252 // Release all the implicit physical register defs that are live. 253 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 254 I != E; ++I) { 255 if (I->Cost < 0) { 256 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 257 LiveRegs.erase(I->Reg); 258 assert(LiveRegDefs[I->Reg] == SU && 259 "Physical register dependency violated?"); 260 LiveRegDefs[I->Reg] = NULL; 261 LiveRegCycles[I->Reg] = 0; 262 } 263 } 264 } 265 266 SU->isScheduled = true; 267} 268 269/// CapturePred - This does the opposite of ReleasePred. Since SU is being 270/// unscheduled, incrcease the succ left count of its predecessors. Remove 271/// them from AvailableQueue if necessary. 272void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 273 PredSU->CycleBound = 0; 274 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 275 I != E; ++I) { 276 if (I->Dep == SU) 277 continue; 278 PredSU->CycleBound = std::max(PredSU->CycleBound, 279 I->Dep->Cycle + PredSU->Latency); 280 } 281 282 if (PredSU->isAvailable) { 283 PredSU->isAvailable = false; 284 if (!PredSU->isPending) 285 AvailableQueue->remove(PredSU); 286 } 287 288 ++PredSU->NumSuccsLeft; 289} 290 291/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 292/// its predecessor states to reflect the change. 293void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 294 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 295 DEBUG(SU->dump(&DAG)); 296 297 AvailableQueue->UnscheduledNode(SU); 298 299 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 300 I != E; ++I) { 301 CapturePred(I->Dep, SU, I->isCtrl); 302 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 303 LiveRegs.erase(I->Reg); 304 assert(LiveRegDefs[I->Reg] == I->Dep && 305 "Physical register dependency violated?"); 306 LiveRegDefs[I->Reg] = NULL; 307 LiveRegCycles[I->Reg] = 0; 308 } 309 } 310 311 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 312 I != E; ++I) { 313 if (I->Cost < 0) { 314 if (LiveRegs.insert(I->Reg)) { 315 assert(!LiveRegDefs[I->Reg] && 316 "Physical register dependency violated?"); 317 LiveRegDefs[I->Reg] = SU; 318 } 319 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 320 LiveRegCycles[I->Reg] = I->Dep->Cycle; 321 } 322 } 323 324 SU->Cycle = 0; 325 SU->isScheduled = false; 326 SU->isAvailable = true; 327 AvailableQueue->push(SU); 328} 329 330// FIXME: This is probably too slow! 331static void isReachable(SUnit *SU, SUnit *TargetSU, 332 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 333 if (Reached) return; 334 if (SU == TargetSU) { 335 Reached = true; 336 return; 337 } 338 if (!Visited.insert(SU)) return; 339 340 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 341 ++I) 342 isReachable(I->Dep, TargetSU, Visited, Reached); 343} 344 345static bool isReachable(SUnit *SU, SUnit *TargetSU) { 346 SmallPtrSet<SUnit*, 32> Visited; 347 bool Reached = false; 348 isReachable(SU, TargetSU, Visited, Reached); 349 return Reached; 350} 351 352/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 353/// create a cycle. 354static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 355 if (isReachable(TargetSU, SU)) 356 return true; 357 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358 I != E; ++I) 359 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 360 return true; 361 return false; 362} 363 364/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 365/// BTCycle in order to schedule a specific node. Returns the last unscheduled 366/// SUnit. Also returns if a successor is unscheduled in the process. 367void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 368 unsigned &CurCycle) { 369 SUnit *OldSU = NULL; 370 while (CurCycle > BtCycle) { 371 OldSU = Sequence.back(); 372 Sequence.pop_back(); 373 if (SU->isSucc(OldSU)) 374 // Don't try to remove SU from AvailableQueue. 375 SU->isAvailable = false; 376 UnscheduleNodeBottomUp(OldSU); 377 --CurCycle; 378 } 379 380 381 if (SU->isSucc(OldSU)) { 382 assert(false && "Something is wrong!"); 383 abort(); 384 } 385 386 ++NumBacktracks; 387} 388 389/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 390/// successors to the newly created node. 391SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 392 if (SU->FlaggedNodes.size()) 393 return NULL; 394 395 SDNode *N = SU->Node; 396 if (!N) 397 return NULL; 398 399 SUnit *NewSU; 400 bool TryUnfold = false; 401 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 402 MVT::ValueType VT = N->getValueType(i); 403 if (VT == MVT::Flag) 404 return NULL; 405 else if (VT == MVT::Other) 406 TryUnfold = true; 407 } 408 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 409 const SDOperand &Op = N->getOperand(i); 410 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 411 if (VT == MVT::Flag) 412 return NULL; 413 } 414 415 if (TryUnfold) { 416 SmallVector<SDNode*, 4> NewNodes; 417 if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes)) 418 return NULL; 419 420 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 421 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 422 423 N = NewNodes[1]; 424 SDNode *LoadNode = NewNodes[0]; 425 unsigned NumVals = N->getNumValues(); 426 unsigned OldNumVals = SU->Node->getNumValues(); 427 for (unsigned i = 0; i != NumVals; ++i) 428 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i)); 429 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), 430 SDOperand(LoadNode, 1)); 431 432 SUnit *NewSU = NewSUnit(N); 433 SUnitMap[N].push_back(NewSU); 434 const TargetInstrDescriptor *TID = &TII->get(N->getTargetOpcode()); 435 for (unsigned i = 0; i != TID->numOperands; ++i) { 436 if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { 437 NewSU->isTwoAddress = true; 438 break; 439 } 440 } 441 if (TID->Flags & M_COMMUTABLE) 442 NewSU->isCommutable = true; 443 // FIXME: Calculate height / depth and propagate the changes? 444 NewSU->Depth = SU->Depth; 445 NewSU->Height = SU->Height; 446 ComputeLatency(NewSU); 447 448 // LoadNode may already exist. This can happen when there is another 449 // load from the same location and producing the same type of value 450 // but it has different alignment or volatileness. 451 bool isNewLoad = true; 452 SUnit *LoadSU; 453 DenseMap<SDNode*, std::vector<SUnit*> >::iterator SMI = 454 SUnitMap.find(LoadNode); 455 if (SMI != SUnitMap.end()) { 456 LoadSU = SMI->second.front(); 457 isNewLoad = false; 458 } else { 459 LoadSU = NewSUnit(LoadNode); 460 SUnitMap[LoadNode].push_back(LoadSU); 461 462 LoadSU->Depth = SU->Depth; 463 LoadSU->Height = SU->Height; 464 ComputeLatency(LoadSU); 465 } 466 467 SUnit *ChainPred = NULL; 468 SmallVector<SDep, 4> ChainSuccs; 469 SmallVector<SDep, 4> LoadPreds; 470 SmallVector<SDep, 4> NodePreds; 471 SmallVector<SDep, 4> NodeSuccs; 472 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 473 I != E; ++I) { 474 if (I->isCtrl) 475 ChainPred = I->Dep; 476 else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) 477 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 478 else 479 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 480 } 481 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 482 I != E; ++I) { 483 if (I->isCtrl) 484 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 485 I->isCtrl, I->isSpecial)); 486 else 487 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 488 I->isCtrl, I->isSpecial)); 489 } 490 491 SU->removePred(ChainPred, true, false); 492 if (isNewLoad) 493 LoadSU->addPred(ChainPred, true, false); 494 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 495 SDep *Pred = &LoadPreds[i]; 496 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 497 if (isNewLoad) 498 LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 499 Pred->Reg, Pred->Cost); 500 } 501 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 502 SDep *Pred = &NodePreds[i]; 503 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 504 NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 505 Pred->Reg, Pred->Cost); 506 } 507 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 508 SDep *Succ = &NodeSuccs[i]; 509 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 510 Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, 511 Succ->Reg, Succ->Cost); 512 } 513 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 514 SDep *Succ = &ChainSuccs[i]; 515 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 516 if (isNewLoad) 517 Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, 518 Succ->Reg, Succ->Cost); 519 } 520 if (isNewLoad) 521 NewSU->addPred(LoadSU, false, false); 522 523 if (isNewLoad) 524 AvailableQueue->addNode(LoadSU); 525 AvailableQueue->addNode(NewSU); 526 527 ++NumUnfolds; 528 529 if (NewSU->NumSuccsLeft == 0) { 530 NewSU->isAvailable = true; 531 return NewSU; 532 } 533 SU = NewSU; 534 } 535 536 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 537 NewSU = Clone(SU); 538 539 // New SUnit has the exact same predecessors. 540 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 541 I != E; ++I) 542 if (!I->isSpecial) { 543 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 544 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 545 } 546 547 // Only copy scheduled successors. Cut them from old node's successor 548 // list and move them over. 549 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 550 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 551 I != E; ++I) { 552 if (I->isSpecial) 553 continue; 554 if (I->Dep->isScheduled) { 555 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 556 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 557 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 558 } 559 } 560 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 561 SUnit *Succ = DelDeps[i].first; 562 bool isCtrl = DelDeps[i].second; 563 Succ->removePred(SU, isCtrl, false); 564 } 565 566 AvailableQueue->updateNode(SU); 567 AvailableQueue->addNode(NewSU); 568 569 ++NumDups; 570 return NewSU; 571} 572 573/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 574/// and move all scheduled successors of the given SUnit to the last copy. 575void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 576 const TargetRegisterClass *DestRC, 577 const TargetRegisterClass *SrcRC, 578 SmallVector<SUnit*, 2> &Copies) { 579 SUnit *CopyFromSU = NewSUnit(NULL); 580 CopyFromSU->CopySrcRC = SrcRC; 581 CopyFromSU->CopyDstRC = DestRC; 582 CopyFromSU->Depth = SU->Depth; 583 CopyFromSU->Height = SU->Height; 584 585 SUnit *CopyToSU = NewSUnit(NULL); 586 CopyToSU->CopySrcRC = DestRC; 587 CopyToSU->CopyDstRC = SrcRC; 588 589 // Only copy scheduled successors. Cut them from old node's successor 590 // list and move them over. 591 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 592 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 593 I != E; ++I) { 594 if (I->isSpecial) 595 continue; 596 if (I->Dep->isScheduled) { 597 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 598 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 599 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 600 } 601 } 602 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 603 SUnit *Succ = DelDeps[i].first; 604 bool isCtrl = DelDeps[i].second; 605 Succ->removePred(SU, isCtrl, false); 606 } 607 608 CopyFromSU->addPred(SU, false, false, Reg, -1); 609 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 610 611 AvailableQueue->updateNode(SU); 612 AvailableQueue->addNode(CopyFromSU); 613 AvailableQueue->addNode(CopyToSU); 614 Copies.push_back(CopyFromSU); 615 Copies.push_back(CopyToSU); 616 617 ++NumCCCopies; 618} 619 620/// getPhysicalRegisterVT - Returns the ValueType of the physical register 621/// definition of the specified node. 622/// FIXME: Move to SelectionDAG? 623static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 624 const TargetInstrInfo *TII) { 625 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 626 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 627 unsigned NumRes = TID.numDefs; 628 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 629 if (Reg == *ImpDef) 630 break; 631 ++NumRes; 632 } 633 return N->getValueType(NumRes); 634} 635 636/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 637/// scheduling of the given node to satisfy live physical register dependencies. 638/// If the specific node is the last one that's available to schedule, do 639/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 640bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 641 SmallVector<unsigned, 4> &LRegs){ 642 if (LiveRegs.empty()) 643 return false; 644 645 SmallSet<unsigned, 4> RegAdded; 646 // If this node would clobber any "live" register, then it's not ready. 647 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 648 I != E; ++I) { 649 if (I->Cost < 0) { 650 unsigned Reg = I->Reg; 651 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 652 if (RegAdded.insert(Reg)) 653 LRegs.push_back(Reg); 654 } 655 for (const unsigned *Alias = MRI->getAliasSet(Reg); 656 *Alias; ++Alias) 657 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 658 if (RegAdded.insert(*Alias)) 659 LRegs.push_back(*Alias); 660 } 661 } 662 } 663 664 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 665 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 666 if (!Node || !Node->isTargetOpcode()) 667 continue; 668 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 669 if (!TID.ImplicitDefs) 670 continue; 671 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 672 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 673 if (RegAdded.insert(*Reg)) 674 LRegs.push_back(*Reg); 675 } 676 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 677 *Alias; ++Alias) 678 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 679 if (RegAdded.insert(*Alias)) 680 LRegs.push_back(*Alias); 681 } 682 } 683 } 684 return !LRegs.empty(); 685} 686 687 688/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 689/// schedulers. 690void ScheduleDAGRRList::ListScheduleBottomUp() { 691 unsigned CurCycle = 0; 692 // Add root to Available queue. 693 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 694 RootSU->isAvailable = true; 695 AvailableQueue->push(RootSU); 696 697 // While Available queue is not empty, grab the node with the highest 698 // priority. If it is not ready put it back. Schedule the node. 699 SmallVector<SUnit*, 4> NotReady; 700 while (!AvailableQueue->empty()) { 701 bool Delayed = false; 702 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 703 SUnit *CurSU = AvailableQueue->pop(); 704 while (CurSU) { 705 if (CurSU->CycleBound <= CurCycle) { 706 SmallVector<unsigned, 4> LRegs; 707 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 708 break; 709 Delayed = true; 710 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 711 } 712 713 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 714 NotReady.push_back(CurSU); 715 CurSU = AvailableQueue->pop(); 716 } 717 718 // All candidates are delayed due to live physical reg dependencies. 719 // Try backtracking, code duplication, or inserting cross class copies 720 // to resolve it. 721 if (Delayed && !CurSU) { 722 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 723 SUnit *TrySU = NotReady[i]; 724 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 725 726 // Try unscheduling up to the point where it's safe to schedule 727 // this node. 728 unsigned LiveCycle = CurCycle; 729 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 730 unsigned Reg = LRegs[j]; 731 unsigned LCycle = LiveRegCycles[Reg]; 732 LiveCycle = std::min(LiveCycle, LCycle); 733 } 734 SUnit *OldSU = Sequence[LiveCycle]; 735 if (!WillCreateCycle(TrySU, OldSU)) { 736 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 737 // Force the current node to be scheduled before the node that 738 // requires the physical reg dep. 739 if (OldSU->isAvailable) { 740 OldSU->isAvailable = false; 741 AvailableQueue->remove(OldSU); 742 } 743 TrySU->addPred(OldSU, true, true); 744 // If one or more successors has been unscheduled, then the current 745 // node is no longer avaialable. Schedule a successor that's now 746 // available instead. 747 if (!TrySU->isAvailable) 748 CurSU = AvailableQueue->pop(); 749 else { 750 CurSU = TrySU; 751 TrySU->isPending = false; 752 NotReady.erase(NotReady.begin()+i); 753 } 754 break; 755 } 756 } 757 758 if (!CurSU) { 759 // Can't backtrace. Try duplicating the nodes that produces these 760 // "expensive to copy" values to break the dependency. In case even 761 // that doesn't work, insert cross class copies. 762 SUnit *TrySU = NotReady[0]; 763 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 764 assert(LRegs.size() == 1 && "Can't handle this yet!"); 765 unsigned Reg = LRegs[0]; 766 SUnit *LRDef = LiveRegDefs[Reg]; 767 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 768 if (!NewDef) { 769 // Issue expensive cross register class copies. 770 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 771 const TargetRegisterClass *RC = 772 MRI->getPhysicalRegisterRegClass(VT, Reg); 773 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 774 if (!DestRC) { 775 assert(false && "Don't know how to copy this physical register!"); 776 abort(); 777 } 778 SmallVector<SUnit*, 2> Copies; 779 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 780 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 781 << " to SU #" << Copies.front()->NodeNum << "\n"; 782 TrySU->addPred(Copies.front(), true, true); 783 NewDef = Copies.back(); 784 } 785 786 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 787 << " to SU #" << TrySU->NodeNum << "\n"; 788 LiveRegDefs[Reg] = NewDef; 789 NewDef->addPred(TrySU, true, true); 790 TrySU->isAvailable = false; 791 CurSU = NewDef; 792 } 793 794 if (!CurSU) { 795 assert(false && "Unable to resolve live physical register dependencies!"); 796 abort(); 797 } 798 } 799 800 // Add the nodes that aren't ready back onto the available list. 801 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 802 NotReady[i]->isPending = false; 803 // May no longer be available due to backtracking. 804 if (NotReady[i]->isAvailable) 805 AvailableQueue->push(NotReady[i]); 806 } 807 NotReady.clear(); 808 809 if (!CurSU) 810 Sequence.push_back(0); 811 else { 812 ScheduleNodeBottomUp(CurSU, CurCycle); 813 Sequence.push_back(CurSU); 814 } 815 ++CurCycle; 816 } 817 818 // Add entry node last 819 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 820 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 821 Sequence.push_back(Entry); 822 } 823 824 // Reverse the order if it is bottom up. 825 std::reverse(Sequence.begin(), Sequence.end()); 826 827 828#ifndef NDEBUG 829 // Verify that all SUnits were scheduled. 830 bool AnyNotSched = false; 831 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 832 if (SUnits[i].NumSuccsLeft != 0) { 833 if (!AnyNotSched) 834 cerr << "*** List scheduling failed! ***\n"; 835 SUnits[i].dump(&DAG); 836 cerr << "has not been scheduled!\n"; 837 AnyNotSched = true; 838 } 839 } 840 assert(!AnyNotSched); 841#endif 842} 843 844//===----------------------------------------------------------------------===// 845// Top-Down Scheduling 846//===----------------------------------------------------------------------===// 847 848/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 849/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 850void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 851 unsigned CurCycle) { 852 // FIXME: the distance between two nodes is not always == the predecessor's 853 // latency. For example, the reader can very well read the register written 854 // by the predecessor later than the issue cycle. It also depends on the 855 // interrupt model (drain vs. freeze). 856 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 857 858 --SuccSU->NumPredsLeft; 859 860#ifndef NDEBUG 861 if (SuccSU->NumPredsLeft < 0) { 862 cerr << "*** List scheduling failed! ***\n"; 863 SuccSU->dump(&DAG); 864 cerr << " has been released too many times!\n"; 865 assert(0); 866 } 867#endif 868 869 if (SuccSU->NumPredsLeft == 0) { 870 SuccSU->isAvailable = true; 871 AvailableQueue->push(SuccSU); 872 } 873} 874 875 876/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 877/// count of its successors. If a successor pending count is zero, add it to 878/// the Available queue. 879void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 880 DOUT << "*** Scheduling [" << CurCycle << "]: "; 881 DEBUG(SU->dump(&DAG)); 882 SU->Cycle = CurCycle; 883 884 AvailableQueue->ScheduledNode(SU); 885 886 // Top down: release successors 887 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 888 I != E; ++I) 889 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 890 SU->isScheduled = true; 891} 892 893/// ListScheduleTopDown - The main loop of list scheduling for top-down 894/// schedulers. 895void ScheduleDAGRRList::ListScheduleTopDown() { 896 unsigned CurCycle = 0; 897 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 898 899 // All leaves to Available queue. 900 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 901 // It is available if it has no predecessors. 902 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 903 AvailableQueue->push(&SUnits[i]); 904 SUnits[i].isAvailable = true; 905 } 906 } 907 908 // Emit the entry node first. 909 ScheduleNodeTopDown(Entry, CurCycle); 910 Sequence.push_back(Entry); 911 ++CurCycle; 912 913 // While Available queue is not empty, grab the node with the highest 914 // priority. If it is not ready put it back. Schedule the node. 915 std::vector<SUnit*> NotReady; 916 while (!AvailableQueue->empty()) { 917 SUnit *CurSU = AvailableQueue->pop(); 918 while (CurSU && CurSU->CycleBound > CurCycle) { 919 NotReady.push_back(CurSU); 920 CurSU = AvailableQueue->pop(); 921 } 922 923 // Add the nodes that aren't ready back onto the available list. 924 AvailableQueue->push_all(NotReady); 925 NotReady.clear(); 926 927 if (!CurSU) 928 Sequence.push_back(0); 929 else { 930 ScheduleNodeTopDown(CurSU, CurCycle); 931 Sequence.push_back(CurSU); 932 } 933 CurCycle++; 934 } 935 936 937#ifndef NDEBUG 938 // Verify that all SUnits were scheduled. 939 bool AnyNotSched = false; 940 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 941 if (!SUnits[i].isScheduled) { 942 if (!AnyNotSched) 943 cerr << "*** List scheduling failed! ***\n"; 944 SUnits[i].dump(&DAG); 945 cerr << "has not been scheduled!\n"; 946 AnyNotSched = true; 947 } 948 } 949 assert(!AnyNotSched); 950#endif 951} 952 953 954 955//===----------------------------------------------------------------------===// 956// RegReductionPriorityQueue Implementation 957//===----------------------------------------------------------------------===// 958// 959// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 960// to reduce register pressure. 961// 962namespace { 963 template<class SF> 964 class RegReductionPriorityQueue; 965 966 /// Sorting functions for the Available queue. 967 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 968 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 969 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 970 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 971 972 bool operator()(const SUnit* left, const SUnit* right) const; 973 }; 974 975 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 976 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 977 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 978 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 979 980 bool operator()(const SUnit* left, const SUnit* right) const; 981 }; 982} // end anonymous namespace 983 984static inline bool isCopyFromLiveIn(const SUnit *SU) { 985 SDNode *N = SU->Node; 986 return N && N->getOpcode() == ISD::CopyFromReg && 987 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 988} 989 990namespace { 991 template<class SF> 992 class VISIBILITY_HIDDEN RegReductionPriorityQueue 993 : public SchedulingPriorityQueue { 994 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 995 996 public: 997 RegReductionPriorityQueue() : 998 Queue(SF(this)) {} 999 1000 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1001 std::vector<SUnit> &sunits) {} 1002 1003 virtual void addNode(const SUnit *SU) {} 1004 1005 virtual void updateNode(const SUnit *SU) {} 1006 1007 virtual void releaseState() {} 1008 1009 virtual unsigned getNodePriority(const SUnit *SU) const { 1010 return 0; 1011 } 1012 1013 unsigned size() const { return Queue.size(); } 1014 1015 bool empty() const { return Queue.empty(); } 1016 1017 void push(SUnit *U) { 1018 Queue.push(U); 1019 } 1020 void push_all(const std::vector<SUnit *> &Nodes) { 1021 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1022 Queue.push(Nodes[i]); 1023 } 1024 1025 SUnit *pop() { 1026 if (empty()) return NULL; 1027 SUnit *V = Queue.top(); 1028 Queue.pop(); 1029 return V; 1030 } 1031 1032 /// remove - This is a really inefficient way to remove a node from a 1033 /// priority queue. We should roll our own heap to make this better or 1034 /// something. 1035 void remove(SUnit *SU) { 1036 std::vector<SUnit*> Temp; 1037 1038 assert(!Queue.empty() && "Not in queue!"); 1039 while (Queue.top() != SU) { 1040 Temp.push_back(Queue.top()); 1041 Queue.pop(); 1042 assert(!Queue.empty() && "Not in queue!"); 1043 } 1044 1045 // Remove the node from the PQ. 1046 Queue.pop(); 1047 1048 // Add all the other nodes back. 1049 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 1050 Queue.push(Temp[i]); 1051 } 1052 }; 1053 1054 template<class SF> 1055 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 1056 : public RegReductionPriorityQueue<SF> { 1057 // SUnitMap SDNode to SUnit mapping (n -> n). 1058 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1059 1060 // SUnits - The SUnits for the current graph. 1061 const std::vector<SUnit> *SUnits; 1062 1063 // SethiUllmanNumbers - The SethiUllman number for each node. 1064 std::vector<unsigned> SethiUllmanNumbers; 1065 1066 const TargetInstrInfo *TII; 1067 public: 1068 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii) 1069 : TII(tii) {} 1070 1071 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1072 std::vector<SUnit> &sunits) { 1073 SUnitMap = &sumap; 1074 SUnits = &sunits; 1075 // Add pseudo dependency edges for two-address nodes. 1076 AddPseudoTwoAddrDeps(); 1077 // Calculate node priorities. 1078 CalculateSethiUllmanNumbers(); 1079 } 1080 1081 void addNode(const SUnit *SU) { 1082 SethiUllmanNumbers.resize(SUnits->size(), 0); 1083 CalcNodeSethiUllmanNumber(SU); 1084 } 1085 1086 void updateNode(const SUnit *SU) { 1087 SethiUllmanNumbers[SU->NodeNum] = 0; 1088 CalcNodeSethiUllmanNumber(SU); 1089 } 1090 1091 void releaseState() { 1092 SUnits = 0; 1093 SethiUllmanNumbers.clear(); 1094 } 1095 1096 unsigned getNodePriority(const SUnit *SU) const { 1097 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1098 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1099 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1100 // CopyFromReg should be close to its def because it restricts 1101 // allocation choices. But if it is a livein then perhaps we want it 1102 // closer to its uses so it can be coalesced. 1103 return 0xffff; 1104 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1105 // CopyToReg should be close to its uses to facilitate coalescing and 1106 // avoid spilling. 1107 return 0; 1108 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1109 Opc == TargetInstrInfo::INSERT_SUBREG) 1110 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1111 // facilitate coalescing. 1112 return 0; 1113 else if (SU->NumSuccs == 0) 1114 // If SU does not have a use, i.e. it doesn't produce a value that would 1115 // be consumed (e.g. store), then it terminates a chain of computation. 1116 // Give it a large SethiUllman number so it will be scheduled right 1117 // before its predecessors that it doesn't lengthen their live ranges. 1118 return 0xffff; 1119 else if (SU->NumPreds == 0) 1120 // If SU does not have a def, schedule it close to its uses because it 1121 // does not lengthen any live ranges. 1122 return 0; 1123 else 1124 return SethiUllmanNumbers[SU->NodeNum]; 1125 } 1126 1127 private: 1128 bool canClobber(SUnit *SU, SUnit *Op); 1129 void AddPseudoTwoAddrDeps(); 1130 void CalculateSethiUllmanNumbers(); 1131 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1132 }; 1133 1134 1135 template<class SF> 1136 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1137 : public RegReductionPriorityQueue<SF> { 1138 // SUnitMap SDNode to SUnit mapping (n -> n). 1139 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1140 1141 // SUnits - The SUnits for the current graph. 1142 const std::vector<SUnit> *SUnits; 1143 1144 // SethiUllmanNumbers - The SethiUllman number for each node. 1145 std::vector<unsigned> SethiUllmanNumbers; 1146 1147 public: 1148 TDRegReductionPriorityQueue() {} 1149 1150 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1151 std::vector<SUnit> &sunits) { 1152 SUnitMap = &sumap; 1153 SUnits = &sunits; 1154 // Calculate node priorities. 1155 CalculateSethiUllmanNumbers(); 1156 } 1157 1158 void addNode(const SUnit *SU) { 1159 SethiUllmanNumbers.resize(SUnits->size(), 0); 1160 CalcNodeSethiUllmanNumber(SU); 1161 } 1162 1163 void updateNode(const SUnit *SU) { 1164 SethiUllmanNumbers[SU->NodeNum] = 0; 1165 CalcNodeSethiUllmanNumber(SU); 1166 } 1167 1168 void releaseState() { 1169 SUnits = 0; 1170 SethiUllmanNumbers.clear(); 1171 } 1172 1173 unsigned getNodePriority(const SUnit *SU) const { 1174 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1175 return SethiUllmanNumbers[SU->NodeNum]; 1176 } 1177 1178 private: 1179 void CalculateSethiUllmanNumbers(); 1180 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1181 }; 1182} 1183 1184/// closestSucc - Returns the scheduled cycle of the successor which is 1185/// closet to the current cycle. 1186static unsigned closestSucc(const SUnit *SU) { 1187 unsigned MaxCycle = 0; 1188 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1189 I != E; ++I) { 1190 unsigned Cycle = I->Dep->Cycle; 1191 // If there are bunch of CopyToRegs stacked up, they should be considered 1192 // to be at the same position. 1193 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1194 Cycle = closestSucc(I->Dep)+1; 1195 if (Cycle > MaxCycle) 1196 MaxCycle = Cycle; 1197 } 1198 return MaxCycle; 1199} 1200 1201/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1202/// for scratch registers. Live-in operands and live-out results don't count 1203/// since they are "fixed". 1204static unsigned calcMaxScratches(const SUnit *SU) { 1205 unsigned Scratches = 0; 1206 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1207 I != E; ++I) { 1208 if (I->isCtrl) continue; // ignore chain preds 1209 if (I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1210 Scratches++; 1211 } 1212 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1213 I != E; ++I) { 1214 if (I->isCtrl) continue; // ignore chain succs 1215 if (I->Dep->Node->getOpcode() != ISD::CopyToReg) 1216 Scratches += 10; 1217 } 1218 return Scratches; 1219} 1220 1221// Bottom up 1222bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1223 // There used to be a special tie breaker here that looked for 1224 // two-address instructions and preferred the instruction with a 1225 // def&use operand. The special case triggered diagnostics when 1226 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1227 // ordering that priority_queue requires. It didn't help much anyway 1228 // because AddPseudoTwoAddrDeps already covers many of the cases 1229 // where it would have applied. In addition, it's counter-intuitive 1230 // that a tie breaker would be the first thing attempted. There's a 1231 // "real" tie breaker below that is the operation of last resort. 1232 // The fact that the "special tie breaker" would trigger when there 1233 // wasn't otherwise a tie is what broke the strict weak ordering 1234 // constraint. 1235 1236 unsigned LPriority = SPQ->getNodePriority(left); 1237 unsigned RPriority = SPQ->getNodePriority(right); 1238 if (LPriority > RPriority) 1239 return true; 1240 else if (LPriority == RPriority) { 1241 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1242 // e.g. 1243 // t1 = op t2, c1 1244 // t3 = op t4, c2 1245 // 1246 // and the following instructions are both ready. 1247 // t2 = op c3 1248 // t4 = op c4 1249 // 1250 // Then schedule t2 = op first. 1251 // i.e. 1252 // t4 = op c4 1253 // t2 = op c3 1254 // t1 = op t2, c1 1255 // t3 = op t4, c2 1256 // 1257 // This creates more short live intervals. 1258 unsigned LDist = closestSucc(left); 1259 unsigned RDist = closestSucc(right); 1260 if (LDist < RDist) 1261 return true; 1262 else if (LDist == RDist) { 1263 // Intuitively, it's good to push down instructions whose results are 1264 // liveout so their long live ranges won't conflict with other values 1265 // which are needed inside the BB. Further prioritize liveout instructions 1266 // by the number of operands which are calculated within the BB. 1267 unsigned LScratch = calcMaxScratches(left); 1268 unsigned RScratch = calcMaxScratches(right); 1269 if (LScratch > RScratch) 1270 return true; 1271 else if (LScratch == RScratch) 1272 if (left->Height > right->Height) 1273 return true; 1274 else if (left->Height == right->Height) 1275 if (left->Depth < right->Depth) 1276 return true; 1277 else if (left->Depth == right->Depth) 1278 if (left->CycleBound > right->CycleBound) 1279 return true; 1280 } 1281 } 1282 return false; 1283} 1284 1285template<class SF> 1286bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1287 if (SU->isTwoAddress) { 1288 unsigned Opc = SU->Node->getTargetOpcode(); 1289 unsigned NumRes = TII->getNumDefs(Opc); 1290 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1291 for (unsigned i = 0; i != NumOps; ++i) { 1292 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1293 SDNode *DU = SU->Node->getOperand(i).Val; 1294 if ((*SUnitMap).find(DU) != (*SUnitMap).end() && 1295 Op == (*SUnitMap)[DU][SU->InstanceNo]) 1296 return true; 1297 } 1298 } 1299 } 1300 return false; 1301} 1302 1303 1304/// hasCopyToRegUse - Return true if SU has a value successor that is a 1305/// CopyToReg node. 1306static bool hasCopyToRegUse(SUnit *SU) { 1307 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1308 I != E; ++I) { 1309 if (I->isCtrl) continue; 1310 SUnit *SuccSU = I->Dep; 1311 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1312 return true; 1313 } 1314 return false; 1315} 1316 1317/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1318/// it as a def&use operand. Add a pseudo control edge from it to the other 1319/// node (if it won't create a cycle) so the two-address one will be scheduled 1320/// first (lower in the schedule). If both nodes are two-address, favor the 1321/// one that has a CopyToReg use (more likely to be a loop induction update). 1322/// If both are two-address, but one is commutable while the other is not 1323/// commutable, favor the one that's not commutable. 1324template<class SF> 1325void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1326 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1327 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1328 if (!SU->isTwoAddress) 1329 continue; 1330 1331 SDNode *Node = SU->Node; 1332 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1333 continue; 1334 1335 unsigned Opc = Node->getTargetOpcode(); 1336 unsigned NumRes = TII->getNumDefs(Opc); 1337 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1338 for (unsigned j = 0; j != NumOps; ++j) { 1339 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1340 SDNode *DU = SU->Node->getOperand(j).Val; 1341 if ((*SUnitMap).find(DU) == (*SUnitMap).end()) 1342 continue; 1343 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1344 if (!DUSU) continue; 1345 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1346 I != E; ++I) { 1347 if (I->isCtrl) continue; 1348 SUnit *SuccSU = I->Dep; 1349 // Don't constrain nodes with implicit defs. It can create cycles 1350 // plus it may increase register pressures. 1351 if (SuccSU == SU || SuccSU->hasPhysRegDefs) 1352 continue; 1353 // Be conservative. Ignore if nodes aren't at roughly the same 1354 // depth and height. 1355 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1356 continue; 1357 if (SuccSU->Depth > SU->Depth && (SuccSU->Depth - SU->Depth) > 1) 1358 continue; 1359 if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode()) 1360 continue; 1361 // Don't constraint extract_subreg / insert_subreg these may be 1362 // coalesced away. We don't them close to their uses. 1363 unsigned SuccOpc = SuccSU->Node->getTargetOpcode(); 1364 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1365 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1366 continue; 1367 if ((!canClobber(SuccSU, DUSU) || 1368 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1369 (!SU->isCommutable && SuccSU->isCommutable)) && 1370 !isReachable(SuccSU, SU)) { 1371 DOUT << "Adding an edge from SU # " << SU->NodeNum 1372 << " to SU #" << SuccSU->NodeNum << "\n"; 1373 SU->addPred(SuccSU, true, true); 1374 } 1375 } 1376 } 1377 } 1378 } 1379} 1380 1381/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1382/// Smaller number is the higher priority. 1383template<class SF> 1384unsigned BURegReductionPriorityQueue<SF>:: 1385CalcNodeSethiUllmanNumber(const SUnit *SU) { 1386 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1387 if (SethiUllmanNumber != 0) 1388 return SethiUllmanNumber; 1389 1390 unsigned Extra = 0; 1391 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1392 I != E; ++I) { 1393 if (I->isCtrl) continue; // ignore chain preds 1394 SUnit *PredSU = I->Dep; 1395 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1396 if (PredSethiUllman > SethiUllmanNumber) { 1397 SethiUllmanNumber = PredSethiUllman; 1398 Extra = 0; 1399 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1400 ++Extra; 1401 } 1402 1403 SethiUllmanNumber += Extra; 1404 1405 if (SethiUllmanNumber == 0) 1406 SethiUllmanNumber = 1; 1407 1408 return SethiUllmanNumber; 1409} 1410 1411/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1412/// scheduling units. 1413template<class SF> 1414void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1415 SethiUllmanNumbers.assign(SUnits->size(), 0); 1416 1417 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1418 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1419} 1420 1421static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1422 unsigned Sum = 0; 1423 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1424 I != E; ++I) { 1425 SUnit *SuccSU = I->Dep; 1426 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1427 EE = SuccSU->Preds.end(); II != EE; ++II) { 1428 SUnit *PredSU = II->Dep; 1429 if (!PredSU->isScheduled) 1430 ++Sum; 1431 } 1432 } 1433 1434 return Sum; 1435} 1436 1437 1438// Top down 1439bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1440 unsigned LPriority = SPQ->getNodePriority(left); 1441 unsigned RPriority = SPQ->getNodePriority(right); 1442 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1443 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1444 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1445 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1446 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1447 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1448 1449 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1450 return false; 1451 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1452 return true; 1453 1454 // Special tie breaker: if two nodes share a operand, the one that use it 1455 // as a def&use operand is preferred. 1456 if (LIsTarget && RIsTarget) { 1457 if (left->isTwoAddress && !right->isTwoAddress) { 1458 SDNode *DUNode = left->Node->getOperand(0).Val; 1459 if (DUNode->isOperand(right->Node)) 1460 RBonus += 2; 1461 } 1462 if (!left->isTwoAddress && right->isTwoAddress) { 1463 SDNode *DUNode = right->Node->getOperand(0).Val; 1464 if (DUNode->isOperand(left->Node)) 1465 LBonus += 2; 1466 } 1467 } 1468 if (LIsFloater) 1469 LBonus -= 2; 1470 if (RIsFloater) 1471 RBonus -= 2; 1472 if (left->NumSuccs == 1) 1473 LBonus += 2; 1474 if (right->NumSuccs == 1) 1475 RBonus += 2; 1476 1477 if (LPriority+LBonus < RPriority+RBonus) 1478 return true; 1479 else if (LPriority == RPriority) 1480 if (left->Depth < right->Depth) 1481 return true; 1482 else if (left->Depth == right->Depth) 1483 if (left->NumSuccsLeft > right->NumSuccsLeft) 1484 return true; 1485 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1486 if (left->CycleBound > right->CycleBound) 1487 return true; 1488 return false; 1489} 1490 1491/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1492/// Smaller number is the higher priority. 1493template<class SF> 1494unsigned TDRegReductionPriorityQueue<SF>:: 1495CalcNodeSethiUllmanNumber(const SUnit *SU) { 1496 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1497 if (SethiUllmanNumber != 0) 1498 return SethiUllmanNumber; 1499 1500 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1501 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1502 SethiUllmanNumber = 0xffff; 1503 else if (SU->NumSuccsLeft == 0) 1504 // If SU does not have a use, i.e. it doesn't produce a value that would 1505 // be consumed (e.g. store), then it terminates a chain of computation. 1506 // Give it a small SethiUllman number so it will be scheduled right before 1507 // its predecessors that it doesn't lengthen their live ranges. 1508 SethiUllmanNumber = 0; 1509 else if (SU->NumPredsLeft == 0 && 1510 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1511 SethiUllmanNumber = 0xffff; 1512 else { 1513 int Extra = 0; 1514 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1515 I != E; ++I) { 1516 if (I->isCtrl) continue; // ignore chain preds 1517 SUnit *PredSU = I->Dep; 1518 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1519 if (PredSethiUllman > SethiUllmanNumber) { 1520 SethiUllmanNumber = PredSethiUllman; 1521 Extra = 0; 1522 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1523 ++Extra; 1524 } 1525 1526 SethiUllmanNumber += Extra; 1527 } 1528 1529 return SethiUllmanNumber; 1530} 1531 1532/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1533/// scheduling units. 1534template<class SF> 1535void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1536 SethiUllmanNumbers.assign(SUnits->size(), 0); 1537 1538 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1539 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1540} 1541 1542//===----------------------------------------------------------------------===// 1543// Public Constructor Functions 1544//===----------------------------------------------------------------------===// 1545 1546llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1547 SelectionDAG *DAG, 1548 MachineBasicBlock *BB) { 1549 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1550 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1551 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII)); 1552} 1553 1554llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1555 SelectionDAG *DAG, 1556 MachineBasicBlock *BB) { 1557 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1558 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1559} 1560 1561