ScheduleDAGRRList.cpp revision 081ce940e7351e90fff829320b7dc6738a6b3815
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/CodeGen/SSARegMap.h" 22#include "llvm/Target/MRegisterInfo.h" 23#include "llvm/Target/TargetData.h" 24#include "llvm/Target/TargetMachine.h" 25#include "llvm/Target/TargetInstrInfo.h" 26#include "llvm/Support/Debug.h" 27#include "llvm/Support/Compiler.h" 28#include "llvm/ADT/SmallPtrSet.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include <climits> 32#include <queue> 33#include "llvm/Support/CommandLine.h" 34using namespace llvm; 35 36STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 37STATISTIC(NumUnfolds, "Number of nodes unfolded"); 38STATISTIC(NumDups, "Number of duplicated nodes"); 39STATISTIC(NumCCCopies, "Number of cross class copies"); 40 41static RegisterScheduler 42 burrListDAGScheduler("list-burr", 43 " Bottom-up register reduction list scheduling", 44 createBURRListDAGScheduler); 45static RegisterScheduler 46 tdrListrDAGScheduler("list-tdrr", 47 " Top-down register reduction list scheduling", 48 createTDRRListDAGScheduler); 49 50namespace { 51//===----------------------------------------------------------------------===// 52/// ScheduleDAGRRList - The actual register reduction list scheduler 53/// implementation. This supports both top-down and bottom-up scheduling. 54/// 55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 56private: 57 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 58 /// it is top-down. 59 bool isBottomUp; 60 61 /// AvailableQueue - The priority queue to use for the available SUnits. 62 ///a 63 SchedulingPriorityQueue *AvailableQueue; 64 65 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 66 /// that are "live". These nodes must be scheduled before any other nodes that 67 /// modifies the registers can be scheduled. 68 SmallSet<unsigned, 4> LiveRegs; 69 std::vector<SUnit*> LiveRegDefs; 70 std::vector<unsigned> LiveRegCycles; 71 72public: 73 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 74 const TargetMachine &tm, bool isbottomup, 75 SchedulingPriorityQueue *availqueue) 76 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 77 AvailableQueue(availqueue) { 78 } 79 80 ~ScheduleDAGRRList() { 81 delete AvailableQueue; 82 } 83 84 void Schedule(); 85 86private: 87 void ReleasePred(SUnit*, bool, unsigned); 88 void ReleaseSucc(SUnit*, bool isChain, unsigned); 89 void CapturePred(SUnit*, SUnit*, bool); 90 void ScheduleNodeBottomUp(SUnit*, unsigned); 91 void ScheduleNodeTopDown(SUnit*, unsigned); 92 void UnscheduleNodeBottomUp(SUnit*); 93 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 94 SUnit *CopyAndMoveSuccessors(SUnit*); 95 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 96 const TargetRegisterClass*, 97 const TargetRegisterClass*, 98 SmallVector<SUnit*, 2>&); 99 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 100 void ListScheduleTopDown(); 101 void ListScheduleBottomUp(); 102 void CommuteNodesToReducePressure(); 103}; 104} // end anonymous namespace 105 106 107/// Schedule - Schedule the DAG using list scheduling. 108void ScheduleDAGRRList::Schedule() { 109 DOUT << "********** List Scheduling **********\n"; 110 111 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 112 LiveRegCycles.resize(MRI->getNumRegs(), 0); 113 114 // Build scheduling units. 115 BuildSchedUnits(); 116 117 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 118 SUnits[su].dumpAll(&DAG)); 119 CalculateDepths(); 120 CalculateHeights(); 121 122 AvailableQueue->initNodes(SUnitMap, SUnits); 123 124 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 125 if (isBottomUp) 126 ListScheduleBottomUp(); 127 else 128 ListScheduleTopDown(); 129 130 AvailableQueue->releaseState(); 131 132 CommuteNodesToReducePressure(); 133 134 DOUT << "*** Final schedule ***\n"; 135 DEBUG(dumpSchedule()); 136 DOUT << "\n"; 137 138 // Emit in scheduled order 139 EmitSchedule(); 140} 141 142/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 143/// it is not the last use of its first operand, add it to the CommuteSet if 144/// possible. It will be commuted when it is translated to a MI. 145void ScheduleDAGRRList::CommuteNodesToReducePressure() { 146 SmallPtrSet<SUnit*, 4> OperandSeen; 147 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 148 SUnit *SU = Sequence[i]; 149 if (!SU || !SU->Node) continue; 150 if (SU->isCommutable) { 151 unsigned Opc = SU->Node->getTargetOpcode(); 152 unsigned NumRes = TII->getNumDefs(Opc); 153 unsigned NumOps = CountOperands(SU->Node); 154 for (unsigned j = 0; j != NumOps; ++j) { 155 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 156 continue; 157 158 SDNode *OpN = SU->Node->getOperand(j).Val; 159 SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 160 if (OpSU && OperandSeen.count(OpSU) == 1) { 161 // Ok, so SU is not the last use of OpSU, but SU is two-address so 162 // it will clobber OpSU. Try to commute SU if no other source operands 163 // are live below. 164 bool DoCommute = true; 165 for (unsigned k = 0; k < NumOps; ++k) { 166 if (k != j) { 167 OpN = SU->Node->getOperand(k).Val; 168 OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 169 if (OpSU && OperandSeen.count(OpSU) == 1) { 170 DoCommute = false; 171 break; 172 } 173 } 174 } 175 if (DoCommute) 176 CommuteSet.insert(SU->Node); 177 } 178 179 // Only look at the first use&def node for now. 180 break; 181 } 182 } 183 184 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 185 I != E; ++I) { 186 if (!I->isCtrl) 187 OperandSeen.insert(I->Dep); 188 } 189 } 190} 191 192//===----------------------------------------------------------------------===// 193// Bottom-Up Scheduling 194//===----------------------------------------------------------------------===// 195 196/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 197/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 198void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 199 unsigned CurCycle) { 200 // FIXME: the distance between two nodes is not always == the predecessor's 201 // latency. For example, the reader can very well read the register written 202 // by the predecessor later than the issue cycle. It also depends on the 203 // interrupt model (drain vs. freeze). 204 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 205 206 --PredSU->NumSuccsLeft; 207 208#ifndef NDEBUG 209 if (PredSU->NumSuccsLeft < 0) { 210 cerr << "*** List scheduling failed! ***\n"; 211 PredSU->dump(&DAG); 212 cerr << " has been released too many times!\n"; 213 assert(0); 214 } 215#endif 216 217 if (PredSU->NumSuccsLeft == 0) { 218 // EntryToken has to go last! Special case it here. 219 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 220 PredSU->isAvailable = true; 221 AvailableQueue->push(PredSU); 222 } 223 } 224} 225 226/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 227/// count of its predecessors. If a predecessor pending count is zero, add it to 228/// the Available queue. 229void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 230 DOUT << "*** Scheduling [" << CurCycle << "]: "; 231 DEBUG(SU->dump(&DAG)); 232 SU->Cycle = CurCycle; 233 234 AvailableQueue->ScheduledNode(SU); 235 236 // Bottom up: release predecessors 237 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 238 I != E; ++I) { 239 ReleasePred(I->Dep, I->isCtrl, CurCycle); 240 if (I->Cost < 0) { 241 // This is a physical register dependency and it's impossible or 242 // expensive to copy the register. Make sure nothing that can 243 // clobber the register is scheduled between the predecessor and 244 // this node. 245 if (LiveRegs.insert(I->Reg)) { 246 LiveRegDefs[I->Reg] = I->Dep; 247 LiveRegCycles[I->Reg] = CurCycle; 248 } 249 } 250 } 251 252 // Release all the implicit physical register defs that are live. 253 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 254 I != E; ++I) { 255 if (I->Cost < 0) { 256 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 257 LiveRegs.erase(I->Reg); 258 assert(LiveRegDefs[I->Reg] == SU && 259 "Physical register dependency violated?"); 260 LiveRegDefs[I->Reg] = NULL; 261 LiveRegCycles[I->Reg] = 0; 262 } 263 } 264 } 265 266 SU->isScheduled = true; 267} 268 269/// CapturePred - This does the opposite of ReleasePred. Since SU is being 270/// unscheduled, incrcease the succ left count of its predecessors. Remove 271/// them from AvailableQueue if necessary. 272void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 273 PredSU->CycleBound = 0; 274 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 275 I != E; ++I) { 276 if (I->Dep == SU) 277 continue; 278 PredSU->CycleBound = std::max(PredSU->CycleBound, 279 I->Dep->Cycle + PredSU->Latency); 280 } 281 282 if (PredSU->isAvailable) { 283 PredSU->isAvailable = false; 284 if (!PredSU->isPending) 285 AvailableQueue->remove(PredSU); 286 } 287 288 ++PredSU->NumSuccsLeft; 289} 290 291/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 292/// its predecessor states to reflect the change. 293void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 294 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 295 DEBUG(SU->dump(&DAG)); 296 297 AvailableQueue->UnscheduledNode(SU); 298 299 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 300 I != E; ++I) { 301 CapturePred(I->Dep, SU, I->isCtrl); 302 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 303 LiveRegs.erase(I->Reg); 304 assert(LiveRegDefs[I->Reg] == I->Dep && 305 "Physical register dependency violated?"); 306 LiveRegDefs[I->Reg] = NULL; 307 LiveRegCycles[I->Reg] = 0; 308 } 309 } 310 311 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 312 I != E; ++I) { 313 if (I->Cost < 0) { 314 if (LiveRegs.insert(I->Reg)) { 315 assert(!LiveRegDefs[I->Reg] && 316 "Physical register dependency violated?"); 317 LiveRegDefs[I->Reg] = SU; 318 } 319 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 320 LiveRegCycles[I->Reg] = I->Dep->Cycle; 321 } 322 } 323 324 SU->Cycle = 0; 325 SU->isScheduled = false; 326 SU->isAvailable = true; 327 AvailableQueue->push(SU); 328} 329 330// FIXME: This is probably too slow! 331static void isReachable(SUnit *SU, SUnit *TargetSU, 332 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 333 if (Reached) return; 334 if (SU == TargetSU) { 335 Reached = true; 336 return; 337 } 338 if (!Visited.insert(SU)) return; 339 340 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 341 ++I) 342 isReachable(I->Dep, TargetSU, Visited, Reached); 343} 344 345static bool isReachable(SUnit *SU, SUnit *TargetSU) { 346 SmallPtrSet<SUnit*, 32> Visited; 347 bool Reached = false; 348 isReachable(SU, TargetSU, Visited, Reached); 349 return Reached; 350} 351 352/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 353/// create a cycle. 354static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 355 if (isReachable(TargetSU, SU)) 356 return true; 357 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 358 I != E; ++I) 359 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 360 return true; 361 return false; 362} 363 364/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 365/// BTCycle in order to schedule a specific node. Returns the last unscheduled 366/// SUnit. Also returns if a successor is unscheduled in the process. 367void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 368 unsigned &CurCycle) { 369 SUnit *OldSU = NULL; 370 while (CurCycle > BtCycle) { 371 OldSU = Sequence.back(); 372 Sequence.pop_back(); 373 if (SU->isSucc(OldSU)) 374 // Don't try to remove SU from AvailableQueue. 375 SU->isAvailable = false; 376 UnscheduleNodeBottomUp(OldSU); 377 --CurCycle; 378 } 379 380 381 if (SU->isSucc(OldSU)) { 382 assert(false && "Something is wrong!"); 383 abort(); 384 } 385 386 ++NumBacktracks; 387} 388 389/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 390/// successors to the newly created node. 391SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 392 if (SU->FlaggedNodes.size()) 393 return NULL; 394 395 SDNode *N = SU->Node; 396 if (!N) 397 return NULL; 398 399 SUnit *NewSU; 400 bool TryUnfold = false; 401 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 402 MVT::ValueType VT = N->getValueType(i); 403 if (VT == MVT::Flag) 404 return NULL; 405 else if (VT == MVT::Other) 406 TryUnfold = true; 407 } 408 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 409 const SDOperand &Op = N->getOperand(i); 410 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 411 if (VT == MVT::Flag) 412 return NULL; 413 } 414 415 if (TryUnfold) { 416 SmallVector<SDNode*, 4> NewNodes; 417 if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes)) 418 return NULL; 419 420 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 421 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 422 423 N = NewNodes[1]; 424 SDNode *LoadNode = NewNodes[0]; 425 unsigned NumVals = N->getNumValues(); 426 unsigned OldNumVals = SU->Node->getNumValues(); 427 for (unsigned i = 0; i != NumVals; ++i) 428 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i)); 429 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), 430 SDOperand(LoadNode, 1)); 431 432 SUnit *NewSU = NewSUnit(N); 433 SUnitMap[N].push_back(NewSU); 434 const TargetInstrDescriptor *TID = &TII->get(N->getTargetOpcode()); 435 for (unsigned i = 0; i != TID->numOperands; ++i) { 436 if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { 437 NewSU->isTwoAddress = true; 438 break; 439 } 440 } 441 if (TID->Flags & M_COMMUTABLE) 442 NewSU->isCommutable = true; 443 // FIXME: Calculate height / depth and propagate the changes? 444 NewSU->Depth = SU->Depth; 445 NewSU->Height = SU->Height; 446 ComputeLatency(NewSU); 447 448 // LoadNode may already exist. This can happen when there is another 449 // load from the same location and producing the same type of value 450 // but it has different alignment or volatileness. 451 bool isNewLoad = true; 452 SUnit *LoadSU; 453 DenseMap<SDNode*, std::vector<SUnit*> >::iterator SMI = 454 SUnitMap.find(LoadNode); 455 if (SMI != SUnitMap.end()) { 456 LoadSU = SMI->second.front(); 457 isNewLoad = false; 458 } else { 459 LoadSU = NewSUnit(LoadNode); 460 SUnitMap[LoadNode].push_back(LoadSU); 461 462 LoadSU->Depth = SU->Depth; 463 LoadSU->Height = SU->Height; 464 ComputeLatency(LoadSU); 465 } 466 467 SUnit *ChainPred = NULL; 468 SmallVector<SDep, 4> ChainSuccs; 469 SmallVector<SDep, 4> LoadPreds; 470 SmallVector<SDep, 4> NodePreds; 471 SmallVector<SDep, 4> NodeSuccs; 472 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 473 I != E; ++I) { 474 if (I->isCtrl) 475 ChainPred = I->Dep; 476 else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) 477 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 478 else 479 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 480 } 481 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 482 I != E; ++I) { 483 if (I->isCtrl) 484 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 485 I->isCtrl, I->isSpecial)); 486 else 487 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 488 I->isCtrl, I->isSpecial)); 489 } 490 491 SU->removePred(ChainPred, true, false); 492 if (isNewLoad) 493 LoadSU->addPred(ChainPred, true, false); 494 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 495 SDep *Pred = &LoadPreds[i]; 496 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 497 if (isNewLoad) 498 LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 499 Pred->Reg, Pred->Cost); 500 } 501 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 502 SDep *Pred = &NodePreds[i]; 503 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 504 NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 505 Pred->Reg, Pred->Cost); 506 } 507 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 508 SDep *Succ = &NodeSuccs[i]; 509 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 510 Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, 511 Succ->Reg, Succ->Cost); 512 } 513 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 514 SDep *Succ = &ChainSuccs[i]; 515 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 516 if (isNewLoad) 517 Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, 518 Succ->Reg, Succ->Cost); 519 } 520 if (isNewLoad) 521 NewSU->addPred(LoadSU, false, false); 522 523 if (isNewLoad) 524 AvailableQueue->addNode(LoadSU); 525 AvailableQueue->addNode(NewSU); 526 527 ++NumUnfolds; 528 529 if (NewSU->NumSuccsLeft == 0) { 530 NewSU->isAvailable = true; 531 return NewSU; 532 } 533 SU = NewSU; 534 } 535 536 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 537 NewSU = Clone(SU); 538 539 // New SUnit has the exact same predecessors. 540 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 541 I != E; ++I) 542 if (!I->isSpecial) { 543 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 544 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 545 } 546 547 // Only copy scheduled successors. Cut them from old node's successor 548 // list and move them over. 549 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 550 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 551 I != E; ++I) { 552 if (I->isSpecial) 553 continue; 554 if (I->Dep->isScheduled) { 555 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 556 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 557 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 558 } 559 } 560 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 561 SUnit *Succ = DelDeps[i].first; 562 bool isCtrl = DelDeps[i].second; 563 Succ->removePred(SU, isCtrl, false); 564 } 565 566 AvailableQueue->updateNode(SU); 567 AvailableQueue->addNode(NewSU); 568 569 ++NumDups; 570 return NewSU; 571} 572 573/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 574/// and move all scheduled successors of the given SUnit to the last copy. 575void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 576 const TargetRegisterClass *DestRC, 577 const TargetRegisterClass *SrcRC, 578 SmallVector<SUnit*, 2> &Copies) { 579 SUnit *CopyFromSU = NewSUnit(NULL); 580 CopyFromSU->CopySrcRC = SrcRC; 581 CopyFromSU->CopyDstRC = DestRC; 582 CopyFromSU->Depth = SU->Depth; 583 CopyFromSU->Height = SU->Height; 584 585 SUnit *CopyToSU = NewSUnit(NULL); 586 CopyToSU->CopySrcRC = DestRC; 587 CopyToSU->CopyDstRC = SrcRC; 588 589 // Only copy scheduled successors. Cut them from old node's successor 590 // list and move them over. 591 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 592 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 593 I != E; ++I) { 594 if (I->isSpecial) 595 continue; 596 if (I->Dep->isScheduled) { 597 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 598 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 599 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 600 } 601 } 602 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 603 SUnit *Succ = DelDeps[i].first; 604 bool isCtrl = DelDeps[i].second; 605 Succ->removePred(SU, isCtrl, false); 606 } 607 608 CopyFromSU->addPred(SU, false, false, Reg, -1); 609 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 610 611 AvailableQueue->updateNode(SU); 612 AvailableQueue->addNode(CopyFromSU); 613 AvailableQueue->addNode(CopyToSU); 614 Copies.push_back(CopyFromSU); 615 Copies.push_back(CopyToSU); 616 617 ++NumCCCopies; 618} 619 620/// getPhysicalRegisterVT - Returns the ValueType of the physical register 621/// definition of the specified node. 622/// FIXME: Move to SelectionDAG? 623static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 624 const TargetInstrInfo *TII) { 625 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 626 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 627 unsigned NumRes = TID.numDefs; 628 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 629 if (Reg == *ImpDef) 630 break; 631 ++NumRes; 632 } 633 return N->getValueType(NumRes); 634} 635 636/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 637/// scheduling of the given node to satisfy live physical register dependencies. 638/// If the specific node is the last one that's available to schedule, do 639/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 640bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 641 SmallVector<unsigned, 4> &LRegs){ 642 if (LiveRegs.empty()) 643 return false; 644 645 SmallSet<unsigned, 4> RegAdded; 646 // If this node would clobber any "live" register, then it's not ready. 647 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 648 I != E; ++I) { 649 if (I->Cost < 0) { 650 unsigned Reg = I->Reg; 651 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 652 if (RegAdded.insert(Reg)) 653 LRegs.push_back(Reg); 654 } 655 for (const unsigned *Alias = MRI->getAliasSet(Reg); 656 *Alias; ++Alias) 657 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 658 if (RegAdded.insert(*Alias)) 659 LRegs.push_back(*Alias); 660 } 661 } 662 } 663 664 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 665 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 666 if (!Node || !Node->isTargetOpcode()) 667 continue; 668 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 669 if (!TID.ImplicitDefs) 670 continue; 671 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 672 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 673 if (RegAdded.insert(*Reg)) 674 LRegs.push_back(*Reg); 675 } 676 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 677 *Alias; ++Alias) 678 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 679 if (RegAdded.insert(*Alias)) 680 LRegs.push_back(*Alias); 681 } 682 } 683 } 684 return !LRegs.empty(); 685} 686 687 688/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 689/// schedulers. 690void ScheduleDAGRRList::ListScheduleBottomUp() { 691 unsigned CurCycle = 0; 692 // Add root to Available queue. 693 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 694 RootSU->isAvailable = true; 695 AvailableQueue->push(RootSU); 696 697 // While Available queue is not empty, grab the node with the highest 698 // priority. If it is not ready put it back. Schedule the node. 699 SmallVector<SUnit*, 4> NotReady; 700 while (!AvailableQueue->empty()) { 701 bool Delayed = false; 702 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 703 SUnit *CurSU = AvailableQueue->pop(); 704 while (CurSU) { 705 if (CurSU->CycleBound <= CurCycle) { 706 SmallVector<unsigned, 4> LRegs; 707 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 708 break; 709 Delayed = true; 710 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 711 } 712 713 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 714 NotReady.push_back(CurSU); 715 CurSU = AvailableQueue->pop(); 716 } 717 718 // All candidates are delayed due to live physical reg dependencies. 719 // Try backtracking, code duplication, or inserting cross class copies 720 // to resolve it. 721 if (Delayed && !CurSU) { 722 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 723 SUnit *TrySU = NotReady[i]; 724 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 725 726 // Try unscheduling up to the point where it's safe to schedule 727 // this node. 728 unsigned LiveCycle = CurCycle; 729 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 730 unsigned Reg = LRegs[j]; 731 unsigned LCycle = LiveRegCycles[Reg]; 732 LiveCycle = std::min(LiveCycle, LCycle); 733 } 734 SUnit *OldSU = Sequence[LiveCycle]; 735 if (!WillCreateCycle(TrySU, OldSU)) { 736 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 737 // Force the current node to be scheduled before the node that 738 // requires the physical reg dep. 739 if (OldSU->isAvailable) { 740 OldSU->isAvailable = false; 741 AvailableQueue->remove(OldSU); 742 } 743 TrySU->addPred(OldSU, true, true); 744 // If one or more successors has been unscheduled, then the current 745 // node is no longer avaialable. Schedule a successor that's now 746 // available instead. 747 if (!TrySU->isAvailable) 748 CurSU = AvailableQueue->pop(); 749 else { 750 CurSU = TrySU; 751 TrySU->isPending = false; 752 NotReady.erase(NotReady.begin()+i); 753 } 754 break; 755 } 756 } 757 758 if (!CurSU) { 759 // Can't backtrace. Try duplicating the nodes that produces these 760 // "expensive to copy" values to break the dependency. In case even 761 // that doesn't work, insert cross class copies. 762 SUnit *TrySU = NotReady[0]; 763 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 764 assert(LRegs.size() == 1 && "Can't handle this yet!"); 765 unsigned Reg = LRegs[0]; 766 SUnit *LRDef = LiveRegDefs[Reg]; 767 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 768 if (!NewDef) { 769 // Issue expensive cross register class copies. 770 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 771 const TargetRegisterClass *RC = 772 MRI->getPhysicalRegisterRegClass(VT, Reg); 773 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 774 if (!DestRC) { 775 assert(false && "Don't know how to copy this physical register!"); 776 abort(); 777 } 778 SmallVector<SUnit*, 2> Copies; 779 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 780 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 781 << " to SU #" << Copies.front()->NodeNum << "\n"; 782 TrySU->addPred(Copies.front(), true, true); 783 NewDef = Copies.back(); 784 } 785 786 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 787 << " to SU #" << TrySU->NodeNum << "\n"; 788 LiveRegDefs[Reg] = NewDef; 789 NewDef->addPred(TrySU, true, true); 790 TrySU->isAvailable = false; 791 CurSU = NewDef; 792 } 793 794 if (!CurSU) { 795 assert(false && "Unable to resolve live physical register dependencies!"); 796 abort(); 797 } 798 } 799 800 // Add the nodes that aren't ready back onto the available list. 801 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 802 NotReady[i]->isPending = false; 803 // May no longer be available due to backtracking. 804 if (NotReady[i]->isAvailable) 805 AvailableQueue->push(NotReady[i]); 806 } 807 NotReady.clear(); 808 809 if (!CurSU) 810 Sequence.push_back(0); 811 else { 812 ScheduleNodeBottomUp(CurSU, CurCycle); 813 Sequence.push_back(CurSU); 814 } 815 ++CurCycle; 816 } 817 818 // Add entry node last 819 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 820 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 821 Sequence.push_back(Entry); 822 } 823 824 // Reverse the order if it is bottom up. 825 std::reverse(Sequence.begin(), Sequence.end()); 826 827 828#ifndef NDEBUG 829 // Verify that all SUnits were scheduled. 830 bool AnyNotSched = false; 831 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 832 if (SUnits[i].NumSuccsLeft != 0) { 833 if (!AnyNotSched) 834 cerr << "*** List scheduling failed! ***\n"; 835 SUnits[i].dump(&DAG); 836 cerr << "has not been scheduled!\n"; 837 AnyNotSched = true; 838 } 839 } 840 assert(!AnyNotSched); 841#endif 842} 843 844//===----------------------------------------------------------------------===// 845// Top-Down Scheduling 846//===----------------------------------------------------------------------===// 847 848/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 849/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 850void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 851 unsigned CurCycle) { 852 // FIXME: the distance between two nodes is not always == the predecessor's 853 // latency. For example, the reader can very well read the register written 854 // by the predecessor later than the issue cycle. It also depends on the 855 // interrupt model (drain vs. freeze). 856 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 857 858 --SuccSU->NumPredsLeft; 859 860#ifndef NDEBUG 861 if (SuccSU->NumPredsLeft < 0) { 862 cerr << "*** List scheduling failed! ***\n"; 863 SuccSU->dump(&DAG); 864 cerr << " has been released too many times!\n"; 865 assert(0); 866 } 867#endif 868 869 if (SuccSU->NumPredsLeft == 0) { 870 SuccSU->isAvailable = true; 871 AvailableQueue->push(SuccSU); 872 } 873} 874 875 876/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 877/// count of its successors. If a successor pending count is zero, add it to 878/// the Available queue. 879void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 880 DOUT << "*** Scheduling [" << CurCycle << "]: "; 881 DEBUG(SU->dump(&DAG)); 882 SU->Cycle = CurCycle; 883 884 AvailableQueue->ScheduledNode(SU); 885 886 // Top down: release successors 887 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 888 I != E; ++I) 889 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 890 SU->isScheduled = true; 891} 892 893/// ListScheduleTopDown - The main loop of list scheduling for top-down 894/// schedulers. 895void ScheduleDAGRRList::ListScheduleTopDown() { 896 unsigned CurCycle = 0; 897 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 898 899 // All leaves to Available queue. 900 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 901 // It is available if it has no predecessors. 902 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 903 AvailableQueue->push(&SUnits[i]); 904 SUnits[i].isAvailable = true; 905 } 906 } 907 908 // Emit the entry node first. 909 ScheduleNodeTopDown(Entry, CurCycle); 910 Sequence.push_back(Entry); 911 ++CurCycle; 912 913 // While Available queue is not empty, grab the node with the highest 914 // priority. If it is not ready put it back. Schedule the node. 915 std::vector<SUnit*> NotReady; 916 while (!AvailableQueue->empty()) { 917 SUnit *CurSU = AvailableQueue->pop(); 918 while (CurSU && CurSU->CycleBound > CurCycle) { 919 NotReady.push_back(CurSU); 920 CurSU = AvailableQueue->pop(); 921 } 922 923 // Add the nodes that aren't ready back onto the available list. 924 AvailableQueue->push_all(NotReady); 925 NotReady.clear(); 926 927 if (!CurSU) 928 Sequence.push_back(0); 929 else { 930 ScheduleNodeTopDown(CurSU, CurCycle); 931 Sequence.push_back(CurSU); 932 } 933 CurCycle++; 934 } 935 936 937#ifndef NDEBUG 938 // Verify that all SUnits were scheduled. 939 bool AnyNotSched = false; 940 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 941 if (!SUnits[i].isScheduled) { 942 if (!AnyNotSched) 943 cerr << "*** List scheduling failed! ***\n"; 944 SUnits[i].dump(&DAG); 945 cerr << "has not been scheduled!\n"; 946 AnyNotSched = true; 947 } 948 } 949 assert(!AnyNotSched); 950#endif 951} 952 953 954 955//===----------------------------------------------------------------------===// 956// RegReductionPriorityQueue Implementation 957//===----------------------------------------------------------------------===// 958// 959// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 960// to reduce register pressure. 961// 962namespace { 963 template<class SF> 964 class RegReductionPriorityQueue; 965 966 /// Sorting functions for the Available queue. 967 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 968 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 969 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 970 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 971 972 bool operator()(const SUnit* left, const SUnit* right) const; 973 }; 974 975 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 976 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 977 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 978 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 979 980 bool operator()(const SUnit* left, const SUnit* right) const; 981 }; 982} // end anonymous namespace 983 984static inline bool isCopyFromLiveIn(const SUnit *SU) { 985 SDNode *N = SU->Node; 986 return N && N->getOpcode() == ISD::CopyFromReg && 987 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 988} 989 990namespace { 991 template<class SF> 992 class VISIBILITY_HIDDEN RegReductionPriorityQueue 993 : public SchedulingPriorityQueue { 994 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 995 996 public: 997 RegReductionPriorityQueue() : 998 Queue(SF(this)) {} 999 1000 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1001 std::vector<SUnit> &sunits) {} 1002 1003 virtual void addNode(const SUnit *SU) {} 1004 1005 virtual void updateNode(const SUnit *SU) {} 1006 1007 virtual void releaseState() {} 1008 1009 virtual unsigned getNodePriority(const SUnit *SU) const { 1010 return 0; 1011 } 1012 1013 unsigned size() const { return Queue.size(); } 1014 1015 bool empty() const { return Queue.empty(); } 1016 1017 void push(SUnit *U) { 1018 Queue.push(U); 1019 } 1020 void push_all(const std::vector<SUnit *> &Nodes) { 1021 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1022 Queue.push(Nodes[i]); 1023 } 1024 1025 SUnit *pop() { 1026 if (empty()) return NULL; 1027 SUnit *V = Queue.top(); 1028 Queue.pop(); 1029 return V; 1030 } 1031 1032 /// remove - This is a really inefficient way to remove a node from a 1033 /// priority queue. We should roll our own heap to make this better or 1034 /// something. 1035 void remove(SUnit *SU) { 1036 std::vector<SUnit*> Temp; 1037 1038 assert(!Queue.empty() && "Not in queue!"); 1039 while (Queue.top() != SU) { 1040 Temp.push_back(Queue.top()); 1041 Queue.pop(); 1042 assert(!Queue.empty() && "Not in queue!"); 1043 } 1044 1045 // Remove the node from the PQ. 1046 Queue.pop(); 1047 1048 // Add all the other nodes back. 1049 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 1050 Queue.push(Temp[i]); 1051 } 1052 }; 1053 1054 template<class SF> 1055 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 1056 : public RegReductionPriorityQueue<SF> { 1057 // SUnitMap SDNode to SUnit mapping (n -> n). 1058 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1059 1060 // SUnits - The SUnits for the current graph. 1061 const std::vector<SUnit> *SUnits; 1062 1063 // SethiUllmanNumbers - The SethiUllman number for each node. 1064 std::vector<unsigned> SethiUllmanNumbers; 1065 1066 const TargetInstrInfo *TII; 1067 const MRegisterInfo *MRI; 1068 public: 1069 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii, 1070 const MRegisterInfo *mri) 1071 : TII(tii), MRI(mri) {} 1072 1073 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1074 std::vector<SUnit> &sunits) { 1075 SUnitMap = &sumap; 1076 SUnits = &sunits; 1077 // Add pseudo dependency edges for two-address nodes. 1078 AddPseudoTwoAddrDeps(); 1079 // Calculate node priorities. 1080 CalculateSethiUllmanNumbers(); 1081 } 1082 1083 void addNode(const SUnit *SU) { 1084 SethiUllmanNumbers.resize(SUnits->size(), 0); 1085 CalcNodeSethiUllmanNumber(SU); 1086 } 1087 1088 void updateNode(const SUnit *SU) { 1089 SethiUllmanNumbers[SU->NodeNum] = 0; 1090 CalcNodeSethiUllmanNumber(SU); 1091 } 1092 1093 void releaseState() { 1094 SUnits = 0; 1095 SethiUllmanNumbers.clear(); 1096 } 1097 1098 unsigned getNodePriority(const SUnit *SU) const { 1099 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1100 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1101 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1102 // CopyFromReg should be close to its def because it restricts 1103 // allocation choices. But if it is a livein then perhaps we want it 1104 // closer to its uses so it can be coalesced. 1105 return 0xffff; 1106 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1107 // CopyToReg should be close to its uses to facilitate coalescing and 1108 // avoid spilling. 1109 return 0; 1110 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1111 Opc == TargetInstrInfo::INSERT_SUBREG) 1112 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1113 // facilitate coalescing. 1114 return 0; 1115 else if (SU->NumSuccs == 0) 1116 // If SU does not have a use, i.e. it doesn't produce a value that would 1117 // be consumed (e.g. store), then it terminates a chain of computation. 1118 // Give it a large SethiUllman number so it will be scheduled right 1119 // before its predecessors that it doesn't lengthen their live ranges. 1120 return 0xffff; 1121 else if (SU->NumPreds == 0) 1122 // If SU does not have a def, schedule it close to its uses because it 1123 // does not lengthen any live ranges. 1124 return 0; 1125 else 1126 return SethiUllmanNumbers[SU->NodeNum]; 1127 } 1128 1129 private: 1130 bool canClobber(SUnit *SU, SUnit *Op); 1131 void AddPseudoTwoAddrDeps(); 1132 void CalculateSethiUllmanNumbers(); 1133 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1134 }; 1135 1136 1137 template<class SF> 1138 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1139 : public RegReductionPriorityQueue<SF> { 1140 // SUnitMap SDNode to SUnit mapping (n -> n). 1141 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1142 1143 // SUnits - The SUnits for the current graph. 1144 const std::vector<SUnit> *SUnits; 1145 1146 // SethiUllmanNumbers - The SethiUllman number for each node. 1147 std::vector<unsigned> SethiUllmanNumbers; 1148 1149 public: 1150 TDRegReductionPriorityQueue() {} 1151 1152 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1153 std::vector<SUnit> &sunits) { 1154 SUnitMap = &sumap; 1155 SUnits = &sunits; 1156 // Calculate node priorities. 1157 CalculateSethiUllmanNumbers(); 1158 } 1159 1160 void addNode(const SUnit *SU) { 1161 SethiUllmanNumbers.resize(SUnits->size(), 0); 1162 CalcNodeSethiUllmanNumber(SU); 1163 } 1164 1165 void updateNode(const SUnit *SU) { 1166 SethiUllmanNumbers[SU->NodeNum] = 0; 1167 CalcNodeSethiUllmanNumber(SU); 1168 } 1169 1170 void releaseState() { 1171 SUnits = 0; 1172 SethiUllmanNumbers.clear(); 1173 } 1174 1175 unsigned getNodePriority(const SUnit *SU) const { 1176 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1177 return SethiUllmanNumbers[SU->NodeNum]; 1178 } 1179 1180 private: 1181 void CalculateSethiUllmanNumbers(); 1182 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1183 }; 1184} 1185 1186/// closestSucc - Returns the scheduled cycle of the successor which is 1187/// closet to the current cycle. 1188static unsigned closestSucc(const SUnit *SU) { 1189 unsigned MaxCycle = 0; 1190 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1191 I != E; ++I) { 1192 unsigned Cycle = I->Dep->Cycle; 1193 // If there are bunch of CopyToRegs stacked up, they should be considered 1194 // to be at the same position. 1195 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1196 Cycle = closestSucc(I->Dep)+1; 1197 if (Cycle > MaxCycle) 1198 MaxCycle = Cycle; 1199 } 1200 return MaxCycle; 1201} 1202 1203/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1204/// for scratch registers. Live-in operands and live-out results don't count 1205/// since they are "fixed". 1206static unsigned calcMaxScratches(const SUnit *SU) { 1207 unsigned Scratches = 0; 1208 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1209 I != E; ++I) { 1210 if (I->isCtrl) continue; // ignore chain preds 1211 if (I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1212 Scratches++; 1213 } 1214 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1215 I != E; ++I) { 1216 if (I->isCtrl) continue; // ignore chain succs 1217 if (I->Dep->Node->getOpcode() != ISD::CopyToReg) 1218 Scratches += 10; 1219 } 1220 return Scratches; 1221} 1222 1223// Bottom up 1224bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1225 // There used to be a special tie breaker here that looked for 1226 // two-address instructions and preferred the instruction with a 1227 // def&use operand. The special case triggered diagnostics when 1228 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1229 // ordering that priority_queue requires. It didn't help much anyway 1230 // because AddPseudoTwoAddrDeps already covers many of the cases 1231 // where it would have applied. In addition, it's counter-intuitive 1232 // that a tie breaker would be the first thing attempted. There's a 1233 // "real" tie breaker below that is the operation of last resort. 1234 // The fact that the "special tie breaker" would trigger when there 1235 // wasn't otherwise a tie is what broke the strict weak ordering 1236 // constraint. 1237 1238 unsigned LPriority = SPQ->getNodePriority(left); 1239 unsigned RPriority = SPQ->getNodePriority(right); 1240 if (LPriority > RPriority) 1241 return true; 1242 else if (LPriority == RPriority) { 1243 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1244 // e.g. 1245 // t1 = op t2, c1 1246 // t3 = op t4, c2 1247 // 1248 // and the following instructions are both ready. 1249 // t2 = op c3 1250 // t4 = op c4 1251 // 1252 // Then schedule t2 = op first. 1253 // i.e. 1254 // t4 = op c4 1255 // t2 = op c3 1256 // t1 = op t2, c1 1257 // t3 = op t4, c2 1258 // 1259 // This creates more short live intervals. 1260 unsigned LDist = closestSucc(left); 1261 unsigned RDist = closestSucc(right); 1262 if (LDist < RDist) 1263 return true; 1264 else if (LDist == RDist) { 1265 // Intuitively, it's good to push down instructions whose results are 1266 // liveout so their long live ranges won't conflict with other values 1267 // which are needed inside the BB. Further prioritize liveout instructions 1268 // by the number of operands which are calculated within the BB. 1269 unsigned LScratch = calcMaxScratches(left); 1270 unsigned RScratch = calcMaxScratches(right); 1271 if (LScratch > RScratch) 1272 return true; 1273 else if (LScratch == RScratch) 1274 if (left->Height > right->Height) 1275 return true; 1276 else if (left->Height == right->Height) 1277 if (left->Depth < right->Depth) 1278 return true; 1279 else if (left->Depth == right->Depth) 1280 if (left->CycleBound > right->CycleBound) 1281 return true; 1282 } 1283 } 1284 return false; 1285} 1286 1287template<class SF> 1288bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1289 if (SU->isTwoAddress) { 1290 unsigned Opc = SU->Node->getTargetOpcode(); 1291 unsigned NumRes = TII->getNumDefs(Opc); 1292 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1293 for (unsigned i = 0; i != NumOps; ++i) { 1294 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1295 SDNode *DU = SU->Node->getOperand(i).Val; 1296 if ((*SUnitMap).find(DU) != (*SUnitMap).end() && 1297 Op == (*SUnitMap)[DU][SU->InstanceNo]) 1298 return true; 1299 } 1300 } 1301 } 1302 return false; 1303} 1304 1305 1306/// hasCopyToRegUse - Return true if SU has a value successor that is a 1307/// CopyToReg node. 1308static bool hasCopyToRegUse(SUnit *SU) { 1309 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1310 I != E; ++I) { 1311 if (I->isCtrl) continue; 1312 SUnit *SuccSU = I->Dep; 1313 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1314 return true; 1315 } 1316 return false; 1317} 1318 1319/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1320/// physical register def. 1321static bool canClobberPhysRegDefs(SUnit *SuccSU, SUnit *SU, 1322 const TargetInstrInfo *TII, 1323 const MRegisterInfo *MRI) { 1324 SDNode *N = SuccSU->Node; 1325 unsigned NumDefs = TII->getNumDefs(N->getTargetOpcode()); 1326 const unsigned *ImpDefs = TII->getImplicitDefs(N->getTargetOpcode()); 1327 if (!ImpDefs) 1328 return false; 1329 const unsigned *SUImpDefs = TII->getImplicitDefs(SU->Node->getTargetOpcode()); 1330 if (!SUImpDefs) 1331 return false; 1332 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1333 MVT::ValueType VT = N->getValueType(i); 1334 if (VT == MVT::Flag || VT == MVT::Other) 1335 continue; 1336 unsigned Reg = ImpDefs[i - NumDefs]; 1337 for (;*SUImpDefs; ++SUImpDefs) { 1338 unsigned SUReg = *SUImpDefs; 1339 if (MRI->regsOverlap(Reg, SUReg)) 1340 return true; 1341 } 1342 } 1343 return false; 1344} 1345 1346/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1347/// it as a def&use operand. Add a pseudo control edge from it to the other 1348/// node (if it won't create a cycle) so the two-address one will be scheduled 1349/// first (lower in the schedule). If both nodes are two-address, favor the 1350/// one that has a CopyToReg use (more likely to be a loop induction update). 1351/// If both are two-address, but one is commutable while the other is not 1352/// commutable, favor the one that's not commutable. 1353template<class SF> 1354void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1355 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1356 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1357 if (!SU->isTwoAddress) 1358 continue; 1359 1360 SDNode *Node = SU->Node; 1361 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1362 continue; 1363 1364 unsigned Opc = Node->getTargetOpcode(); 1365 unsigned NumRes = TII->getNumDefs(Opc); 1366 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1367 for (unsigned j = 0; j != NumOps; ++j) { 1368 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1369 SDNode *DU = SU->Node->getOperand(j).Val; 1370 if ((*SUnitMap).find(DU) == (*SUnitMap).end()) 1371 continue; 1372 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1373 if (!DUSU) continue; 1374 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1375 I != E; ++I) { 1376 if (I->isCtrl) continue; 1377 SUnit *SuccSU = I->Dep; 1378 if (SuccSU == SU) 1379 continue; 1380 // Be conservative. Ignore if nodes aren't at roughly the same 1381 // depth and height. 1382 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1383 continue; 1384 if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode()) 1385 continue; 1386 // Don't constrain nodes with physical register defs if the 1387 // predecessor can cloober them. 1388 if (SuccSU->hasPhysRegDefs) { 1389 if (canClobberPhysRegDefs(SuccSU, SU, TII, MRI)) 1390 continue; 1391 } 1392 // Don't constraint extract_subreg / insert_subreg these may be 1393 // coalesced away. We don't them close to their uses. 1394 unsigned SuccOpc = SuccSU->Node->getTargetOpcode(); 1395 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1396 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1397 continue; 1398 if ((!canClobber(SuccSU, DUSU) || 1399 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1400 (!SU->isCommutable && SuccSU->isCommutable)) && 1401 !isReachable(SuccSU, SU)) { 1402 DOUT << "Adding an edge from SU # " << SU->NodeNum 1403 << " to SU #" << SuccSU->NodeNum << "\n"; 1404 SU->addPred(SuccSU, true, true); 1405 } 1406 } 1407 } 1408 } 1409 } 1410} 1411 1412/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1413/// Smaller number is the higher priority. 1414template<class SF> 1415unsigned BURegReductionPriorityQueue<SF>:: 1416CalcNodeSethiUllmanNumber(const SUnit *SU) { 1417 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1418 if (SethiUllmanNumber != 0) 1419 return SethiUllmanNumber; 1420 1421 unsigned Extra = 0; 1422 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1423 I != E; ++I) { 1424 if (I->isCtrl) continue; // ignore chain preds 1425 SUnit *PredSU = I->Dep; 1426 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1427 if (PredSethiUllman > SethiUllmanNumber) { 1428 SethiUllmanNumber = PredSethiUllman; 1429 Extra = 0; 1430 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1431 ++Extra; 1432 } 1433 1434 SethiUllmanNumber += Extra; 1435 1436 if (SethiUllmanNumber == 0) 1437 SethiUllmanNumber = 1; 1438 1439 return SethiUllmanNumber; 1440} 1441 1442/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1443/// scheduling units. 1444template<class SF> 1445void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1446 SethiUllmanNumbers.assign(SUnits->size(), 0); 1447 1448 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1449 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1450} 1451 1452static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1453 unsigned Sum = 0; 1454 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1455 I != E; ++I) { 1456 SUnit *SuccSU = I->Dep; 1457 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1458 EE = SuccSU->Preds.end(); II != EE; ++II) { 1459 SUnit *PredSU = II->Dep; 1460 if (!PredSU->isScheduled) 1461 ++Sum; 1462 } 1463 } 1464 1465 return Sum; 1466} 1467 1468 1469// Top down 1470bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1471 unsigned LPriority = SPQ->getNodePriority(left); 1472 unsigned RPriority = SPQ->getNodePriority(right); 1473 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1474 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1475 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1476 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1477 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1478 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1479 1480 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1481 return false; 1482 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1483 return true; 1484 1485 // Special tie breaker: if two nodes share a operand, the one that use it 1486 // as a def&use operand is preferred. 1487 if (LIsTarget && RIsTarget) { 1488 if (left->isTwoAddress && !right->isTwoAddress) { 1489 SDNode *DUNode = left->Node->getOperand(0).Val; 1490 if (DUNode->isOperand(right->Node)) 1491 RBonus += 2; 1492 } 1493 if (!left->isTwoAddress && right->isTwoAddress) { 1494 SDNode *DUNode = right->Node->getOperand(0).Val; 1495 if (DUNode->isOperand(left->Node)) 1496 LBonus += 2; 1497 } 1498 } 1499 if (LIsFloater) 1500 LBonus -= 2; 1501 if (RIsFloater) 1502 RBonus -= 2; 1503 if (left->NumSuccs == 1) 1504 LBonus += 2; 1505 if (right->NumSuccs == 1) 1506 RBonus += 2; 1507 1508 if (LPriority+LBonus < RPriority+RBonus) 1509 return true; 1510 else if (LPriority == RPriority) 1511 if (left->Depth < right->Depth) 1512 return true; 1513 else if (left->Depth == right->Depth) 1514 if (left->NumSuccsLeft > right->NumSuccsLeft) 1515 return true; 1516 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1517 if (left->CycleBound > right->CycleBound) 1518 return true; 1519 return false; 1520} 1521 1522/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1523/// Smaller number is the higher priority. 1524template<class SF> 1525unsigned TDRegReductionPriorityQueue<SF>:: 1526CalcNodeSethiUllmanNumber(const SUnit *SU) { 1527 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1528 if (SethiUllmanNumber != 0) 1529 return SethiUllmanNumber; 1530 1531 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1532 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1533 SethiUllmanNumber = 0xffff; 1534 else if (SU->NumSuccsLeft == 0) 1535 // If SU does not have a use, i.e. it doesn't produce a value that would 1536 // be consumed (e.g. store), then it terminates a chain of computation. 1537 // Give it a small SethiUllman number so it will be scheduled right before 1538 // its predecessors that it doesn't lengthen their live ranges. 1539 SethiUllmanNumber = 0; 1540 else if (SU->NumPredsLeft == 0 && 1541 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1542 SethiUllmanNumber = 0xffff; 1543 else { 1544 int Extra = 0; 1545 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1546 I != E; ++I) { 1547 if (I->isCtrl) continue; // ignore chain preds 1548 SUnit *PredSU = I->Dep; 1549 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1550 if (PredSethiUllman > SethiUllmanNumber) { 1551 SethiUllmanNumber = PredSethiUllman; 1552 Extra = 0; 1553 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1554 ++Extra; 1555 } 1556 1557 SethiUllmanNumber += Extra; 1558 } 1559 1560 return SethiUllmanNumber; 1561} 1562 1563/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1564/// scheduling units. 1565template<class SF> 1566void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1567 SethiUllmanNumbers.assign(SUnits->size(), 0); 1568 1569 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1570 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1571} 1572 1573//===----------------------------------------------------------------------===// 1574// Public Constructor Functions 1575//===----------------------------------------------------------------------===// 1576 1577llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1578 SelectionDAG *DAG, 1579 MachineBasicBlock *BB) { 1580 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1581 const MRegisterInfo *MRI = DAG->getTarget().getRegisterInfo(); 1582 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1583 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII, MRI)); 1584} 1585 1586llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1587 SelectionDAG *DAG, 1588 MachineBasicBlock *BB) { 1589 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1590 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1591} 1592 1593