ScheduleDAGRRList.cpp revision 1b98919de35bee879f414e9b97b38eeb9df287bc
1//===----- ScheduleDAGList.cpp - Reg pressure reduction list scheduler ----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements bottom-up and top-down register pressure reduction list 11// schedulers, using standard algorithms. The basic approach uses a priority 12// queue of available nodes to schedule. One at a time, nodes are taken from 13// the priority queue (thus in priority order), checked for legality to 14// schedule, and emitted if legal. 15// 16//===----------------------------------------------------------------------===// 17 18#define DEBUG_TYPE "pre-RA-sched" 19#include "llvm/CodeGen/ScheduleDAG.h" 20#include "llvm/CodeGen/SchedulerRegistry.h" 21#include "llvm/Target/MRegisterInfo.h" 22#include "llvm/Target/TargetData.h" 23#include "llvm/Target/TargetMachine.h" 24#include "llvm/Target/TargetInstrInfo.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/Compiler.h" 27#include "llvm/ADT/SmallPtrSet.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/Statistic.h" 30#include <climits> 31#include <queue> 32#include "llvm/Support/CommandLine.h" 33using namespace llvm; 34 35STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); 36STATISTIC(NumUnfolds, "Number of nodes unfolded"); 37STATISTIC(NumDups, "Number of duplicated nodes"); 38STATISTIC(NumCCCopies, "Number of cross class copies"); 39 40static RegisterScheduler 41 burrListDAGScheduler("list-burr", 42 " Bottom-up register reduction list scheduling", 43 createBURRListDAGScheduler); 44static RegisterScheduler 45 tdrListrDAGScheduler("list-tdrr", 46 " Top-down register reduction list scheduling", 47 createTDRRListDAGScheduler); 48 49namespace { 50//===----------------------------------------------------------------------===// 51/// ScheduleDAGRRList - The actual register reduction list scheduler 52/// implementation. This supports both top-down and bottom-up scheduling. 53/// 54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG { 55private: 56 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 57 /// it is top-down. 58 bool isBottomUp; 59 60 /// AvailableQueue - The priority queue to use for the available SUnits. 61 ///a 62 SchedulingPriorityQueue *AvailableQueue; 63 64 /// LiveRegs / LiveRegDefs - A set of physical registers and their definition 65 /// that are "live". These nodes must be scheduled before any other nodes that 66 /// modifies the registers can be scheduled. 67 SmallSet<unsigned, 4> LiveRegs; 68 std::vector<SUnit*> LiveRegDefs; 69 std::vector<unsigned> LiveRegCycles; 70 71public: 72 ScheduleDAGRRList(SelectionDAG &dag, MachineBasicBlock *bb, 73 const TargetMachine &tm, bool isbottomup, 74 SchedulingPriorityQueue *availqueue) 75 : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), 76 AvailableQueue(availqueue) { 77 } 78 79 ~ScheduleDAGRRList() { 80 delete AvailableQueue; 81 } 82 83 void Schedule(); 84 85private: 86 void ReleasePred(SUnit*, bool, unsigned); 87 void ReleaseSucc(SUnit*, bool isChain, unsigned); 88 void CapturePred(SUnit*, SUnit*, bool); 89 void ScheduleNodeBottomUp(SUnit*, unsigned); 90 void ScheduleNodeTopDown(SUnit*, unsigned); 91 void UnscheduleNodeBottomUp(SUnit*); 92 void BacktrackBottomUp(SUnit*, unsigned, unsigned&); 93 SUnit *CopyAndMoveSuccessors(SUnit*); 94 void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, 95 const TargetRegisterClass*, 96 const TargetRegisterClass*, 97 SmallVector<SUnit*, 2>&); 98 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 99 void ListScheduleTopDown(); 100 void ListScheduleBottomUp(); 101 void CommuteNodesToReducePressure(); 102}; 103} // end anonymous namespace 104 105 106/// Schedule - Schedule the DAG using list scheduling. 107void ScheduleDAGRRList::Schedule() { 108 DOUT << "********** List Scheduling **********\n"; 109 110 LiveRegDefs.resize(MRI->getNumRegs(), NULL); 111 LiveRegCycles.resize(MRI->getNumRegs(), 0); 112 113 // Build scheduling units. 114 BuildSchedUnits(); 115 116 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 117 SUnits[su].dumpAll(&DAG)); 118 CalculateDepths(); 119 CalculateHeights(); 120 121 AvailableQueue->initNodes(SUnitMap, SUnits); 122 123 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 124 if (isBottomUp) 125 ListScheduleBottomUp(); 126 else 127 ListScheduleTopDown(); 128 129 AvailableQueue->releaseState(); 130 131 CommuteNodesToReducePressure(); 132 133 DOUT << "*** Final schedule ***\n"; 134 DEBUG(dumpSchedule()); 135 DOUT << "\n"; 136 137 // Emit in scheduled order 138 EmitSchedule(); 139} 140 141/// CommuteNodesToReducePressure - If a node is two-address and commutable, and 142/// it is not the last use of its first operand, add it to the CommuteSet if 143/// possible. It will be commuted when it is translated to a MI. 144void ScheduleDAGRRList::CommuteNodesToReducePressure() { 145 SmallPtrSet<SUnit*, 4> OperandSeen; 146 for (unsigned i = Sequence.size()-1; i != 0; --i) { // Ignore first node. 147 SUnit *SU = Sequence[i]; 148 if (!SU || !SU->Node) continue; 149 if (SU->isCommutable) { 150 unsigned Opc = SU->Node->getTargetOpcode(); 151 unsigned NumRes = TII->getNumDefs(Opc); 152 unsigned NumOps = CountOperands(SU->Node); 153 for (unsigned j = 0; j != NumOps; ++j) { 154 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) == -1) 155 continue; 156 157 SDNode *OpN = SU->Node->getOperand(j).Val; 158 SUnit *OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 159 if (OpSU && OperandSeen.count(OpSU) == 1) { 160 // Ok, so SU is not the last use of OpSU, but SU is two-address so 161 // it will clobber OpSU. Try to commute SU if no other source operands 162 // are live below. 163 bool DoCommute = true; 164 for (unsigned k = 0; k < NumOps; ++k) { 165 if (k != j) { 166 OpN = SU->Node->getOperand(k).Val; 167 OpSU = isPassiveNode(OpN) ? NULL : SUnitMap[OpN][SU->InstanceNo]; 168 if (OpSU && OperandSeen.count(OpSU) == 1) { 169 DoCommute = false; 170 break; 171 } 172 } 173 } 174 if (DoCommute) 175 CommuteSet.insert(SU->Node); 176 } 177 178 // Only look at the first use&def node for now. 179 break; 180 } 181 } 182 183 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 184 I != E; ++I) { 185 if (!I->isCtrl) 186 OperandSeen.insert(I->Dep); 187 } 188 } 189} 190 191//===----------------------------------------------------------------------===// 192// Bottom-Up Scheduling 193//===----------------------------------------------------------------------===// 194 195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 196/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 197void ScheduleDAGRRList::ReleasePred(SUnit *PredSU, bool isChain, 198 unsigned CurCycle) { 199 // FIXME: the distance between two nodes is not always == the predecessor's 200 // latency. For example, the reader can very well read the register written 201 // by the predecessor later than the issue cycle. It also depends on the 202 // interrupt model (drain vs. freeze). 203 PredSU->CycleBound = std::max(PredSU->CycleBound, CurCycle + PredSU->Latency); 204 205 --PredSU->NumSuccsLeft; 206 207#ifndef NDEBUG 208 if (PredSU->NumSuccsLeft < 0) { 209 cerr << "*** List scheduling failed! ***\n"; 210 PredSU->dump(&DAG); 211 cerr << " has been released too many times!\n"; 212 assert(0); 213 } 214#endif 215 216 if (PredSU->NumSuccsLeft == 0) { 217 // EntryToken has to go last! Special case it here. 218 if (!PredSU->Node || PredSU->Node->getOpcode() != ISD::EntryToken) { 219 PredSU->isAvailable = true; 220 AvailableQueue->push(PredSU); 221 } 222 } 223} 224 225/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 226/// count of its predecessors. If a predecessor pending count is zero, add it to 227/// the Available queue. 228void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { 229 DOUT << "*** Scheduling [" << CurCycle << "]: "; 230 DEBUG(SU->dump(&DAG)); 231 SU->Cycle = CurCycle; 232 233 AvailableQueue->ScheduledNode(SU); 234 235 // Bottom up: release predecessors 236 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 237 I != E; ++I) { 238 ReleasePred(I->Dep, I->isCtrl, CurCycle); 239 if (I->Cost < 0) { 240 // This is a physical register dependency and it's impossible or 241 // expensive to copy the register. Make sure nothing that can 242 // clobber the register is scheduled between the predecessor and 243 // this node. 244 if (LiveRegs.insert(I->Reg)) { 245 LiveRegDefs[I->Reg] = I->Dep; 246 LiveRegCycles[I->Reg] = CurCycle; 247 } 248 } 249 } 250 251 // Release all the implicit physical register defs that are live. 252 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 253 I != E; ++I) { 254 if (I->Cost < 0) { 255 if (LiveRegCycles[I->Reg] == I->Dep->Cycle) { 256 LiveRegs.erase(I->Reg); 257 assert(LiveRegDefs[I->Reg] == SU && 258 "Physical register dependency violated?"); 259 LiveRegDefs[I->Reg] = NULL; 260 LiveRegCycles[I->Reg] = 0; 261 } 262 } 263 } 264 265 SU->isScheduled = true; 266} 267 268/// CapturePred - This does the opposite of ReleasePred. Since SU is being 269/// unscheduled, incrcease the succ left count of its predecessors. Remove 270/// them from AvailableQueue if necessary. 271void ScheduleDAGRRList::CapturePred(SUnit *PredSU, SUnit *SU, bool isChain) { 272 PredSU->CycleBound = 0; 273 for (SUnit::succ_iterator I = PredSU->Succs.begin(), E = PredSU->Succs.end(); 274 I != E; ++I) { 275 if (I->Dep == SU) 276 continue; 277 PredSU->CycleBound = std::max(PredSU->CycleBound, 278 I->Dep->Cycle + PredSU->Latency); 279 } 280 281 if (PredSU->isAvailable) { 282 PredSU->isAvailable = false; 283 if (!PredSU->isPending) 284 AvailableQueue->remove(PredSU); 285 } 286 287 ++PredSU->NumSuccsLeft; 288} 289 290/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 291/// its predecessor states to reflect the change. 292void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 293 DOUT << "*** Unscheduling [" << SU->Cycle << "]: "; 294 DEBUG(SU->dump(&DAG)); 295 296 AvailableQueue->UnscheduledNode(SU); 297 298 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 299 I != E; ++I) { 300 CapturePred(I->Dep, SU, I->isCtrl); 301 if (I->Cost < 0 && SU->Cycle == LiveRegCycles[I->Reg]) { 302 LiveRegs.erase(I->Reg); 303 assert(LiveRegDefs[I->Reg] == I->Dep && 304 "Physical register dependency violated?"); 305 LiveRegDefs[I->Reg] = NULL; 306 LiveRegCycles[I->Reg] = 0; 307 } 308 } 309 310 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 311 I != E; ++I) { 312 if (I->Cost < 0) { 313 if (LiveRegs.insert(I->Reg)) { 314 assert(!LiveRegDefs[I->Reg] && 315 "Physical register dependency violated?"); 316 LiveRegDefs[I->Reg] = SU; 317 } 318 if (I->Dep->Cycle < LiveRegCycles[I->Reg]) 319 LiveRegCycles[I->Reg] = I->Dep->Cycle; 320 } 321 } 322 323 SU->Cycle = 0; 324 SU->isScheduled = false; 325 SU->isAvailable = true; 326 AvailableQueue->push(SU); 327} 328 329// FIXME: This is probably too slow! 330static void isReachable(SUnit *SU, SUnit *TargetSU, 331 SmallPtrSet<SUnit*, 32> &Visited, bool &Reached) { 332 if (Reached) return; 333 if (SU == TargetSU) { 334 Reached = true; 335 return; 336 } 337 if (!Visited.insert(SU)) return; 338 339 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; 340 ++I) 341 isReachable(I->Dep, TargetSU, Visited, Reached); 342} 343 344static bool isReachable(SUnit *SU, SUnit *TargetSU) { 345 SmallPtrSet<SUnit*, 32> Visited; 346 bool Reached = false; 347 isReachable(SU, TargetSU, Visited, Reached); 348 return Reached; 349} 350 351/// willCreateCycle - Returns true if adding an edge from SU to TargetSU will 352/// create a cycle. 353static bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 354 if (isReachable(TargetSU, SU)) 355 return true; 356 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 357 I != E; ++I) 358 if (I->Cost < 0 && isReachable(TargetSU, I->Dep)) 359 return true; 360 return false; 361} 362 363/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 364/// BTCycle in order to schedule a specific node. Returns the last unscheduled 365/// SUnit. Also returns if a successor is unscheduled in the process. 366void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, 367 unsigned &CurCycle) { 368 SUnit *OldSU = NULL; 369 while (CurCycle > BtCycle) { 370 OldSU = Sequence.back(); 371 Sequence.pop_back(); 372 if (SU->isSucc(OldSU)) 373 // Don't try to remove SU from AvailableQueue. 374 SU->isAvailable = false; 375 UnscheduleNodeBottomUp(OldSU); 376 --CurCycle; 377 } 378 379 380 if (SU->isSucc(OldSU)) { 381 assert(false && "Something is wrong!"); 382 abort(); 383 } 384 385 ++NumBacktracks; 386} 387 388/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 389/// successors to the newly created node. 390SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 391 if (SU->FlaggedNodes.size()) 392 return NULL; 393 394 SDNode *N = SU->Node; 395 if (!N) 396 return NULL; 397 398 SUnit *NewSU; 399 bool TryUnfold = false; 400 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 401 MVT::ValueType VT = N->getValueType(i); 402 if (VT == MVT::Flag) 403 return NULL; 404 else if (VT == MVT::Other) 405 TryUnfold = true; 406 } 407 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 408 const SDOperand &Op = N->getOperand(i); 409 MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); 410 if (VT == MVT::Flag) 411 return NULL; 412 } 413 414 if (TryUnfold) { 415 SmallVector<SDNode*, 4> NewNodes; 416 if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes)) 417 return NULL; 418 419 DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; 420 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 421 422 N = NewNodes[1]; 423 SDNode *LoadNode = NewNodes[0]; 424 unsigned NumVals = N->getNumValues(); 425 unsigned OldNumVals = SU->Node->getNumValues(); 426 for (unsigned i = 0; i != NumVals; ++i) 427 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), SDOperand(N, i)); 428 DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), 429 SDOperand(LoadNode, 1)); 430 431 SUnit *NewSU = NewSUnit(N); 432 SUnitMap[N].push_back(NewSU); 433 const TargetInstrDescriptor *TID = &TII->get(N->getTargetOpcode()); 434 for (unsigned i = 0; i != TID->numOperands; ++i) { 435 if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { 436 NewSU->isTwoAddress = true; 437 break; 438 } 439 } 440 if (TID->Flags & M_COMMUTABLE) 441 NewSU->isCommutable = true; 442 // FIXME: Calculate height / depth and propagate the changes? 443 NewSU->Depth = SU->Depth; 444 NewSU->Height = SU->Height; 445 ComputeLatency(NewSU); 446 447 // LoadNode may already exist. This can happen when there is another 448 // load from the same location and producing the same type of value 449 // but it has different alignment or volatileness. 450 bool isNewLoad = true; 451 SUnit *LoadSU; 452 DenseMap<SDNode*, std::vector<SUnit*> >::iterator SMI = 453 SUnitMap.find(LoadNode); 454 if (SMI != SUnitMap.end()) { 455 LoadSU = SMI->second.front(); 456 isNewLoad = false; 457 } else { 458 LoadSU = NewSUnit(LoadNode); 459 SUnitMap[LoadNode].push_back(LoadSU); 460 461 LoadSU->Depth = SU->Depth; 462 LoadSU->Height = SU->Height; 463 ComputeLatency(LoadSU); 464 } 465 466 SUnit *ChainPred = NULL; 467 SmallVector<SDep, 4> ChainSuccs; 468 SmallVector<SDep, 4> LoadPreds; 469 SmallVector<SDep, 4> NodePreds; 470 SmallVector<SDep, 4> NodeSuccs; 471 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 472 I != E; ++I) { 473 if (I->isCtrl) 474 ChainPred = I->Dep; 475 else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) 476 LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 477 else 478 NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); 479 } 480 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 481 I != E; ++I) { 482 if (I->isCtrl) 483 ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 484 I->isCtrl, I->isSpecial)); 485 else 486 NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, 487 I->isCtrl, I->isSpecial)); 488 } 489 490 SU->removePred(ChainPred, true, false); 491 if (isNewLoad) 492 LoadSU->addPred(ChainPred, true, false); 493 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 494 SDep *Pred = &LoadPreds[i]; 495 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 496 if (isNewLoad) 497 LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 498 Pred->Reg, Pred->Cost); 499 } 500 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 501 SDep *Pred = &NodePreds[i]; 502 SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); 503 NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, 504 Pred->Reg, Pred->Cost); 505 } 506 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 507 SDep *Succ = &NodeSuccs[i]; 508 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 509 Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, 510 Succ->Reg, Succ->Cost); 511 } 512 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 513 SDep *Succ = &ChainSuccs[i]; 514 Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); 515 if (isNewLoad) 516 Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, 517 Succ->Reg, Succ->Cost); 518 } 519 if (isNewLoad) 520 NewSU->addPred(LoadSU, false, false); 521 522 if (isNewLoad) 523 AvailableQueue->addNode(LoadSU); 524 AvailableQueue->addNode(NewSU); 525 526 ++NumUnfolds; 527 528 if (NewSU->NumSuccsLeft == 0) { 529 NewSU->isAvailable = true; 530 return NewSU; 531 } 532 SU = NewSU; 533 } 534 535 DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; 536 NewSU = Clone(SU); 537 538 // New SUnit has the exact same predecessors. 539 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 540 I != E; ++I) 541 if (!I->isSpecial) { 542 NewSU->addPred(I->Dep, I->isCtrl, false, I->Reg, I->Cost); 543 NewSU->Depth = std::max(NewSU->Depth, I->Dep->Depth+1); 544 } 545 546 // Only copy scheduled successors. Cut them from old node's successor 547 // list and move them over. 548 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 549 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 550 I != E; ++I) { 551 if (I->isSpecial) 552 continue; 553 if (I->Dep->isScheduled) { 554 NewSU->Height = std::max(NewSU->Height, I->Dep->Height+1); 555 I->Dep->addPred(NewSU, I->isCtrl, false, I->Reg, I->Cost); 556 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 557 } 558 } 559 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 560 SUnit *Succ = DelDeps[i].first; 561 bool isCtrl = DelDeps[i].second; 562 Succ->removePred(SU, isCtrl, false); 563 } 564 565 AvailableQueue->updateNode(SU); 566 AvailableQueue->addNode(NewSU); 567 568 ++NumDups; 569 return NewSU; 570} 571 572/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies 573/// and move all scheduled successors of the given SUnit to the last copy. 574void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 575 const TargetRegisterClass *DestRC, 576 const TargetRegisterClass *SrcRC, 577 SmallVector<SUnit*, 2> &Copies) { 578 SUnit *CopyFromSU = NewSUnit(NULL); 579 CopyFromSU->CopySrcRC = SrcRC; 580 CopyFromSU->CopyDstRC = DestRC; 581 CopyFromSU->Depth = SU->Depth; 582 CopyFromSU->Height = SU->Height; 583 584 SUnit *CopyToSU = NewSUnit(NULL); 585 CopyToSU->CopySrcRC = DestRC; 586 CopyToSU->CopyDstRC = SrcRC; 587 588 // Only copy scheduled successors. Cut them from old node's successor 589 // list and move them over. 590 SmallVector<std::pair<SUnit*, bool>, 4> DelDeps; 591 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 592 I != E; ++I) { 593 if (I->isSpecial) 594 continue; 595 if (I->Dep->isScheduled) { 596 CopyToSU->Height = std::max(CopyToSU->Height, I->Dep->Height+1); 597 I->Dep->addPred(CopyToSU, I->isCtrl, false, I->Reg, I->Cost); 598 DelDeps.push_back(std::make_pair(I->Dep, I->isCtrl)); 599 } 600 } 601 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { 602 SUnit *Succ = DelDeps[i].first; 603 bool isCtrl = DelDeps[i].second; 604 Succ->removePred(SU, isCtrl, false); 605 } 606 607 CopyFromSU->addPred(SU, false, false, Reg, -1); 608 CopyToSU->addPred(CopyFromSU, false, false, Reg, 1); 609 610 AvailableQueue->updateNode(SU); 611 AvailableQueue->addNode(CopyFromSU); 612 AvailableQueue->addNode(CopyToSU); 613 Copies.push_back(CopyFromSU); 614 Copies.push_back(CopyToSU); 615 616 ++NumCCCopies; 617} 618 619/// getPhysicalRegisterVT - Returns the ValueType of the physical register 620/// definition of the specified node. 621/// FIXME: Move to SelectionDAG? 622static MVT::ValueType getPhysicalRegisterVT(SDNode *N, unsigned Reg, 623 const TargetInstrInfo *TII) { 624 const TargetInstrDescriptor &TID = TII->get(N->getTargetOpcode()); 625 assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 626 unsigned NumRes = TID.numDefs; 627 for (const unsigned *ImpDef = TID.ImplicitDefs; *ImpDef; ++ImpDef) { 628 if (Reg == *ImpDef) 629 break; 630 ++NumRes; 631 } 632 return N->getValueType(NumRes); 633} 634 635/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 636/// scheduling of the given node to satisfy live physical register dependencies. 637/// If the specific node is the last one that's available to schedule, do 638/// whatever is necessary (i.e. backtracking or cloning) to make it possible. 639bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU, 640 SmallVector<unsigned, 4> &LRegs){ 641 if (LiveRegs.empty()) 642 return false; 643 644 SmallSet<unsigned, 4> RegAdded; 645 // If this node would clobber any "live" register, then it's not ready. 646 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 647 I != E; ++I) { 648 if (I->Cost < 0) { 649 unsigned Reg = I->Reg; 650 if (LiveRegs.count(Reg) && LiveRegDefs[Reg] != I->Dep) { 651 if (RegAdded.insert(Reg)) 652 LRegs.push_back(Reg); 653 } 654 for (const unsigned *Alias = MRI->getAliasSet(Reg); 655 *Alias; ++Alias) 656 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != I->Dep) { 657 if (RegAdded.insert(*Alias)) 658 LRegs.push_back(*Alias); 659 } 660 } 661 } 662 663 for (unsigned i = 0, e = SU->FlaggedNodes.size()+1; i != e; ++i) { 664 SDNode *Node = (i == 0) ? SU->Node : SU->FlaggedNodes[i-1]; 665 if (!Node || !Node->isTargetOpcode()) 666 continue; 667 const TargetInstrDescriptor &TID = TII->get(Node->getTargetOpcode()); 668 if (!TID.ImplicitDefs) 669 continue; 670 for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) { 671 if (LiveRegs.count(*Reg) && LiveRegDefs[*Reg] != SU) { 672 if (RegAdded.insert(*Reg)) 673 LRegs.push_back(*Reg); 674 } 675 for (const unsigned *Alias = MRI->getAliasSet(*Reg); 676 *Alias; ++Alias) 677 if (LiveRegs.count(*Alias) && LiveRegDefs[*Alias] != SU) { 678 if (RegAdded.insert(*Alias)) 679 LRegs.push_back(*Alias); 680 } 681 } 682 } 683 return !LRegs.empty(); 684} 685 686 687/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 688/// schedulers. 689void ScheduleDAGRRList::ListScheduleBottomUp() { 690 unsigned CurCycle = 0; 691 // Add root to Available queue. 692 SUnit *RootSU = SUnitMap[DAG.getRoot().Val].front(); 693 RootSU->isAvailable = true; 694 AvailableQueue->push(RootSU); 695 696 // While Available queue is not empty, grab the node with the highest 697 // priority. If it is not ready put it back. Schedule the node. 698 SmallVector<SUnit*, 4> NotReady; 699 while (!AvailableQueue->empty()) { 700 bool Delayed = false; 701 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 702 SUnit *CurSU = AvailableQueue->pop(); 703 while (CurSU) { 704 if (CurSU->CycleBound <= CurCycle) { 705 SmallVector<unsigned, 4> LRegs; 706 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 707 break; 708 Delayed = true; 709 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 710 } 711 712 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 713 NotReady.push_back(CurSU); 714 CurSU = AvailableQueue->pop(); 715 } 716 717 // All candidates are delayed due to live physical reg dependencies. 718 // Try backtracking, code duplication, or inserting cross class copies 719 // to resolve it. 720 if (Delayed && !CurSU) { 721 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 722 SUnit *TrySU = NotReady[i]; 723 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 724 725 // Try unscheduling up to the point where it's safe to schedule 726 // this node. 727 unsigned LiveCycle = CurCycle; 728 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 729 unsigned Reg = LRegs[j]; 730 unsigned LCycle = LiveRegCycles[Reg]; 731 LiveCycle = std::min(LiveCycle, LCycle); 732 } 733 SUnit *OldSU = Sequence[LiveCycle]; 734 if (!WillCreateCycle(TrySU, OldSU)) { 735 BacktrackBottomUp(TrySU, LiveCycle, CurCycle); 736 // Force the current node to be scheduled before the node that 737 // requires the physical reg dep. 738 if (OldSU->isAvailable) { 739 OldSU->isAvailable = false; 740 AvailableQueue->remove(OldSU); 741 } 742 TrySU->addPred(OldSU, true, true); 743 // If one or more successors has been unscheduled, then the current 744 // node is no longer avaialable. Schedule a successor that's now 745 // available instead. 746 if (!TrySU->isAvailable) 747 CurSU = AvailableQueue->pop(); 748 else { 749 CurSU = TrySU; 750 TrySU->isPending = false; 751 NotReady.erase(NotReady.begin()+i); 752 } 753 break; 754 } 755 } 756 757 if (!CurSU) { 758 // Can't backtrace. Try duplicating the nodes that produces these 759 // "expensive to copy" values to break the dependency. In case even 760 // that doesn't work, insert cross class copies. 761 SUnit *TrySU = NotReady[0]; 762 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 763 assert(LRegs.size() == 1 && "Can't handle this yet!"); 764 unsigned Reg = LRegs[0]; 765 SUnit *LRDef = LiveRegDefs[Reg]; 766 SUnit *NewDef = CopyAndMoveSuccessors(LRDef); 767 if (!NewDef) { 768 // Issue expensive cross register class copies. 769 MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); 770 const TargetRegisterClass *RC = 771 MRI->getPhysicalRegisterRegClass(VT, Reg); 772 const TargetRegisterClass *DestRC = MRI->getCrossCopyRegClass(RC); 773 if (!DestRC) { 774 assert(false && "Don't know how to copy this physical register!"); 775 abort(); 776 } 777 SmallVector<SUnit*, 2> Copies; 778 InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 779 DOUT << "Adding an edge from SU # " << TrySU->NodeNum 780 << " to SU #" << Copies.front()->NodeNum << "\n"; 781 TrySU->addPred(Copies.front(), true, true); 782 NewDef = Copies.back(); 783 } 784 785 DOUT << "Adding an edge from SU # " << NewDef->NodeNum 786 << " to SU #" << TrySU->NodeNum << "\n"; 787 LiveRegDefs[Reg] = NewDef; 788 NewDef->addPred(TrySU, true, true); 789 TrySU->isAvailable = false; 790 CurSU = NewDef; 791 } 792 793 if (!CurSU) { 794 assert(false && "Unable to resolve live physical register dependencies!"); 795 abort(); 796 } 797 } 798 799 // Add the nodes that aren't ready back onto the available list. 800 for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { 801 NotReady[i]->isPending = false; 802 // May no longer be available due to backtracking. 803 if (NotReady[i]->isAvailable) 804 AvailableQueue->push(NotReady[i]); 805 } 806 NotReady.clear(); 807 808 if (!CurSU) 809 Sequence.push_back(0); 810 else { 811 ScheduleNodeBottomUp(CurSU, CurCycle); 812 Sequence.push_back(CurSU); 813 } 814 ++CurCycle; 815 } 816 817 // Add entry node last 818 if (DAG.getEntryNode().Val != DAG.getRoot().Val) { 819 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 820 Sequence.push_back(Entry); 821 } 822 823 // Reverse the order if it is bottom up. 824 std::reverse(Sequence.begin(), Sequence.end()); 825 826 827#ifndef NDEBUG 828 // Verify that all SUnits were scheduled. 829 bool AnyNotSched = false; 830 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 831 if (SUnits[i].NumSuccsLeft != 0) { 832 if (!AnyNotSched) 833 cerr << "*** List scheduling failed! ***\n"; 834 SUnits[i].dump(&DAG); 835 cerr << "has not been scheduled!\n"; 836 AnyNotSched = true; 837 } 838 } 839 assert(!AnyNotSched); 840#endif 841} 842 843//===----------------------------------------------------------------------===// 844// Top-Down Scheduling 845//===----------------------------------------------------------------------===// 846 847/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 848/// the AvailableQueue if the count reaches zero. Also update its cycle bound. 849void ScheduleDAGRRList::ReleaseSucc(SUnit *SuccSU, bool isChain, 850 unsigned CurCycle) { 851 // FIXME: the distance between two nodes is not always == the predecessor's 852 // latency. For example, the reader can very well read the register written 853 // by the predecessor later than the issue cycle. It also depends on the 854 // interrupt model (drain vs. freeze). 855 SuccSU->CycleBound = std::max(SuccSU->CycleBound, CurCycle + SuccSU->Latency); 856 857 --SuccSU->NumPredsLeft; 858 859#ifndef NDEBUG 860 if (SuccSU->NumPredsLeft < 0) { 861 cerr << "*** List scheduling failed! ***\n"; 862 SuccSU->dump(&DAG); 863 cerr << " has been released too many times!\n"; 864 assert(0); 865 } 866#endif 867 868 if (SuccSU->NumPredsLeft == 0) { 869 SuccSU->isAvailable = true; 870 AvailableQueue->push(SuccSU); 871 } 872} 873 874 875/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 876/// count of its successors. If a successor pending count is zero, add it to 877/// the Available queue. 878void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { 879 DOUT << "*** Scheduling [" << CurCycle << "]: "; 880 DEBUG(SU->dump(&DAG)); 881 SU->Cycle = CurCycle; 882 883 AvailableQueue->ScheduledNode(SU); 884 885 // Top down: release successors 886 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 887 I != E; ++I) 888 ReleaseSucc(I->Dep, I->isCtrl, CurCycle); 889 SU->isScheduled = true; 890} 891 892/// ListScheduleTopDown - The main loop of list scheduling for top-down 893/// schedulers. 894void ScheduleDAGRRList::ListScheduleTopDown() { 895 unsigned CurCycle = 0; 896 SUnit *Entry = SUnitMap[DAG.getEntryNode().Val].front(); 897 898 // All leaves to Available queue. 899 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 900 // It is available if it has no predecessors. 901 if (SUnits[i].Preds.size() == 0 && &SUnits[i] != Entry) { 902 AvailableQueue->push(&SUnits[i]); 903 SUnits[i].isAvailable = true; 904 } 905 } 906 907 // Emit the entry node first. 908 ScheduleNodeTopDown(Entry, CurCycle); 909 Sequence.push_back(Entry); 910 ++CurCycle; 911 912 // While Available queue is not empty, grab the node with the highest 913 // priority. If it is not ready put it back. Schedule the node. 914 std::vector<SUnit*> NotReady; 915 while (!AvailableQueue->empty()) { 916 SUnit *CurSU = AvailableQueue->pop(); 917 while (CurSU && CurSU->CycleBound > CurCycle) { 918 NotReady.push_back(CurSU); 919 CurSU = AvailableQueue->pop(); 920 } 921 922 // Add the nodes that aren't ready back onto the available list. 923 AvailableQueue->push_all(NotReady); 924 NotReady.clear(); 925 926 if (!CurSU) 927 Sequence.push_back(0); 928 else { 929 ScheduleNodeTopDown(CurSU, CurCycle); 930 Sequence.push_back(CurSU); 931 } 932 CurCycle++; 933 } 934 935 936#ifndef NDEBUG 937 // Verify that all SUnits were scheduled. 938 bool AnyNotSched = false; 939 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 940 if (!SUnits[i].isScheduled) { 941 if (!AnyNotSched) 942 cerr << "*** List scheduling failed! ***\n"; 943 SUnits[i].dump(&DAG); 944 cerr << "has not been scheduled!\n"; 945 AnyNotSched = true; 946 } 947 } 948 assert(!AnyNotSched); 949#endif 950} 951 952 953 954//===----------------------------------------------------------------------===// 955// RegReductionPriorityQueue Implementation 956//===----------------------------------------------------------------------===// 957// 958// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 959// to reduce register pressure. 960// 961namespace { 962 template<class SF> 963 class RegReductionPriorityQueue; 964 965 /// Sorting functions for the Available queue. 966 struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 967 RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; 968 bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} 969 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 970 971 bool operator()(const SUnit* left, const SUnit* right) const; 972 }; 973 974 struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { 975 RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; 976 td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} 977 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 978 979 bool operator()(const SUnit* left, const SUnit* right) const; 980 }; 981} // end anonymous namespace 982 983static inline bool isCopyFromLiveIn(const SUnit *SU) { 984 SDNode *N = SU->Node; 985 return N && N->getOpcode() == ISD::CopyFromReg && 986 N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag; 987} 988 989namespace { 990 template<class SF> 991 class VISIBILITY_HIDDEN RegReductionPriorityQueue 992 : public SchedulingPriorityQueue { 993 std::priority_queue<SUnit*, std::vector<SUnit*>, SF> Queue; 994 995 public: 996 RegReductionPriorityQueue() : 997 Queue(SF(this)) {} 998 999 virtual void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1000 std::vector<SUnit> &sunits) {} 1001 1002 virtual void addNode(const SUnit *SU) {} 1003 1004 virtual void updateNode(const SUnit *SU) {} 1005 1006 virtual void releaseState() {} 1007 1008 virtual unsigned getNodePriority(const SUnit *SU) const { 1009 return 0; 1010 } 1011 1012 unsigned size() const { return Queue.size(); } 1013 1014 bool empty() const { return Queue.empty(); } 1015 1016 void push(SUnit *U) { 1017 Queue.push(U); 1018 } 1019 void push_all(const std::vector<SUnit *> &Nodes) { 1020 for (unsigned i = 0, e = Nodes.size(); i != e; ++i) 1021 Queue.push(Nodes[i]); 1022 } 1023 1024 SUnit *pop() { 1025 if (empty()) return NULL; 1026 SUnit *V = Queue.top(); 1027 Queue.pop(); 1028 return V; 1029 } 1030 1031 /// remove - This is a really inefficient way to remove a node from a 1032 /// priority queue. We should roll our own heap to make this better or 1033 /// something. 1034 void remove(SUnit *SU) { 1035 std::vector<SUnit*> Temp; 1036 1037 assert(!Queue.empty() && "Not in queue!"); 1038 while (Queue.top() != SU) { 1039 Temp.push_back(Queue.top()); 1040 Queue.pop(); 1041 assert(!Queue.empty() && "Not in queue!"); 1042 } 1043 1044 // Remove the node from the PQ. 1045 Queue.pop(); 1046 1047 // Add all the other nodes back. 1048 for (unsigned i = 0, e = Temp.size(); i != e; ++i) 1049 Queue.push(Temp[i]); 1050 } 1051 }; 1052 1053 template<class SF> 1054 class VISIBILITY_HIDDEN BURegReductionPriorityQueue 1055 : public RegReductionPriorityQueue<SF> { 1056 // SUnitMap SDNode to SUnit mapping (n -> n). 1057 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1058 1059 // SUnits - The SUnits for the current graph. 1060 const std::vector<SUnit> *SUnits; 1061 1062 // SethiUllmanNumbers - The SethiUllman number for each node. 1063 std::vector<unsigned> SethiUllmanNumbers; 1064 1065 const TargetInstrInfo *TII; 1066 const MRegisterInfo *MRI; 1067 public: 1068 explicit BURegReductionPriorityQueue(const TargetInstrInfo *tii, 1069 const MRegisterInfo *mri) 1070 : TII(tii), MRI(mri) {} 1071 1072 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1073 std::vector<SUnit> &sunits) { 1074 SUnitMap = &sumap; 1075 SUnits = &sunits; 1076 // Add pseudo dependency edges for two-address nodes. 1077 AddPseudoTwoAddrDeps(); 1078 // Calculate node priorities. 1079 CalculateSethiUllmanNumbers(); 1080 } 1081 1082 void addNode(const SUnit *SU) { 1083 SethiUllmanNumbers.resize(SUnits->size(), 0); 1084 CalcNodeSethiUllmanNumber(SU); 1085 } 1086 1087 void updateNode(const SUnit *SU) { 1088 SethiUllmanNumbers[SU->NodeNum] = 0; 1089 CalcNodeSethiUllmanNumber(SU); 1090 } 1091 1092 void releaseState() { 1093 SUnits = 0; 1094 SethiUllmanNumbers.clear(); 1095 } 1096 1097 unsigned getNodePriority(const SUnit *SU) const { 1098 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1099 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1100 if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU)) 1101 // CopyFromReg should be close to its def because it restricts 1102 // allocation choices. But if it is a livein then perhaps we want it 1103 // closer to its uses so it can be coalesced. 1104 return 0xffff; 1105 else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1106 // CopyToReg should be close to its uses to facilitate coalescing and 1107 // avoid spilling. 1108 return 0; 1109 else if (Opc == TargetInstrInfo::EXTRACT_SUBREG || 1110 Opc == TargetInstrInfo::INSERT_SUBREG) 1111 // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to 1112 // facilitate coalescing. 1113 return 0; 1114 else if (SU->NumSuccs == 0) 1115 // If SU does not have a use, i.e. it doesn't produce a value that would 1116 // be consumed (e.g. store), then it terminates a chain of computation. 1117 // Give it a large SethiUllman number so it will be scheduled right 1118 // before its predecessors that it doesn't lengthen their live ranges. 1119 return 0xffff; 1120 else if (SU->NumPreds == 0) 1121 // If SU does not have a def, schedule it close to its uses because it 1122 // does not lengthen any live ranges. 1123 return 0; 1124 else 1125 return SethiUllmanNumbers[SU->NodeNum]; 1126 } 1127 1128 private: 1129 bool canClobber(SUnit *SU, SUnit *Op); 1130 void AddPseudoTwoAddrDeps(); 1131 void CalculateSethiUllmanNumbers(); 1132 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1133 }; 1134 1135 1136 template<class SF> 1137 class VISIBILITY_HIDDEN TDRegReductionPriorityQueue 1138 : public RegReductionPriorityQueue<SF> { 1139 // SUnitMap SDNode to SUnit mapping (n -> n). 1140 DenseMap<SDNode*, std::vector<SUnit*> > *SUnitMap; 1141 1142 // SUnits - The SUnits for the current graph. 1143 const std::vector<SUnit> *SUnits; 1144 1145 // SethiUllmanNumbers - The SethiUllman number for each node. 1146 std::vector<unsigned> SethiUllmanNumbers; 1147 1148 public: 1149 TDRegReductionPriorityQueue() {} 1150 1151 void initNodes(DenseMap<SDNode*, std::vector<SUnit*> > &sumap, 1152 std::vector<SUnit> &sunits) { 1153 SUnitMap = &sumap; 1154 SUnits = &sunits; 1155 // Calculate node priorities. 1156 CalculateSethiUllmanNumbers(); 1157 } 1158 1159 void addNode(const SUnit *SU) { 1160 SethiUllmanNumbers.resize(SUnits->size(), 0); 1161 CalcNodeSethiUllmanNumber(SU); 1162 } 1163 1164 void updateNode(const SUnit *SU) { 1165 SethiUllmanNumbers[SU->NodeNum] = 0; 1166 CalcNodeSethiUllmanNumber(SU); 1167 } 1168 1169 void releaseState() { 1170 SUnits = 0; 1171 SethiUllmanNumbers.clear(); 1172 } 1173 1174 unsigned getNodePriority(const SUnit *SU) const { 1175 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1176 return SethiUllmanNumbers[SU->NodeNum]; 1177 } 1178 1179 private: 1180 void CalculateSethiUllmanNumbers(); 1181 unsigned CalcNodeSethiUllmanNumber(const SUnit *SU); 1182 }; 1183} 1184 1185/// closestSucc - Returns the scheduled cycle of the successor which is 1186/// closet to the current cycle. 1187static unsigned closestSucc(const SUnit *SU) { 1188 unsigned MaxCycle = 0; 1189 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1190 I != E; ++I) { 1191 unsigned Cycle = I->Dep->Cycle; 1192 // If there are bunch of CopyToRegs stacked up, they should be considered 1193 // to be at the same position. 1194 if (I->Dep->Node && I->Dep->Node->getOpcode() == ISD::CopyToReg) 1195 Cycle = closestSucc(I->Dep)+1; 1196 if (Cycle > MaxCycle) 1197 MaxCycle = Cycle; 1198 } 1199 return MaxCycle; 1200} 1201 1202/// calcMaxScratches - Returns an cost estimate of the worse case requirement 1203/// for scratch registers. Live-in operands and live-out results don't count 1204/// since they are "fixed". 1205static unsigned calcMaxScratches(const SUnit *SU) { 1206 unsigned Scratches = 0; 1207 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1208 I != E; ++I) { 1209 if (I->isCtrl) continue; // ignore chain preds 1210 if (I->Dep->Node->getOpcode() != ISD::CopyFromReg) 1211 Scratches++; 1212 } 1213 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1214 I != E; ++I) { 1215 if (I->isCtrl) continue; // ignore chain succs 1216 if (I->Dep->Node->getOpcode() != ISD::CopyToReg) 1217 Scratches += 10; 1218 } 1219 return Scratches; 1220} 1221 1222// Bottom up 1223bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1224 // There used to be a special tie breaker here that looked for 1225 // two-address instructions and preferred the instruction with a 1226 // def&use operand. The special case triggered diagnostics when 1227 // _GLIBCXX_DEBUG was enabled because it broke the strict weak 1228 // ordering that priority_queue requires. It didn't help much anyway 1229 // because AddPseudoTwoAddrDeps already covers many of the cases 1230 // where it would have applied. In addition, it's counter-intuitive 1231 // that a tie breaker would be the first thing attempted. There's a 1232 // "real" tie breaker below that is the operation of last resort. 1233 // The fact that the "special tie breaker" would trigger when there 1234 // wasn't otherwise a tie is what broke the strict weak ordering 1235 // constraint. 1236 1237 unsigned LPriority = SPQ->getNodePriority(left); 1238 unsigned RPriority = SPQ->getNodePriority(right); 1239 if (LPriority > RPriority) 1240 return true; 1241 else if (LPriority == RPriority) { 1242 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 1243 // e.g. 1244 // t1 = op t2, c1 1245 // t3 = op t4, c2 1246 // 1247 // and the following instructions are both ready. 1248 // t2 = op c3 1249 // t4 = op c4 1250 // 1251 // Then schedule t2 = op first. 1252 // i.e. 1253 // t4 = op c4 1254 // t2 = op c3 1255 // t1 = op t2, c1 1256 // t3 = op t4, c2 1257 // 1258 // This creates more short live intervals. 1259 unsigned LDist = closestSucc(left); 1260 unsigned RDist = closestSucc(right); 1261 if (LDist < RDist) 1262 return true; 1263 else if (LDist == RDist) { 1264 // Intuitively, it's good to push down instructions whose results are 1265 // liveout so their long live ranges won't conflict with other values 1266 // which are needed inside the BB. Further prioritize liveout instructions 1267 // by the number of operands which are calculated within the BB. 1268 unsigned LScratch = calcMaxScratches(left); 1269 unsigned RScratch = calcMaxScratches(right); 1270 if (LScratch > RScratch) 1271 return true; 1272 else if (LScratch == RScratch) 1273 if (left->Height > right->Height) 1274 return true; 1275 else if (left->Height == right->Height) 1276 if (left->Depth < right->Depth) 1277 return true; 1278 else if (left->Depth == right->Depth) 1279 if (left->CycleBound > right->CycleBound) 1280 return true; 1281 } 1282 } 1283 return false; 1284} 1285 1286template<class SF> 1287bool BURegReductionPriorityQueue<SF>::canClobber(SUnit *SU, SUnit *Op) { 1288 if (SU->isTwoAddress) { 1289 unsigned Opc = SU->Node->getTargetOpcode(); 1290 unsigned NumRes = TII->getNumDefs(Opc); 1291 unsigned NumOps = ScheduleDAG::CountOperands(SU->Node); 1292 for (unsigned i = 0; i != NumOps; ++i) { 1293 if (TII->getOperandConstraint(Opc, i+NumRes, TOI::TIED_TO) != -1) { 1294 SDNode *DU = SU->Node->getOperand(i).Val; 1295 if ((*SUnitMap).find(DU) != (*SUnitMap).end() && 1296 Op == (*SUnitMap)[DU][SU->InstanceNo]) 1297 return true; 1298 } 1299 } 1300 } 1301 return false; 1302} 1303 1304 1305/// hasCopyToRegUse - Return true if SU has a value successor that is a 1306/// CopyToReg node. 1307static bool hasCopyToRegUse(SUnit *SU) { 1308 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1309 I != E; ++I) { 1310 if (I->isCtrl) continue; 1311 SUnit *SuccSU = I->Dep; 1312 if (SuccSU->Node && SuccSU->Node->getOpcode() == ISD::CopyToReg) 1313 return true; 1314 } 1315 return false; 1316} 1317 1318/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 1319/// physical register def. 1320static bool canClobberPhysRegDefs(SUnit *SuccSU, SUnit *SU, 1321 const TargetInstrInfo *TII, 1322 const MRegisterInfo *MRI) { 1323 SDNode *N = SuccSU->Node; 1324 unsigned NumDefs = TII->getNumDefs(N->getTargetOpcode()); 1325 const unsigned *ImpDefs = TII->getImplicitDefs(N->getTargetOpcode()); 1326 if (!ImpDefs) 1327 return false; 1328 const unsigned *SUImpDefs = TII->getImplicitDefs(SU->Node->getTargetOpcode()); 1329 if (!SUImpDefs) 1330 return false; 1331 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 1332 MVT::ValueType VT = N->getValueType(i); 1333 if (VT == MVT::Flag || VT == MVT::Other) 1334 continue; 1335 unsigned Reg = ImpDefs[i - NumDefs]; 1336 for (;*SUImpDefs; ++SUImpDefs) { 1337 unsigned SUReg = *SUImpDefs; 1338 if (MRI->regsOverlap(Reg, SUReg)) 1339 return true; 1340 } 1341 } 1342 return false; 1343} 1344 1345/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 1346/// it as a def&use operand. Add a pseudo control edge from it to the other 1347/// node (if it won't create a cycle) so the two-address one will be scheduled 1348/// first (lower in the schedule). If both nodes are two-address, favor the 1349/// one that has a CopyToReg use (more likely to be a loop induction update). 1350/// If both are two-address, but one is commutable while the other is not 1351/// commutable, favor the one that's not commutable. 1352template<class SF> 1353void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { 1354 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 1355 SUnit *SU = (SUnit *)&((*SUnits)[i]); 1356 if (!SU->isTwoAddress) 1357 continue; 1358 1359 SDNode *Node = SU->Node; 1360 if (!Node || !Node->isTargetOpcode() || SU->FlaggedNodes.size() > 0) 1361 continue; 1362 1363 unsigned Opc = Node->getTargetOpcode(); 1364 unsigned NumRes = TII->getNumDefs(Opc); 1365 unsigned NumOps = ScheduleDAG::CountOperands(Node); 1366 for (unsigned j = 0; j != NumOps; ++j) { 1367 if (TII->getOperandConstraint(Opc, j+NumRes, TOI::TIED_TO) != -1) { 1368 SDNode *DU = SU->Node->getOperand(j).Val; 1369 if ((*SUnitMap).find(DU) == (*SUnitMap).end()) 1370 continue; 1371 SUnit *DUSU = (*SUnitMap)[DU][SU->InstanceNo]; 1372 if (!DUSU) continue; 1373 for (SUnit::succ_iterator I = DUSU->Succs.begin(),E = DUSU->Succs.end(); 1374 I != E; ++I) { 1375 if (I->isCtrl) continue; 1376 SUnit *SuccSU = I->Dep; 1377 if (SuccSU == SU) 1378 continue; 1379 // Be conservative. Ignore if nodes aren't at roughly the same 1380 // depth and height. 1381 if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1) 1382 continue; 1383 if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode()) 1384 continue; 1385 // Don't constrain nodes with physical register defs if the 1386 // predecessor can cloober them. 1387 if (SuccSU->hasPhysRegDefs) { 1388 if (canClobberPhysRegDefs(SuccSU, SU, TII, MRI)) 1389 continue; 1390 } 1391 // Don't constraint extract_subreg / insert_subreg these may be 1392 // coalesced away. We don't them close to their uses. 1393 unsigned SuccOpc = SuccSU->Node->getTargetOpcode(); 1394 if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG || 1395 SuccOpc == TargetInstrInfo::INSERT_SUBREG) 1396 continue; 1397 if ((!canClobber(SuccSU, DUSU) || 1398 (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || 1399 (!SU->isCommutable && SuccSU->isCommutable)) && 1400 !isReachable(SuccSU, SU)) { 1401 DOUT << "Adding an edge from SU # " << SU->NodeNum 1402 << " to SU #" << SuccSU->NodeNum << "\n"; 1403 SU->addPred(SuccSU, true, true); 1404 } 1405 } 1406 } 1407 } 1408 } 1409} 1410 1411/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1412/// Smaller number is the higher priority. 1413template<class SF> 1414unsigned BURegReductionPriorityQueue<SF>:: 1415CalcNodeSethiUllmanNumber(const SUnit *SU) { 1416 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1417 if (SethiUllmanNumber != 0) 1418 return SethiUllmanNumber; 1419 1420 unsigned Extra = 0; 1421 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1422 I != E; ++I) { 1423 if (I->isCtrl) continue; // ignore chain preds 1424 SUnit *PredSU = I->Dep; 1425 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1426 if (PredSethiUllman > SethiUllmanNumber) { 1427 SethiUllmanNumber = PredSethiUllman; 1428 Extra = 0; 1429 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1430 ++Extra; 1431 } 1432 1433 SethiUllmanNumber += Extra; 1434 1435 if (SethiUllmanNumber == 0) 1436 SethiUllmanNumber = 1; 1437 1438 return SethiUllmanNumber; 1439} 1440 1441/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1442/// scheduling units. 1443template<class SF> 1444void BURegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1445 SethiUllmanNumbers.assign(SUnits->size(), 0); 1446 1447 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1448 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1449} 1450 1451static unsigned SumOfUnscheduledPredsOfSuccs(const SUnit *SU) { 1452 unsigned Sum = 0; 1453 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1454 I != E; ++I) { 1455 SUnit *SuccSU = I->Dep; 1456 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 1457 EE = SuccSU->Preds.end(); II != EE; ++II) { 1458 SUnit *PredSU = II->Dep; 1459 if (!PredSU->isScheduled) 1460 ++Sum; 1461 } 1462 } 1463 1464 return Sum; 1465} 1466 1467 1468// Top down 1469bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 1470 unsigned LPriority = SPQ->getNodePriority(left); 1471 unsigned RPriority = SPQ->getNodePriority(right); 1472 bool LIsTarget = left->Node && left->Node->isTargetOpcode(); 1473 bool RIsTarget = right->Node && right->Node->isTargetOpcode(); 1474 bool LIsFloater = LIsTarget && left->NumPreds == 0; 1475 bool RIsFloater = RIsTarget && right->NumPreds == 0; 1476 unsigned LBonus = (SumOfUnscheduledPredsOfSuccs(left) == 1) ? 2 : 0; 1477 unsigned RBonus = (SumOfUnscheduledPredsOfSuccs(right) == 1) ? 2 : 0; 1478 1479 if (left->NumSuccs == 0 && right->NumSuccs != 0) 1480 return false; 1481 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 1482 return true; 1483 1484 // Special tie breaker: if two nodes share a operand, the one that use it 1485 // as a def&use operand is preferred. 1486 if (LIsTarget && RIsTarget) { 1487 if (left->isTwoAddress && !right->isTwoAddress) { 1488 SDNode *DUNode = left->Node->getOperand(0).Val; 1489 if (DUNode->isOperand(right->Node)) 1490 RBonus += 2; 1491 } 1492 if (!left->isTwoAddress && right->isTwoAddress) { 1493 SDNode *DUNode = right->Node->getOperand(0).Val; 1494 if (DUNode->isOperand(left->Node)) 1495 LBonus += 2; 1496 } 1497 } 1498 if (LIsFloater) 1499 LBonus -= 2; 1500 if (RIsFloater) 1501 RBonus -= 2; 1502 if (left->NumSuccs == 1) 1503 LBonus += 2; 1504 if (right->NumSuccs == 1) 1505 RBonus += 2; 1506 1507 if (LPriority+LBonus < RPriority+RBonus) 1508 return true; 1509 else if (LPriority == RPriority) 1510 if (left->Depth < right->Depth) 1511 return true; 1512 else if (left->Depth == right->Depth) 1513 if (left->NumSuccsLeft > right->NumSuccsLeft) 1514 return true; 1515 else if (left->NumSuccsLeft == right->NumSuccsLeft) 1516 if (left->CycleBound > right->CycleBound) 1517 return true; 1518 return false; 1519} 1520 1521/// CalcNodeSethiUllmanNumber - Priority is the Sethi Ullman number. 1522/// Smaller number is the higher priority. 1523template<class SF> 1524unsigned TDRegReductionPriorityQueue<SF>:: 1525CalcNodeSethiUllmanNumber(const SUnit *SU) { 1526 unsigned &SethiUllmanNumber = SethiUllmanNumbers[SU->NodeNum]; 1527 if (SethiUllmanNumber != 0) 1528 return SethiUllmanNumber; 1529 1530 unsigned Opc = SU->Node ? SU->Node->getOpcode() : 0; 1531 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1532 SethiUllmanNumber = 0xffff; 1533 else if (SU->NumSuccsLeft == 0) 1534 // If SU does not have a use, i.e. it doesn't produce a value that would 1535 // be consumed (e.g. store), then it terminates a chain of computation. 1536 // Give it a small SethiUllman number so it will be scheduled right before 1537 // its predecessors that it doesn't lengthen their live ranges. 1538 SethiUllmanNumber = 0; 1539 else if (SU->NumPredsLeft == 0 && 1540 (Opc != ISD::CopyFromReg || isCopyFromLiveIn(SU))) 1541 SethiUllmanNumber = 0xffff; 1542 else { 1543 int Extra = 0; 1544 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1545 I != E; ++I) { 1546 if (I->isCtrl) continue; // ignore chain preds 1547 SUnit *PredSU = I->Dep; 1548 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU); 1549 if (PredSethiUllman > SethiUllmanNumber) { 1550 SethiUllmanNumber = PredSethiUllman; 1551 Extra = 0; 1552 } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl) 1553 ++Extra; 1554 } 1555 1556 SethiUllmanNumber += Extra; 1557 } 1558 1559 return SethiUllmanNumber; 1560} 1561 1562/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1563/// scheduling units. 1564template<class SF> 1565void TDRegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() { 1566 SethiUllmanNumbers.assign(SUnits->size(), 0); 1567 1568 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1569 CalcNodeSethiUllmanNumber(&(*SUnits)[i]); 1570} 1571 1572//===----------------------------------------------------------------------===// 1573// Public Constructor Functions 1574//===----------------------------------------------------------------------===// 1575 1576llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 1577 SelectionDAG *DAG, 1578 MachineBasicBlock *BB) { 1579 const TargetInstrInfo *TII = DAG->getTarget().getInstrInfo(); 1580 const MRegisterInfo *MRI = DAG->getTarget().getRegisterInfo(); 1581 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), true, 1582 new BURegReductionPriorityQueue<bu_ls_rr_sort>(TII, MRI)); 1583} 1584 1585llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 1586 SelectionDAG *DAG, 1587 MachineBasicBlock *BB) { 1588 return new ScheduleDAGRRList(*DAG, BB, DAG->getTarget(), false, 1589 new TDRegReductionPriorityQueue<td_ls_rr_sort>()); 1590} 1591 1592