MachineScheduler.cpp revision b717a5084722a5cad843444a8b1b4bf53f1c6325
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// MachineScheduler schedules machine instructions after phi elimination. It
11// preserves LiveIntervals so it can be invoked before register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "misched"
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/PriorityQueue.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/CodeGen/RegisterClassInfo.h"
24#include "llvm/CodeGen/ScheduleDFS.h"
25#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/GraphWriter.h"
30#include "llvm/Support/raw_ostream.h"
31#include <queue>
32
33using namespace llvm;
34
35namespace llvm {
36cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
37                           cl::desc("Force top-down list scheduling"));
38cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
39                            cl::desc("Force bottom-up list scheduling"));
40}
41
42#ifndef NDEBUG
43static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
44  cl::desc("Pop up a window to show MISched dags after they are processed"));
45
46static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
47  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
48#else
49static bool ViewMISchedDAGs = false;
50#endif // NDEBUG
51
52// Experimental heuristics
53static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
54  cl::desc("Enable load clustering."), cl::init(true));
55
56// Experimental heuristics
57static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
58  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
59
60// DAG subtrees must have at least this many nodes.
61static const unsigned MinSubtreeSize = 8;
62
63//===----------------------------------------------------------------------===//
64// Machine Instruction Scheduling Pass and Registry
65//===----------------------------------------------------------------------===//
66
67MachineSchedContext::MachineSchedContext():
68    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
69  RegClassInfo = new RegisterClassInfo();
70}
71
72MachineSchedContext::~MachineSchedContext() {
73  delete RegClassInfo;
74}
75
76namespace {
77/// MachineScheduler runs after coalescing and before register allocation.
78class MachineScheduler : public MachineSchedContext,
79                         public MachineFunctionPass {
80public:
81  MachineScheduler();
82
83  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
84
85  virtual void releaseMemory() {}
86
87  virtual bool runOnMachineFunction(MachineFunction&);
88
89  virtual void print(raw_ostream &O, const Module* = 0) const;
90
91  static char ID; // Class identification, replacement for typeinfo
92};
93} // namespace
94
95char MachineScheduler::ID = 0;
96
97char &llvm::MachineSchedulerID = MachineScheduler::ID;
98
99INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
100                      "Machine Instruction Scheduler", false, false)
101INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
102INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
103INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
104INITIALIZE_PASS_END(MachineScheduler, "misched",
105                    "Machine Instruction Scheduler", false, false)
106
107MachineScheduler::MachineScheduler()
108: MachineFunctionPass(ID) {
109  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
110}
111
112void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
113  AU.setPreservesCFG();
114  AU.addRequiredID(MachineDominatorsID);
115  AU.addRequired<MachineLoopInfo>();
116  AU.addRequired<AliasAnalysis>();
117  AU.addRequired<TargetPassConfig>();
118  AU.addRequired<SlotIndexes>();
119  AU.addPreserved<SlotIndexes>();
120  AU.addRequired<LiveIntervals>();
121  AU.addPreserved<LiveIntervals>();
122  MachineFunctionPass::getAnalysisUsage(AU);
123}
124
125MachinePassRegistry MachineSchedRegistry::Registry;
126
127/// A dummy default scheduler factory indicates whether the scheduler
128/// is overridden on the command line.
129static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
130  return 0;
131}
132
133/// MachineSchedOpt allows command line selection of the scheduler.
134static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
135               RegisterPassParser<MachineSchedRegistry> >
136MachineSchedOpt("misched",
137                cl::init(&useDefaultMachineSched), cl::Hidden,
138                cl::desc("Machine instruction scheduler to use"));
139
140static MachineSchedRegistry
141DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
142                     useDefaultMachineSched);
143
144/// Forward declare the standard machine scheduler. This will be used as the
145/// default scheduler if the target does not set a default.
146static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
147
148
149/// Decrement this iterator until reaching the top or a non-debug instr.
150static MachineBasicBlock::iterator
151priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
152  assert(I != Beg && "reached the top of the region, cannot decrement");
153  while (--I != Beg) {
154    if (!I->isDebugValue())
155      break;
156  }
157  return I;
158}
159
160/// If this iterator is a debug value, increment until reaching the End or a
161/// non-debug instruction.
162static MachineBasicBlock::iterator
163nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
164  for(; I != End; ++I) {
165    if (!I->isDebugValue())
166      break;
167  }
168  return I;
169}
170
171/// Top-level MachineScheduler pass driver.
172///
173/// Visit blocks in function order. Divide each block into scheduling regions
174/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
175/// consistent with the DAG builder, which traverses the interior of the
176/// scheduling regions bottom-up.
177///
178/// This design avoids exposing scheduling boundaries to the DAG builder,
179/// simplifying the DAG builder's support for "special" target instructions.
180/// At the same time the design allows target schedulers to operate across
181/// scheduling boundaries, for example to bundle the boudary instructions
182/// without reordering them. This creates complexity, because the target
183/// scheduler must update the RegionBegin and RegionEnd positions cached by
184/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
185/// design would be to split blocks at scheduling boundaries, but LLVM has a
186/// general bias against block splitting purely for implementation simplicity.
187bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
188  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
189
190  // Initialize the context of the pass.
191  MF = &mf;
192  MLI = &getAnalysis<MachineLoopInfo>();
193  MDT = &getAnalysis<MachineDominatorTree>();
194  PassConfig = &getAnalysis<TargetPassConfig>();
195  AA = &getAnalysis<AliasAnalysis>();
196
197  LIS = &getAnalysis<LiveIntervals>();
198  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
199
200  RegClassInfo->runOnMachineFunction(*MF);
201
202  // Select the scheduler, or set the default.
203  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
204  if (Ctor == useDefaultMachineSched) {
205    // Get the default scheduler set by the target.
206    Ctor = MachineSchedRegistry::getDefault();
207    if (!Ctor) {
208      Ctor = createConvergingSched;
209      MachineSchedRegistry::setDefault(Ctor);
210    }
211  }
212  // Instantiate the selected scheduler.
213  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
214
215  // Visit all machine basic blocks.
216  //
217  // TODO: Visit blocks in global postorder or postorder within the bottom-up
218  // loop tree. Then we can optionally compute global RegPressure.
219  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
220       MBB != MBBEnd; ++MBB) {
221
222    Scheduler->startBlock(MBB);
223
224    // Break the block into scheduling regions [I, RegionEnd), and schedule each
225    // region as soon as it is discovered. RegionEnd points the scheduling
226    // boundary at the bottom of the region. The DAG does not include RegionEnd,
227    // but the region does (i.e. the next RegionEnd is above the previous
228    // RegionBegin). If the current block has no terminator then RegionEnd ==
229    // MBB->end() for the bottom region.
230    //
231    // The Scheduler may insert instructions during either schedule() or
232    // exitRegion(), even for empty regions. So the local iterators 'I' and
233    // 'RegionEnd' are invalid across these calls.
234    unsigned RemainingInstrs = MBB->size();
235    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
236        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
237
238      // Avoid decrementing RegionEnd for blocks with no terminator.
239      if (RegionEnd != MBB->end()
240          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
241        --RegionEnd;
242        // Count the boundary instruction.
243        --RemainingInstrs;
244      }
245
246      // The next region starts above the previous region. Look backward in the
247      // instruction stream until we find the nearest boundary.
248      MachineBasicBlock::iterator I = RegionEnd;
249      for(;I != MBB->begin(); --I, --RemainingInstrs) {
250        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
251          break;
252      }
253      // Notify the scheduler of the region, even if we may skip scheduling
254      // it. Perhaps it still needs to be bundled.
255      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
256
257      // Skip empty scheduling regions (0 or 1 schedulable instructions).
258      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
259        // Close the current region. Bundle the terminator if needed.
260        // This invalidates 'RegionEnd' and 'I'.
261        Scheduler->exitRegion();
262        continue;
263      }
264      DEBUG(dbgs() << "********** MI Scheduling **********\n");
265      DEBUG(dbgs() << MF->getName()
266            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
267            << "\n  From: " << *I << "    To: ";
268            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
269            else dbgs() << "End";
270            dbgs() << " Remaining: " << RemainingInstrs << "\n");
271
272      // Schedule a region: possibly reorder instructions.
273      // This invalidates 'RegionEnd' and 'I'.
274      Scheduler->schedule();
275
276      // Close the current region.
277      Scheduler->exitRegion();
278
279      // Scheduling has invalidated the current iterator 'I'. Ask the
280      // scheduler for the top of it's scheduled region.
281      RegionEnd = Scheduler->begin();
282    }
283    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
284    Scheduler->finishBlock();
285  }
286  Scheduler->finalizeSchedule();
287  DEBUG(LIS->print(dbgs()));
288  return true;
289}
290
291void MachineScheduler::print(raw_ostream &O, const Module* m) const {
292  // unimplemented
293}
294
295#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296void ReadyQueue::dump() {
297  dbgs() << Name << ": ";
298  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
299    dbgs() << Queue[i]->NodeNum << " ";
300  dbgs() << "\n";
301}
302#endif
303
304//===----------------------------------------------------------------------===//
305// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
306// preservation.
307//===----------------------------------------------------------------------===//
308
309ScheduleDAGMI::~ScheduleDAGMI() {
310  delete DFSResult;
311  DeleteContainerPointers(Mutations);
312  delete SchedImpl;
313}
314
315bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
316  if (SuccSU != &ExitSU) {
317    // Do not use WillCreateCycle, it assumes SD scheduling.
318    // If Pred is reachable from Succ, then the edge creates a cycle.
319    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
320      return false;
321    Topo.AddPred(SuccSU, PredDep.getSUnit());
322  }
323  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
324  // Return true regardless of whether a new edge needed to be inserted.
325  return true;
326}
327
328/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
329/// NumPredsLeft reaches zero, release the successor node.
330///
331/// FIXME: Adjust SuccSU height based on MinLatency.
332void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
333  SUnit *SuccSU = SuccEdge->getSUnit();
334
335  if (SuccEdge->isWeak()) {
336    --SuccSU->WeakPredsLeft;
337    if (SuccEdge->isCluster())
338      NextClusterSucc = SuccSU;
339    return;
340  }
341#ifndef NDEBUG
342  if (SuccSU->NumPredsLeft == 0) {
343    dbgs() << "*** Scheduling failed! ***\n";
344    SuccSU->dump(this);
345    dbgs() << " has been released too many times!\n";
346    llvm_unreachable(0);
347  }
348#endif
349  --SuccSU->NumPredsLeft;
350  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
351    SchedImpl->releaseTopNode(SuccSU);
352}
353
354/// releaseSuccessors - Call releaseSucc on each of SU's successors.
355void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
356  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
357       I != E; ++I) {
358    releaseSucc(SU, &*I);
359  }
360}
361
362/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
363/// NumSuccsLeft reaches zero, release the predecessor node.
364///
365/// FIXME: Adjust PredSU height based on MinLatency.
366void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
367  SUnit *PredSU = PredEdge->getSUnit();
368
369  if (PredEdge->isWeak()) {
370    --PredSU->WeakSuccsLeft;
371    if (PredEdge->isCluster())
372      NextClusterPred = PredSU;
373    return;
374  }
375#ifndef NDEBUG
376  if (PredSU->NumSuccsLeft == 0) {
377    dbgs() << "*** Scheduling failed! ***\n";
378    PredSU->dump(this);
379    dbgs() << " has been released too many times!\n";
380    llvm_unreachable(0);
381  }
382#endif
383  --PredSU->NumSuccsLeft;
384  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
385    SchedImpl->releaseBottomNode(PredSU);
386}
387
388/// releasePredecessors - Call releasePred on each of SU's predecessors.
389void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
390  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
391       I != E; ++I) {
392    releasePred(SU, &*I);
393  }
394}
395
396void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
397                                    MachineBasicBlock::iterator InsertPos) {
398  // Advance RegionBegin if the first instruction moves down.
399  if (&*RegionBegin == MI)
400    ++RegionBegin;
401
402  // Update the instruction stream.
403  BB->splice(InsertPos, BB, MI);
404
405  // Update LiveIntervals
406  LIS->handleMove(MI, /*UpdateFlags=*/true);
407
408  // Recede RegionBegin if an instruction moves above the first.
409  if (RegionBegin == InsertPos)
410    RegionBegin = MI;
411}
412
413bool ScheduleDAGMI::checkSchedLimit() {
414#ifndef NDEBUG
415  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
416    CurrentTop = CurrentBottom;
417    return false;
418  }
419  ++NumInstrsScheduled;
420#endif
421  return true;
422}
423
424/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
425/// crossing a scheduling boundary. [begin, end) includes all instructions in
426/// the region, including the boundary itself and single-instruction regions
427/// that don't get scheduled.
428void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
429                                MachineBasicBlock::iterator begin,
430                                MachineBasicBlock::iterator end,
431                                unsigned endcount)
432{
433  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
434
435  // For convenience remember the end of the liveness region.
436  LiveRegionEnd =
437    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
438}
439
440// Setup the register pressure trackers for the top scheduled top and bottom
441// scheduled regions.
442void ScheduleDAGMI::initRegPressure() {
443  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
444  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
445
446  // Close the RPTracker to finalize live ins.
447  RPTracker.closeRegion();
448
449  DEBUG(RPTracker.getPressure().dump(TRI));
450
451  // Initialize the live ins and live outs.
452  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
453  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
454
455  // Close one end of the tracker so we can call
456  // getMaxUpward/DownwardPressureDelta before advancing across any
457  // instructions. This converts currently live regs into live ins/outs.
458  TopRPTracker.closeTop();
459  BotRPTracker.closeBottom();
460
461  // Account for liveness generated by the region boundary.
462  if (LiveRegionEnd != RegionEnd)
463    BotRPTracker.recede();
464
465  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
466
467  // Cache the list of excess pressure sets in this region. This will also track
468  // the max pressure in the scheduled code for these sets.
469  RegionCriticalPSets.clear();
470  const std::vector<unsigned> &RegionPressure =
471    RPTracker.getPressure().MaxSetPressure;
472  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
473    unsigned Limit = TRI->getRegPressureSetLimit(i);
474    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
475          << "Limit " << Limit
476          << " Actual " << RegionPressure[i] << "\n");
477    if (RegionPressure[i] > Limit)
478      RegionCriticalPSets.push_back(PressureElement(i, 0));
479  }
480  DEBUG(dbgs() << "Excess PSets: ";
481        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
482          dbgs() << TRI->getRegPressureSetName(
483            RegionCriticalPSets[i].PSetID) << " ";
484        dbgs() << "\n");
485}
486
487// FIXME: When the pressure tracker deals in pressure differences then we won't
488// iterate over all RegionCriticalPSets[i].
489void ScheduleDAGMI::
490updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
491  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
492    unsigned ID = RegionCriticalPSets[i].PSetID;
493    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
494    if ((int)NewMaxPressure[ID] > MaxUnits)
495      MaxUnits = NewMaxPressure[ID];
496  }
497}
498
499/// schedule - Called back from MachineScheduler::runOnMachineFunction
500/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
501/// only includes instructions that have DAG nodes, not scheduling boundaries.
502///
503/// This is a skeletal driver, with all the functionality pushed into helpers,
504/// so that it can be easilly extended by experimental schedulers. Generally,
505/// implementing MachineSchedStrategy should be sufficient to implement a new
506/// scheduling algorithm. However, if a scheduler further subclasses
507/// ScheduleDAGMI then it will want to override this virtual method in order to
508/// update any specialized state.
509void ScheduleDAGMI::schedule() {
510  buildDAGWithRegPressure();
511
512  Topo.InitDAGTopologicalSorting();
513
514  postprocessDAG();
515
516  SmallVector<SUnit*, 8> TopRoots, BotRoots;
517  findRootsAndBiasEdges(TopRoots, BotRoots);
518
519  // Initialize the strategy before modifying the DAG.
520  // This may initialize a DFSResult to be used for queue priority.
521  SchedImpl->initialize(this);
522
523  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
524          SUnits[su].dumpAll(this));
525  if (ViewMISchedDAGs) viewGraph();
526
527  // Initialize ready queues now that the DAG and priority data are finalized.
528  initQueues(TopRoots, BotRoots);
529
530  bool IsTopNode = false;
531  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
532    assert(!SU->isScheduled && "Node already scheduled");
533    if (!checkSchedLimit())
534      break;
535
536    scheduleMI(SU, IsTopNode);
537
538    updateQueues(SU, IsTopNode);
539  }
540  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
541
542  placeDebugValues();
543
544  DEBUG({
545      unsigned BBNum = begin()->getParent()->getNumber();
546      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
547      dumpSchedule();
548      dbgs() << '\n';
549    });
550}
551
552/// Build the DAG and setup three register pressure trackers.
553void ScheduleDAGMI::buildDAGWithRegPressure() {
554  // Initialize the register pressure tracker used by buildSchedGraph.
555  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
556
557  // Account for liveness generate by the region boundary.
558  if (LiveRegionEnd != RegionEnd)
559    RPTracker.recede();
560
561  // Build the DAG, and compute current register pressure.
562  buildSchedGraph(AA, &RPTracker);
563
564  // Initialize top/bottom trackers after computing region pressure.
565  initRegPressure();
566}
567
568/// Apply each ScheduleDAGMutation step in order.
569void ScheduleDAGMI::postprocessDAG() {
570  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
571    Mutations[i]->apply(this);
572  }
573}
574
575void ScheduleDAGMI::computeDFSResult() {
576  if (!DFSResult)
577    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
578  DFSResult->clear();
579  ScheduledTrees.clear();
580  DFSResult->resize(SUnits.size());
581  DFSResult->compute(SUnits);
582  ScheduledTrees.resize(DFSResult->getNumSubtrees());
583}
584
585void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
586                                          SmallVectorImpl<SUnit*> &BotRoots) {
587  for (std::vector<SUnit>::iterator
588         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
589    SUnit *SU = &(*I);
590    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
591
592    // Order predecessors so DFSResult follows the critical path.
593    SU->biasCriticalPath();
594
595    // A SUnit is ready to top schedule if it has no predecessors.
596    if (!I->NumPredsLeft)
597      TopRoots.push_back(SU);
598    // A SUnit is ready to bottom schedule if it has no successors.
599    if (!I->NumSuccsLeft)
600      BotRoots.push_back(SU);
601  }
602  ExitSU.biasCriticalPath();
603}
604
605/// Identify DAG roots and setup scheduler queues.
606void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
607                               ArrayRef<SUnit*> BotRoots) {
608  NextClusterSucc = NULL;
609  NextClusterPred = NULL;
610
611  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
612  //
613  // Nodes with unreleased weak edges can still be roots.
614  // Release top roots in forward order.
615  for (SmallVectorImpl<SUnit*>::const_iterator
616         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
617    SchedImpl->releaseTopNode(*I);
618  }
619  // Release bottom roots in reverse order so the higher priority nodes appear
620  // first. This is more natural and slightly more efficient.
621  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
622         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
623    SchedImpl->releaseBottomNode(*I);
624  }
625
626  releaseSuccessors(&EntrySU);
627  releasePredecessors(&ExitSU);
628
629  SchedImpl->registerRoots();
630
631  // Advance past initial DebugValues.
632  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
633  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
634  TopRPTracker.setPos(CurrentTop);
635
636  CurrentBottom = RegionEnd;
637}
638
639/// Move an instruction and update register pressure.
640void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
641  // Move the instruction to its new location in the instruction stream.
642  MachineInstr *MI = SU->getInstr();
643
644  if (IsTopNode) {
645    assert(SU->isTopReady() && "node still has unscheduled dependencies");
646    if (&*CurrentTop == MI)
647      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
648    else {
649      moveInstruction(MI, CurrentTop);
650      TopRPTracker.setPos(MI);
651    }
652
653    // Update top scheduled pressure.
654    TopRPTracker.advance();
655    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
656    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
657  }
658  else {
659    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
660    MachineBasicBlock::iterator priorII =
661      priorNonDebug(CurrentBottom, CurrentTop);
662    if (&*priorII == MI)
663      CurrentBottom = priorII;
664    else {
665      if (&*CurrentTop == MI) {
666        CurrentTop = nextIfDebug(++CurrentTop, priorII);
667        TopRPTracker.setPos(CurrentTop);
668      }
669      moveInstruction(MI, CurrentBottom);
670      CurrentBottom = MI;
671    }
672    // Update bottom scheduled pressure.
673    BotRPTracker.recede();
674    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
675    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
676  }
677}
678
679/// Update scheduler queues after scheduling an instruction.
680void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
681  // Release dependent instructions for scheduling.
682  if (IsTopNode)
683    releaseSuccessors(SU);
684  else
685    releasePredecessors(SU);
686
687  SU->isScheduled = true;
688
689  if (DFSResult) {
690    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
691    if (!ScheduledTrees.test(SubtreeID)) {
692      ScheduledTrees.set(SubtreeID);
693      DFSResult->scheduleTree(SubtreeID);
694      SchedImpl->scheduleTree(SubtreeID);
695    }
696  }
697
698  // Notify the scheduling strategy after updating the DAG.
699  SchedImpl->schedNode(SU, IsTopNode);
700}
701
702/// Reinsert any remaining debug_values, just like the PostRA scheduler.
703void ScheduleDAGMI::placeDebugValues() {
704  // If first instruction was a DBG_VALUE then put it back.
705  if (FirstDbgValue) {
706    BB->splice(RegionBegin, BB, FirstDbgValue);
707    RegionBegin = FirstDbgValue;
708  }
709
710  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
711         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
712    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
713    MachineInstr *DbgValue = P.first;
714    MachineBasicBlock::iterator OrigPrevMI = P.second;
715    if (&*RegionBegin == DbgValue)
716      ++RegionBegin;
717    BB->splice(++OrigPrevMI, BB, DbgValue);
718    if (OrigPrevMI == llvm::prior(RegionEnd))
719      RegionEnd = DbgValue;
720  }
721  DbgValues.clear();
722  FirstDbgValue = NULL;
723}
724
725#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
726void ScheduleDAGMI::dumpSchedule() const {
727  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
728    if (SUnit *SU = getSUnit(&(*MI)))
729      SU->dump(this);
730    else
731      dbgs() << "Missing SUnit\n";
732  }
733}
734#endif
735
736//===----------------------------------------------------------------------===//
737// LoadClusterMutation - DAG post-processing to cluster loads.
738//===----------------------------------------------------------------------===//
739
740namespace {
741/// \brief Post-process the DAG to create cluster edges between neighboring
742/// loads.
743class LoadClusterMutation : public ScheduleDAGMutation {
744  struct LoadInfo {
745    SUnit *SU;
746    unsigned BaseReg;
747    unsigned Offset;
748    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
749      : SU(su), BaseReg(reg), Offset(ofs) {}
750  };
751  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
752                           const LoadClusterMutation::LoadInfo &RHS);
753
754  const TargetInstrInfo *TII;
755  const TargetRegisterInfo *TRI;
756public:
757  LoadClusterMutation(const TargetInstrInfo *tii,
758                      const TargetRegisterInfo *tri)
759    : TII(tii), TRI(tri) {}
760
761  virtual void apply(ScheduleDAGMI *DAG);
762protected:
763  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
764};
765} // anonymous
766
767bool LoadClusterMutation::LoadInfoLess(
768  const LoadClusterMutation::LoadInfo &LHS,
769  const LoadClusterMutation::LoadInfo &RHS) {
770  if (LHS.BaseReg != RHS.BaseReg)
771    return LHS.BaseReg < RHS.BaseReg;
772  return LHS.Offset < RHS.Offset;
773}
774
775void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
776                                                  ScheduleDAGMI *DAG) {
777  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
778  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
779    SUnit *SU = Loads[Idx];
780    unsigned BaseReg;
781    unsigned Offset;
782    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
783      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
784  }
785  if (LoadRecords.size() < 2)
786    return;
787  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
788  unsigned ClusterLength = 1;
789  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
790    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
791      ClusterLength = 1;
792      continue;
793    }
794
795    SUnit *SUa = LoadRecords[Idx].SU;
796    SUnit *SUb = LoadRecords[Idx+1].SU;
797    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
798        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
799
800      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
801            << SUb->NodeNum << ")\n");
802      // Copy successor edges from SUa to SUb. Interleaving computation
803      // dependent on SUa can prevent load combining due to register reuse.
804      // Predecessor edges do not need to be copied from SUb to SUa since nearby
805      // loads should have effectively the same inputs.
806      for (SUnit::const_succ_iterator
807             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
808        if (SI->getSUnit() == SUb)
809          continue;
810        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
811        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
812      }
813      ++ClusterLength;
814    }
815    else
816      ClusterLength = 1;
817  }
818}
819
820/// \brief Callback from DAG postProcessing to create cluster edges for loads.
821void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
822  // Map DAG NodeNum to store chain ID.
823  DenseMap<unsigned, unsigned> StoreChainIDs;
824  // Map each store chain to a set of dependent loads.
825  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
826  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
827    SUnit *SU = &DAG->SUnits[Idx];
828    if (!SU->getInstr()->mayLoad())
829      continue;
830    unsigned ChainPredID = DAG->SUnits.size();
831    for (SUnit::const_pred_iterator
832           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
833      if (PI->isCtrl()) {
834        ChainPredID = PI->getSUnit()->NodeNum;
835        break;
836      }
837    }
838    // Check if this chain-like pred has been seen
839    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
840    unsigned NumChains = StoreChainDependents.size();
841    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
842      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
843    if (Result.second)
844      StoreChainDependents.resize(NumChains + 1);
845    StoreChainDependents[Result.first->second].push_back(SU);
846  }
847  // Iterate over the store chains.
848  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
849    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
850}
851
852//===----------------------------------------------------------------------===//
853// MacroFusion - DAG post-processing to encourage fusion of macro ops.
854//===----------------------------------------------------------------------===//
855
856namespace {
857/// \brief Post-process the DAG to create cluster edges between instructions
858/// that may be fused by the processor into a single operation.
859class MacroFusion : public ScheduleDAGMutation {
860  const TargetInstrInfo *TII;
861public:
862  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
863
864  virtual void apply(ScheduleDAGMI *DAG);
865};
866} // anonymous
867
868/// \brief Callback from DAG postProcessing to create cluster edges to encourage
869/// fused operations.
870void MacroFusion::apply(ScheduleDAGMI *DAG) {
871  // For now, assume targets can only fuse with the branch.
872  MachineInstr *Branch = DAG->ExitSU.getInstr();
873  if (!Branch)
874    return;
875
876  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
877    SUnit *SU = &DAG->SUnits[--Idx];
878    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
879      continue;
880
881    // Create a single weak edge from SU to ExitSU. The only effect is to cause
882    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
883    // need to copy predecessor edges from ExitSU to SU, since top-down
884    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
885    // of SU, we could create an artificial edge from the deepest root, but it
886    // hasn't been needed yet.
887    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
888    (void)Success;
889    assert(Success && "No DAG nodes should be reachable from ExitSU");
890
891    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
892    break;
893  }
894}
895
896//===----------------------------------------------------------------------===//
897// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
898//===----------------------------------------------------------------------===//
899
900namespace {
901/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
902/// the schedule.
903class ConvergingScheduler : public MachineSchedStrategy {
904public:
905  /// Represent the type of SchedCandidate found within a single queue.
906  /// pickNodeBidirectional depends on these listed by decreasing priority.
907  enum CandReason {
908    NoCand, SingleExcess, SingleCritical, Cluster,
909    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
910    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
911    NodeOrder};
912
913#ifndef NDEBUG
914  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
915#endif
916
917  /// Policy for scheduling the next instruction in the candidate's zone.
918  struct CandPolicy {
919    bool ReduceLatency;
920    unsigned ReduceResIdx;
921    unsigned DemandResIdx;
922
923    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
924  };
925
926  /// Status of an instruction's critical resource consumption.
927  struct SchedResourceDelta {
928    // Count critical resources in the scheduled region required by SU.
929    unsigned CritResources;
930
931    // Count critical resources from another region consumed by SU.
932    unsigned DemandedResources;
933
934    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
935
936    bool operator==(const SchedResourceDelta &RHS) const {
937      return CritResources == RHS.CritResources
938        && DemandedResources == RHS.DemandedResources;
939    }
940    bool operator!=(const SchedResourceDelta &RHS) const {
941      return !operator==(RHS);
942    }
943  };
944
945  /// Store the state used by ConvergingScheduler heuristics, required for the
946  /// lifetime of one invocation of pickNode().
947  struct SchedCandidate {
948    CandPolicy Policy;
949
950    // The best SUnit candidate.
951    SUnit *SU;
952
953    // The reason for this candidate.
954    CandReason Reason;
955
956    // Register pressure values for the best candidate.
957    RegPressureDelta RPDelta;
958
959    // Critical resource consumption of the best candidate.
960    SchedResourceDelta ResDelta;
961
962    SchedCandidate(const CandPolicy &policy)
963    : Policy(policy), SU(NULL), Reason(NoCand) {}
964
965    bool isValid() const { return SU; }
966
967    // Copy the status of another candidate without changing policy.
968    void setBest(SchedCandidate &Best) {
969      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
970      SU = Best.SU;
971      Reason = Best.Reason;
972      RPDelta = Best.RPDelta;
973      ResDelta = Best.ResDelta;
974    }
975
976    void initResourceDelta(const ScheduleDAGMI *DAG,
977                           const TargetSchedModel *SchedModel);
978  };
979
980  /// Summarize the unscheduled region.
981  struct SchedRemainder {
982    // Critical path through the DAG in expected latency.
983    unsigned CriticalPath;
984
985    // Unscheduled resources
986    SmallVector<unsigned, 16> RemainingCounts;
987    // Critical resource for the unscheduled zone.
988    unsigned CritResIdx;
989    // Number of micro-ops left to schedule.
990    unsigned RemainingMicroOps;
991
992    void reset() {
993      CriticalPath = 0;
994      RemainingCounts.clear();
995      CritResIdx = 0;
996      RemainingMicroOps = 0;
997    }
998
999    SchedRemainder() { reset(); }
1000
1001    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1002
1003    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
1004      if (!SchedModel->hasInstrSchedModel())
1005        return 0;
1006
1007      return std::max(
1008        RemainingMicroOps * SchedModel->getMicroOpFactor(),
1009        RemainingCounts[CritResIdx]);
1010    }
1011  };
1012
1013  /// Each Scheduling boundary is associated with ready queues. It tracks the
1014  /// current cycle in the direction of movement, and maintains the state
1015  /// of "hazards" and other interlocks at the current cycle.
1016  struct SchedBoundary {
1017    ScheduleDAGMI *DAG;
1018    const TargetSchedModel *SchedModel;
1019    SchedRemainder *Rem;
1020
1021    ReadyQueue Available;
1022    ReadyQueue Pending;
1023    bool CheckPending;
1024
1025    // For heuristics, keep a list of the nodes that immediately depend on the
1026    // most recently scheduled node.
1027    SmallPtrSet<const SUnit*, 8> NextSUs;
1028
1029    ScheduleHazardRecognizer *HazardRec;
1030
1031    unsigned CurrCycle;
1032    unsigned IssueCount;
1033
1034    /// MinReadyCycle - Cycle of the soonest available instruction.
1035    unsigned MinReadyCycle;
1036
1037    // The expected latency of the critical path in this scheduled zone.
1038    unsigned ExpectedLatency;
1039
1040    // Resources used in the scheduled zone beyond this boundary.
1041    SmallVector<unsigned, 16> ResourceCounts;
1042
1043    // Cache the critical resources ID in this scheduled zone.
1044    unsigned CritResIdx;
1045
1046    // Is the scheduled region resource limited vs. latency limited.
1047    bool IsResourceLimited;
1048
1049    unsigned ExpectedCount;
1050
1051#ifndef NDEBUG
1052    // Remember the greatest min operand latency.
1053    unsigned MaxMinLatency;
1054#endif
1055
1056    void reset() {
1057      // A new HazardRec is created for each DAG and owned by SchedBoundary.
1058      delete HazardRec;
1059
1060      Available.clear();
1061      Pending.clear();
1062      CheckPending = false;
1063      NextSUs.clear();
1064      HazardRec = 0;
1065      CurrCycle = 0;
1066      IssueCount = 0;
1067      MinReadyCycle = UINT_MAX;
1068      ExpectedLatency = 0;
1069      ResourceCounts.resize(1);
1070      assert(!ResourceCounts[0] && "nonzero count for bad resource");
1071      CritResIdx = 0;
1072      IsResourceLimited = false;
1073      ExpectedCount = 0;
1074#ifndef NDEBUG
1075      MaxMinLatency = 0;
1076#endif
1077      // Reserve a zero-count for invalid CritResIdx.
1078      ResourceCounts.resize(1);
1079    }
1080
1081    /// Pending queues extend the ready queues with the same ID and the
1082    /// PendingFlag set.
1083    SchedBoundary(unsigned ID, const Twine &Name):
1084      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1085      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
1086      HazardRec(0) {
1087      reset();
1088    }
1089
1090    ~SchedBoundary() { delete HazardRec; }
1091
1092    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1093              SchedRemainder *rem);
1094
1095    bool isTop() const {
1096      return Available.getID() == ConvergingScheduler::TopQID;
1097    }
1098
1099    unsigned getUnscheduledLatency(SUnit *SU) const {
1100      if (isTop())
1101        return SU->getHeight();
1102      return SU->getDepth() + SU->Latency;
1103    }
1104
1105    unsigned getCriticalCount() const {
1106      return ResourceCounts[CritResIdx];
1107    }
1108
1109    bool checkHazard(SUnit *SU);
1110
1111    void setLatencyPolicy(CandPolicy &Policy);
1112
1113    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1114
1115    void bumpCycle();
1116
1117    void countResource(unsigned PIdx, unsigned Cycles);
1118
1119    void bumpNode(SUnit *SU);
1120
1121    void releasePending();
1122
1123    void removeReady(SUnit *SU);
1124
1125    SUnit *pickOnlyChoice();
1126  };
1127
1128private:
1129  ScheduleDAGMI *DAG;
1130  const TargetSchedModel *SchedModel;
1131  const TargetRegisterInfo *TRI;
1132
1133  // State of the top and bottom scheduled instruction boundaries.
1134  SchedRemainder Rem;
1135  SchedBoundary Top;
1136  SchedBoundary Bot;
1137
1138public:
1139  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1140  enum {
1141    TopQID = 1,
1142    BotQID = 2,
1143    LogMaxQID = 2
1144  };
1145
1146  ConvergingScheduler():
1147    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1148
1149  virtual void initialize(ScheduleDAGMI *dag);
1150
1151  virtual SUnit *pickNode(bool &IsTopNode);
1152
1153  virtual void schedNode(SUnit *SU, bool IsTopNode);
1154
1155  virtual void releaseTopNode(SUnit *SU);
1156
1157  virtual void releaseBottomNode(SUnit *SU);
1158
1159  virtual void registerRoots();
1160
1161protected:
1162  void balanceZones(
1163    ConvergingScheduler::SchedBoundary &CriticalZone,
1164    ConvergingScheduler::SchedCandidate &CriticalCand,
1165    ConvergingScheduler::SchedBoundary &OppositeZone,
1166    ConvergingScheduler::SchedCandidate &OppositeCand);
1167
1168  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1169                           ConvergingScheduler::SchedCandidate &BotCand);
1170
1171  void tryCandidate(SchedCandidate &Cand,
1172                    SchedCandidate &TryCand,
1173                    SchedBoundary &Zone,
1174                    const RegPressureTracker &RPTracker,
1175                    RegPressureTracker &TempTracker);
1176
1177  SUnit *pickNodeBidirectional(bool &IsTopNode);
1178
1179  void pickNodeFromQueue(SchedBoundary &Zone,
1180                         const RegPressureTracker &RPTracker,
1181                         SchedCandidate &Candidate);
1182
1183#ifndef NDEBUG
1184  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1185#endif
1186};
1187} // namespace
1188
1189void ConvergingScheduler::SchedRemainder::
1190init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1191  reset();
1192  if (!SchedModel->hasInstrSchedModel())
1193    return;
1194  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1195  for (std::vector<SUnit>::iterator
1196         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1197    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1198    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1199    for (TargetSchedModel::ProcResIter
1200           PI = SchedModel->getWriteProcResBegin(SC),
1201           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1202      unsigned PIdx = PI->ProcResourceIdx;
1203      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1204      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1205    }
1206  }
1207  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1208       PIdx != PEnd; ++PIdx) {
1209    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1210        >= (int)SchedModel->getLatencyFactor()) {
1211      CritResIdx = PIdx;
1212    }
1213  }
1214}
1215
1216void ConvergingScheduler::SchedBoundary::
1217init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1218  reset();
1219  DAG = dag;
1220  SchedModel = smodel;
1221  Rem = rem;
1222  if (SchedModel->hasInstrSchedModel())
1223    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1224}
1225
1226void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1227  DAG = dag;
1228  SchedModel = DAG->getSchedModel();
1229  TRI = DAG->TRI;
1230
1231  Rem.init(DAG, SchedModel);
1232  Top.init(DAG, SchedModel, &Rem);
1233  Bot.init(DAG, SchedModel, &Rem);
1234
1235  DAG->computeDFSResult();
1236
1237  // Initialize resource counts.
1238
1239  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1240  // are disabled, then these HazardRecs will be disabled.
1241  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1242  const TargetMachine &TM = DAG->MF.getTarget();
1243  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1244  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1245
1246  assert((!ForceTopDown || !ForceBottomUp) &&
1247         "-misched-topdown incompatible with -misched-bottomup");
1248}
1249
1250void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1251  if (SU->isScheduled)
1252    return;
1253
1254  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1255       I != E; ++I) {
1256    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1257    unsigned MinLatency = I->getMinLatency();
1258#ifndef NDEBUG
1259    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1260#endif
1261    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1262      SU->TopReadyCycle = PredReadyCycle + MinLatency;
1263  }
1264  Top.releaseNode(SU, SU->TopReadyCycle);
1265}
1266
1267void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1268  if (SU->isScheduled)
1269    return;
1270
1271  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1272
1273  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1274       I != E; ++I) {
1275    if (I->isWeak())
1276      continue;
1277    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1278    unsigned MinLatency = I->getMinLatency();
1279#ifndef NDEBUG
1280    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1281#endif
1282    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1283      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1284  }
1285  Bot.releaseNode(SU, SU->BotReadyCycle);
1286}
1287
1288void ConvergingScheduler::registerRoots() {
1289  Rem.CriticalPath = DAG->ExitSU.getDepth();
1290  // Some roots may not feed into ExitSU. Check all of them in case.
1291  for (std::vector<SUnit*>::const_iterator
1292         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1293    if ((*I)->getDepth() > Rem.CriticalPath)
1294      Rem.CriticalPath = (*I)->getDepth();
1295  }
1296  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1297}
1298
1299/// Does this SU have a hazard within the current instruction group.
1300///
1301/// The scheduler supports two modes of hazard recognition. The first is the
1302/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1303/// supports highly complicated in-order reservation tables
1304/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1305///
1306/// The second is a streamlined mechanism that checks for hazards based on
1307/// simple counters that the scheduler itself maintains. It explicitly checks
1308/// for instruction dispatch limitations, including the number of micro-ops that
1309/// can dispatch per cycle.
1310///
1311/// TODO: Also check whether the SU must start a new group.
1312bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1313  if (HazardRec->isEnabled())
1314    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1315
1316  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1317  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1318    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1319          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1320    return true;
1321  }
1322  return false;
1323}
1324
1325/// Compute the remaining latency to determine whether ILP should be increased.
1326void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1327  // FIXME: compile time. In all, we visit four queues here one we should only
1328  // need to visit the one that was last popped if we cache the result.
1329  unsigned RemLatency = 0;
1330  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1331       I != E; ++I) {
1332    unsigned L = getUnscheduledLatency(*I);
1333    if (L > RemLatency)
1334      RemLatency = L;
1335  }
1336  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1337       I != E; ++I) {
1338    unsigned L = getUnscheduledLatency(*I);
1339    if (L > RemLatency)
1340      RemLatency = L;
1341  }
1342  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1343  if (RemLatency + ExpectedLatency >= CriticalPathLimit
1344      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1345    Policy.ReduceLatency = true;
1346    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
1347  }
1348}
1349
1350void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1351                                                     unsigned ReadyCycle) {
1352
1353  if (ReadyCycle < MinReadyCycle)
1354    MinReadyCycle = ReadyCycle;
1355
1356  // Check for interlocks first. For the purpose of other heuristics, an
1357  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1358  if (ReadyCycle > CurrCycle || checkHazard(SU))
1359    Pending.push(SU);
1360  else
1361    Available.push(SU);
1362
1363  // Record this node as an immediate dependent of the scheduled node.
1364  NextSUs.insert(SU);
1365}
1366
1367/// Move the boundary of scheduled code by one cycle.
1368void ConvergingScheduler::SchedBoundary::bumpCycle() {
1369  unsigned Width = SchedModel->getIssueWidth();
1370  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1371
1372  unsigned NextCycle = CurrCycle + 1;
1373  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1374  if (MinReadyCycle > NextCycle) {
1375    IssueCount = 0;
1376    NextCycle = MinReadyCycle;
1377  }
1378
1379  if (!HazardRec->isEnabled()) {
1380    // Bypass HazardRec virtual calls.
1381    CurrCycle = NextCycle;
1382  }
1383  else {
1384    // Bypass getHazardType calls in case of long latency.
1385    for (; CurrCycle != NextCycle; ++CurrCycle) {
1386      if (isTop())
1387        HazardRec->AdvanceCycle();
1388      else
1389        HazardRec->RecedeCycle();
1390    }
1391  }
1392  CheckPending = true;
1393  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1394
1395  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1396        << CurrCycle << '\n');
1397}
1398
1399/// Add the given processor resource to this scheduled zone.
1400void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1401                                                       unsigned Cycles) {
1402  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1403  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1404        << " +(" << Cycles << "x" << Factor
1405        << ") / " << SchedModel->getLatencyFactor() << '\n');
1406
1407  unsigned Count = Factor * Cycles;
1408  ResourceCounts[PIdx] += Count;
1409  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1410  Rem->RemainingCounts[PIdx] -= Count;
1411
1412  // Check if this resource exceeds the current critical resource by a full
1413  // cycle. If so, it becomes the critical resource.
1414  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1415      >= (int)SchedModel->getLatencyFactor()) {
1416    CritResIdx = PIdx;
1417    DEBUG(dbgs() << "  *** Critical resource "
1418          << SchedModel->getProcResource(PIdx)->Name << " x"
1419          << ResourceCounts[PIdx] << '\n');
1420  }
1421}
1422
1423/// Move the boundary of scheduled code by one SUnit.
1424void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1425  // Update the reservation table.
1426  if (HazardRec->isEnabled()) {
1427    if (!isTop() && SU->isCall) {
1428      // Calls are scheduled with their preceding instructions. For bottom-up
1429      // scheduling, clear the pipeline state before emitting.
1430      HazardRec->Reset();
1431    }
1432    HazardRec->EmitInstruction(SU);
1433  }
1434  // Update resource counts and critical resource.
1435  if (SchedModel->hasInstrSchedModel()) {
1436    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1437    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1438    for (TargetSchedModel::ProcResIter
1439           PI = SchedModel->getWriteProcResBegin(SC),
1440           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1441      countResource(PI->ProcResourceIdx, PI->Cycles);
1442    }
1443  }
1444  if (isTop()) {
1445    if (SU->getDepth() > ExpectedLatency)
1446      ExpectedLatency = SU->getDepth();
1447  }
1448  else {
1449    if (SU->getHeight() > ExpectedLatency)
1450      ExpectedLatency = SU->getHeight();
1451  }
1452
1453  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1454
1455  // Check the instruction group dispatch limit.
1456  // TODO: Check if this SU must end a dispatch group.
1457  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1458
1459  // checkHazard prevents scheduling multiple instructions per cycle that exceed
1460  // issue width. However, we commonly reach the maximum. In this case
1461  // opportunistically bump the cycle to avoid uselessly checking everything in
1462  // the readyQ. Furthermore, a single instruction may produce more than one
1463  // cycle's worth of micro-ops.
1464  if (IssueCount >= SchedModel->getIssueWidth()) {
1465    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1466    bumpCycle();
1467  }
1468}
1469
1470/// Release pending ready nodes in to the available queue. This makes them
1471/// visible to heuristics.
1472void ConvergingScheduler::SchedBoundary::releasePending() {
1473  // If the available queue is empty, it is safe to reset MinReadyCycle.
1474  if (Available.empty())
1475    MinReadyCycle = UINT_MAX;
1476
1477  // Check to see if any of the pending instructions are ready to issue.  If
1478  // so, add them to the available queue.
1479  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1480    SUnit *SU = *(Pending.begin()+i);
1481    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1482
1483    if (ReadyCycle < MinReadyCycle)
1484      MinReadyCycle = ReadyCycle;
1485
1486    if (ReadyCycle > CurrCycle)
1487      continue;
1488
1489    if (checkHazard(SU))
1490      continue;
1491
1492    Available.push(SU);
1493    Pending.remove(Pending.begin()+i);
1494    --i; --e;
1495  }
1496  DEBUG(if (!Pending.empty()) Pending.dump());
1497  CheckPending = false;
1498}
1499
1500/// Remove SU from the ready set for this boundary.
1501void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1502  if (Available.isInQueue(SU))
1503    Available.remove(Available.find(SU));
1504  else {
1505    assert(Pending.isInQueue(SU) && "bad ready count");
1506    Pending.remove(Pending.find(SU));
1507  }
1508}
1509
1510/// If this queue only has one ready candidate, return it. As a side effect,
1511/// defer any nodes that now hit a hazard, and advance the cycle until at least
1512/// one node is ready. If multiple instructions are ready, return NULL.
1513SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1514  if (CheckPending)
1515    releasePending();
1516
1517  if (IssueCount > 0) {
1518    // Defer any ready instrs that now have a hazard.
1519    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1520      if (checkHazard(*I)) {
1521        Pending.push(*I);
1522        I = Available.remove(I);
1523        continue;
1524      }
1525      ++I;
1526    }
1527  }
1528  for (unsigned i = 0; Available.empty(); ++i) {
1529    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1530           "permanent hazard"); (void)i;
1531    bumpCycle();
1532    releasePending();
1533  }
1534  if (Available.size() == 1)
1535    return *Available.begin();
1536  return NULL;
1537}
1538
1539/// Record the candidate policy for opposite zones with different critical
1540/// resources.
1541///
1542/// If the CriticalZone is latency limited, don't force a policy for the
1543/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1544void ConvergingScheduler::balanceZones(
1545  ConvergingScheduler::SchedBoundary &CriticalZone,
1546  ConvergingScheduler::SchedCandidate &CriticalCand,
1547  ConvergingScheduler::SchedBoundary &OppositeZone,
1548  ConvergingScheduler::SchedCandidate &OppositeCand) {
1549
1550  if (!CriticalZone.IsResourceLimited)
1551    return;
1552  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1553
1554  SchedRemainder *Rem = CriticalZone.Rem;
1555
1556  // If the critical zone is overconsuming a resource relative to the
1557  // remainder, try to reduce it.
1558  unsigned RemainingCritCount =
1559    Rem->RemainingCounts[CriticalZone.CritResIdx];
1560  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1561      > (int)SchedModel->getLatencyFactor()) {
1562    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1563    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1564          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1565          << '\n');
1566  }
1567  // If the other zone is underconsuming a resource relative to the full zone,
1568  // try to increase it.
1569  unsigned OppositeCount =
1570    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1571  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1572      > (int)SchedModel->getLatencyFactor()) {
1573    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1574    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1575          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1576          << '\n');
1577  }
1578}
1579
1580/// Determine if the scheduled zones exceed resource limits or critical path and
1581/// set each candidate's ReduceHeight policy accordingly.
1582void ConvergingScheduler::checkResourceLimits(
1583  ConvergingScheduler::SchedCandidate &TopCand,
1584  ConvergingScheduler::SchedCandidate &BotCand) {
1585
1586  // Set ReduceLatency to true if needed.
1587  Bot.setLatencyPolicy(BotCand.Policy);
1588  Top.setLatencyPolicy(TopCand.Policy);
1589
1590  // Handle resource-limited regions.
1591  if (Top.IsResourceLimited && Bot.IsResourceLimited
1592      && Top.CritResIdx == Bot.CritResIdx) {
1593    // If the scheduled critical resource in both zones is no longer the
1594    // critical remaining resource, attempt to reduce resource height both ways.
1595    if (Top.CritResIdx != Rem.CritResIdx) {
1596      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1597      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1598      DEBUG(dbgs() << "Reduce scheduled "
1599            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1600    }
1601    return;
1602  }
1603  // Handle latency-limited regions.
1604  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1605    // If the total scheduled expected latency exceeds the region's critical
1606    // path then reduce latency both ways.
1607    //
1608    // Just because a zone is not resource limited does not mean it is latency
1609    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1610    // to exceed expected latency.
1611    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1612        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1613      TopCand.Policy.ReduceLatency = true;
1614      BotCand.Policy.ReduceLatency = true;
1615      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1616            << " + " << Bot.ExpectedLatency << '\n');
1617    }
1618    return;
1619  }
1620  // The critical resource is different in each zone, so request balancing.
1621
1622  // Compute the cost of each zone.
1623  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1624  Top.ExpectedCount = std::max(
1625    Top.getCriticalCount(),
1626    Top.ExpectedCount * SchedModel->getLatencyFactor());
1627  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1628  Bot.ExpectedCount = std::max(
1629    Bot.getCriticalCount(),
1630    Bot.ExpectedCount * SchedModel->getLatencyFactor());
1631
1632  balanceZones(Top, TopCand, Bot, BotCand);
1633  balanceZones(Bot, BotCand, Top, TopCand);
1634}
1635
1636void ConvergingScheduler::SchedCandidate::
1637initResourceDelta(const ScheduleDAGMI *DAG,
1638                  const TargetSchedModel *SchedModel) {
1639  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1640    return;
1641
1642  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1643  for (TargetSchedModel::ProcResIter
1644         PI = SchedModel->getWriteProcResBegin(SC),
1645         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1646    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1647      ResDelta.CritResources += PI->Cycles;
1648    if (PI->ProcResourceIdx == Policy.DemandResIdx)
1649      ResDelta.DemandedResources += PI->Cycles;
1650  }
1651}
1652
1653/// Return true if this heuristic determines order.
1654static bool tryLess(unsigned TryVal, unsigned CandVal,
1655                    ConvergingScheduler::SchedCandidate &TryCand,
1656                    ConvergingScheduler::SchedCandidate &Cand,
1657                    ConvergingScheduler::CandReason Reason) {
1658  if (TryVal < CandVal) {
1659    TryCand.Reason = Reason;
1660    return true;
1661  }
1662  if (TryVal > CandVal) {
1663    if (Cand.Reason > Reason)
1664      Cand.Reason = Reason;
1665    return true;
1666  }
1667  return false;
1668}
1669
1670static bool tryGreater(unsigned TryVal, unsigned CandVal,
1671                       ConvergingScheduler::SchedCandidate &TryCand,
1672                       ConvergingScheduler::SchedCandidate &Cand,
1673                       ConvergingScheduler::CandReason Reason) {
1674  if (TryVal > CandVal) {
1675    TryCand.Reason = Reason;
1676    return true;
1677  }
1678  if (TryVal < CandVal) {
1679    if (Cand.Reason > Reason)
1680      Cand.Reason = Reason;
1681    return true;
1682  }
1683  return false;
1684}
1685
1686static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1687  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1688}
1689
1690/// Apply a set of heursitics to a new candidate. Heuristics are currently
1691/// hierarchical. This may be more efficient than a graduated cost model because
1692/// we don't need to evaluate all aspects of the model for each node in the
1693/// queue. But it's really done to make the heuristics easier to debug and
1694/// statistically analyze.
1695///
1696/// \param Cand provides the policy and current best candidate.
1697/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1698/// \param Zone describes the scheduled zone that we are extending.
1699/// \param RPTracker describes reg pressure within the scheduled zone.
1700/// \param TempTracker is a scratch pressure tracker to reuse in queries.
1701void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1702                                       SchedCandidate &TryCand,
1703                                       SchedBoundary &Zone,
1704                                       const RegPressureTracker &RPTracker,
1705                                       RegPressureTracker &TempTracker) {
1706
1707  // Always initialize TryCand's RPDelta.
1708  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1709                                  DAG->getRegionCriticalPSets(),
1710                                  DAG->getRegPressure().MaxSetPressure);
1711
1712  // Initialize the candidate if needed.
1713  if (!Cand.isValid()) {
1714    TryCand.Reason = NodeOrder;
1715    return;
1716  }
1717  // Avoid exceeding the target's limit.
1718  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1719              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1720    return;
1721  if (Cand.Reason == SingleExcess)
1722    Cand.Reason = MultiPressure;
1723
1724  // Avoid increasing the max critical pressure in the scheduled region.
1725  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1726              Cand.RPDelta.CriticalMax.UnitIncrease,
1727              TryCand, Cand, SingleCritical))
1728    return;
1729  if (Cand.Reason == SingleCritical)
1730    Cand.Reason = MultiPressure;
1731
1732  // Keep clustered nodes together to encourage downstream peephole
1733  // optimizations which may reduce resource requirements.
1734  //
1735  // This is a best effort to set things up for a post-RA pass. Optimizations
1736  // like generating loads of multiple registers should ideally be done within
1737  // the scheduler pass by combining the loads during DAG postprocessing.
1738  const SUnit *NextClusterSU =
1739    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1740  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1741                 TryCand, Cand, Cluster))
1742    return;
1743  // Currently, weak edges are for clustering, so we hard-code that reason.
1744  // However, deferring the current TryCand will not change Cand's reason.
1745  CandReason OrigReason = Cand.Reason;
1746  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1747              getWeakLeft(Cand.SU, Zone.isTop()),
1748              TryCand, Cand, Cluster)) {
1749    Cand.Reason = OrigReason;
1750    return;
1751  }
1752  // Avoid critical resource consumption and balance the schedule.
1753  TryCand.initResourceDelta(DAG, SchedModel);
1754  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1755              TryCand, Cand, ResourceReduce))
1756    return;
1757  if (tryGreater(TryCand.ResDelta.DemandedResources,
1758                 Cand.ResDelta.DemandedResources,
1759                 TryCand, Cand, ResourceDemand))
1760    return;
1761
1762  // Avoid serializing long latency dependence chains.
1763  if (Cand.Policy.ReduceLatency) {
1764    if (Zone.isTop()) {
1765      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1766          > Zone.ExpectedCount) {
1767        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1768                    TryCand, Cand, TopDepthReduce))
1769          return;
1770      }
1771      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1772                     TryCand, Cand, TopPathReduce))
1773        return;
1774    }
1775    else {
1776      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1777          > Zone.ExpectedCount) {
1778        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1779                    TryCand, Cand, BotHeightReduce))
1780          return;
1781      }
1782      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1783                     TryCand, Cand, BotPathReduce))
1784        return;
1785    }
1786  }
1787
1788  // Avoid increasing the max pressure of the entire region.
1789  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1790              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1791    return;
1792  if (Cand.Reason == SingleMax)
1793    Cand.Reason = MultiPressure;
1794
1795  // Prefer immediate defs/users of the last scheduled instruction. This is a
1796  // nice pressure avoidance strategy that also conserves the processor's
1797  // register renaming resources and keeps the machine code readable.
1798  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1799                 TryCand, Cand, NextDefUse))
1800    return;
1801
1802  // Fall through to original instruction order.
1803  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1804      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1805    TryCand.Reason = NodeOrder;
1806  }
1807}
1808
1809/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1810/// more desirable than RHS from scheduling standpoint.
1811static bool compareRPDelta(const RegPressureDelta &LHS,
1812                           const RegPressureDelta &RHS) {
1813  // Compare each component of pressure in decreasing order of importance
1814  // without checking if any are valid. Invalid PressureElements are assumed to
1815  // have UnitIncrease==0, so are neutral.
1816
1817  // Avoid increasing the max critical pressure in the scheduled region.
1818  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1819    DEBUG(dbgs() << "RP excess top - bot: "
1820          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1821    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1822  }
1823  // Avoid increasing the max critical pressure in the scheduled region.
1824  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1825    DEBUG(dbgs() << "RP critical top - bot: "
1826          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1827          << '\n');
1828    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1829  }
1830  // Avoid increasing the max pressure of the entire region.
1831  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1832    DEBUG(dbgs() << "RP current top - bot: "
1833          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1834          << '\n');
1835    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1836  }
1837  return false;
1838}
1839
1840#ifndef NDEBUG
1841const char *ConvergingScheduler::getReasonStr(
1842  ConvergingScheduler::CandReason Reason) {
1843  switch (Reason) {
1844  case NoCand:         return "NOCAND    ";
1845  case SingleExcess:   return "REG-EXCESS";
1846  case SingleCritical: return "REG-CRIT  ";
1847  case Cluster:        return "CLUSTER   ";
1848  case SingleMax:      return "REG-MAX   ";
1849  case MultiPressure:  return "REG-MULTI ";
1850  case ResourceReduce: return "RES-REDUCE";
1851  case ResourceDemand: return "RES-DEMAND";
1852  case TopDepthReduce: return "TOP-DEPTH ";
1853  case TopPathReduce:  return "TOP-PATH  ";
1854  case BotHeightReduce:return "BOT-HEIGHT";
1855  case BotPathReduce:  return "BOT-PATH  ";
1856  case NextDefUse:     return "DEF-USE   ";
1857  case NodeOrder:      return "ORDER     ";
1858  };
1859  llvm_unreachable("Unknown reason!");
1860}
1861
1862void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1863                                         const SchedBoundary &Zone) {
1864  const char *Label = getReasonStr(Cand.Reason);
1865  PressureElement P;
1866  unsigned ResIdx = 0;
1867  unsigned Latency = 0;
1868  switch (Cand.Reason) {
1869  default:
1870    break;
1871  case SingleExcess:
1872    P = Cand.RPDelta.Excess;
1873    break;
1874  case SingleCritical:
1875    P = Cand.RPDelta.CriticalMax;
1876    break;
1877  case SingleMax:
1878    P = Cand.RPDelta.CurrentMax;
1879    break;
1880  case ResourceReduce:
1881    ResIdx = Cand.Policy.ReduceResIdx;
1882    break;
1883  case ResourceDemand:
1884    ResIdx = Cand.Policy.DemandResIdx;
1885    break;
1886  case TopDepthReduce:
1887    Latency = Cand.SU->getDepth();
1888    break;
1889  case TopPathReduce:
1890    Latency = Cand.SU->getHeight();
1891    break;
1892  case BotHeightReduce:
1893    Latency = Cand.SU->getHeight();
1894    break;
1895  case BotPathReduce:
1896    Latency = Cand.SU->getDepth();
1897    break;
1898  }
1899  dbgs() << Label << " " << Zone.Available.getName() << " ";
1900  if (P.isValid())
1901    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1902           << " ";
1903  else
1904    dbgs() << "     ";
1905  if (ResIdx)
1906    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1907  else
1908    dbgs() << "        ";
1909  if (Latency)
1910    dbgs() << Latency << " cycles ";
1911  else
1912    dbgs() << "         ";
1913  Cand.SU->dump(DAG);
1914}
1915#endif
1916
1917/// Pick the best candidate from the top queue.
1918///
1919/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1920/// DAG building. To adjust for the current scheduling location we need to
1921/// maintain the number of vreg uses remaining to be top-scheduled.
1922void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1923                                            const RegPressureTracker &RPTracker,
1924                                            SchedCandidate &Cand) {
1925  ReadyQueue &Q = Zone.Available;
1926
1927  DEBUG(Q.dump());
1928
1929  // getMaxPressureDelta temporarily modifies the tracker.
1930  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1931
1932  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1933
1934    SchedCandidate TryCand(Cand.Policy);
1935    TryCand.SU = *I;
1936    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1937    if (TryCand.Reason != NoCand) {
1938      // Initialize resource delta if needed in case future heuristics query it.
1939      if (TryCand.ResDelta == SchedResourceDelta())
1940        TryCand.initResourceDelta(DAG, SchedModel);
1941      Cand.setBest(TryCand);
1942      DEBUG(traceCandidate(Cand, Zone));
1943    }
1944  }
1945}
1946
1947static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1948                      bool IsTop) {
1949  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1950        << " SU(" << Cand.SU->NodeNum << ") "
1951        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1952}
1953
1954/// Pick the best candidate node from either the top or bottom queue.
1955SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1956  // Schedule as far as possible in the direction of no choice. This is most
1957  // efficient, but also provides the best heuristics for CriticalPSets.
1958  if (SUnit *SU = Bot.pickOnlyChoice()) {
1959    IsTopNode = false;
1960    return SU;
1961  }
1962  if (SUnit *SU = Top.pickOnlyChoice()) {
1963    IsTopNode = true;
1964    return SU;
1965  }
1966  CandPolicy NoPolicy;
1967  SchedCandidate BotCand(NoPolicy);
1968  SchedCandidate TopCand(NoPolicy);
1969  checkResourceLimits(TopCand, BotCand);
1970
1971  // Prefer bottom scheduling when heuristics are silent.
1972  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1973  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1974
1975  // If either Q has a single candidate that provides the least increase in
1976  // Excess pressure, we can immediately schedule from that Q.
1977  //
1978  // RegionCriticalPSets summarizes the pressure within the scheduled region and
1979  // affects picking from either Q. If scheduling in one direction must
1980  // increase pressure for one of the excess PSets, then schedule in that
1981  // direction first to provide more freedom in the other direction.
1982  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1983    IsTopNode = false;
1984    tracePick(BotCand, IsTopNode);
1985    return BotCand.SU;
1986  }
1987  // Check if the top Q has a better candidate.
1988  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1989  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1990
1991  // If either Q has a single candidate that minimizes pressure above the
1992  // original region's pressure pick it.
1993  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1994    if (TopCand.Reason < BotCand.Reason) {
1995      IsTopNode = true;
1996      tracePick(TopCand, IsTopNode);
1997      return TopCand.SU;
1998    }
1999    IsTopNode = false;
2000    tracePick(BotCand, IsTopNode);
2001    return BotCand.SU;
2002  }
2003  // Check for a salient pressure difference and pick the best from either side.
2004  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
2005    IsTopNode = true;
2006    tracePick(TopCand, IsTopNode);
2007    return TopCand.SU;
2008  }
2009  // Otherwise prefer the bottom candidate, in node order if all else failed.
2010  if (TopCand.Reason < BotCand.Reason) {
2011    IsTopNode = true;
2012    tracePick(TopCand, IsTopNode);
2013    return TopCand.SU;
2014  }
2015  IsTopNode = false;
2016  tracePick(BotCand, IsTopNode);
2017  return BotCand.SU;
2018}
2019
2020/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2021SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2022  if (DAG->top() == DAG->bottom()) {
2023    assert(Top.Available.empty() && Top.Pending.empty() &&
2024           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2025    return NULL;
2026  }
2027  SUnit *SU;
2028  do {
2029    if (ForceTopDown) {
2030      SU = Top.pickOnlyChoice();
2031      if (!SU) {
2032        CandPolicy NoPolicy;
2033        SchedCandidate TopCand(NoPolicy);
2034        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2035        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2036        SU = TopCand.SU;
2037      }
2038      IsTopNode = true;
2039    }
2040    else if (ForceBottomUp) {
2041      SU = Bot.pickOnlyChoice();
2042      if (!SU) {
2043        CandPolicy NoPolicy;
2044        SchedCandidate BotCand(NoPolicy);
2045        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2046        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2047        SU = BotCand.SU;
2048      }
2049      IsTopNode = false;
2050    }
2051    else {
2052      SU = pickNodeBidirectional(IsTopNode);
2053    }
2054  } while (SU->isScheduled);
2055
2056  if (SU->isTopReady())
2057    Top.removeReady(SU);
2058  if (SU->isBottomReady())
2059    Bot.removeReady(SU);
2060
2061  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2062        << " Scheduling Instruction in cycle "
2063        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2064        SU->dump(DAG));
2065  return SU;
2066}
2067
2068/// Update the scheduler's state after scheduling a node. This is the same node
2069/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2070/// it's state based on the current cycle before MachineSchedStrategy does.
2071void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2072  if (IsTopNode) {
2073    SU->TopReadyCycle = Top.CurrCycle;
2074    Top.bumpNode(SU);
2075  }
2076  else {
2077    SU->BotReadyCycle = Bot.CurrCycle;
2078    Bot.bumpNode(SU);
2079  }
2080}
2081
2082/// Create the standard converging machine scheduler. This will be used as the
2083/// default scheduler if the target does not set a default.
2084static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2085  assert((!ForceTopDown || !ForceBottomUp) &&
2086         "-misched-topdown incompatible with -misched-bottomup");
2087  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2088  // Register DAG post-processors.
2089  if (EnableLoadCluster)
2090    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2091  if (EnableMacroFusion)
2092    DAG->addMutation(new MacroFusion(DAG->TII));
2093  return DAG;
2094}
2095static MachineSchedRegistry
2096ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2097                        createConvergingSched);
2098
2099//===----------------------------------------------------------------------===//
2100// ILP Scheduler. Currently for experimental analysis of heuristics.
2101//===----------------------------------------------------------------------===//
2102
2103namespace {
2104/// \brief Order nodes by the ILP metric.
2105struct ILPOrder {
2106  const SchedDFSResult *DFSResult;
2107  const BitVector *ScheduledTrees;
2108  bool MaximizeILP;
2109
2110  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2111
2112  /// \brief Apply a less-than relation on node priority.
2113  ///
2114  /// (Return true if A comes after B in the Q.)
2115  bool operator()(const SUnit *A, const SUnit *B) const {
2116    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2117    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2118    if (SchedTreeA != SchedTreeB) {
2119      // Unscheduled trees have lower priority.
2120      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2121        return ScheduledTrees->test(SchedTreeB);
2122
2123      // Trees with shallower connections have have lower priority.
2124      if (DFSResult->getSubtreeLevel(SchedTreeA)
2125          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2126        return DFSResult->getSubtreeLevel(SchedTreeA)
2127          < DFSResult->getSubtreeLevel(SchedTreeB);
2128      }
2129    }
2130    if (MaximizeILP)
2131      return DFSResult->getILP(A) < DFSResult->getILP(B);
2132    else
2133      return DFSResult->getILP(A) > DFSResult->getILP(B);
2134  }
2135};
2136
2137/// \brief Schedule based on the ILP metric.
2138class ILPScheduler : public MachineSchedStrategy {
2139  /// In case all subtrees are eventually connected to a common root through
2140  /// data dependence (e.g. reduction), place an upper limit on their size.
2141  ///
2142  /// FIXME: A subtree limit is generally good, but in the situation commented
2143  /// above, where multiple similar subtrees feed a common root, we should
2144  /// only split at a point where the resulting subtrees will be balanced.
2145  /// (a motivating test case must be found).
2146  static const unsigned SubtreeLimit = 16;
2147
2148  ScheduleDAGMI *DAG;
2149  ILPOrder Cmp;
2150
2151  std::vector<SUnit*> ReadyQ;
2152public:
2153  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2154
2155  virtual void initialize(ScheduleDAGMI *dag) {
2156    DAG = dag;
2157    DAG->computeDFSResult();
2158    Cmp.DFSResult = DAG->getDFSResult();
2159    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2160    ReadyQ.clear();
2161  }
2162
2163  virtual void registerRoots() {
2164    // Restore the heap in ReadyQ with the updated DFS results.
2165    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2166  }
2167
2168  /// Implement MachineSchedStrategy interface.
2169  /// -----------------------------------------
2170
2171  /// Callback to select the highest priority node from the ready Q.
2172  virtual SUnit *pickNode(bool &IsTopNode) {
2173    if (ReadyQ.empty()) return NULL;
2174    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2175    SUnit *SU = ReadyQ.back();
2176    ReadyQ.pop_back();
2177    IsTopNode = false;
2178    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2179          << *SU->getInstr()
2180          << " ILP: " << DAG->getDFSResult()->getILP(SU)
2181          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2182          << DAG->getDFSResult()->getSubtreeLevel(
2183            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
2184    return SU;
2185  }
2186
2187  /// \brief Scheduler callback to notify that a new subtree is scheduled.
2188  virtual void scheduleTree(unsigned SubtreeID) {
2189    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2190  }
2191
2192  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2193  /// DFSResults, and resort the priority Q.
2194  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2195    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2196  }
2197
2198  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2199
2200  virtual void releaseBottomNode(SUnit *SU) {
2201    ReadyQ.push_back(SU);
2202    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2203  }
2204};
2205} // namespace
2206
2207static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2208  return new ScheduleDAGMI(C, new ILPScheduler(true));
2209}
2210static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2211  return new ScheduleDAGMI(C, new ILPScheduler(false));
2212}
2213static MachineSchedRegistry ILPMaxRegistry(
2214  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2215static MachineSchedRegistry ILPMinRegistry(
2216  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2217
2218//===----------------------------------------------------------------------===//
2219// Machine Instruction Shuffler for Correctness Testing
2220//===----------------------------------------------------------------------===//
2221
2222#ifndef NDEBUG
2223namespace {
2224/// Apply a less-than relation on the node order, which corresponds to the
2225/// instruction order prior to scheduling. IsReverse implements greater-than.
2226template<bool IsReverse>
2227struct SUnitOrder {
2228  bool operator()(SUnit *A, SUnit *B) const {
2229    if (IsReverse)
2230      return A->NodeNum > B->NodeNum;
2231    else
2232      return A->NodeNum < B->NodeNum;
2233  }
2234};
2235
2236/// Reorder instructions as much as possible.
2237class InstructionShuffler : public MachineSchedStrategy {
2238  bool IsAlternating;
2239  bool IsTopDown;
2240
2241  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2242  // gives nodes with a higher number higher priority causing the latest
2243  // instructions to be scheduled first.
2244  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2245    TopQ;
2246  // When scheduling bottom-up, use greater-than as the queue priority.
2247  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2248    BottomQ;
2249public:
2250  InstructionShuffler(bool alternate, bool topdown)
2251    : IsAlternating(alternate), IsTopDown(topdown) {}
2252
2253  virtual void initialize(ScheduleDAGMI *) {
2254    TopQ.clear();
2255    BottomQ.clear();
2256  }
2257
2258  /// Implement MachineSchedStrategy interface.
2259  /// -----------------------------------------
2260
2261  virtual SUnit *pickNode(bool &IsTopNode) {
2262    SUnit *SU;
2263    if (IsTopDown) {
2264      do {
2265        if (TopQ.empty()) return NULL;
2266        SU = TopQ.top();
2267        TopQ.pop();
2268      } while (SU->isScheduled);
2269      IsTopNode = true;
2270    }
2271    else {
2272      do {
2273        if (BottomQ.empty()) return NULL;
2274        SU = BottomQ.top();
2275        BottomQ.pop();
2276      } while (SU->isScheduled);
2277      IsTopNode = false;
2278    }
2279    if (IsAlternating)
2280      IsTopDown = !IsTopDown;
2281    return SU;
2282  }
2283
2284  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2285
2286  virtual void releaseTopNode(SUnit *SU) {
2287    TopQ.push(SU);
2288  }
2289  virtual void releaseBottomNode(SUnit *SU) {
2290    BottomQ.push(SU);
2291  }
2292};
2293} // namespace
2294
2295static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2296  bool Alternate = !ForceTopDown && !ForceBottomUp;
2297  bool TopDown = !ForceBottomUp;
2298  assert((TopDown || !ForceTopDown) &&
2299         "-misched-topdown incompatible with -misched-bottomup");
2300  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2301}
2302static MachineSchedRegistry ShufflerRegistry(
2303  "shuffle", "Shuffle machine instructions alternating directions",
2304  createInstructionShuffler);
2305#endif // !NDEBUG
2306
2307//===----------------------------------------------------------------------===//
2308// GraphWriter support for ScheduleDAGMI.
2309//===----------------------------------------------------------------------===//
2310
2311#ifndef NDEBUG
2312namespace llvm {
2313
2314template<> struct GraphTraits<
2315  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
2316
2317template<>
2318struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
2319
2320  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
2321
2322  static std::string getGraphName(const ScheduleDAG *G) {
2323    return G->MF.getName();
2324  }
2325
2326  static bool renderGraphFromBottomUp() {
2327    return true;
2328  }
2329
2330  static bool isNodeHidden(const SUnit *Node) {
2331    return (Node->NumPreds > 10 || Node->NumSuccs > 10);
2332  }
2333
2334  static bool hasNodeAddressLabel(const SUnit *Node,
2335                                  const ScheduleDAG *Graph) {
2336    return false;
2337  }
2338
2339  /// If you want to override the dot attributes printed for a particular
2340  /// edge, override this method.
2341  static std::string getEdgeAttributes(const SUnit *Node,
2342                                       SUnitIterator EI,
2343                                       const ScheduleDAG *Graph) {
2344    if (EI.isArtificialDep())
2345      return "color=cyan,style=dashed";
2346    if (EI.isCtrlDep())
2347      return "color=blue,style=dashed";
2348    return "";
2349  }
2350
2351  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
2352    std::string Str;
2353    raw_string_ostream SS(Str);
2354    SS << "SU(" << SU->NodeNum << ')';
2355    return SS.str();
2356  }
2357  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
2358    return G->getGraphNodeLabel(SU);
2359  }
2360
2361  static std::string getNodeAttributes(const SUnit *N,
2362                                       const ScheduleDAG *Graph) {
2363    std::string Str("shape=Mrecord");
2364    const SchedDFSResult *DFS =
2365      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
2366    if (DFS) {
2367      Str += ",style=filled,fillcolor=\"#";
2368      Str += DOT::getColorString(DFS->getSubtreeID(N));
2369      Str += '"';
2370    }
2371    return Str;
2372  }
2373};
2374} // namespace llvm
2375#endif // NDEBUG
2376
2377/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
2378/// rendered using 'dot'.
2379///
2380void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
2381#ifndef NDEBUG
2382  ViewGraph(this, Name, false, Title);
2383#else
2384  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
2385         << "systems with Graphviz or gv!\n";
2386#endif  // NDEBUG
2387}
2388
2389/// Out-of-line implementation with no arguments is handy for gdb.
2390void ScheduleDAGMI::viewGraph() {
2391  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
2392}
2393