MachineScheduler.cpp revision 3aef70314b053a1df4f85ca4a6f3890d06ebbdd6
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// MachineScheduler schedules machine instructions after phi elimination. It
11// preserves LiveIntervals so it can be invoked before register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "misched"
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/PriorityQueue.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/CodeGen/RegisterClassInfo.h"
24#include "llvm/CodeGen/ScheduleDFS.h"
25#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/raw_ostream.h"
30#include <queue>
31
32using namespace llvm;
33
34namespace llvm {
35cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
36                           cl::desc("Force top-down list scheduling"));
37cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
38                            cl::desc("Force bottom-up list scheduling"));
39}
40
41#ifndef NDEBUG
42static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
43  cl::desc("Pop up a window to show MISched dags after they are processed"));
44
45static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
46  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
47#else
48static bool ViewMISchedDAGs = false;
49#endif // NDEBUG
50
51// Threshold to very roughly model an out-of-order processor's instruction
52// buffers. If the actual value of this threshold matters much in practice, then
53// it can be specified by the machine model. For now, it's an experimental
54// tuning knob to determine when and if it matters.
55static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
56  cl::desc("Allow expected latency to exceed the critical path by N cycles "
57           "before attempting to balance ILP"),
58  cl::init(10U));
59
60// Experimental heuristics
61static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
62  cl::desc("Enable load clustering."), cl::init(true));
63
64// Experimental heuristics
65static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
66  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
67
68//===----------------------------------------------------------------------===//
69// Machine Instruction Scheduling Pass and Registry
70//===----------------------------------------------------------------------===//
71
72MachineSchedContext::MachineSchedContext():
73    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
74  RegClassInfo = new RegisterClassInfo();
75}
76
77MachineSchedContext::~MachineSchedContext() {
78  delete RegClassInfo;
79}
80
81namespace {
82/// MachineScheduler runs after coalescing and before register allocation.
83class MachineScheduler : public MachineSchedContext,
84                         public MachineFunctionPass {
85public:
86  MachineScheduler();
87
88  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
89
90  virtual void releaseMemory() {}
91
92  virtual bool runOnMachineFunction(MachineFunction&);
93
94  virtual void print(raw_ostream &O, const Module* = 0) const;
95
96  static char ID; // Class identification, replacement for typeinfo
97};
98} // namespace
99
100char MachineScheduler::ID = 0;
101
102char &llvm::MachineSchedulerID = MachineScheduler::ID;
103
104INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
105                      "Machine Instruction Scheduler", false, false)
106INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
107INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
108INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
109INITIALIZE_PASS_END(MachineScheduler, "misched",
110                    "Machine Instruction Scheduler", false, false)
111
112MachineScheduler::MachineScheduler()
113: MachineFunctionPass(ID) {
114  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
115}
116
117void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
118  AU.setPreservesCFG();
119  AU.addRequiredID(MachineDominatorsID);
120  AU.addRequired<MachineLoopInfo>();
121  AU.addRequired<AliasAnalysis>();
122  AU.addRequired<TargetPassConfig>();
123  AU.addRequired<SlotIndexes>();
124  AU.addPreserved<SlotIndexes>();
125  AU.addRequired<LiveIntervals>();
126  AU.addPreserved<LiveIntervals>();
127  MachineFunctionPass::getAnalysisUsage(AU);
128}
129
130MachinePassRegistry MachineSchedRegistry::Registry;
131
132/// A dummy default scheduler factory indicates whether the scheduler
133/// is overridden on the command line.
134static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
135  return 0;
136}
137
138/// MachineSchedOpt allows command line selection of the scheduler.
139static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
140               RegisterPassParser<MachineSchedRegistry> >
141MachineSchedOpt("misched",
142                cl::init(&useDefaultMachineSched), cl::Hidden,
143                cl::desc("Machine instruction scheduler to use"));
144
145static MachineSchedRegistry
146DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
147                     useDefaultMachineSched);
148
149/// Forward declare the standard machine scheduler. This will be used as the
150/// default scheduler if the target does not set a default.
151static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
152
153
154/// Decrement this iterator until reaching the top or a non-debug instr.
155static MachineBasicBlock::iterator
156priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
157  assert(I != Beg && "reached the top of the region, cannot decrement");
158  while (--I != Beg) {
159    if (!I->isDebugValue())
160      break;
161  }
162  return I;
163}
164
165/// If this iterator is a debug value, increment until reaching the End or a
166/// non-debug instruction.
167static MachineBasicBlock::iterator
168nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
169  for(; I != End; ++I) {
170    if (!I->isDebugValue())
171      break;
172  }
173  return I;
174}
175
176/// Top-level MachineScheduler pass driver.
177///
178/// Visit blocks in function order. Divide each block into scheduling regions
179/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
180/// consistent with the DAG builder, which traverses the interior of the
181/// scheduling regions bottom-up.
182///
183/// This design avoids exposing scheduling boundaries to the DAG builder,
184/// simplifying the DAG builder's support for "special" target instructions.
185/// At the same time the design allows target schedulers to operate across
186/// scheduling boundaries, for example to bundle the boudary instructions
187/// without reordering them. This creates complexity, because the target
188/// scheduler must update the RegionBegin and RegionEnd positions cached by
189/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
190/// design would be to split blocks at scheduling boundaries, but LLVM has a
191/// general bias against block splitting purely for implementation simplicity.
192bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
193  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
194
195  // Initialize the context of the pass.
196  MF = &mf;
197  MLI = &getAnalysis<MachineLoopInfo>();
198  MDT = &getAnalysis<MachineDominatorTree>();
199  PassConfig = &getAnalysis<TargetPassConfig>();
200  AA = &getAnalysis<AliasAnalysis>();
201
202  LIS = &getAnalysis<LiveIntervals>();
203  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
204
205  RegClassInfo->runOnMachineFunction(*MF);
206
207  // Select the scheduler, or set the default.
208  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
209  if (Ctor == useDefaultMachineSched) {
210    // Get the default scheduler set by the target.
211    Ctor = MachineSchedRegistry::getDefault();
212    if (!Ctor) {
213      Ctor = createConvergingSched;
214      MachineSchedRegistry::setDefault(Ctor);
215    }
216  }
217  // Instantiate the selected scheduler.
218  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
219
220  // Visit all machine basic blocks.
221  //
222  // TODO: Visit blocks in global postorder or postorder within the bottom-up
223  // loop tree. Then we can optionally compute global RegPressure.
224  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
225       MBB != MBBEnd; ++MBB) {
226
227    Scheduler->startBlock(MBB);
228
229    // Break the block into scheduling regions [I, RegionEnd), and schedule each
230    // region as soon as it is discovered. RegionEnd points the scheduling
231    // boundary at the bottom of the region. The DAG does not include RegionEnd,
232    // but the region does (i.e. the next RegionEnd is above the previous
233    // RegionBegin). If the current block has no terminator then RegionEnd ==
234    // MBB->end() for the bottom region.
235    //
236    // The Scheduler may insert instructions during either schedule() or
237    // exitRegion(), even for empty regions. So the local iterators 'I' and
238    // 'RegionEnd' are invalid across these calls.
239    unsigned RemainingInstrs = MBB->size();
240    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
241        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
242
243      // Avoid decrementing RegionEnd for blocks with no terminator.
244      if (RegionEnd != MBB->end()
245          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
246        --RegionEnd;
247        // Count the boundary instruction.
248        --RemainingInstrs;
249      }
250
251      // The next region starts above the previous region. Look backward in the
252      // instruction stream until we find the nearest boundary.
253      MachineBasicBlock::iterator I = RegionEnd;
254      for(;I != MBB->begin(); --I, --RemainingInstrs) {
255        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
256          break;
257      }
258      // Notify the scheduler of the region, even if we may skip scheduling
259      // it. Perhaps it still needs to be bundled.
260      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
261
262      // Skip empty scheduling regions (0 or 1 schedulable instructions).
263      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
264        // Close the current region. Bundle the terminator if needed.
265        // This invalidates 'RegionEnd' and 'I'.
266        Scheduler->exitRegion();
267        continue;
268      }
269      DEBUG(dbgs() << "********** MI Scheduling **********\n");
270      DEBUG(dbgs() << MF->getName()
271            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
272            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
273            else dbgs() << "End";
274            dbgs() << " Remaining: " << RemainingInstrs << "\n");
275
276      // Schedule a region: possibly reorder instructions.
277      // This invalidates 'RegionEnd' and 'I'.
278      Scheduler->schedule();
279
280      // Close the current region.
281      Scheduler->exitRegion();
282
283      // Scheduling has invalidated the current iterator 'I'. Ask the
284      // scheduler for the top of it's scheduled region.
285      RegionEnd = Scheduler->begin();
286    }
287    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
288    Scheduler->finishBlock();
289  }
290  Scheduler->finalizeSchedule();
291  DEBUG(LIS->print(dbgs()));
292  return true;
293}
294
295void MachineScheduler::print(raw_ostream &O, const Module* m) const {
296  // unimplemented
297}
298
299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
300void ReadyQueue::dump() {
301  dbgs() << Name << ": ";
302  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
303    dbgs() << Queue[i]->NodeNum << " ";
304  dbgs() << "\n";
305}
306#endif
307
308//===----------------------------------------------------------------------===//
309// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
310// preservation.
311//===----------------------------------------------------------------------===//
312
313bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
314  if (SuccSU != &ExitSU) {
315    // Do not use WillCreateCycle, it assumes SD scheduling.
316    // If Pred is reachable from Succ, then the edge creates a cycle.
317    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
318      return false;
319    Topo.AddPred(SuccSU, PredDep.getSUnit());
320  }
321  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
322  // Return true regardless of whether a new edge needed to be inserted.
323  return true;
324}
325
326/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
327/// NumPredsLeft reaches zero, release the successor node.
328///
329/// FIXME: Adjust SuccSU height based on MinLatency.
330void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
331  SUnit *SuccSU = SuccEdge->getSUnit();
332
333  if (SuccEdge->isWeak()) {
334    --SuccSU->WeakPredsLeft;
335    if (SuccEdge->isCluster())
336      NextClusterSucc = SuccSU;
337    return;
338  }
339#ifndef NDEBUG
340  if (SuccSU->NumPredsLeft == 0) {
341    dbgs() << "*** Scheduling failed! ***\n";
342    SuccSU->dump(this);
343    dbgs() << " has been released too many times!\n";
344    llvm_unreachable(0);
345  }
346#endif
347  --SuccSU->NumPredsLeft;
348  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
349    SchedImpl->releaseTopNode(SuccSU);
350}
351
352/// releaseSuccessors - Call releaseSucc on each of SU's successors.
353void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
354  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
355       I != E; ++I) {
356    releaseSucc(SU, &*I);
357  }
358}
359
360/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
361/// NumSuccsLeft reaches zero, release the predecessor node.
362///
363/// FIXME: Adjust PredSU height based on MinLatency.
364void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
365  SUnit *PredSU = PredEdge->getSUnit();
366
367  if (PredEdge->isWeak()) {
368    --PredSU->WeakSuccsLeft;
369    if (PredEdge->isCluster())
370      NextClusterPred = PredSU;
371    return;
372  }
373#ifndef NDEBUG
374  if (PredSU->NumSuccsLeft == 0) {
375    dbgs() << "*** Scheduling failed! ***\n";
376    PredSU->dump(this);
377    dbgs() << " has been released too many times!\n";
378    llvm_unreachable(0);
379  }
380#endif
381  --PredSU->NumSuccsLeft;
382  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
383    SchedImpl->releaseBottomNode(PredSU);
384}
385
386/// releasePredecessors - Call releasePred on each of SU's predecessors.
387void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
388  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
389       I != E; ++I) {
390    releasePred(SU, &*I);
391  }
392}
393
394void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
395                                    MachineBasicBlock::iterator InsertPos) {
396  // Advance RegionBegin if the first instruction moves down.
397  if (&*RegionBegin == MI)
398    ++RegionBegin;
399
400  // Update the instruction stream.
401  BB->splice(InsertPos, BB, MI);
402
403  // Update LiveIntervals
404  LIS->handleMove(MI, /*UpdateFlags=*/true);
405
406  // Recede RegionBegin if an instruction moves above the first.
407  if (RegionBegin == InsertPos)
408    RegionBegin = MI;
409}
410
411bool ScheduleDAGMI::checkSchedLimit() {
412#ifndef NDEBUG
413  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
414    CurrentTop = CurrentBottom;
415    return false;
416  }
417  ++NumInstrsScheduled;
418#endif
419  return true;
420}
421
422/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
423/// crossing a scheduling boundary. [begin, end) includes all instructions in
424/// the region, including the boundary itself and single-instruction regions
425/// that don't get scheduled.
426void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
427                                MachineBasicBlock::iterator begin,
428                                MachineBasicBlock::iterator end,
429                                unsigned endcount)
430{
431  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
432
433  // For convenience remember the end of the liveness region.
434  LiveRegionEnd =
435    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
436}
437
438// Setup the register pressure trackers for the top scheduled top and bottom
439// scheduled regions.
440void ScheduleDAGMI::initRegPressure() {
441  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
442  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
443
444  // Close the RPTracker to finalize live ins.
445  RPTracker.closeRegion();
446
447  DEBUG(RPTracker.getPressure().dump(TRI));
448
449  // Initialize the live ins and live outs.
450  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
451  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
452
453  // Close one end of the tracker so we can call
454  // getMaxUpward/DownwardPressureDelta before advancing across any
455  // instructions. This converts currently live regs into live ins/outs.
456  TopRPTracker.closeTop();
457  BotRPTracker.closeBottom();
458
459  // Account for liveness generated by the region boundary.
460  if (LiveRegionEnd != RegionEnd)
461    BotRPTracker.recede();
462
463  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
464
465  // Cache the list of excess pressure sets in this region. This will also track
466  // the max pressure in the scheduled code for these sets.
467  RegionCriticalPSets.clear();
468  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
469  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
470    unsigned Limit = TRI->getRegPressureSetLimit(i);
471    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
472          << "Limit " << Limit
473          << " Actual " << RegionPressure[i] << "\n");
474    if (RegionPressure[i] > Limit)
475      RegionCriticalPSets.push_back(PressureElement(i, 0));
476  }
477  DEBUG(dbgs() << "Excess PSets: ";
478        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
479          dbgs() << TRI->getRegPressureSetName(
480            RegionCriticalPSets[i].PSetID) << " ";
481        dbgs() << "\n");
482}
483
484// FIXME: When the pressure tracker deals in pressure differences then we won't
485// iterate over all RegionCriticalPSets[i].
486void ScheduleDAGMI::
487updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
488  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
489    unsigned ID = RegionCriticalPSets[i].PSetID;
490    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
491    if ((int)NewMaxPressure[ID] > MaxUnits)
492      MaxUnits = NewMaxPressure[ID];
493  }
494}
495
496/// schedule - Called back from MachineScheduler::runOnMachineFunction
497/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
498/// only includes instructions that have DAG nodes, not scheduling boundaries.
499///
500/// This is a skeletal driver, with all the functionality pushed into helpers,
501/// so that it can be easilly extended by experimental schedulers. Generally,
502/// implementing MachineSchedStrategy should be sufficient to implement a new
503/// scheduling algorithm. However, if a scheduler further subclasses
504/// ScheduleDAGMI then it will want to override this virtual method in order to
505/// update any specialized state.
506void ScheduleDAGMI::schedule() {
507  buildDAGWithRegPressure();
508
509  Topo.InitDAGTopologicalSorting();
510
511  postprocessDAG();
512
513  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
514          SUnits[su].dumpAll(this));
515
516  if (ViewMISchedDAGs) viewGraph();
517
518  initQueues();
519
520  bool IsTopNode = false;
521  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
522    assert(!SU->isScheduled && "Node already scheduled");
523    if (!checkSchedLimit())
524      break;
525
526    scheduleMI(SU, IsTopNode);
527
528    updateQueues(SU, IsTopNode);
529  }
530  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
531
532  placeDebugValues();
533
534  DEBUG({
535      unsigned BBNum = begin()->getParent()->getNumber();
536      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
537      dumpSchedule();
538      dbgs() << '\n';
539    });
540}
541
542/// Build the DAG and setup three register pressure trackers.
543void ScheduleDAGMI::buildDAGWithRegPressure() {
544  // Initialize the register pressure tracker used by buildSchedGraph.
545  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
546
547  // Account for liveness generate by the region boundary.
548  if (LiveRegionEnd != RegionEnd)
549    RPTracker.recede();
550
551  // Build the DAG, and compute current register pressure.
552  buildSchedGraph(AA, &RPTracker);
553  if (ViewMISchedDAGs) viewGraph();
554
555  // Initialize top/bottom trackers after computing region pressure.
556  initRegPressure();
557}
558
559/// Apply each ScheduleDAGMutation step in order.
560void ScheduleDAGMI::postprocessDAG() {
561  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
562    Mutations[i]->apply(this);
563  }
564}
565
566// Release all DAG roots for scheduling.
567//
568// Nodes with unreleased weak edges can still be roots.
569void ScheduleDAGMI::releaseRoots() {
570  SmallVector<SUnit*, 16> BotRoots;
571
572  for (std::vector<SUnit>::iterator
573         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
574    SUnit *SU = &(*I);
575    // A SUnit is ready to top schedule if it has no predecessors.
576    if (!I->NumPredsLeft && SU != &EntrySU)
577      SchedImpl->releaseTopNode(SU);
578    // A SUnit is ready to bottom schedule if it has no successors.
579    if (!I->NumSuccsLeft && SU != &ExitSU)
580      BotRoots.push_back(SU);
581  }
582  // Release bottom roots in reverse order so the higher priority nodes appear
583  // first. This is more natural and slightly more efficient.
584  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
585         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
586    SchedImpl->releaseBottomNode(*I);
587}
588
589/// Identify DAG roots and setup scheduler queues.
590void ScheduleDAGMI::initQueues() {
591  NextClusterSucc = NULL;
592  NextClusterPred = NULL;
593
594  // Initialize the strategy before modifying the DAG.
595  SchedImpl->initialize(this);
596
597  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
598  releaseRoots();
599
600  releaseSuccessors(&EntrySU);
601  releasePredecessors(&ExitSU);
602
603  SchedImpl->registerRoots();
604
605  // Advance past initial DebugValues.
606  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
607  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
608  TopRPTracker.setPos(CurrentTop);
609
610  CurrentBottom = RegionEnd;
611}
612
613/// Move an instruction and update register pressure.
614void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
615  // Move the instruction to its new location in the instruction stream.
616  MachineInstr *MI = SU->getInstr();
617
618  if (IsTopNode) {
619    assert(SU->isTopReady() && "node still has unscheduled dependencies");
620    if (&*CurrentTop == MI)
621      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
622    else {
623      moveInstruction(MI, CurrentTop);
624      TopRPTracker.setPos(MI);
625    }
626
627    // Update top scheduled pressure.
628    TopRPTracker.advance();
629    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
630    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
631  }
632  else {
633    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
634    MachineBasicBlock::iterator priorII =
635      priorNonDebug(CurrentBottom, CurrentTop);
636    if (&*priorII == MI)
637      CurrentBottom = priorII;
638    else {
639      if (&*CurrentTop == MI) {
640        CurrentTop = nextIfDebug(++CurrentTop, priorII);
641        TopRPTracker.setPos(CurrentTop);
642      }
643      moveInstruction(MI, CurrentBottom);
644      CurrentBottom = MI;
645    }
646    // Update bottom scheduled pressure.
647    BotRPTracker.recede();
648    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
649    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
650  }
651}
652
653/// Update scheduler queues after scheduling an instruction.
654void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
655  // Release dependent instructions for scheduling.
656  if (IsTopNode)
657    releaseSuccessors(SU);
658  else
659    releasePredecessors(SU);
660
661  SU->isScheduled = true;
662
663  // Notify the scheduling strategy after updating the DAG.
664  SchedImpl->schedNode(SU, IsTopNode);
665}
666
667/// Reinsert any remaining debug_values, just like the PostRA scheduler.
668void ScheduleDAGMI::placeDebugValues() {
669  // If first instruction was a DBG_VALUE then put it back.
670  if (FirstDbgValue) {
671    BB->splice(RegionBegin, BB, FirstDbgValue);
672    RegionBegin = FirstDbgValue;
673  }
674
675  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
676         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
677    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
678    MachineInstr *DbgValue = P.first;
679    MachineBasicBlock::iterator OrigPrevMI = P.second;
680    if (&*RegionBegin == DbgValue)
681      ++RegionBegin;
682    BB->splice(++OrigPrevMI, BB, DbgValue);
683    if (OrigPrevMI == llvm::prior(RegionEnd))
684      RegionEnd = DbgValue;
685  }
686  DbgValues.clear();
687  FirstDbgValue = NULL;
688}
689
690#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
691void ScheduleDAGMI::dumpSchedule() const {
692  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
693    if (SUnit *SU = getSUnit(&(*MI)))
694      SU->dump(this);
695    else
696      dbgs() << "Missing SUnit\n";
697  }
698}
699#endif
700
701//===----------------------------------------------------------------------===//
702// LoadClusterMutation - DAG post-processing to cluster loads.
703//===----------------------------------------------------------------------===//
704
705namespace {
706/// \brief Post-process the DAG to create cluster edges between neighboring
707/// loads.
708class LoadClusterMutation : public ScheduleDAGMutation {
709  struct LoadInfo {
710    SUnit *SU;
711    unsigned BaseReg;
712    unsigned Offset;
713    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
714      : SU(su), BaseReg(reg), Offset(ofs) {}
715  };
716  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
717                           const LoadClusterMutation::LoadInfo &RHS);
718
719  const TargetInstrInfo *TII;
720  const TargetRegisterInfo *TRI;
721public:
722  LoadClusterMutation(const TargetInstrInfo *tii,
723                      const TargetRegisterInfo *tri)
724    : TII(tii), TRI(tri) {}
725
726  virtual void apply(ScheduleDAGMI *DAG);
727protected:
728  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
729};
730} // anonymous
731
732bool LoadClusterMutation::LoadInfoLess(
733  const LoadClusterMutation::LoadInfo &LHS,
734  const LoadClusterMutation::LoadInfo &RHS) {
735  if (LHS.BaseReg != RHS.BaseReg)
736    return LHS.BaseReg < RHS.BaseReg;
737  return LHS.Offset < RHS.Offset;
738}
739
740void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
741                                                  ScheduleDAGMI *DAG) {
742  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
743  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
744    SUnit *SU = Loads[Idx];
745    unsigned BaseReg;
746    unsigned Offset;
747    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
748      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
749  }
750  if (LoadRecords.size() < 2)
751    return;
752  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
753  unsigned ClusterLength = 1;
754  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
755    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
756      ClusterLength = 1;
757      continue;
758    }
759
760    SUnit *SUa = LoadRecords[Idx].SU;
761    SUnit *SUb = LoadRecords[Idx+1].SU;
762    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
763        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
764
765      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
766            << SUb->NodeNum << ")\n");
767      // Copy successor edges from SUa to SUb. Interleaving computation
768      // dependent on SUa can prevent load combining due to register reuse.
769      // Predecessor edges do not need to be copied from SUb to SUa since nearby
770      // loads should have effectively the same inputs.
771      for (SUnit::const_succ_iterator
772             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
773        if (SI->getSUnit() == SUb)
774          continue;
775        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
776        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
777      }
778      ++ClusterLength;
779    }
780    else
781      ClusterLength = 1;
782  }
783}
784
785/// \brief Callback from DAG postProcessing to create cluster edges for loads.
786void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
787  // Map DAG NodeNum to store chain ID.
788  DenseMap<unsigned, unsigned> StoreChainIDs;
789  // Map each store chain to a set of dependent loads.
790  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
791  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
792    SUnit *SU = &DAG->SUnits[Idx];
793    if (!SU->getInstr()->mayLoad())
794      continue;
795    unsigned ChainPredID = DAG->SUnits.size();
796    for (SUnit::const_pred_iterator
797           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
798      if (PI->isCtrl()) {
799        ChainPredID = PI->getSUnit()->NodeNum;
800        break;
801      }
802    }
803    // Check if this chain-like pred has been seen
804    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
805    unsigned NumChains = StoreChainDependents.size();
806    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
807      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
808    if (Result.second)
809      StoreChainDependents.resize(NumChains + 1);
810    StoreChainDependents[Result.first->second].push_back(SU);
811  }
812  // Iterate over the store chains.
813  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
814    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
815}
816
817//===----------------------------------------------------------------------===//
818// MacroFusion - DAG post-processing to encourage fusion of macro ops.
819//===----------------------------------------------------------------------===//
820
821namespace {
822/// \brief Post-process the DAG to create cluster edges between instructions
823/// that may be fused by the processor into a single operation.
824class MacroFusion : public ScheduleDAGMutation {
825  const TargetInstrInfo *TII;
826public:
827  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
828
829  virtual void apply(ScheduleDAGMI *DAG);
830};
831} // anonymous
832
833/// \brief Callback from DAG postProcessing to create cluster edges to encourage
834/// fused operations.
835void MacroFusion::apply(ScheduleDAGMI *DAG) {
836  // For now, assume targets can only fuse with the branch.
837  MachineInstr *Branch = DAG->ExitSU.getInstr();
838  if (!Branch)
839    return;
840
841  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
842    SUnit *SU = &DAG->SUnits[--Idx];
843    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
844      continue;
845
846    // Create a single weak edge from SU to ExitSU. The only effect is to cause
847    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
848    // need to copy predecessor edges from ExitSU to SU, since top-down
849    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
850    // of SU, we could create an artificial edge from the deepest root, but it
851    // hasn't been needed yet.
852    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
853    (void)Success;
854    assert(Success && "No DAG nodes should be reachable from ExitSU");
855
856    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
857    break;
858  }
859}
860
861//===----------------------------------------------------------------------===//
862// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
863//===----------------------------------------------------------------------===//
864
865namespace {
866/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
867/// the schedule.
868class ConvergingScheduler : public MachineSchedStrategy {
869public:
870  /// Represent the type of SchedCandidate found within a single queue.
871  /// pickNodeBidirectional depends on these listed by decreasing priority.
872  enum CandReason {
873    NoCand, SingleExcess, SingleCritical, Cluster,
874    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
875    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
876    NodeOrder};
877
878#ifndef NDEBUG
879  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
880#endif
881
882  /// Policy for scheduling the next instruction in the candidate's zone.
883  struct CandPolicy {
884    bool ReduceLatency;
885    unsigned ReduceResIdx;
886    unsigned DemandResIdx;
887
888    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
889  };
890
891  /// Status of an instruction's critical resource consumption.
892  struct SchedResourceDelta {
893    // Count critical resources in the scheduled region required by SU.
894    unsigned CritResources;
895
896    // Count critical resources from another region consumed by SU.
897    unsigned DemandedResources;
898
899    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
900
901    bool operator==(const SchedResourceDelta &RHS) const {
902      return CritResources == RHS.CritResources
903        && DemandedResources == RHS.DemandedResources;
904    }
905    bool operator!=(const SchedResourceDelta &RHS) const {
906      return !operator==(RHS);
907    }
908  };
909
910  /// Store the state used by ConvergingScheduler heuristics, required for the
911  /// lifetime of one invocation of pickNode().
912  struct SchedCandidate {
913    CandPolicy Policy;
914
915    // The best SUnit candidate.
916    SUnit *SU;
917
918    // The reason for this candidate.
919    CandReason Reason;
920
921    // Register pressure values for the best candidate.
922    RegPressureDelta RPDelta;
923
924    // Critical resource consumption of the best candidate.
925    SchedResourceDelta ResDelta;
926
927    SchedCandidate(const CandPolicy &policy)
928    : Policy(policy), SU(NULL), Reason(NoCand) {}
929
930    bool isValid() const { return SU; }
931
932    // Copy the status of another candidate without changing policy.
933    void setBest(SchedCandidate &Best) {
934      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
935      SU = Best.SU;
936      Reason = Best.Reason;
937      RPDelta = Best.RPDelta;
938      ResDelta = Best.ResDelta;
939    }
940
941    void initResourceDelta(const ScheduleDAGMI *DAG,
942                           const TargetSchedModel *SchedModel);
943  };
944
945  /// Summarize the unscheduled region.
946  struct SchedRemainder {
947    // Critical path through the DAG in expected latency.
948    unsigned CriticalPath;
949
950    // Unscheduled resources
951    SmallVector<unsigned, 16> RemainingCounts;
952    // Critical resource for the unscheduled zone.
953    unsigned CritResIdx;
954    // Number of micro-ops left to schedule.
955    unsigned RemainingMicroOps;
956
957    void reset() {
958      CriticalPath = 0;
959      RemainingCounts.clear();
960      CritResIdx = 0;
961      RemainingMicroOps = 0;
962    }
963
964    SchedRemainder() { reset(); }
965
966    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
967
968    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
969      if (!SchedModel->hasInstrSchedModel())
970        return 0;
971
972      return std::max(
973        RemainingMicroOps * SchedModel->getMicroOpFactor(),
974        RemainingCounts[CritResIdx]);
975    }
976  };
977
978  /// Each Scheduling boundary is associated with ready queues. It tracks the
979  /// current cycle in the direction of movement, and maintains the state
980  /// of "hazards" and other interlocks at the current cycle.
981  struct SchedBoundary {
982    ScheduleDAGMI *DAG;
983    const TargetSchedModel *SchedModel;
984    SchedRemainder *Rem;
985
986    ReadyQueue Available;
987    ReadyQueue Pending;
988    bool CheckPending;
989
990    // For heuristics, keep a list of the nodes that immediately depend on the
991    // most recently scheduled node.
992    SmallPtrSet<const SUnit*, 8> NextSUs;
993
994    ScheduleHazardRecognizer *HazardRec;
995
996    unsigned CurrCycle;
997    unsigned IssueCount;
998
999    /// MinReadyCycle - Cycle of the soonest available instruction.
1000    unsigned MinReadyCycle;
1001
1002    // The expected latency of the critical path in this scheduled zone.
1003    unsigned ExpectedLatency;
1004
1005    // Resources used in the scheduled zone beyond this boundary.
1006    SmallVector<unsigned, 16> ResourceCounts;
1007
1008    // Cache the critical resources ID in this scheduled zone.
1009    unsigned CritResIdx;
1010
1011    // Is the scheduled region resource limited vs. latency limited.
1012    bool IsResourceLimited;
1013
1014    unsigned ExpectedCount;
1015
1016#ifndef NDEBUG
1017    // Remember the greatest min operand latency.
1018    unsigned MaxMinLatency;
1019#endif
1020
1021    void reset() {
1022      Available.clear();
1023      Pending.clear();
1024      CheckPending = false;
1025      NextSUs.clear();
1026      HazardRec = 0;
1027      CurrCycle = 0;
1028      IssueCount = 0;
1029      MinReadyCycle = UINT_MAX;
1030      ExpectedLatency = 0;
1031      ResourceCounts.resize(1);
1032      assert(!ResourceCounts[0] && "nonzero count for bad resource");
1033      CritResIdx = 0;
1034      IsResourceLimited = false;
1035      ExpectedCount = 0;
1036#ifndef NDEBUG
1037      MaxMinLatency = 0;
1038#endif
1039      // Reserve a zero-count for invalid CritResIdx.
1040      ResourceCounts.resize(1);
1041    }
1042
1043    /// Pending queues extend the ready queues with the same ID and the
1044    /// PendingFlag set.
1045    SchedBoundary(unsigned ID, const Twine &Name):
1046      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1047      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
1048      reset();
1049    }
1050
1051    ~SchedBoundary() { delete HazardRec; }
1052
1053    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1054              SchedRemainder *rem);
1055
1056    bool isTop() const {
1057      return Available.getID() == ConvergingScheduler::TopQID;
1058    }
1059
1060    unsigned getUnscheduledLatency(SUnit *SU) const {
1061      if (isTop())
1062        return SU->getHeight();
1063      return SU->getDepth() + SU->Latency;
1064    }
1065
1066    unsigned getCriticalCount() const {
1067      return ResourceCounts[CritResIdx];
1068    }
1069
1070    bool checkHazard(SUnit *SU);
1071
1072    void setLatencyPolicy(CandPolicy &Policy);
1073
1074    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1075
1076    void bumpCycle();
1077
1078    void countResource(unsigned PIdx, unsigned Cycles);
1079
1080    void bumpNode(SUnit *SU);
1081
1082    void releasePending();
1083
1084    void removeReady(SUnit *SU);
1085
1086    SUnit *pickOnlyChoice();
1087  };
1088
1089private:
1090  ScheduleDAGMI *DAG;
1091  const TargetSchedModel *SchedModel;
1092  const TargetRegisterInfo *TRI;
1093
1094  // State of the top and bottom scheduled instruction boundaries.
1095  SchedRemainder Rem;
1096  SchedBoundary Top;
1097  SchedBoundary Bot;
1098
1099public:
1100  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1101  enum {
1102    TopQID = 1,
1103    BotQID = 2,
1104    LogMaxQID = 2
1105  };
1106
1107  ConvergingScheduler():
1108    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1109
1110  virtual void initialize(ScheduleDAGMI *dag);
1111
1112  virtual SUnit *pickNode(bool &IsTopNode);
1113
1114  virtual void schedNode(SUnit *SU, bool IsTopNode);
1115
1116  virtual void releaseTopNode(SUnit *SU);
1117
1118  virtual void releaseBottomNode(SUnit *SU);
1119
1120  virtual void registerRoots();
1121
1122protected:
1123  void balanceZones(
1124    ConvergingScheduler::SchedBoundary &CriticalZone,
1125    ConvergingScheduler::SchedCandidate &CriticalCand,
1126    ConvergingScheduler::SchedBoundary &OppositeZone,
1127    ConvergingScheduler::SchedCandidate &OppositeCand);
1128
1129  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1130                           ConvergingScheduler::SchedCandidate &BotCand);
1131
1132  void tryCandidate(SchedCandidate &Cand,
1133                    SchedCandidate &TryCand,
1134                    SchedBoundary &Zone,
1135                    const RegPressureTracker &RPTracker,
1136                    RegPressureTracker &TempTracker);
1137
1138  SUnit *pickNodeBidirectional(bool &IsTopNode);
1139
1140  void pickNodeFromQueue(SchedBoundary &Zone,
1141                         const RegPressureTracker &RPTracker,
1142                         SchedCandidate &Candidate);
1143
1144#ifndef NDEBUG
1145  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1146#endif
1147};
1148} // namespace
1149
1150void ConvergingScheduler::SchedRemainder::
1151init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1152  reset();
1153  if (!SchedModel->hasInstrSchedModel())
1154    return;
1155  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1156  for (std::vector<SUnit>::iterator
1157         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1158    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1159    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1160    for (TargetSchedModel::ProcResIter
1161           PI = SchedModel->getWriteProcResBegin(SC),
1162           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1163      unsigned PIdx = PI->ProcResourceIdx;
1164      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1165      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1166    }
1167  }
1168  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1169       PIdx != PEnd; ++PIdx) {
1170    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1171        >= (int)SchedModel->getLatencyFactor()) {
1172      CritResIdx = PIdx;
1173    }
1174  }
1175}
1176
1177void ConvergingScheduler::SchedBoundary::
1178init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1179  reset();
1180  DAG = dag;
1181  SchedModel = smodel;
1182  Rem = rem;
1183  if (SchedModel->hasInstrSchedModel())
1184    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1185}
1186
1187void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1188  DAG = dag;
1189  SchedModel = DAG->getSchedModel();
1190  TRI = DAG->TRI;
1191  Rem.init(DAG, SchedModel);
1192  Top.init(DAG, SchedModel, &Rem);
1193  Bot.init(DAG, SchedModel, &Rem);
1194
1195  // Initialize resource counts.
1196
1197  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1198  // are disabled, then these HazardRecs will be disabled.
1199  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1200  const TargetMachine &TM = DAG->MF.getTarget();
1201  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1202  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1203
1204  assert((!ForceTopDown || !ForceBottomUp) &&
1205         "-misched-topdown incompatible with -misched-bottomup");
1206}
1207
1208void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1209  if (SU->isScheduled)
1210    return;
1211
1212  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1213       I != E; ++I) {
1214    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1215    unsigned MinLatency = I->getMinLatency();
1216#ifndef NDEBUG
1217    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1218#endif
1219    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1220      SU->TopReadyCycle = PredReadyCycle + MinLatency;
1221  }
1222  Top.releaseNode(SU, SU->TopReadyCycle);
1223}
1224
1225void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1226  if (SU->isScheduled)
1227    return;
1228
1229  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1230
1231  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1232       I != E; ++I) {
1233    if (I->isWeak())
1234      continue;
1235    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1236    unsigned MinLatency = I->getMinLatency();
1237#ifndef NDEBUG
1238    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1239#endif
1240    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1241      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1242  }
1243  Bot.releaseNode(SU, SU->BotReadyCycle);
1244}
1245
1246void ConvergingScheduler::registerRoots() {
1247  Rem.CriticalPath = DAG->ExitSU.getDepth();
1248  // Some roots may not feed into ExitSU. Check all of them in case.
1249  for (std::vector<SUnit*>::const_iterator
1250         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1251    if ((*I)->getDepth() > Rem.CriticalPath)
1252      Rem.CriticalPath = (*I)->getDepth();
1253  }
1254  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1255}
1256
1257/// Does this SU have a hazard within the current instruction group.
1258///
1259/// The scheduler supports two modes of hazard recognition. The first is the
1260/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1261/// supports highly complicated in-order reservation tables
1262/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1263///
1264/// The second is a streamlined mechanism that checks for hazards based on
1265/// simple counters that the scheduler itself maintains. It explicitly checks
1266/// for instruction dispatch limitations, including the number of micro-ops that
1267/// can dispatch per cycle.
1268///
1269/// TODO: Also check whether the SU must start a new group.
1270bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1271  if (HazardRec->isEnabled())
1272    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1273
1274  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1275  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1276    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1277          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1278    return true;
1279  }
1280  return false;
1281}
1282
1283/// Compute the remaining latency to determine whether ILP should be increased.
1284void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1285  // FIXME: compile time. In all, we visit four queues here one we should only
1286  // need to visit the one that was last popped if we cache the result.
1287  unsigned RemLatency = 0;
1288  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1289       I != E; ++I) {
1290    unsigned L = getUnscheduledLatency(*I);
1291    if (L > RemLatency)
1292      RemLatency = L;
1293  }
1294  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1295       I != E; ++I) {
1296    unsigned L = getUnscheduledLatency(*I);
1297    if (L > RemLatency)
1298      RemLatency = L;
1299  }
1300  if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
1301      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1302    Policy.ReduceLatency = true;
1303    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
1304  }
1305}
1306
1307void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1308                                                     unsigned ReadyCycle) {
1309
1310  if (ReadyCycle < MinReadyCycle)
1311    MinReadyCycle = ReadyCycle;
1312
1313  // Check for interlocks first. For the purpose of other heuristics, an
1314  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1315  if (ReadyCycle > CurrCycle || checkHazard(SU))
1316    Pending.push(SU);
1317  else
1318    Available.push(SU);
1319
1320  // Record this node as an immediate dependent of the scheduled node.
1321  NextSUs.insert(SU);
1322}
1323
1324/// Move the boundary of scheduled code by one cycle.
1325void ConvergingScheduler::SchedBoundary::bumpCycle() {
1326  unsigned Width = SchedModel->getIssueWidth();
1327  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1328
1329  unsigned NextCycle = CurrCycle + 1;
1330  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1331  if (MinReadyCycle > NextCycle) {
1332    IssueCount = 0;
1333    NextCycle = MinReadyCycle;
1334  }
1335
1336  if (!HazardRec->isEnabled()) {
1337    // Bypass HazardRec virtual calls.
1338    CurrCycle = NextCycle;
1339  }
1340  else {
1341    // Bypass getHazardType calls in case of long latency.
1342    for (; CurrCycle != NextCycle; ++CurrCycle) {
1343      if (isTop())
1344        HazardRec->AdvanceCycle();
1345      else
1346        HazardRec->RecedeCycle();
1347    }
1348  }
1349  CheckPending = true;
1350  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1351
1352  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1353        << CurrCycle << '\n');
1354}
1355
1356/// Add the given processor resource to this scheduled zone.
1357void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1358                                                       unsigned Cycles) {
1359  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1360  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1361        << " +(" << Cycles << "x" << Factor
1362        << ") / " << SchedModel->getLatencyFactor() << '\n');
1363
1364  unsigned Count = Factor * Cycles;
1365  ResourceCounts[PIdx] += Count;
1366  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1367  Rem->RemainingCounts[PIdx] -= Count;
1368
1369  // Check if this resource exceeds the current critical resource by a full
1370  // cycle. If so, it becomes the critical resource.
1371  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1372      >= (int)SchedModel->getLatencyFactor()) {
1373    CritResIdx = PIdx;
1374    DEBUG(dbgs() << "  *** Critical resource "
1375          << SchedModel->getProcResource(PIdx)->Name << " x"
1376          << ResourceCounts[PIdx] << '\n');
1377  }
1378}
1379
1380/// Move the boundary of scheduled code by one SUnit.
1381void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1382  // Update the reservation table.
1383  if (HazardRec->isEnabled()) {
1384    if (!isTop() && SU->isCall) {
1385      // Calls are scheduled with their preceding instructions. For bottom-up
1386      // scheduling, clear the pipeline state before emitting.
1387      HazardRec->Reset();
1388    }
1389    HazardRec->EmitInstruction(SU);
1390  }
1391  // Update resource counts and critical resource.
1392  if (SchedModel->hasInstrSchedModel()) {
1393    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1394    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1395    for (TargetSchedModel::ProcResIter
1396           PI = SchedModel->getWriteProcResBegin(SC),
1397           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1398      countResource(PI->ProcResourceIdx, PI->Cycles);
1399    }
1400  }
1401  if (isTop()) {
1402    if (SU->getDepth() > ExpectedLatency)
1403      ExpectedLatency = SU->getDepth();
1404  }
1405  else {
1406    if (SU->getHeight() > ExpectedLatency)
1407      ExpectedLatency = SU->getHeight();
1408  }
1409
1410  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1411
1412  // Check the instruction group dispatch limit.
1413  // TODO: Check if this SU must end a dispatch group.
1414  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1415
1416  // checkHazard prevents scheduling multiple instructions per cycle that exceed
1417  // issue width. However, we commonly reach the maximum. In this case
1418  // opportunistically bump the cycle to avoid uselessly checking everything in
1419  // the readyQ. Furthermore, a single instruction may produce more than one
1420  // cycle's worth of micro-ops.
1421  if (IssueCount >= SchedModel->getIssueWidth()) {
1422    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1423    bumpCycle();
1424  }
1425}
1426
1427/// Release pending ready nodes in to the available queue. This makes them
1428/// visible to heuristics.
1429void ConvergingScheduler::SchedBoundary::releasePending() {
1430  // If the available queue is empty, it is safe to reset MinReadyCycle.
1431  if (Available.empty())
1432    MinReadyCycle = UINT_MAX;
1433
1434  // Check to see if any of the pending instructions are ready to issue.  If
1435  // so, add them to the available queue.
1436  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1437    SUnit *SU = *(Pending.begin()+i);
1438    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1439
1440    if (ReadyCycle < MinReadyCycle)
1441      MinReadyCycle = ReadyCycle;
1442
1443    if (ReadyCycle > CurrCycle)
1444      continue;
1445
1446    if (checkHazard(SU))
1447      continue;
1448
1449    Available.push(SU);
1450    Pending.remove(Pending.begin()+i);
1451    --i; --e;
1452  }
1453  DEBUG(if (!Pending.empty()) Pending.dump());
1454  CheckPending = false;
1455}
1456
1457/// Remove SU from the ready set for this boundary.
1458void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1459  if (Available.isInQueue(SU))
1460    Available.remove(Available.find(SU));
1461  else {
1462    assert(Pending.isInQueue(SU) && "bad ready count");
1463    Pending.remove(Pending.find(SU));
1464  }
1465}
1466
1467/// If this queue only has one ready candidate, return it. As a side effect,
1468/// defer any nodes that now hit a hazard, and advance the cycle until at least
1469/// one node is ready. If multiple instructions are ready, return NULL.
1470SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1471  if (CheckPending)
1472    releasePending();
1473
1474  if (IssueCount > 0) {
1475    // Defer any ready instrs that now have a hazard.
1476    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1477      if (checkHazard(*I)) {
1478        Pending.push(*I);
1479        I = Available.remove(I);
1480        continue;
1481      }
1482      ++I;
1483    }
1484  }
1485  for (unsigned i = 0; Available.empty(); ++i) {
1486    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1487           "permanent hazard"); (void)i;
1488    bumpCycle();
1489    releasePending();
1490  }
1491  if (Available.size() == 1)
1492    return *Available.begin();
1493  return NULL;
1494}
1495
1496/// Record the candidate policy for opposite zones with different critical
1497/// resources.
1498///
1499/// If the CriticalZone is latency limited, don't force a policy for the
1500/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1501void ConvergingScheduler::balanceZones(
1502  ConvergingScheduler::SchedBoundary &CriticalZone,
1503  ConvergingScheduler::SchedCandidate &CriticalCand,
1504  ConvergingScheduler::SchedBoundary &OppositeZone,
1505  ConvergingScheduler::SchedCandidate &OppositeCand) {
1506
1507  if (!CriticalZone.IsResourceLimited)
1508    return;
1509  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1510
1511  SchedRemainder *Rem = CriticalZone.Rem;
1512
1513  // If the critical zone is overconsuming a resource relative to the
1514  // remainder, try to reduce it.
1515  unsigned RemainingCritCount =
1516    Rem->RemainingCounts[CriticalZone.CritResIdx];
1517  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1518      > (int)SchedModel->getLatencyFactor()) {
1519    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1520    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1521          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1522          << '\n');
1523  }
1524  // If the other zone is underconsuming a resource relative to the full zone,
1525  // try to increase it.
1526  unsigned OppositeCount =
1527    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1528  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1529      > (int)SchedModel->getLatencyFactor()) {
1530    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1531    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1532          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1533          << '\n');
1534  }
1535}
1536
1537/// Determine if the scheduled zones exceed resource limits or critical path and
1538/// set each candidate's ReduceHeight policy accordingly.
1539void ConvergingScheduler::checkResourceLimits(
1540  ConvergingScheduler::SchedCandidate &TopCand,
1541  ConvergingScheduler::SchedCandidate &BotCand) {
1542
1543  // Set ReduceLatency to true if needed.
1544  Bot.setLatencyPolicy(TopCand.Policy);
1545  Top.setLatencyPolicy(BotCand.Policy);
1546
1547  // Handle resource-limited regions.
1548  if (Top.IsResourceLimited && Bot.IsResourceLimited
1549      && Top.CritResIdx == Bot.CritResIdx) {
1550    // If the scheduled critical resource in both zones is no longer the
1551    // critical remaining resource, attempt to reduce resource height both ways.
1552    if (Top.CritResIdx != Rem.CritResIdx) {
1553      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1554      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1555      DEBUG(dbgs() << "Reduce scheduled "
1556            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1557    }
1558    return;
1559  }
1560  // Handle latency-limited regions.
1561  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1562    // If the total scheduled expected latency exceeds the region's critical
1563    // path then reduce latency both ways.
1564    //
1565    // Just because a zone is not resource limited does not mean it is latency
1566    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1567    // to exceed expected latency.
1568    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1569        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1570      TopCand.Policy.ReduceLatency = true;
1571      BotCand.Policy.ReduceLatency = true;
1572      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1573            << " + " << Bot.ExpectedLatency << '\n');
1574    }
1575    return;
1576  }
1577  // The critical resource is different in each zone, so request balancing.
1578
1579  // Compute the cost of each zone.
1580  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1581  Top.ExpectedCount = std::max(
1582    Top.getCriticalCount(),
1583    Top.ExpectedCount * SchedModel->getLatencyFactor());
1584  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1585  Bot.ExpectedCount = std::max(
1586    Bot.getCriticalCount(),
1587    Bot.ExpectedCount * SchedModel->getLatencyFactor());
1588
1589  balanceZones(Top, TopCand, Bot, BotCand);
1590  balanceZones(Bot, BotCand, Top, TopCand);
1591}
1592
1593void ConvergingScheduler::SchedCandidate::
1594initResourceDelta(const ScheduleDAGMI *DAG,
1595                  const TargetSchedModel *SchedModel) {
1596  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1597    return;
1598
1599  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1600  for (TargetSchedModel::ProcResIter
1601         PI = SchedModel->getWriteProcResBegin(SC),
1602         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1603    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1604      ResDelta.CritResources += PI->Cycles;
1605    if (PI->ProcResourceIdx == Policy.DemandResIdx)
1606      ResDelta.DemandedResources += PI->Cycles;
1607  }
1608}
1609
1610/// Return true if this heuristic determines order.
1611static bool tryLess(unsigned TryVal, unsigned CandVal,
1612                    ConvergingScheduler::SchedCandidate &TryCand,
1613                    ConvergingScheduler::SchedCandidate &Cand,
1614                    ConvergingScheduler::CandReason Reason) {
1615  if (TryVal < CandVal) {
1616    TryCand.Reason = Reason;
1617    return true;
1618  }
1619  if (TryVal > CandVal) {
1620    if (Cand.Reason > Reason)
1621      Cand.Reason = Reason;
1622    return true;
1623  }
1624  return false;
1625}
1626
1627static bool tryGreater(unsigned TryVal, unsigned CandVal,
1628                       ConvergingScheduler::SchedCandidate &TryCand,
1629                       ConvergingScheduler::SchedCandidate &Cand,
1630                       ConvergingScheduler::CandReason Reason) {
1631  if (TryVal > CandVal) {
1632    TryCand.Reason = Reason;
1633    return true;
1634  }
1635  if (TryVal < CandVal) {
1636    if (Cand.Reason > Reason)
1637      Cand.Reason = Reason;
1638    return true;
1639  }
1640  return false;
1641}
1642
1643static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1644  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1645}
1646
1647/// Apply a set of heursitics to a new candidate. Heuristics are currently
1648/// hierarchical. This may be more efficient than a graduated cost model because
1649/// we don't need to evaluate all aspects of the model for each node in the
1650/// queue. But it's really done to make the heuristics easier to debug and
1651/// statistically analyze.
1652///
1653/// \param Cand provides the policy and current best candidate.
1654/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1655/// \param Zone describes the scheduled zone that we are extending.
1656/// \param RPTracker describes reg pressure within the scheduled zone.
1657/// \param TempTracker is a scratch pressure tracker to reuse in queries.
1658void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1659                                       SchedCandidate &TryCand,
1660                                       SchedBoundary &Zone,
1661                                       const RegPressureTracker &RPTracker,
1662                                       RegPressureTracker &TempTracker) {
1663
1664  // Always initialize TryCand's RPDelta.
1665  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1666                                  DAG->getRegionCriticalPSets(),
1667                                  DAG->getRegPressure().MaxSetPressure);
1668
1669  // Initialize the candidate if needed.
1670  if (!Cand.isValid()) {
1671    TryCand.Reason = NodeOrder;
1672    return;
1673  }
1674  // Avoid exceeding the target's limit.
1675  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1676              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1677    return;
1678  if (Cand.Reason == SingleExcess)
1679    Cand.Reason = MultiPressure;
1680
1681  // Avoid increasing the max critical pressure in the scheduled region.
1682  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1683              Cand.RPDelta.CriticalMax.UnitIncrease,
1684              TryCand, Cand, SingleCritical))
1685    return;
1686  if (Cand.Reason == SingleCritical)
1687    Cand.Reason = MultiPressure;
1688
1689  // Keep clustered nodes together to encourage downstream peephole
1690  // optimizations which may reduce resource requirements.
1691  //
1692  // This is a best effort to set things up for a post-RA pass. Optimizations
1693  // like generating loads of multiple registers should ideally be done within
1694  // the scheduler pass by combining the loads during DAG postprocessing.
1695  const SUnit *NextClusterSU =
1696    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1697  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1698                 TryCand, Cand, Cluster))
1699    return;
1700  // Currently, weak edges are for clustering, so we hard-code that reason.
1701  // However, deferring the current TryCand will not change Cand's reason.
1702  CandReason OrigReason = Cand.Reason;
1703  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1704              getWeakLeft(Cand.SU, Zone.isTop()),
1705              TryCand, Cand, Cluster)) {
1706    Cand.Reason = OrigReason;
1707    return;
1708  }
1709  // Avoid critical resource consumption and balance the schedule.
1710  TryCand.initResourceDelta(DAG, SchedModel);
1711  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1712              TryCand, Cand, ResourceReduce))
1713    return;
1714  if (tryGreater(TryCand.ResDelta.DemandedResources,
1715                 Cand.ResDelta.DemandedResources,
1716                 TryCand, Cand, ResourceDemand))
1717    return;
1718
1719  // Avoid serializing long latency dependence chains.
1720  if (Cand.Policy.ReduceLatency) {
1721    if (Zone.isTop()) {
1722      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1723          > Zone.ExpectedCount) {
1724        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1725                    TryCand, Cand, TopDepthReduce))
1726          return;
1727      }
1728      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1729                     TryCand, Cand, TopPathReduce))
1730        return;
1731    }
1732    else {
1733      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1734          > Zone.ExpectedCount) {
1735        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1736                    TryCand, Cand, BotHeightReduce))
1737          return;
1738      }
1739      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1740                     TryCand, Cand, BotPathReduce))
1741        return;
1742    }
1743  }
1744
1745  // Avoid increasing the max pressure of the entire region.
1746  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1747              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1748    return;
1749  if (Cand.Reason == SingleMax)
1750    Cand.Reason = MultiPressure;
1751
1752  // Prefer immediate defs/users of the last scheduled instruction. This is a
1753  // nice pressure avoidance strategy that also conserves the processor's
1754  // register renaming resources and keeps the machine code readable.
1755  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1756                 TryCand, Cand, NextDefUse))
1757    return;
1758
1759  // Fall through to original instruction order.
1760  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1761      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1762    TryCand.Reason = NodeOrder;
1763  }
1764}
1765
1766/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1767/// more desirable than RHS from scheduling standpoint.
1768static bool compareRPDelta(const RegPressureDelta &LHS,
1769                           const RegPressureDelta &RHS) {
1770  // Compare each component of pressure in decreasing order of importance
1771  // without checking if any are valid. Invalid PressureElements are assumed to
1772  // have UnitIncrease==0, so are neutral.
1773
1774  // Avoid increasing the max critical pressure in the scheduled region.
1775  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1776    DEBUG(dbgs() << "RP excess top - bot: "
1777          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1778    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1779  }
1780  // Avoid increasing the max critical pressure in the scheduled region.
1781  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1782    DEBUG(dbgs() << "RP critical top - bot: "
1783          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1784          << '\n');
1785    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1786  }
1787  // Avoid increasing the max pressure of the entire region.
1788  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1789    DEBUG(dbgs() << "RP current top - bot: "
1790          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1791          << '\n');
1792    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1793  }
1794  return false;
1795}
1796
1797#ifndef NDEBUG
1798const char *ConvergingScheduler::getReasonStr(
1799  ConvergingScheduler::CandReason Reason) {
1800  switch (Reason) {
1801  case NoCand:         return "NOCAND    ";
1802  case SingleExcess:   return "REG-EXCESS";
1803  case SingleCritical: return "REG-CRIT  ";
1804  case Cluster:        return "CLUSTER   ";
1805  case SingleMax:      return "REG-MAX   ";
1806  case MultiPressure:  return "REG-MULTI ";
1807  case ResourceReduce: return "RES-REDUCE";
1808  case ResourceDemand: return "RES-DEMAND";
1809  case TopDepthReduce: return "TOP-DEPTH ";
1810  case TopPathReduce:  return "TOP-PATH  ";
1811  case BotHeightReduce:return "BOT-HEIGHT";
1812  case BotPathReduce:  return "BOT-PATH  ";
1813  case NextDefUse:     return "DEF-USE   ";
1814  case NodeOrder:      return "ORDER     ";
1815  };
1816  llvm_unreachable("Unknown reason!");
1817}
1818
1819void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1820                                         const SchedBoundary &Zone) {
1821  const char *Label = getReasonStr(Cand.Reason);
1822  PressureElement P;
1823  unsigned ResIdx = 0;
1824  unsigned Latency = 0;
1825  switch (Cand.Reason) {
1826  default:
1827    break;
1828  case SingleExcess:
1829    P = Cand.RPDelta.Excess;
1830    break;
1831  case SingleCritical:
1832    P = Cand.RPDelta.CriticalMax;
1833    break;
1834  case SingleMax:
1835    P = Cand.RPDelta.CurrentMax;
1836    break;
1837  case ResourceReduce:
1838    ResIdx = Cand.Policy.ReduceResIdx;
1839    break;
1840  case ResourceDemand:
1841    ResIdx = Cand.Policy.DemandResIdx;
1842    break;
1843  case TopDepthReduce:
1844    Latency = Cand.SU->getDepth();
1845    break;
1846  case TopPathReduce:
1847    Latency = Cand.SU->getHeight();
1848    break;
1849  case BotHeightReduce:
1850    Latency = Cand.SU->getHeight();
1851    break;
1852  case BotPathReduce:
1853    Latency = Cand.SU->getDepth();
1854    break;
1855  }
1856  dbgs() << Label << " " << Zone.Available.getName() << " ";
1857  if (P.isValid())
1858    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1859           << " ";
1860  else
1861    dbgs() << "     ";
1862  if (ResIdx)
1863    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1864  else
1865    dbgs() << "        ";
1866  if (Latency)
1867    dbgs() << Latency << " cycles ";
1868  else
1869    dbgs() << "         ";
1870  Cand.SU->dump(DAG);
1871}
1872#endif
1873
1874/// Pick the best candidate from the top queue.
1875///
1876/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1877/// DAG building. To adjust for the current scheduling location we need to
1878/// maintain the number of vreg uses remaining to be top-scheduled.
1879void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1880                                            const RegPressureTracker &RPTracker,
1881                                            SchedCandidate &Cand) {
1882  ReadyQueue &Q = Zone.Available;
1883
1884  DEBUG(Q.dump());
1885
1886  // getMaxPressureDelta temporarily modifies the tracker.
1887  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1888
1889  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1890
1891    SchedCandidate TryCand(Cand.Policy);
1892    TryCand.SU = *I;
1893    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1894    if (TryCand.Reason != NoCand) {
1895      // Initialize resource delta if needed in case future heuristics query it.
1896      if (TryCand.ResDelta == SchedResourceDelta())
1897        TryCand.initResourceDelta(DAG, SchedModel);
1898      Cand.setBest(TryCand);
1899      DEBUG(traceCandidate(Cand, Zone));
1900    }
1901  }
1902}
1903
1904static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1905                      bool IsTop) {
1906  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1907        << " SU(" << Cand.SU->NodeNum << ") "
1908        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1909}
1910
1911/// Pick the best candidate node from either the top or bottom queue.
1912SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1913  // Schedule as far as possible in the direction of no choice. This is most
1914  // efficient, but also provides the best heuristics for CriticalPSets.
1915  if (SUnit *SU = Bot.pickOnlyChoice()) {
1916    IsTopNode = false;
1917    return SU;
1918  }
1919  if (SUnit *SU = Top.pickOnlyChoice()) {
1920    IsTopNode = true;
1921    return SU;
1922  }
1923  CandPolicy NoPolicy;
1924  SchedCandidate BotCand(NoPolicy);
1925  SchedCandidate TopCand(NoPolicy);
1926  checkResourceLimits(TopCand, BotCand);
1927
1928  // Prefer bottom scheduling when heuristics are silent.
1929  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1930  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1931
1932  // If either Q has a single candidate that provides the least increase in
1933  // Excess pressure, we can immediately schedule from that Q.
1934  //
1935  // RegionCriticalPSets summarizes the pressure within the scheduled region and
1936  // affects picking from either Q. If scheduling in one direction must
1937  // increase pressure for one of the excess PSets, then schedule in that
1938  // direction first to provide more freedom in the other direction.
1939  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1940    IsTopNode = false;
1941    tracePick(BotCand, IsTopNode);
1942    return BotCand.SU;
1943  }
1944  // Check if the top Q has a better candidate.
1945  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1946  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1947
1948  // If either Q has a single candidate that minimizes pressure above the
1949  // original region's pressure pick it.
1950  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1951    if (TopCand.Reason < BotCand.Reason) {
1952      IsTopNode = true;
1953      tracePick(TopCand, IsTopNode);
1954      return TopCand.SU;
1955    }
1956    IsTopNode = false;
1957    tracePick(BotCand, IsTopNode);
1958    return BotCand.SU;
1959  }
1960  // Check for a salient pressure difference and pick the best from either side.
1961  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1962    IsTopNode = true;
1963    tracePick(TopCand, IsTopNode);
1964    return TopCand.SU;
1965  }
1966  // Otherwise prefer the bottom candidate, in node order if all else failed.
1967  if (TopCand.Reason < BotCand.Reason) {
1968    IsTopNode = true;
1969    tracePick(TopCand, IsTopNode);
1970    return TopCand.SU;
1971  }
1972  IsTopNode = false;
1973  tracePick(BotCand, IsTopNode);
1974  return BotCand.SU;
1975}
1976
1977/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
1978SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
1979  if (DAG->top() == DAG->bottom()) {
1980    assert(Top.Available.empty() && Top.Pending.empty() &&
1981           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
1982    return NULL;
1983  }
1984  SUnit *SU;
1985  do {
1986    if (ForceTopDown) {
1987      SU = Top.pickOnlyChoice();
1988      if (!SU) {
1989        CandPolicy NoPolicy;
1990        SchedCandidate TopCand(NoPolicy);
1991        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1992        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1993        SU = TopCand.SU;
1994      }
1995      IsTopNode = true;
1996    }
1997    else if (ForceBottomUp) {
1998      SU = Bot.pickOnlyChoice();
1999      if (!SU) {
2000        CandPolicy NoPolicy;
2001        SchedCandidate BotCand(NoPolicy);
2002        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2003        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2004        SU = BotCand.SU;
2005      }
2006      IsTopNode = false;
2007    }
2008    else {
2009      SU = pickNodeBidirectional(IsTopNode);
2010    }
2011  } while (SU->isScheduled);
2012
2013  if (SU->isTopReady())
2014    Top.removeReady(SU);
2015  if (SU->isBottomReady())
2016    Bot.removeReady(SU);
2017
2018  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2019        << " Scheduling Instruction in cycle "
2020        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2021        SU->dump(DAG));
2022  return SU;
2023}
2024
2025/// Update the scheduler's state after scheduling a node. This is the same node
2026/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2027/// it's state based on the current cycle before MachineSchedStrategy does.
2028void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2029  if (IsTopNode) {
2030    SU->TopReadyCycle = Top.CurrCycle;
2031    Top.bumpNode(SU);
2032  }
2033  else {
2034    SU->BotReadyCycle = Bot.CurrCycle;
2035    Bot.bumpNode(SU);
2036  }
2037}
2038
2039/// Create the standard converging machine scheduler. This will be used as the
2040/// default scheduler if the target does not set a default.
2041static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2042  assert((!ForceTopDown || !ForceBottomUp) &&
2043         "-misched-topdown incompatible with -misched-bottomup");
2044  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2045  // Register DAG post-processors.
2046  if (EnableLoadCluster)
2047    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2048  if (EnableMacroFusion)
2049    DAG->addMutation(new MacroFusion(DAG->TII));
2050  return DAG;
2051}
2052static MachineSchedRegistry
2053ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2054                        createConvergingSched);
2055
2056//===----------------------------------------------------------------------===//
2057// ILP Scheduler. Currently for experimental analysis of heuristics.
2058//===----------------------------------------------------------------------===//
2059
2060namespace {
2061/// \brief Order nodes by the ILP metric.
2062struct ILPOrder {
2063  SchedDFSResult *DFSResult;
2064  BitVector *ScheduledTrees;
2065  bool MaximizeILP;
2066
2067  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
2068    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
2069
2070  /// \brief Apply a less-than relation on node priority.
2071  ///
2072  /// (Return true if A comes after B in the Q.)
2073  bool operator()(const SUnit *A, const SUnit *B) const {
2074    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2075    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2076    if (SchedTreeA != SchedTreeB) {
2077      // Unscheduled trees have lower priority.
2078      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2079        return ScheduledTrees->test(SchedTreeB);
2080
2081      // Trees with shallower connections have have lower priority.
2082      if (DFSResult->getSubtreeLevel(SchedTreeA)
2083          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2084        return DFSResult->getSubtreeLevel(SchedTreeA)
2085          < DFSResult->getSubtreeLevel(SchedTreeB);
2086      }
2087    }
2088    if (MaximizeILP)
2089      return DFSResult->getILP(A) < DFSResult->getILP(B);
2090    else
2091      return DFSResult->getILP(A) > DFSResult->getILP(B);
2092  }
2093};
2094
2095/// \brief Schedule based on the ILP metric.
2096class ILPScheduler : public MachineSchedStrategy {
2097  /// In case all subtrees are eventually connected to a common root through
2098  /// data dependence (e.g. reduction), place an upper limit on their size.
2099  ///
2100  /// FIXME: A subtree limit is generally good, but in the situation commented
2101  /// above, where multiple similar subtrees feed a common root, we should
2102  /// only split at a point where the resulting subtrees will be balanced.
2103  /// (a motivating test case must be found).
2104  static const unsigned SubtreeLimit = 16;
2105
2106  SchedDFSResult DFSResult;
2107  BitVector ScheduledTrees;
2108  ILPOrder Cmp;
2109
2110  std::vector<SUnit*> ReadyQ;
2111public:
2112  ILPScheduler(bool MaximizeILP)
2113  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
2114    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
2115
2116  virtual void initialize(ScheduleDAGMI *DAG) {
2117    ReadyQ.clear();
2118    DFSResult.clear();
2119    DFSResult.resize(DAG->SUnits.size());
2120    ScheduledTrees.clear();
2121  }
2122
2123  virtual void registerRoots() {
2124    DFSResult.compute(ReadyQ);
2125    ScheduledTrees.resize(DFSResult.getNumSubtrees());
2126    // Restore the heap in ReadyQ with the updated DFS results.
2127    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2128  }
2129
2130  /// Implement MachineSchedStrategy interface.
2131  /// -----------------------------------------
2132
2133  /// Callback to select the highest priority node from the ready Q.
2134  virtual SUnit *pickNode(bool &IsTopNode) {
2135    if (ReadyQ.empty()) return NULL;
2136    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2137    SUnit *SU = ReadyQ.back();
2138    ReadyQ.pop_back();
2139    IsTopNode = false;
2140    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2141          << *SU->getInstr()
2142          << " ILP: " << DFSResult.getILP(SU)
2143          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
2144          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
2145    return SU;
2146  }
2147
2148  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2149  /// DFSResults, and resort the priority Q.
2150  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2151    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2152    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
2153      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
2154      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
2155      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2156    }
2157  }
2158
2159  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2160
2161  virtual void releaseBottomNode(SUnit *SU) {
2162    ReadyQ.push_back(SU);
2163    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2164  }
2165};
2166} // namespace
2167
2168static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2169  return new ScheduleDAGMI(C, new ILPScheduler(true));
2170}
2171static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2172  return new ScheduleDAGMI(C, new ILPScheduler(false));
2173}
2174static MachineSchedRegistry ILPMaxRegistry(
2175  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2176static MachineSchedRegistry ILPMinRegistry(
2177  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2178
2179//===----------------------------------------------------------------------===//
2180// Machine Instruction Shuffler for Correctness Testing
2181//===----------------------------------------------------------------------===//
2182
2183#ifndef NDEBUG
2184namespace {
2185/// Apply a less-than relation on the node order, which corresponds to the
2186/// instruction order prior to scheduling. IsReverse implements greater-than.
2187template<bool IsReverse>
2188struct SUnitOrder {
2189  bool operator()(SUnit *A, SUnit *B) const {
2190    if (IsReverse)
2191      return A->NodeNum > B->NodeNum;
2192    else
2193      return A->NodeNum < B->NodeNum;
2194  }
2195};
2196
2197/// Reorder instructions as much as possible.
2198class InstructionShuffler : public MachineSchedStrategy {
2199  bool IsAlternating;
2200  bool IsTopDown;
2201
2202  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2203  // gives nodes with a higher number higher priority causing the latest
2204  // instructions to be scheduled first.
2205  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2206    TopQ;
2207  // When scheduling bottom-up, use greater-than as the queue priority.
2208  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2209    BottomQ;
2210public:
2211  InstructionShuffler(bool alternate, bool topdown)
2212    : IsAlternating(alternate), IsTopDown(topdown) {}
2213
2214  virtual void initialize(ScheduleDAGMI *) {
2215    TopQ.clear();
2216    BottomQ.clear();
2217  }
2218
2219  /// Implement MachineSchedStrategy interface.
2220  /// -----------------------------------------
2221
2222  virtual SUnit *pickNode(bool &IsTopNode) {
2223    SUnit *SU;
2224    if (IsTopDown) {
2225      do {
2226        if (TopQ.empty()) return NULL;
2227        SU = TopQ.top();
2228        TopQ.pop();
2229      } while (SU->isScheduled);
2230      IsTopNode = true;
2231    }
2232    else {
2233      do {
2234        if (BottomQ.empty()) return NULL;
2235        SU = BottomQ.top();
2236        BottomQ.pop();
2237      } while (SU->isScheduled);
2238      IsTopNode = false;
2239    }
2240    if (IsAlternating)
2241      IsTopDown = !IsTopDown;
2242    return SU;
2243  }
2244
2245  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2246
2247  virtual void releaseTopNode(SUnit *SU) {
2248    TopQ.push(SU);
2249  }
2250  virtual void releaseBottomNode(SUnit *SU) {
2251    BottomQ.push(SU);
2252  }
2253};
2254} // namespace
2255
2256static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2257  bool Alternate = !ForceTopDown && !ForceBottomUp;
2258  bool TopDown = !ForceBottomUp;
2259  assert((TopDown || !ForceTopDown) &&
2260         "-misched-topdown incompatible with -misched-bottomup");
2261  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2262}
2263static MachineSchedRegistry ShufflerRegistry(
2264  "shuffle", "Shuffle machine instructions alternating directions",
2265  createInstructionShuffler);
2266#endif // !NDEBUG
2267