MachineScheduler.cpp revision 071966f6bf2a535b318995cfa5d0f5b641fb4e14
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// MachineScheduler schedules machine instructions after phi elimination. It
11// preserves LiveIntervals so it can be invoked before register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "misched"
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/PriorityQueue.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/CodeGen/RegisterClassInfo.h"
24#include "llvm/CodeGen/ScheduleDFS.h"
25#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/Support/raw_ostream.h"
30#include <queue>
31
32using namespace llvm;
33
34namespace llvm {
35cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
36                           cl::desc("Force top-down list scheduling"));
37cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
38                            cl::desc("Force bottom-up list scheduling"));
39}
40
41#ifndef NDEBUG
42static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
43  cl::desc("Pop up a window to show MISched dags after they are processed"));
44
45static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
46  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
47#else
48static bool ViewMISchedDAGs = false;
49#endif // NDEBUG
50
51// Threshold to very roughly model an out-of-order processor's instruction
52// buffers. If the actual value of this threshold matters much in practice, then
53// it can be specified by the machine model. For now, it's an experimental
54// tuning knob to determine when and if it matters.
55static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
56  cl::desc("Allow expected latency to exceed the critical path by N cycles "
57           "before attempting to balance ILP"),
58  cl::init(10U));
59
60// Experimental heuristics
61static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
62  cl::desc("Enable load clustering."), cl::init(true));
63
64// Experimental heuristics
65static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
66  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
67
68//===----------------------------------------------------------------------===//
69// Machine Instruction Scheduling Pass and Registry
70//===----------------------------------------------------------------------===//
71
72MachineSchedContext::MachineSchedContext():
73    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
74  RegClassInfo = new RegisterClassInfo();
75}
76
77MachineSchedContext::~MachineSchedContext() {
78  delete RegClassInfo;
79}
80
81namespace {
82/// MachineScheduler runs after coalescing and before register allocation.
83class MachineScheduler : public MachineSchedContext,
84                         public MachineFunctionPass {
85public:
86  MachineScheduler();
87
88  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
89
90  virtual void releaseMemory() {}
91
92  virtual bool runOnMachineFunction(MachineFunction&);
93
94  virtual void print(raw_ostream &O, const Module* = 0) const;
95
96  static char ID; // Class identification, replacement for typeinfo
97};
98} // namespace
99
100char MachineScheduler::ID = 0;
101
102char &llvm::MachineSchedulerID = MachineScheduler::ID;
103
104INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
105                      "Machine Instruction Scheduler", false, false)
106INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
107INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
108INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
109INITIALIZE_PASS_END(MachineScheduler, "misched",
110                    "Machine Instruction Scheduler", false, false)
111
112MachineScheduler::MachineScheduler()
113: MachineFunctionPass(ID) {
114  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
115}
116
117void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
118  AU.setPreservesCFG();
119  AU.addRequiredID(MachineDominatorsID);
120  AU.addRequired<MachineLoopInfo>();
121  AU.addRequired<AliasAnalysis>();
122  AU.addRequired<TargetPassConfig>();
123  AU.addRequired<SlotIndexes>();
124  AU.addPreserved<SlotIndexes>();
125  AU.addRequired<LiveIntervals>();
126  AU.addPreserved<LiveIntervals>();
127  MachineFunctionPass::getAnalysisUsage(AU);
128}
129
130MachinePassRegistry MachineSchedRegistry::Registry;
131
132/// A dummy default scheduler factory indicates whether the scheduler
133/// is overridden on the command line.
134static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
135  return 0;
136}
137
138/// MachineSchedOpt allows command line selection of the scheduler.
139static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
140               RegisterPassParser<MachineSchedRegistry> >
141MachineSchedOpt("misched",
142                cl::init(&useDefaultMachineSched), cl::Hidden,
143                cl::desc("Machine instruction scheduler to use"));
144
145static MachineSchedRegistry
146DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
147                     useDefaultMachineSched);
148
149/// Forward declare the standard machine scheduler. This will be used as the
150/// default scheduler if the target does not set a default.
151static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
152
153
154/// Decrement this iterator until reaching the top or a non-debug instr.
155static MachineBasicBlock::iterator
156priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
157  assert(I != Beg && "reached the top of the region, cannot decrement");
158  while (--I != Beg) {
159    if (!I->isDebugValue())
160      break;
161  }
162  return I;
163}
164
165/// If this iterator is a debug value, increment until reaching the End or a
166/// non-debug instruction.
167static MachineBasicBlock::iterator
168nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
169  for(; I != End; ++I) {
170    if (!I->isDebugValue())
171      break;
172  }
173  return I;
174}
175
176/// Top-level MachineScheduler pass driver.
177///
178/// Visit blocks in function order. Divide each block into scheduling regions
179/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
180/// consistent with the DAG builder, which traverses the interior of the
181/// scheduling regions bottom-up.
182///
183/// This design avoids exposing scheduling boundaries to the DAG builder,
184/// simplifying the DAG builder's support for "special" target instructions.
185/// At the same time the design allows target schedulers to operate across
186/// scheduling boundaries, for example to bundle the boudary instructions
187/// without reordering them. This creates complexity, because the target
188/// scheduler must update the RegionBegin and RegionEnd positions cached by
189/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
190/// design would be to split blocks at scheduling boundaries, but LLVM has a
191/// general bias against block splitting purely for implementation simplicity.
192bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
193  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
194
195  // Initialize the context of the pass.
196  MF = &mf;
197  MLI = &getAnalysis<MachineLoopInfo>();
198  MDT = &getAnalysis<MachineDominatorTree>();
199  PassConfig = &getAnalysis<TargetPassConfig>();
200  AA = &getAnalysis<AliasAnalysis>();
201
202  LIS = &getAnalysis<LiveIntervals>();
203  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
204
205  RegClassInfo->runOnMachineFunction(*MF);
206
207  // Select the scheduler, or set the default.
208  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
209  if (Ctor == useDefaultMachineSched) {
210    // Get the default scheduler set by the target.
211    Ctor = MachineSchedRegistry::getDefault();
212    if (!Ctor) {
213      Ctor = createConvergingSched;
214      MachineSchedRegistry::setDefault(Ctor);
215    }
216  }
217  // Instantiate the selected scheduler.
218  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
219
220  // Visit all machine basic blocks.
221  //
222  // TODO: Visit blocks in global postorder or postorder within the bottom-up
223  // loop tree. Then we can optionally compute global RegPressure.
224  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
225       MBB != MBBEnd; ++MBB) {
226
227    Scheduler->startBlock(MBB);
228
229    // Break the block into scheduling regions [I, RegionEnd), and schedule each
230    // region as soon as it is discovered. RegionEnd points the scheduling
231    // boundary at the bottom of the region. The DAG does not include RegionEnd,
232    // but the region does (i.e. the next RegionEnd is above the previous
233    // RegionBegin). If the current block has no terminator then RegionEnd ==
234    // MBB->end() for the bottom region.
235    //
236    // The Scheduler may insert instructions during either schedule() or
237    // exitRegion(), even for empty regions. So the local iterators 'I' and
238    // 'RegionEnd' are invalid across these calls.
239    unsigned RemainingInstrs = MBB->size();
240    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
241        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
242
243      // Avoid decrementing RegionEnd for blocks with no terminator.
244      if (RegionEnd != MBB->end()
245          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
246        --RegionEnd;
247        // Count the boundary instruction.
248        --RemainingInstrs;
249      }
250
251      // The next region starts above the previous region. Look backward in the
252      // instruction stream until we find the nearest boundary.
253      MachineBasicBlock::iterator I = RegionEnd;
254      for(;I != MBB->begin(); --I, --RemainingInstrs) {
255        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
256          break;
257      }
258      // Notify the scheduler of the region, even if we may skip scheduling
259      // it. Perhaps it still needs to be bundled.
260      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
261
262      // Skip empty scheduling regions (0 or 1 schedulable instructions).
263      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
264        // Close the current region. Bundle the terminator if needed.
265        // This invalidates 'RegionEnd' and 'I'.
266        Scheduler->exitRegion();
267        continue;
268      }
269      DEBUG(dbgs() << "********** MI Scheduling **********\n");
270      DEBUG(dbgs() << MF->getName()
271            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
272            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
273            else dbgs() << "End";
274            dbgs() << " Remaining: " << RemainingInstrs << "\n");
275
276      // Schedule a region: possibly reorder instructions.
277      // This invalidates 'RegionEnd' and 'I'.
278      Scheduler->schedule();
279
280      // Close the current region.
281      Scheduler->exitRegion();
282
283      // Scheduling has invalidated the current iterator 'I'. Ask the
284      // scheduler for the top of it's scheduled region.
285      RegionEnd = Scheduler->begin();
286    }
287    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
288    Scheduler->finishBlock();
289  }
290  Scheduler->finalizeSchedule();
291  DEBUG(LIS->print(dbgs()));
292  return true;
293}
294
295void MachineScheduler::print(raw_ostream &O, const Module* m) const {
296  // unimplemented
297}
298
299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
300void ReadyQueue::dump() {
301  dbgs() << Name << ": ";
302  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
303    dbgs() << Queue[i]->NodeNum << " ";
304  dbgs() << "\n";
305}
306#endif
307
308//===----------------------------------------------------------------------===//
309// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
310// preservation.
311//===----------------------------------------------------------------------===//
312
313bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
314  if (SuccSU != &ExitSU) {
315    // Do not use WillCreateCycle, it assumes SD scheduling.
316    // If Pred is reachable from Succ, then the edge creates a cycle.
317    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
318      return false;
319    Topo.AddPred(SuccSU, PredDep.getSUnit());
320  }
321  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
322  // Return true regardless of whether a new edge needed to be inserted.
323  return true;
324}
325
326/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
327/// NumPredsLeft reaches zero, release the successor node.
328///
329/// FIXME: Adjust SuccSU height based on MinLatency.
330void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
331  SUnit *SuccSU = SuccEdge->getSUnit();
332
333  if (SuccEdge->isWeak()) {
334    --SuccSU->WeakPredsLeft;
335    if (SuccEdge->isCluster())
336      NextClusterSucc = SuccSU;
337    return;
338  }
339#ifndef NDEBUG
340  if (SuccSU->NumPredsLeft == 0) {
341    dbgs() << "*** Scheduling failed! ***\n";
342    SuccSU->dump(this);
343    dbgs() << " has been released too many times!\n";
344    llvm_unreachable(0);
345  }
346#endif
347  --SuccSU->NumPredsLeft;
348  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
349    SchedImpl->releaseTopNode(SuccSU);
350}
351
352/// releaseSuccessors - Call releaseSucc on each of SU's successors.
353void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
354  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
355       I != E; ++I) {
356    releaseSucc(SU, &*I);
357  }
358}
359
360/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
361/// NumSuccsLeft reaches zero, release the predecessor node.
362///
363/// FIXME: Adjust PredSU height based on MinLatency.
364void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
365  SUnit *PredSU = PredEdge->getSUnit();
366
367  if (PredEdge->isWeak()) {
368    --PredSU->WeakSuccsLeft;
369    if (PredEdge->isCluster())
370      NextClusterPred = PredSU;
371    return;
372  }
373#ifndef NDEBUG
374  if (PredSU->NumSuccsLeft == 0) {
375    dbgs() << "*** Scheduling failed! ***\n";
376    PredSU->dump(this);
377    dbgs() << " has been released too many times!\n";
378    llvm_unreachable(0);
379  }
380#endif
381  --PredSU->NumSuccsLeft;
382  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
383    SchedImpl->releaseBottomNode(PredSU);
384}
385
386/// releasePredecessors - Call releasePred on each of SU's predecessors.
387void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
388  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
389       I != E; ++I) {
390    releasePred(SU, &*I);
391  }
392}
393
394void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
395                                    MachineBasicBlock::iterator InsertPos) {
396  // Advance RegionBegin if the first instruction moves down.
397  if (&*RegionBegin == MI)
398    ++RegionBegin;
399
400  // Update the instruction stream.
401  BB->splice(InsertPos, BB, MI);
402
403  // Update LiveIntervals
404  LIS->handleMove(MI, /*UpdateFlags=*/true);
405
406  // Recede RegionBegin if an instruction moves above the first.
407  if (RegionBegin == InsertPos)
408    RegionBegin = MI;
409}
410
411bool ScheduleDAGMI::checkSchedLimit() {
412#ifndef NDEBUG
413  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
414    CurrentTop = CurrentBottom;
415    return false;
416  }
417  ++NumInstrsScheduled;
418#endif
419  return true;
420}
421
422/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
423/// crossing a scheduling boundary. [begin, end) includes all instructions in
424/// the region, including the boundary itself and single-instruction regions
425/// that don't get scheduled.
426void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
427                                MachineBasicBlock::iterator begin,
428                                MachineBasicBlock::iterator end,
429                                unsigned endcount)
430{
431  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
432
433  // For convenience remember the end of the liveness region.
434  LiveRegionEnd =
435    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
436}
437
438// Setup the register pressure trackers for the top scheduled top and bottom
439// scheduled regions.
440void ScheduleDAGMI::initRegPressure() {
441  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
442  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
443
444  // Close the RPTracker to finalize live ins.
445  RPTracker.closeRegion();
446
447  DEBUG(RPTracker.getPressure().dump(TRI));
448
449  // Initialize the live ins and live outs.
450  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
451  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
452
453  // Close one end of the tracker so we can call
454  // getMaxUpward/DownwardPressureDelta before advancing across any
455  // instructions. This converts currently live regs into live ins/outs.
456  TopRPTracker.closeTop();
457  BotRPTracker.closeBottom();
458
459  // Account for liveness generated by the region boundary.
460  if (LiveRegionEnd != RegionEnd)
461    BotRPTracker.recede();
462
463  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
464
465  // Cache the list of excess pressure sets in this region. This will also track
466  // the max pressure in the scheduled code for these sets.
467  RegionCriticalPSets.clear();
468  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
469  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
470    unsigned Limit = TRI->getRegPressureSetLimit(i);
471    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
472          << "Limit " << Limit
473          << " Actual " << RegionPressure[i] << "\n");
474    if (RegionPressure[i] > Limit)
475      RegionCriticalPSets.push_back(PressureElement(i, 0));
476  }
477  DEBUG(dbgs() << "Excess PSets: ";
478        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
479          dbgs() << TRI->getRegPressureSetName(
480            RegionCriticalPSets[i].PSetID) << " ";
481        dbgs() << "\n");
482}
483
484// FIXME: When the pressure tracker deals in pressure differences then we won't
485// iterate over all RegionCriticalPSets[i].
486void ScheduleDAGMI::
487updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
488  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
489    unsigned ID = RegionCriticalPSets[i].PSetID;
490    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
491    if ((int)NewMaxPressure[ID] > MaxUnits)
492      MaxUnits = NewMaxPressure[ID];
493  }
494}
495
496/// schedule - Called back from MachineScheduler::runOnMachineFunction
497/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
498/// only includes instructions that have DAG nodes, not scheduling boundaries.
499///
500/// This is a skeletal driver, with all the functionality pushed into helpers,
501/// so that it can be easilly extended by experimental schedulers. Generally,
502/// implementing MachineSchedStrategy should be sufficient to implement a new
503/// scheduling algorithm. However, if a scheduler further subclasses
504/// ScheduleDAGMI then it will want to override this virtual method in order to
505/// update any specialized state.
506void ScheduleDAGMI::schedule() {
507  buildDAGWithRegPressure();
508
509  Topo.InitDAGTopologicalSorting();
510
511  postprocessDAG();
512
513  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
514          SUnits[su].dumpAll(this));
515
516  if (ViewMISchedDAGs) viewGraph();
517
518  initQueues();
519
520  bool IsTopNode = false;
521  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
522    assert(!SU->isScheduled && "Node already scheduled");
523    if (!checkSchedLimit())
524      break;
525
526    scheduleMI(SU, IsTopNode);
527
528    updateQueues(SU, IsTopNode);
529  }
530  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
531
532  placeDebugValues();
533
534  DEBUG({
535      unsigned BBNum = begin()->getParent()->getNumber();
536      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
537      dumpSchedule();
538      dbgs() << '\n';
539    });
540}
541
542/// Build the DAG and setup three register pressure trackers.
543void ScheduleDAGMI::buildDAGWithRegPressure() {
544  // Initialize the register pressure tracker used by buildSchedGraph.
545  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
546
547  // Account for liveness generate by the region boundary.
548  if (LiveRegionEnd != RegionEnd)
549    RPTracker.recede();
550
551  // Build the DAG, and compute current register pressure.
552  buildSchedGraph(AA, &RPTracker);
553  if (ViewMISchedDAGs) viewGraph();
554
555  // Initialize top/bottom trackers after computing region pressure.
556  initRegPressure();
557}
558
559/// Apply each ScheduleDAGMutation step in order.
560void ScheduleDAGMI::postprocessDAG() {
561  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
562    Mutations[i]->apply(this);
563  }
564}
565
566// Release all DAG roots for scheduling.
567//
568// Nodes with unreleased weak edges can still be roots.
569void ScheduleDAGMI::releaseRoots() {
570  SmallVector<SUnit*, 16> BotRoots;
571
572  for (std::vector<SUnit>::iterator
573         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
574    SUnit *SU = &(*I);
575    // A SUnit is ready to top schedule if it has no predecessors.
576    if (!I->NumPredsLeft && SU != &EntrySU)
577      SchedImpl->releaseTopNode(SU);
578    // A SUnit is ready to bottom schedule if it has no successors.
579    if (!I->NumSuccsLeft && SU != &ExitSU)
580      BotRoots.push_back(SU);
581  }
582  // Release bottom roots in reverse order so the higher priority nodes appear
583  // first. This is more natural and slightly more efficient.
584  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
585         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
586    SchedImpl->releaseBottomNode(*I);
587}
588
589/// Identify DAG roots and setup scheduler queues.
590void ScheduleDAGMI::initQueues() {
591  NextClusterSucc = NULL;
592  NextClusterPred = NULL;
593
594  // Initialize the strategy before modifying the DAG.
595  SchedImpl->initialize(this);
596
597  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
598  releaseRoots();
599
600  releaseSuccessors(&EntrySU);
601  releasePredecessors(&ExitSU);
602
603  SchedImpl->registerRoots();
604
605  // Advance past initial DebugValues.
606  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
607  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
608  TopRPTracker.setPos(CurrentTop);
609
610  CurrentBottom = RegionEnd;
611}
612
613/// Move an instruction and update register pressure.
614void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
615  // Move the instruction to its new location in the instruction stream.
616  MachineInstr *MI = SU->getInstr();
617
618  if (IsTopNode) {
619    assert(SU->isTopReady() && "node still has unscheduled dependencies");
620    if (&*CurrentTop == MI)
621      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
622    else {
623      moveInstruction(MI, CurrentTop);
624      TopRPTracker.setPos(MI);
625    }
626
627    // Update top scheduled pressure.
628    TopRPTracker.advance();
629    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
630    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
631  }
632  else {
633    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
634    MachineBasicBlock::iterator priorII =
635      priorNonDebug(CurrentBottom, CurrentTop);
636    if (&*priorII == MI)
637      CurrentBottom = priorII;
638    else {
639      if (&*CurrentTop == MI) {
640        CurrentTop = nextIfDebug(++CurrentTop, priorII);
641        TopRPTracker.setPos(CurrentTop);
642      }
643      moveInstruction(MI, CurrentBottom);
644      CurrentBottom = MI;
645    }
646    // Update bottom scheduled pressure.
647    BotRPTracker.recede();
648    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
649    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
650  }
651}
652
653/// Update scheduler queues after scheduling an instruction.
654void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
655  // Release dependent instructions for scheduling.
656  if (IsTopNode)
657    releaseSuccessors(SU);
658  else
659    releasePredecessors(SU);
660
661  SU->isScheduled = true;
662
663  // Notify the scheduling strategy after updating the DAG.
664  SchedImpl->schedNode(SU, IsTopNode);
665}
666
667/// Reinsert any remaining debug_values, just like the PostRA scheduler.
668void ScheduleDAGMI::placeDebugValues() {
669  // If first instruction was a DBG_VALUE then put it back.
670  if (FirstDbgValue) {
671    BB->splice(RegionBegin, BB, FirstDbgValue);
672    RegionBegin = FirstDbgValue;
673  }
674
675  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
676         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
677    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
678    MachineInstr *DbgValue = P.first;
679    MachineBasicBlock::iterator OrigPrevMI = P.second;
680    if (&*RegionBegin == DbgValue)
681      ++RegionBegin;
682    BB->splice(++OrigPrevMI, BB, DbgValue);
683    if (OrigPrevMI == llvm::prior(RegionEnd))
684      RegionEnd = DbgValue;
685  }
686  DbgValues.clear();
687  FirstDbgValue = NULL;
688}
689
690#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
691void ScheduleDAGMI::dumpSchedule() const {
692  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
693    if (SUnit *SU = getSUnit(&(*MI)))
694      SU->dump(this);
695    else
696      dbgs() << "Missing SUnit\n";
697  }
698}
699#endif
700
701//===----------------------------------------------------------------------===//
702// LoadClusterMutation - DAG post-processing to cluster loads.
703//===----------------------------------------------------------------------===//
704
705namespace {
706/// \brief Post-process the DAG to create cluster edges between neighboring
707/// loads.
708class LoadClusterMutation : public ScheduleDAGMutation {
709  struct LoadInfo {
710    SUnit *SU;
711    unsigned BaseReg;
712    unsigned Offset;
713    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
714      : SU(su), BaseReg(reg), Offset(ofs) {}
715  };
716  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
717                           const LoadClusterMutation::LoadInfo &RHS);
718
719  const TargetInstrInfo *TII;
720  const TargetRegisterInfo *TRI;
721public:
722  LoadClusterMutation(const TargetInstrInfo *tii,
723                      const TargetRegisterInfo *tri)
724    : TII(tii), TRI(tri) {}
725
726  virtual void apply(ScheduleDAGMI *DAG);
727protected:
728  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
729};
730} // anonymous
731
732bool LoadClusterMutation::LoadInfoLess(
733  const LoadClusterMutation::LoadInfo &LHS,
734  const LoadClusterMutation::LoadInfo &RHS) {
735  if (LHS.BaseReg != RHS.BaseReg)
736    return LHS.BaseReg < RHS.BaseReg;
737  return LHS.Offset < RHS.Offset;
738}
739
740void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
741                                                  ScheduleDAGMI *DAG) {
742  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
743  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
744    SUnit *SU = Loads[Idx];
745    unsigned BaseReg;
746    unsigned Offset;
747    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
748      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
749  }
750  if (LoadRecords.size() < 2)
751    return;
752  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
753  unsigned ClusterLength = 1;
754  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
755    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
756      ClusterLength = 1;
757      continue;
758    }
759
760    SUnit *SUa = LoadRecords[Idx].SU;
761    SUnit *SUb = LoadRecords[Idx+1].SU;
762    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
763        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
764
765      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
766            << SUb->NodeNum << ")\n");
767      // Copy successor edges from SUa to SUb. Interleaving computation
768      // dependent on SUa can prevent load combining due to register reuse.
769      // Predecessor edges do not need to be copied from SUb to SUa since nearby
770      // loads should have effectively the same inputs.
771      for (SUnit::const_succ_iterator
772             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
773        if (SI->getSUnit() == SUb)
774          continue;
775        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
776        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
777      }
778      ++ClusterLength;
779    }
780    else
781      ClusterLength = 1;
782  }
783}
784
785/// \brief Callback from DAG postProcessing to create cluster edges for loads.
786void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
787  // Map DAG NodeNum to store chain ID.
788  DenseMap<unsigned, unsigned> StoreChainIDs;
789  // Map each store chain to a set of dependent loads.
790  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
791  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
792    SUnit *SU = &DAG->SUnits[Idx];
793    if (!SU->getInstr()->mayLoad())
794      continue;
795    unsigned ChainPredID = DAG->SUnits.size();
796    for (SUnit::const_pred_iterator
797           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
798      if (PI->isCtrl()) {
799        ChainPredID = PI->getSUnit()->NodeNum;
800        break;
801      }
802    }
803    // Check if this chain-like pred has been seen
804    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
805    unsigned NumChains = StoreChainDependents.size();
806    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
807      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
808    if (Result.second)
809      StoreChainDependents.resize(NumChains + 1);
810    StoreChainDependents[Result.first->second].push_back(SU);
811  }
812  // Iterate over the store chains.
813  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
814    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
815}
816
817//===----------------------------------------------------------------------===//
818// MacroFusion - DAG post-processing to encourage fusion of macro ops.
819//===----------------------------------------------------------------------===//
820
821namespace {
822/// \brief Post-process the DAG to create cluster edges between instructions
823/// that may be fused by the processor into a single operation.
824class MacroFusion : public ScheduleDAGMutation {
825  const TargetInstrInfo *TII;
826public:
827  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
828
829  virtual void apply(ScheduleDAGMI *DAG);
830};
831} // anonymous
832
833/// \brief Callback from DAG postProcessing to create cluster edges to encourage
834/// fused operations.
835void MacroFusion::apply(ScheduleDAGMI *DAG) {
836  // For now, assume targets can only fuse with the branch.
837  MachineInstr *Branch = DAG->ExitSU.getInstr();
838  if (!Branch)
839    return;
840
841  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
842    SUnit *SU = &DAG->SUnits[--Idx];
843    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
844      continue;
845
846    // Create a single weak edge from SU to ExitSU. The only effect is to cause
847    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
848    // need to copy predecessor edges from ExitSU to SU, since top-down
849    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
850    // of SU, we could create an artificial edge from the deepest root, but it
851    // hasn't been needed yet.
852    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
853    (void)Success;
854    assert(Success && "No DAG nodes should be reachable from ExitSU");
855
856    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
857    break;
858  }
859}
860
861//===----------------------------------------------------------------------===//
862// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
863//===----------------------------------------------------------------------===//
864
865namespace {
866/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
867/// the schedule.
868class ConvergingScheduler : public MachineSchedStrategy {
869public:
870  /// Represent the type of SchedCandidate found within a single queue.
871  /// pickNodeBidirectional depends on these listed by decreasing priority.
872  enum CandReason {
873    NoCand, SingleExcess, SingleCritical, Cluster,
874    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
875    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
876    NodeOrder};
877
878#ifndef NDEBUG
879  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
880#endif
881
882  /// Policy for scheduling the next instruction in the candidate's zone.
883  struct CandPolicy {
884    bool ReduceLatency;
885    unsigned ReduceResIdx;
886    unsigned DemandResIdx;
887
888    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
889  };
890
891  /// Status of an instruction's critical resource consumption.
892  struct SchedResourceDelta {
893    // Count critical resources in the scheduled region required by SU.
894    unsigned CritResources;
895
896    // Count critical resources from another region consumed by SU.
897    unsigned DemandedResources;
898
899    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
900
901    bool operator==(const SchedResourceDelta &RHS) const {
902      return CritResources == RHS.CritResources
903        && DemandedResources == RHS.DemandedResources;
904    }
905    bool operator!=(const SchedResourceDelta &RHS) const {
906      return !operator==(RHS);
907    }
908  };
909
910  /// Store the state used by ConvergingScheduler heuristics, required for the
911  /// lifetime of one invocation of pickNode().
912  struct SchedCandidate {
913    CandPolicy Policy;
914
915    // The best SUnit candidate.
916    SUnit *SU;
917
918    // The reason for this candidate.
919    CandReason Reason;
920
921    // Register pressure values for the best candidate.
922    RegPressureDelta RPDelta;
923
924    // Critical resource consumption of the best candidate.
925    SchedResourceDelta ResDelta;
926
927    SchedCandidate(const CandPolicy &policy)
928    : Policy(policy), SU(NULL), Reason(NoCand) {}
929
930    bool isValid() const { return SU; }
931
932    // Copy the status of another candidate without changing policy.
933    void setBest(SchedCandidate &Best) {
934      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
935      SU = Best.SU;
936      Reason = Best.Reason;
937      RPDelta = Best.RPDelta;
938      ResDelta = Best.ResDelta;
939    }
940
941    void initResourceDelta(const ScheduleDAGMI *DAG,
942                           const TargetSchedModel *SchedModel);
943  };
944
945  /// Summarize the unscheduled region.
946  struct SchedRemainder {
947    // Critical path through the DAG in expected latency.
948    unsigned CriticalPath;
949
950    // Unscheduled resources
951    SmallVector<unsigned, 16> RemainingCounts;
952    // Critical resource for the unscheduled zone.
953    unsigned CritResIdx;
954    // Number of micro-ops left to schedule.
955    unsigned RemainingMicroOps;
956    // Is the unscheduled zone resource limited.
957    bool IsResourceLimited;
958
959    unsigned MaxRemainingCount;
960
961    void reset() {
962      CriticalPath = 0;
963      RemainingCounts.clear();
964      CritResIdx = 0;
965      RemainingMicroOps = 0;
966      IsResourceLimited = false;
967      MaxRemainingCount = 0;
968    }
969
970    SchedRemainder() { reset(); }
971
972    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
973  };
974
975  /// Each Scheduling boundary is associated with ready queues. It tracks the
976  /// current cycle in the direction of movement, and maintains the state
977  /// of "hazards" and other interlocks at the current cycle.
978  struct SchedBoundary {
979    ScheduleDAGMI *DAG;
980    const TargetSchedModel *SchedModel;
981    SchedRemainder *Rem;
982
983    ReadyQueue Available;
984    ReadyQueue Pending;
985    bool CheckPending;
986
987    // For heuristics, keep a list of the nodes that immediately depend on the
988    // most recently scheduled node.
989    SmallPtrSet<const SUnit*, 8> NextSUs;
990
991    ScheduleHazardRecognizer *HazardRec;
992
993    unsigned CurrCycle;
994    unsigned IssueCount;
995
996    /// MinReadyCycle - Cycle of the soonest available instruction.
997    unsigned MinReadyCycle;
998
999    // The expected latency of the critical path in this scheduled zone.
1000    unsigned ExpectedLatency;
1001
1002    // Resources used in the scheduled zone beyond this boundary.
1003    SmallVector<unsigned, 16> ResourceCounts;
1004
1005    // Cache the critical resources ID in this scheduled zone.
1006    unsigned CritResIdx;
1007
1008    // Is the scheduled region resource limited vs. latency limited.
1009    bool IsResourceLimited;
1010
1011    unsigned ExpectedCount;
1012
1013    // Policy flag: attempt to find ILP until expected latency is covered.
1014    bool ShouldIncreaseILP;
1015
1016#ifndef NDEBUG
1017    // Remember the greatest min operand latency.
1018    unsigned MaxMinLatency;
1019#endif
1020
1021    void reset() {
1022      Available.clear();
1023      Pending.clear();
1024      CheckPending = false;
1025      NextSUs.clear();
1026      HazardRec = 0;
1027      CurrCycle = 0;
1028      IssueCount = 0;
1029      MinReadyCycle = UINT_MAX;
1030      ExpectedLatency = 0;
1031      ResourceCounts.resize(1);
1032      assert(!ResourceCounts[0] && "nonzero count for bad resource");
1033      CritResIdx = 0;
1034      IsResourceLimited = false;
1035      ExpectedCount = 0;
1036      ShouldIncreaseILP = false;
1037#ifndef NDEBUG
1038      MaxMinLatency = 0;
1039#endif
1040      // Reserve a zero-count for invalid CritResIdx.
1041      ResourceCounts.resize(1);
1042    }
1043
1044    /// Pending queues extend the ready queues with the same ID and the
1045    /// PendingFlag set.
1046    SchedBoundary(unsigned ID, const Twine &Name):
1047      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1048      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
1049      reset();
1050    }
1051
1052    ~SchedBoundary() { delete HazardRec; }
1053
1054    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1055              SchedRemainder *rem);
1056
1057    bool isTop() const {
1058      return Available.getID() == ConvergingScheduler::TopQID;
1059    }
1060
1061    unsigned getUnscheduledLatency(SUnit *SU) const {
1062      if (isTop())
1063        return SU->getHeight();
1064      return SU->getDepth();
1065    }
1066
1067    unsigned getCriticalCount() const {
1068      return ResourceCounts[CritResIdx];
1069    }
1070
1071    bool checkHazard(SUnit *SU);
1072
1073    void checkILPPolicy();
1074
1075    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1076
1077    void bumpCycle();
1078
1079    void countResource(unsigned PIdx, unsigned Cycles);
1080
1081    void bumpNode(SUnit *SU);
1082
1083    void releasePending();
1084
1085    void removeReady(SUnit *SU);
1086
1087    SUnit *pickOnlyChoice();
1088  };
1089
1090private:
1091  ScheduleDAGMI *DAG;
1092  const TargetSchedModel *SchedModel;
1093  const TargetRegisterInfo *TRI;
1094
1095  // State of the top and bottom scheduled instruction boundaries.
1096  SchedRemainder Rem;
1097  SchedBoundary Top;
1098  SchedBoundary Bot;
1099
1100public:
1101  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1102  enum {
1103    TopQID = 1,
1104    BotQID = 2,
1105    LogMaxQID = 2
1106  };
1107
1108  ConvergingScheduler():
1109    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1110
1111  virtual void initialize(ScheduleDAGMI *dag);
1112
1113  virtual SUnit *pickNode(bool &IsTopNode);
1114
1115  virtual void schedNode(SUnit *SU, bool IsTopNode);
1116
1117  virtual void releaseTopNode(SUnit *SU);
1118
1119  virtual void releaseBottomNode(SUnit *SU);
1120
1121  virtual void registerRoots();
1122
1123protected:
1124  void balanceZones(
1125    ConvergingScheduler::SchedBoundary &CriticalZone,
1126    ConvergingScheduler::SchedCandidate &CriticalCand,
1127    ConvergingScheduler::SchedBoundary &OppositeZone,
1128    ConvergingScheduler::SchedCandidate &OppositeCand);
1129
1130  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1131                           ConvergingScheduler::SchedCandidate &BotCand);
1132
1133  void tryCandidate(SchedCandidate &Cand,
1134                    SchedCandidate &TryCand,
1135                    SchedBoundary &Zone,
1136                    const RegPressureTracker &RPTracker,
1137                    RegPressureTracker &TempTracker);
1138
1139  SUnit *pickNodeBidirectional(bool &IsTopNode);
1140
1141  void pickNodeFromQueue(SchedBoundary &Zone,
1142                         const RegPressureTracker &RPTracker,
1143                         SchedCandidate &Candidate);
1144
1145#ifndef NDEBUG
1146  void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone);
1147#endif
1148};
1149} // namespace
1150
1151void ConvergingScheduler::SchedRemainder::
1152init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1153  reset();
1154  if (!SchedModel->hasInstrSchedModel())
1155    return;
1156  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1157  for (std::vector<SUnit>::iterator
1158         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1159    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1160    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1161    for (TargetSchedModel::ProcResIter
1162           PI = SchedModel->getWriteProcResBegin(SC),
1163           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1164      unsigned PIdx = PI->ProcResourceIdx;
1165      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1166      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1167    }
1168  }
1169  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1170       PIdx != PEnd; ++PIdx) {
1171    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1172        >= (int)SchedModel->getLatencyFactor()) {
1173      CritResIdx = PIdx;
1174    }
1175  }
1176  MaxRemainingCount = std::max(
1177    RemainingMicroOps * SchedModel->getMicroOpFactor(),
1178    RemainingCounts[CritResIdx]);
1179}
1180
1181void ConvergingScheduler::SchedBoundary::
1182init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1183  reset();
1184  DAG = dag;
1185  SchedModel = smodel;
1186  Rem = rem;
1187  if (SchedModel->hasInstrSchedModel())
1188    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1189}
1190
1191void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1192  DAG = dag;
1193  SchedModel = DAG->getSchedModel();
1194  TRI = DAG->TRI;
1195  Rem.init(DAG, SchedModel);
1196  Top.init(DAG, SchedModel, &Rem);
1197  Bot.init(DAG, SchedModel, &Rem);
1198
1199  // Initialize resource counts.
1200
1201  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1202  // are disabled, then these HazardRecs will be disabled.
1203  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1204  const TargetMachine &TM = DAG->MF.getTarget();
1205  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1206  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1207
1208  assert((!ForceTopDown || !ForceBottomUp) &&
1209         "-misched-topdown incompatible with -misched-bottomup");
1210}
1211
1212void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1213  if (SU->isScheduled)
1214    return;
1215
1216  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1217       I != E; ++I) {
1218    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1219    unsigned MinLatency = I->getMinLatency();
1220#ifndef NDEBUG
1221    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1222#endif
1223    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1224      SU->TopReadyCycle = PredReadyCycle + MinLatency;
1225  }
1226  Top.releaseNode(SU, SU->TopReadyCycle);
1227}
1228
1229void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1230  if (SU->isScheduled)
1231    return;
1232
1233  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1234
1235  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1236       I != E; ++I) {
1237    if (I->isWeak())
1238      continue;
1239    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1240    unsigned MinLatency = I->getMinLatency();
1241#ifndef NDEBUG
1242    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1243#endif
1244    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1245      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1246  }
1247  Bot.releaseNode(SU, SU->BotReadyCycle);
1248}
1249
1250void ConvergingScheduler::registerRoots() {
1251  Rem.CriticalPath = DAG->ExitSU.getDepth();
1252  // Some roots may not feed into ExitSU. Check all of them in case.
1253  for (std::vector<SUnit*>::const_iterator
1254         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1255    if ((*I)->getDepth() > Rem.CriticalPath)
1256      Rem.CriticalPath = (*I)->getDepth();
1257  }
1258  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1259}
1260
1261/// Does this SU have a hazard within the current instruction group.
1262///
1263/// The scheduler supports two modes of hazard recognition. The first is the
1264/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1265/// supports highly complicated in-order reservation tables
1266/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1267///
1268/// The second is a streamlined mechanism that checks for hazards based on
1269/// simple counters that the scheduler itself maintains. It explicitly checks
1270/// for instruction dispatch limitations, including the number of micro-ops that
1271/// can dispatch per cycle.
1272///
1273/// TODO: Also check whether the SU must start a new group.
1274bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1275  if (HazardRec->isEnabled())
1276    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1277
1278  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1279  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1280    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1281          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1282    return true;
1283  }
1284  return false;
1285}
1286
1287/// If expected latency is covered, disable ILP policy.
1288void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
1289  if (ShouldIncreaseILP
1290      && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
1291    ShouldIncreaseILP = false;
1292    DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
1293  }
1294}
1295
1296void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1297                                                     unsigned ReadyCycle) {
1298
1299  if (ReadyCycle < MinReadyCycle)
1300    MinReadyCycle = ReadyCycle;
1301
1302  // Check for interlocks first. For the purpose of other heuristics, an
1303  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1304  if (ReadyCycle > CurrCycle || checkHazard(SU))
1305    Pending.push(SU);
1306  else
1307    Available.push(SU);
1308
1309  // Record this node as an immediate dependent of the scheduled node.
1310  NextSUs.insert(SU);
1311
1312  // If CriticalPath has been computed, then check if the unscheduled nodes
1313  // exceed the ILP window. Before registerRoots, CriticalPath==0.
1314  if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
1315                            > Rem->CriticalPath + ILPWindow)) {
1316    ShouldIncreaseILP = true;
1317    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
1318          << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
1319  }
1320}
1321
1322/// Move the boundary of scheduled code by one cycle.
1323void ConvergingScheduler::SchedBoundary::bumpCycle() {
1324  unsigned Width = SchedModel->getIssueWidth();
1325  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1326
1327  unsigned NextCycle = CurrCycle + 1;
1328  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1329  if (MinReadyCycle > NextCycle) {
1330    IssueCount = 0;
1331    NextCycle = MinReadyCycle;
1332  }
1333
1334  if (!HazardRec->isEnabled()) {
1335    // Bypass HazardRec virtual calls.
1336    CurrCycle = NextCycle;
1337  }
1338  else {
1339    // Bypass getHazardType calls in case of long latency.
1340    for (; CurrCycle != NextCycle; ++CurrCycle) {
1341      if (isTop())
1342        HazardRec->AdvanceCycle();
1343      else
1344        HazardRec->RecedeCycle();
1345    }
1346  }
1347  CheckPending = true;
1348  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1349
1350  DEBUG(dbgs() << "  *** " << Available.getName() << " cycle "
1351        << CurrCycle << '\n');
1352}
1353
1354/// Add the given processor resource to this scheduled zone.
1355void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1356                                                       unsigned Cycles) {
1357  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1358  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1359        << " +(" << Cycles << "x" << Factor
1360        << ") / " << SchedModel->getLatencyFactor() << '\n');
1361
1362  unsigned Count = Factor * Cycles;
1363  ResourceCounts[PIdx] += Count;
1364  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1365  Rem->RemainingCounts[PIdx] -= Count;
1366
1367  // Reset MaxRemainingCount for sanity.
1368  Rem->MaxRemainingCount = 0;
1369
1370  // Check if this resource exceeds the current critical resource by a full
1371  // cycle. If so, it becomes the critical resource.
1372  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1373      >= (int)SchedModel->getLatencyFactor()) {
1374    CritResIdx = PIdx;
1375    DEBUG(dbgs() << "  *** Critical resource "
1376          << SchedModel->getProcResource(PIdx)->Name << " x"
1377          << ResourceCounts[PIdx] << '\n');
1378  }
1379}
1380
1381/// Move the boundary of scheduled code by one SUnit.
1382void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1383  // Update the reservation table.
1384  if (HazardRec->isEnabled()) {
1385    if (!isTop() && SU->isCall) {
1386      // Calls are scheduled with their preceding instructions. For bottom-up
1387      // scheduling, clear the pipeline state before emitting.
1388      HazardRec->Reset();
1389    }
1390    HazardRec->EmitInstruction(SU);
1391  }
1392  // Update resource counts and critical resource.
1393  if (SchedModel->hasInstrSchedModel()) {
1394    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1395    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1396    for (TargetSchedModel::ProcResIter
1397           PI = SchedModel->getWriteProcResBegin(SC),
1398           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1399      countResource(PI->ProcResourceIdx, PI->Cycles);
1400    }
1401  }
1402  if (isTop()) {
1403    if (SU->getDepth() > ExpectedLatency)
1404      ExpectedLatency = SU->getDepth();
1405  }
1406  else {
1407    if (SU->getHeight() > ExpectedLatency)
1408      ExpectedLatency = SU->getHeight();
1409  }
1410
1411  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1412
1413  // Check the instruction group dispatch limit.
1414  // TODO: Check if this SU must end a dispatch group.
1415  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1416
1417  // checkHazard prevents scheduling multiple instructions per cycle that exceed
1418  // issue width. However, we commonly reach the maximum. In this case
1419  // opportunistically bump the cycle to avoid uselessly checking everything in
1420  // the readyQ. Furthermore, a single instruction may produce more than one
1421  // cycle's worth of micro-ops.
1422  if (IssueCount >= SchedModel->getIssueWidth()) {
1423    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1424    bumpCycle();
1425  }
1426}
1427
1428/// Release pending ready nodes in to the available queue. This makes them
1429/// visible to heuristics.
1430void ConvergingScheduler::SchedBoundary::releasePending() {
1431  // If the available queue is empty, it is safe to reset MinReadyCycle.
1432  if (Available.empty())
1433    MinReadyCycle = UINT_MAX;
1434
1435  // Check to see if any of the pending instructions are ready to issue.  If
1436  // so, add them to the available queue.
1437  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1438    SUnit *SU = *(Pending.begin()+i);
1439    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1440
1441    if (ReadyCycle < MinReadyCycle)
1442      MinReadyCycle = ReadyCycle;
1443
1444    if (ReadyCycle > CurrCycle)
1445      continue;
1446
1447    if (checkHazard(SU))
1448      continue;
1449
1450    Available.push(SU);
1451    Pending.remove(Pending.begin()+i);
1452    --i; --e;
1453  }
1454  DEBUG(if (!Pending.empty()) Pending.dump());
1455  CheckPending = false;
1456}
1457
1458/// Remove SU from the ready set for this boundary.
1459void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1460  if (Available.isInQueue(SU))
1461    Available.remove(Available.find(SU));
1462  else {
1463    assert(Pending.isInQueue(SU) && "bad ready count");
1464    Pending.remove(Pending.find(SU));
1465  }
1466}
1467
1468/// If this queue only has one ready candidate, return it. As a side effect,
1469/// defer any nodes that now hit a hazard, and advance the cycle until at least
1470/// one node is ready. If multiple instructions are ready, return NULL.
1471SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1472  if (CheckPending)
1473    releasePending();
1474
1475  if (IssueCount > 0) {
1476    // Defer any ready instrs that now have a hazard.
1477    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1478      if (checkHazard(*I)) {
1479        Pending.push(*I);
1480        I = Available.remove(I);
1481        continue;
1482      }
1483      ++I;
1484    }
1485  }
1486  for (unsigned i = 0; Available.empty(); ++i) {
1487    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1488           "permanent hazard"); (void)i;
1489    bumpCycle();
1490    releasePending();
1491  }
1492  if (Available.size() == 1)
1493    return *Available.begin();
1494  return NULL;
1495}
1496
1497/// Record the candidate policy for opposite zones with different critical
1498/// resources.
1499///
1500/// If the CriticalZone is latency limited, don't force a policy for the
1501/// candidates here. Instead, When releasing each candidate, releaseNode
1502/// compares the region's critical path to the candidate's height or depth and
1503/// the scheduled zone's expected latency then sets ShouldIncreaseILP.
1504void ConvergingScheduler::balanceZones(
1505  ConvergingScheduler::SchedBoundary &CriticalZone,
1506  ConvergingScheduler::SchedCandidate &CriticalCand,
1507  ConvergingScheduler::SchedBoundary &OppositeZone,
1508  ConvergingScheduler::SchedCandidate &OppositeCand) {
1509
1510  if (!CriticalZone.IsResourceLimited)
1511    return;
1512
1513  SchedRemainder *Rem = CriticalZone.Rem;
1514
1515  // If the critical zone is overconsuming a resource relative to the
1516  // remainder, try to reduce it.
1517  unsigned RemainingCritCount =
1518    Rem->RemainingCounts[CriticalZone.CritResIdx];
1519  if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
1520      > (int)SchedModel->getLatencyFactor()) {
1521    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1522    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
1523          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1524          << '\n');
1525  }
1526  // If the other zone is underconsuming a resource relative to the full zone,
1527  // try to increase it.
1528  unsigned OppositeCount =
1529    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1530  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1531      > (int)SchedModel->getLatencyFactor()) {
1532    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1533    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
1534          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1535          << '\n');
1536  }
1537}
1538
1539/// Determine if the scheduled zones exceed resource limits or critical path and
1540/// set each candidate's ReduceHeight policy accordingly.
1541void ConvergingScheduler::checkResourceLimits(
1542  ConvergingScheduler::SchedCandidate &TopCand,
1543  ConvergingScheduler::SchedCandidate &BotCand) {
1544
1545  Bot.checkILPPolicy();
1546  Top.checkILPPolicy();
1547  if (Bot.ShouldIncreaseILP)
1548    BotCand.Policy.ReduceLatency = true;
1549  if (Top.ShouldIncreaseILP)
1550    TopCand.Policy.ReduceLatency = true;
1551
1552  // Handle resource-limited regions.
1553  if (Top.IsResourceLimited && Bot.IsResourceLimited
1554      && Top.CritResIdx == Bot.CritResIdx) {
1555    // If the scheduled critical resource in both zones is no longer the
1556    // critical remaining resource, attempt to reduce resource height both ways.
1557    if (Top.CritResIdx != Rem.CritResIdx) {
1558      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1559      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1560      DEBUG(dbgs() << "Reduce scheduled "
1561            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1562    }
1563    return;
1564  }
1565  // Handle latency-limited regions.
1566  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1567    // If the total scheduled expected latency exceeds the region's critical
1568    // path then reduce latency both ways.
1569    //
1570    // Just because a zone is not resource limited does not mean it is latency
1571    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1572    // to exceed expected latency.
1573    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1574        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1575      TopCand.Policy.ReduceLatency = true;
1576      BotCand.Policy.ReduceLatency = true;
1577      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
1578            << " + " << Bot.ExpectedLatency << '\n');
1579    }
1580    return;
1581  }
1582  // The critical resource is different in each zone, so request balancing.
1583
1584  // Compute the cost of each zone.
1585  Rem.MaxRemainingCount = std::max(
1586    Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
1587    Rem.RemainingCounts[Rem.CritResIdx]);
1588  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1589  Top.ExpectedCount = std::max(
1590    Top.getCriticalCount(),
1591    Top.ExpectedCount * SchedModel->getLatencyFactor());
1592  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1593  Bot.ExpectedCount = std::max(
1594    Bot.getCriticalCount(),
1595    Bot.ExpectedCount * SchedModel->getLatencyFactor());
1596
1597  balanceZones(Top, TopCand, Bot, BotCand);
1598  balanceZones(Bot, BotCand, Top, TopCand);
1599}
1600
1601void ConvergingScheduler::SchedCandidate::
1602initResourceDelta(const ScheduleDAGMI *DAG,
1603                  const TargetSchedModel *SchedModel) {
1604  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1605    return;
1606
1607  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1608  for (TargetSchedModel::ProcResIter
1609         PI = SchedModel->getWriteProcResBegin(SC),
1610         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1611    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1612      ResDelta.CritResources += PI->Cycles;
1613    if (PI->ProcResourceIdx == Policy.DemandResIdx)
1614      ResDelta.DemandedResources += PI->Cycles;
1615  }
1616}
1617
1618/// Return true if this heuristic determines order.
1619static bool tryLess(unsigned TryVal, unsigned CandVal,
1620                    ConvergingScheduler::SchedCandidate &TryCand,
1621                    ConvergingScheduler::SchedCandidate &Cand,
1622                    ConvergingScheduler::CandReason Reason) {
1623  if (TryVal < CandVal) {
1624    TryCand.Reason = Reason;
1625    return true;
1626  }
1627  if (TryVal > CandVal) {
1628    if (Cand.Reason > Reason)
1629      Cand.Reason = Reason;
1630    return true;
1631  }
1632  return false;
1633}
1634
1635static bool tryGreater(unsigned TryVal, unsigned CandVal,
1636                       ConvergingScheduler::SchedCandidate &TryCand,
1637                       ConvergingScheduler::SchedCandidate &Cand,
1638                       ConvergingScheduler::CandReason Reason) {
1639  if (TryVal > CandVal) {
1640    TryCand.Reason = Reason;
1641    return true;
1642  }
1643  if (TryVal < CandVal) {
1644    if (Cand.Reason > Reason)
1645      Cand.Reason = Reason;
1646    return true;
1647  }
1648  return false;
1649}
1650
1651static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1652  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1653}
1654
1655/// Apply a set of heursitics to a new candidate. Heuristics are currently
1656/// hierarchical. This may be more efficient than a graduated cost model because
1657/// we don't need to evaluate all aspects of the model for each node in the
1658/// queue. But it's really done to make the heuristics easier to debug and
1659/// statistically analyze.
1660///
1661/// \param Cand provides the policy and current best candidate.
1662/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1663/// \param Zone describes the scheduled zone that we are extending.
1664/// \param RPTracker describes reg pressure within the scheduled zone.
1665/// \param TempTracker is a scratch pressure tracker to reuse in queries.
1666void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1667                                       SchedCandidate &TryCand,
1668                                       SchedBoundary &Zone,
1669                                       const RegPressureTracker &RPTracker,
1670                                       RegPressureTracker &TempTracker) {
1671
1672  // Always initialize TryCand's RPDelta.
1673  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1674                                  DAG->getRegionCriticalPSets(),
1675                                  DAG->getRegPressure().MaxSetPressure);
1676
1677  // Initialize the candidate if needed.
1678  if (!Cand.isValid()) {
1679    TryCand.Reason = NodeOrder;
1680    return;
1681  }
1682  // Avoid exceeding the target's limit.
1683  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1684              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1685    return;
1686  if (Cand.Reason == SingleExcess)
1687    Cand.Reason = MultiPressure;
1688
1689  // Avoid increasing the max critical pressure in the scheduled region.
1690  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1691              Cand.RPDelta.CriticalMax.UnitIncrease,
1692              TryCand, Cand, SingleCritical))
1693    return;
1694  if (Cand.Reason == SingleCritical)
1695    Cand.Reason = MultiPressure;
1696
1697  // Keep clustered nodes together to encourage downstream peephole
1698  // optimizations which may reduce resource requirements.
1699  //
1700  // This is a best effort to set things up for a post-RA pass. Optimizations
1701  // like generating loads of multiple registers should ideally be done within
1702  // the scheduler pass by combining the loads during DAG postprocessing.
1703  const SUnit *NextClusterSU =
1704    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
1705  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
1706                 TryCand, Cand, Cluster))
1707    return;
1708  // Currently, weak edges are for clustering, so we hard-code that reason.
1709  // However, deferring the current TryCand will not change Cand's reason.
1710  CandReason OrigReason = Cand.Reason;
1711  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
1712              getWeakLeft(Cand.SU, Zone.isTop()),
1713              TryCand, Cand, Cluster)) {
1714    Cand.Reason = OrigReason;
1715    return;
1716  }
1717  // Avoid critical resource consumption and balance the schedule.
1718  TryCand.initResourceDelta(DAG, SchedModel);
1719  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
1720              TryCand, Cand, ResourceReduce))
1721    return;
1722  if (tryGreater(TryCand.ResDelta.DemandedResources,
1723                 Cand.ResDelta.DemandedResources,
1724                 TryCand, Cand, ResourceDemand))
1725    return;
1726
1727  // Avoid serializing long latency dependence chains.
1728  if (Cand.Policy.ReduceLatency) {
1729    if (Zone.isTop()) {
1730      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
1731          > Zone.ExpectedCount) {
1732        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1733                    TryCand, Cand, TopDepthReduce))
1734          return;
1735      }
1736      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1737                     TryCand, Cand, TopPathReduce))
1738        return;
1739    }
1740    else {
1741      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
1742          > Zone.ExpectedCount) {
1743        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
1744                    TryCand, Cand, BotHeightReduce))
1745          return;
1746      }
1747      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
1748                     TryCand, Cand, BotPathReduce))
1749        return;
1750    }
1751  }
1752
1753  // Avoid increasing the max pressure of the entire region.
1754  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
1755              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
1756    return;
1757  if (Cand.Reason == SingleMax)
1758    Cand.Reason = MultiPressure;
1759
1760  // Prefer immediate defs/users of the last scheduled instruction. This is a
1761  // nice pressure avoidance strategy that also conserves the processor's
1762  // register renaming resources and keeps the machine code readable.
1763  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
1764                 TryCand, Cand, NextDefUse))
1765    return;
1766
1767  // Fall through to original instruction order.
1768  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
1769      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
1770    TryCand.Reason = NodeOrder;
1771  }
1772}
1773
1774/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
1775/// more desirable than RHS from scheduling standpoint.
1776static bool compareRPDelta(const RegPressureDelta &LHS,
1777                           const RegPressureDelta &RHS) {
1778  // Compare each component of pressure in decreasing order of importance
1779  // without checking if any are valid. Invalid PressureElements are assumed to
1780  // have UnitIncrease==0, so are neutral.
1781
1782  // Avoid increasing the max critical pressure in the scheduled region.
1783  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
1784    DEBUG(dbgs() << "RP excess top - bot: "
1785          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
1786    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
1787  }
1788  // Avoid increasing the max critical pressure in the scheduled region.
1789  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
1790    DEBUG(dbgs() << "RP critical top - bot: "
1791          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
1792          << '\n');
1793    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
1794  }
1795  // Avoid increasing the max pressure of the entire region.
1796  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
1797    DEBUG(dbgs() << "RP current top - bot: "
1798          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
1799          << '\n');
1800    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
1801  }
1802  return false;
1803}
1804
1805#ifndef NDEBUG
1806const char *ConvergingScheduler::getReasonStr(
1807  ConvergingScheduler::CandReason Reason) {
1808  switch (Reason) {
1809  case NoCand:         return "NOCAND    ";
1810  case SingleExcess:   return "REG-EXCESS";
1811  case SingleCritical: return "REG-CRIT  ";
1812  case Cluster:        return "CLUSTER   ";
1813  case SingleMax:      return "REG-MAX   ";
1814  case MultiPressure:  return "REG-MULTI ";
1815  case ResourceReduce: return "RES-REDUCE";
1816  case ResourceDemand: return "RES-DEMAND";
1817  case TopDepthReduce: return "TOP-DEPTH ";
1818  case TopPathReduce:  return "TOP-PATH  ";
1819  case BotHeightReduce:return "BOT-HEIGHT";
1820  case BotPathReduce:  return "BOT-PATH  ";
1821  case NextDefUse:     return "DEF-USE   ";
1822  case NodeOrder:      return "ORDER     ";
1823  };
1824  llvm_unreachable("Unknown reason!");
1825}
1826
1827void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand,
1828                                         const SchedBoundary &Zone) {
1829  const char *Label = getReasonStr(Cand.Reason);
1830  PressureElement P;
1831  unsigned ResIdx = 0;
1832  unsigned Latency = 0;
1833  switch (Cand.Reason) {
1834  default:
1835    break;
1836  case SingleExcess:
1837    P = Cand.RPDelta.Excess;
1838    break;
1839  case SingleCritical:
1840    P = Cand.RPDelta.CriticalMax;
1841    break;
1842  case SingleMax:
1843    P = Cand.RPDelta.CurrentMax;
1844    break;
1845  case ResourceReduce:
1846    ResIdx = Cand.Policy.ReduceResIdx;
1847    break;
1848  case ResourceDemand:
1849    ResIdx = Cand.Policy.DemandResIdx;
1850    break;
1851  case TopDepthReduce:
1852    Latency = Cand.SU->getDepth();
1853    break;
1854  case TopPathReduce:
1855    Latency = Cand.SU->getHeight();
1856    break;
1857  case BotHeightReduce:
1858    Latency = Cand.SU->getHeight();
1859    break;
1860  case BotPathReduce:
1861    Latency = Cand.SU->getDepth();
1862    break;
1863  }
1864  dbgs() << Label << " " << Zone.Available.getName() << " ";
1865  if (P.isValid())
1866    dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
1867           << " ";
1868  else
1869    dbgs() << "     ";
1870  if (ResIdx)
1871    dbgs() << SchedModel->getProcResource(ResIdx)->Name << " ";
1872  else
1873    dbgs() << "        ";
1874  if (Latency)
1875    dbgs() << Latency << " cycles ";
1876  else
1877    dbgs() << "         ";
1878  Cand.SU->dump(DAG);
1879}
1880#endif
1881
1882/// Pick the best candidate from the top queue.
1883///
1884/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
1885/// DAG building. To adjust for the current scheduling location we need to
1886/// maintain the number of vreg uses remaining to be top-scheduled.
1887void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
1888                                            const RegPressureTracker &RPTracker,
1889                                            SchedCandidate &Cand) {
1890  ReadyQueue &Q = Zone.Available;
1891
1892  DEBUG(Q.dump());
1893
1894  // getMaxPressureDelta temporarily modifies the tracker.
1895  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
1896
1897  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
1898
1899    SchedCandidate TryCand(Cand.Policy);
1900    TryCand.SU = *I;
1901    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
1902    if (TryCand.Reason != NoCand) {
1903      // Initialize resource delta if needed in case future heuristics query it.
1904      if (TryCand.ResDelta == SchedResourceDelta())
1905        TryCand.initResourceDelta(DAG, SchedModel);
1906      Cand.setBest(TryCand);
1907      DEBUG(traceCandidate(Cand, Zone));
1908    }
1909    TryCand.SU = *I;
1910  }
1911}
1912
1913static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
1914                      bool IsTop) {
1915  DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot")
1916        << " SU(" << Cand.SU->NodeNum << ") "
1917        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
1918}
1919
1920/// Pick the best candidate node from either the top or bottom queue.
1921SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
1922  // Schedule as far as possible in the direction of no choice. This is most
1923  // efficient, but also provides the best heuristics for CriticalPSets.
1924  if (SUnit *SU = Bot.pickOnlyChoice()) {
1925    IsTopNode = false;
1926    return SU;
1927  }
1928  if (SUnit *SU = Top.pickOnlyChoice()) {
1929    IsTopNode = true;
1930    return SU;
1931  }
1932  CandPolicy NoPolicy;
1933  SchedCandidate BotCand(NoPolicy);
1934  SchedCandidate TopCand(NoPolicy);
1935  checkResourceLimits(TopCand, BotCand);
1936
1937  // Prefer bottom scheduling when heuristics are silent.
1938  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
1939  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
1940
1941  // If either Q has a single candidate that provides the least increase in
1942  // Excess pressure, we can immediately schedule from that Q.
1943  //
1944  // RegionCriticalPSets summarizes the pressure within the scheduled region and
1945  // affects picking from either Q. If scheduling in one direction must
1946  // increase pressure for one of the excess PSets, then schedule in that
1947  // direction first to provide more freedom in the other direction.
1948  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
1949    IsTopNode = false;
1950    tracePick(BotCand, IsTopNode);
1951    return BotCand.SU;
1952  }
1953  // Check if the top Q has a better candidate.
1954  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
1955  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
1956
1957  // If either Q has a single candidate that minimizes pressure above the
1958  // original region's pressure pick it.
1959  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
1960    if (TopCand.Reason < BotCand.Reason) {
1961      IsTopNode = true;
1962      tracePick(TopCand, IsTopNode);
1963      return TopCand.SU;
1964    }
1965    IsTopNode = false;
1966    tracePick(BotCand, IsTopNode);
1967    return BotCand.SU;
1968  }
1969  // Check for a salient pressure difference and pick the best from either side.
1970  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
1971    IsTopNode = true;
1972    tracePick(TopCand, IsTopNode);
1973    return TopCand.SU;
1974  }
1975  // Otherwise prefer the bottom candidate, in node order if all else failed.
1976  if (TopCand.Reason < BotCand.Reason) {
1977    IsTopNode = true;
1978    tracePick(TopCand, IsTopNode);
1979    return TopCand.SU;
1980  }
1981  IsTopNode = false;
1982  tracePick(BotCand, IsTopNode);
1983  return BotCand.SU;
1984}
1985
1986/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
1987SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
1988  if (DAG->top() == DAG->bottom()) {
1989    assert(Top.Available.empty() && Top.Pending.empty() &&
1990           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
1991    return NULL;
1992  }
1993  SUnit *SU;
1994  do {
1995    if (ForceTopDown) {
1996      SU = Top.pickOnlyChoice();
1997      if (!SU) {
1998        CandPolicy NoPolicy;
1999        SchedCandidate TopCand(NoPolicy);
2000        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2001        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2002        SU = TopCand.SU;
2003      }
2004      IsTopNode = true;
2005    }
2006    else if (ForceBottomUp) {
2007      SU = Bot.pickOnlyChoice();
2008      if (!SU) {
2009        CandPolicy NoPolicy;
2010        SchedCandidate BotCand(NoPolicy);
2011        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2012        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2013        SU = BotCand.SU;
2014      }
2015      IsTopNode = false;
2016    }
2017    else {
2018      SU = pickNodeBidirectional(IsTopNode);
2019    }
2020  } while (SU->isScheduled);
2021
2022  if (SU->isTopReady())
2023    Top.removeReady(SU);
2024  if (SU->isBottomReady())
2025    Bot.removeReady(SU);
2026
2027  DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
2028        << " Scheduling Instruction in cycle "
2029        << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
2030        SU->dump(DAG));
2031  return SU;
2032}
2033
2034/// Update the scheduler's state after scheduling a node. This is the same node
2035/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2036/// it's state based on the current cycle before MachineSchedStrategy does.
2037void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2038  if (IsTopNode) {
2039    SU->TopReadyCycle = Top.CurrCycle;
2040    Top.bumpNode(SU);
2041  }
2042  else {
2043    SU->BotReadyCycle = Bot.CurrCycle;
2044    Bot.bumpNode(SU);
2045  }
2046}
2047
2048/// Create the standard converging machine scheduler. This will be used as the
2049/// default scheduler if the target does not set a default.
2050static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2051  assert((!ForceTopDown || !ForceBottomUp) &&
2052         "-misched-topdown incompatible with -misched-bottomup");
2053  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2054  // Register DAG post-processors.
2055  if (EnableLoadCluster)
2056    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2057  if (EnableMacroFusion)
2058    DAG->addMutation(new MacroFusion(DAG->TII));
2059  return DAG;
2060}
2061static MachineSchedRegistry
2062ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2063                        createConvergingSched);
2064
2065//===----------------------------------------------------------------------===//
2066// ILP Scheduler. Currently for experimental analysis of heuristics.
2067//===----------------------------------------------------------------------===//
2068
2069namespace {
2070/// \brief Order nodes by the ILP metric.
2071struct ILPOrder {
2072  SchedDFSResult *DFSResult;
2073  BitVector *ScheduledTrees;
2074  bool MaximizeILP;
2075
2076  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
2077    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
2078
2079  /// \brief Apply a less-than relation on node priority.
2080  ///
2081  /// (Return true if A comes after B in the Q.)
2082  bool operator()(const SUnit *A, const SUnit *B) const {
2083    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2084    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2085    if (SchedTreeA != SchedTreeB) {
2086      // Unscheduled trees have lower priority.
2087      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2088        return ScheduledTrees->test(SchedTreeB);
2089
2090      // Trees with shallower connections have have lower priority.
2091      if (DFSResult->getSubtreeLevel(SchedTreeA)
2092          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2093        return DFSResult->getSubtreeLevel(SchedTreeA)
2094          < DFSResult->getSubtreeLevel(SchedTreeB);
2095      }
2096    }
2097    if (MaximizeILP)
2098      return DFSResult->getILP(A) < DFSResult->getILP(B);
2099    else
2100      return DFSResult->getILP(A) > DFSResult->getILP(B);
2101  }
2102};
2103
2104/// \brief Schedule based on the ILP metric.
2105class ILPScheduler : public MachineSchedStrategy {
2106  /// In case all subtrees are eventually connected to a common root through
2107  /// data dependence (e.g. reduction), place an upper limit on their size.
2108  ///
2109  /// FIXME: A subtree limit is generally good, but in the situation commented
2110  /// above, where multiple similar subtrees feed a common root, we should
2111  /// only split at a point where the resulting subtrees will be balanced.
2112  /// (a motivating test case must be found).
2113  static const unsigned SubtreeLimit = 16;
2114
2115  SchedDFSResult DFSResult;
2116  BitVector ScheduledTrees;
2117  ILPOrder Cmp;
2118
2119  std::vector<SUnit*> ReadyQ;
2120public:
2121  ILPScheduler(bool MaximizeILP)
2122  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
2123    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
2124
2125  virtual void initialize(ScheduleDAGMI *DAG) {
2126    ReadyQ.clear();
2127    DFSResult.clear();
2128    DFSResult.resize(DAG->SUnits.size());
2129    ScheduledTrees.clear();
2130  }
2131
2132  virtual void registerRoots() {
2133    DFSResult.compute(ReadyQ);
2134    ScheduledTrees.resize(DFSResult.getNumSubtrees());
2135    // Restore the heap in ReadyQ with the updated DFS results.
2136    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2137  }
2138
2139  /// Implement MachineSchedStrategy interface.
2140  /// -----------------------------------------
2141
2142  /// Callback to select the highest priority node from the ready Q.
2143  virtual SUnit *pickNode(bool &IsTopNode) {
2144    if (ReadyQ.empty()) return NULL;
2145    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2146    SUnit *SU = ReadyQ.back();
2147    ReadyQ.pop_back();
2148    IsTopNode = false;
2149    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
2150          << *SU->getInstr()
2151          << " ILP: " << DFSResult.getILP(SU)
2152          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
2153          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
2154    return SU;
2155  }
2156
2157  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2158  /// DFSResults, and resort the priority Q.
2159  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2160    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2161    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
2162      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
2163      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
2164      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2165    }
2166  }
2167
2168  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2169
2170  virtual void releaseBottomNode(SUnit *SU) {
2171    ReadyQ.push_back(SU);
2172    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2173  }
2174};
2175} // namespace
2176
2177static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2178  return new ScheduleDAGMI(C, new ILPScheduler(true));
2179}
2180static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2181  return new ScheduleDAGMI(C, new ILPScheduler(false));
2182}
2183static MachineSchedRegistry ILPMaxRegistry(
2184  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2185static MachineSchedRegistry ILPMinRegistry(
2186  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2187
2188//===----------------------------------------------------------------------===//
2189// Machine Instruction Shuffler for Correctness Testing
2190//===----------------------------------------------------------------------===//
2191
2192#ifndef NDEBUG
2193namespace {
2194/// Apply a less-than relation on the node order, which corresponds to the
2195/// instruction order prior to scheduling. IsReverse implements greater-than.
2196template<bool IsReverse>
2197struct SUnitOrder {
2198  bool operator()(SUnit *A, SUnit *B) const {
2199    if (IsReverse)
2200      return A->NodeNum > B->NodeNum;
2201    else
2202      return A->NodeNum < B->NodeNum;
2203  }
2204};
2205
2206/// Reorder instructions as much as possible.
2207class InstructionShuffler : public MachineSchedStrategy {
2208  bool IsAlternating;
2209  bool IsTopDown;
2210
2211  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2212  // gives nodes with a higher number higher priority causing the latest
2213  // instructions to be scheduled first.
2214  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2215    TopQ;
2216  // When scheduling bottom-up, use greater-than as the queue priority.
2217  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2218    BottomQ;
2219public:
2220  InstructionShuffler(bool alternate, bool topdown)
2221    : IsAlternating(alternate), IsTopDown(topdown) {}
2222
2223  virtual void initialize(ScheduleDAGMI *) {
2224    TopQ.clear();
2225    BottomQ.clear();
2226  }
2227
2228  /// Implement MachineSchedStrategy interface.
2229  /// -----------------------------------------
2230
2231  virtual SUnit *pickNode(bool &IsTopNode) {
2232    SUnit *SU;
2233    if (IsTopDown) {
2234      do {
2235        if (TopQ.empty()) return NULL;
2236        SU = TopQ.top();
2237        TopQ.pop();
2238      } while (SU->isScheduled);
2239      IsTopNode = true;
2240    }
2241    else {
2242      do {
2243        if (BottomQ.empty()) return NULL;
2244        SU = BottomQ.top();
2245        BottomQ.pop();
2246      } while (SU->isScheduled);
2247      IsTopNode = false;
2248    }
2249    if (IsAlternating)
2250      IsTopDown = !IsTopDown;
2251    return SU;
2252  }
2253
2254  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2255
2256  virtual void releaseTopNode(SUnit *SU) {
2257    TopQ.push(SU);
2258  }
2259  virtual void releaseBottomNode(SUnit *SU) {
2260    BottomQ.push(SU);
2261  }
2262};
2263} // namespace
2264
2265static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2266  bool Alternate = !ForceTopDown && !ForceBottomUp;
2267  bool TopDown = !ForceBottomUp;
2268  assert((TopDown || !ForceTopDown) &&
2269         "-misched-topdown incompatible with -misched-bottomup");
2270  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2271}
2272static MachineSchedRegistry ShufflerRegistry(
2273  "shuffle", "Shuffle machine instructions alternating directions",
2274  createInstructionShuffler);
2275#endif // !NDEBUG
2276