MachineScheduler.cpp revision 2c465a35516ee453e2a759fa11ac86cdb95ec901
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// MachineScheduler schedules machine instructions after phi elimination. It
11// preserves LiveIntervals so it can be invoked before register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "misched"
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/PriorityQueue.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineLoopInfo.h"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/CodeGen/RegisterClassInfo.h"
26#include "llvm/CodeGen/ScheduleDFS.h"
27#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/GraphWriter.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Target/TargetInstrInfo.h"
34#include <queue>
35
36using namespace llvm;
37
38namespace llvm {
39cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
40                           cl::desc("Force top-down list scheduling"));
41cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
42                            cl::desc("Force bottom-up list scheduling"));
43}
44
45#ifndef NDEBUG
46static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
47  cl::desc("Pop up a window to show MISched dags after they are processed"));
48
49static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
50  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
51#else
52static bool ViewMISchedDAGs = false;
53#endif // NDEBUG
54
55// FIXME: remove this flag after initial testing. It should always be a good
56// thing.
57static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
58    cl::desc("Constrain vreg copies."), cl::init(true));
59
60static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
61  cl::desc("Enable load clustering."), cl::init(true));
62
63// Experimental heuristics
64static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
65  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
66
67static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
68  cl::desc("Verify machine instrs before and after machine scheduling"));
69
70// DAG subtrees must have at least this many nodes.
71static const unsigned MinSubtreeSize = 8;
72
73//===----------------------------------------------------------------------===//
74// Machine Instruction Scheduling Pass and Registry
75//===----------------------------------------------------------------------===//
76
77MachineSchedContext::MachineSchedContext():
78    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
79  RegClassInfo = new RegisterClassInfo();
80}
81
82MachineSchedContext::~MachineSchedContext() {
83  delete RegClassInfo;
84}
85
86namespace {
87/// MachineScheduler runs after coalescing and before register allocation.
88class MachineScheduler : public MachineSchedContext,
89                         public MachineFunctionPass {
90public:
91  MachineScheduler();
92
93  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
94
95  virtual void releaseMemory() {}
96
97  virtual bool runOnMachineFunction(MachineFunction&);
98
99  virtual void print(raw_ostream &O, const Module* = 0) const;
100
101  static char ID; // Class identification, replacement for typeinfo
102};
103} // namespace
104
105char MachineScheduler::ID = 0;
106
107char &llvm::MachineSchedulerID = MachineScheduler::ID;
108
109INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
110                      "Machine Instruction Scheduler", false, false)
111INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
112INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
113INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
114INITIALIZE_PASS_END(MachineScheduler, "misched",
115                    "Machine Instruction Scheduler", false, false)
116
117MachineScheduler::MachineScheduler()
118: MachineFunctionPass(ID) {
119  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
120}
121
122void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
123  AU.setPreservesCFG();
124  AU.addRequiredID(MachineDominatorsID);
125  AU.addRequired<MachineLoopInfo>();
126  AU.addRequired<AliasAnalysis>();
127  AU.addRequired<TargetPassConfig>();
128  AU.addRequired<SlotIndexes>();
129  AU.addPreserved<SlotIndexes>();
130  AU.addRequired<LiveIntervals>();
131  AU.addPreserved<LiveIntervals>();
132  MachineFunctionPass::getAnalysisUsage(AU);
133}
134
135MachinePassRegistry MachineSchedRegistry::Registry;
136
137/// A dummy default scheduler factory indicates whether the scheduler
138/// is overridden on the command line.
139static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
140  return 0;
141}
142
143/// MachineSchedOpt allows command line selection of the scheduler.
144static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
145               RegisterPassParser<MachineSchedRegistry> >
146MachineSchedOpt("misched",
147                cl::init(&useDefaultMachineSched), cl::Hidden,
148                cl::desc("Machine instruction scheduler to use"));
149
150static MachineSchedRegistry
151DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
152                     useDefaultMachineSched);
153
154/// Forward declare the standard machine scheduler. This will be used as the
155/// default scheduler if the target does not set a default.
156static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
157
158
159/// Decrement this iterator until reaching the top or a non-debug instr.
160static MachineBasicBlock::iterator
161priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
162  assert(I != Beg && "reached the top of the region, cannot decrement");
163  while (--I != Beg) {
164    if (!I->isDebugValue())
165      break;
166  }
167  return I;
168}
169
170/// If this iterator is a debug value, increment until reaching the End or a
171/// non-debug instruction.
172static MachineBasicBlock::iterator
173nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
174  for(; I != End; ++I) {
175    if (!I->isDebugValue())
176      break;
177  }
178  return I;
179}
180
181/// Top-level MachineScheduler pass driver.
182///
183/// Visit blocks in function order. Divide each block into scheduling regions
184/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
185/// consistent with the DAG builder, which traverses the interior of the
186/// scheduling regions bottom-up.
187///
188/// This design avoids exposing scheduling boundaries to the DAG builder,
189/// simplifying the DAG builder's support for "special" target instructions.
190/// At the same time the design allows target schedulers to operate across
191/// scheduling boundaries, for example to bundle the boudary instructions
192/// without reordering them. This creates complexity, because the target
193/// scheduler must update the RegionBegin and RegionEnd positions cached by
194/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
195/// design would be to split blocks at scheduling boundaries, but LLVM has a
196/// general bias against block splitting purely for implementation simplicity.
197bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
198  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
199
200  // Initialize the context of the pass.
201  MF = &mf;
202  MLI = &getAnalysis<MachineLoopInfo>();
203  MDT = &getAnalysis<MachineDominatorTree>();
204  PassConfig = &getAnalysis<TargetPassConfig>();
205  AA = &getAnalysis<AliasAnalysis>();
206
207  LIS = &getAnalysis<LiveIntervals>();
208  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
209
210  if (VerifyScheduling) {
211    DEBUG(LIS->print(dbgs()));
212    MF->verify(this, "Before machine scheduling.");
213  }
214  RegClassInfo->runOnMachineFunction(*MF);
215
216  // Select the scheduler, or set the default.
217  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
218  if (Ctor == useDefaultMachineSched) {
219    // Get the default scheduler set by the target.
220    Ctor = MachineSchedRegistry::getDefault();
221    if (!Ctor) {
222      Ctor = createConvergingSched;
223      MachineSchedRegistry::setDefault(Ctor);
224    }
225  }
226  // Instantiate the selected scheduler.
227  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
228
229  // Visit all machine basic blocks.
230  //
231  // TODO: Visit blocks in global postorder or postorder within the bottom-up
232  // loop tree. Then we can optionally compute global RegPressure.
233  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
234       MBB != MBBEnd; ++MBB) {
235
236    Scheduler->startBlock(MBB);
237
238    // Break the block into scheduling regions [I, RegionEnd), and schedule each
239    // region as soon as it is discovered. RegionEnd points the scheduling
240    // boundary at the bottom of the region. The DAG does not include RegionEnd,
241    // but the region does (i.e. the next RegionEnd is above the previous
242    // RegionBegin). If the current block has no terminator then RegionEnd ==
243    // MBB->end() for the bottom region.
244    //
245    // The Scheduler may insert instructions during either schedule() or
246    // exitRegion(), even for empty regions. So the local iterators 'I' and
247    // 'RegionEnd' are invalid across these calls.
248    unsigned RemainingInstrs = MBB->size();
249    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
250        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
251
252      // Avoid decrementing RegionEnd for blocks with no terminator.
253      if (RegionEnd != MBB->end()
254          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
255        --RegionEnd;
256        // Count the boundary instruction.
257        --RemainingInstrs;
258      }
259
260      // The next region starts above the previous region. Look backward in the
261      // instruction stream until we find the nearest boundary.
262      MachineBasicBlock::iterator I = RegionEnd;
263      for(;I != MBB->begin(); --I, --RemainingInstrs) {
264        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
265          break;
266      }
267      // Notify the scheduler of the region, even if we may skip scheduling
268      // it. Perhaps it still needs to be bundled.
269      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
270
271      // Skip empty scheduling regions (0 or 1 schedulable instructions).
272      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
273        // Close the current region. Bundle the terminator if needed.
274        // This invalidates 'RegionEnd' and 'I'.
275        Scheduler->exitRegion();
276        continue;
277      }
278      DEBUG(dbgs() << "********** MI Scheduling **********\n");
279      DEBUG(dbgs() << MF->getName()
280            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
281            << "\n  From: " << *I << "    To: ";
282            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
283            else dbgs() << "End";
284            dbgs() << " Remaining: " << RemainingInstrs << "\n");
285
286      // Schedule a region: possibly reorder instructions.
287      // This invalidates 'RegionEnd' and 'I'.
288      Scheduler->schedule();
289
290      // Close the current region.
291      Scheduler->exitRegion();
292
293      // Scheduling has invalidated the current iterator 'I'. Ask the
294      // scheduler for the top of it's scheduled region.
295      RegionEnd = Scheduler->begin();
296    }
297    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
298    Scheduler->finishBlock();
299  }
300  Scheduler->finalizeSchedule();
301  DEBUG(LIS->print(dbgs()));
302  if (VerifyScheduling)
303    MF->verify(this, "After machine scheduling.");
304  return true;
305}
306
307void MachineScheduler::print(raw_ostream &O, const Module* m) const {
308  // unimplemented
309}
310
311#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
312void ReadyQueue::dump() {
313  dbgs() << "  " << Name << ": ";
314  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
315    dbgs() << Queue[i]->NodeNum << " ";
316  dbgs() << "\n";
317}
318#endif
319
320//===----------------------------------------------------------------------===//
321// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
322// preservation.
323//===----------------------------------------------------------------------===//
324
325ScheduleDAGMI::~ScheduleDAGMI() {
326  delete DFSResult;
327  DeleteContainerPointers(Mutations);
328  delete SchedImpl;
329}
330
331bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
332  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
333}
334
335bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
336  if (SuccSU != &ExitSU) {
337    // Do not use WillCreateCycle, it assumes SD scheduling.
338    // If Pred is reachable from Succ, then the edge creates a cycle.
339    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
340      return false;
341    Topo.AddPred(SuccSU, PredDep.getSUnit());
342  }
343  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
344  // Return true regardless of whether a new edge needed to be inserted.
345  return true;
346}
347
348/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
349/// NumPredsLeft reaches zero, release the successor node.
350///
351/// FIXME: Adjust SuccSU height based on MinLatency.
352void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
353  SUnit *SuccSU = SuccEdge->getSUnit();
354
355  if (SuccEdge->isWeak()) {
356    --SuccSU->WeakPredsLeft;
357    if (SuccEdge->isCluster())
358      NextClusterSucc = SuccSU;
359    return;
360  }
361#ifndef NDEBUG
362  if (SuccSU->NumPredsLeft == 0) {
363    dbgs() << "*** Scheduling failed! ***\n";
364    SuccSU->dump(this);
365    dbgs() << " has been released too many times!\n";
366    llvm_unreachable(0);
367  }
368#endif
369  --SuccSU->NumPredsLeft;
370  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
371    SchedImpl->releaseTopNode(SuccSU);
372}
373
374/// releaseSuccessors - Call releaseSucc on each of SU's successors.
375void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
376  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
377       I != E; ++I) {
378    releaseSucc(SU, &*I);
379  }
380}
381
382/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
383/// NumSuccsLeft reaches zero, release the predecessor node.
384///
385/// FIXME: Adjust PredSU height based on MinLatency.
386void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
387  SUnit *PredSU = PredEdge->getSUnit();
388
389  if (PredEdge->isWeak()) {
390    --PredSU->WeakSuccsLeft;
391    if (PredEdge->isCluster())
392      NextClusterPred = PredSU;
393    return;
394  }
395#ifndef NDEBUG
396  if (PredSU->NumSuccsLeft == 0) {
397    dbgs() << "*** Scheduling failed! ***\n";
398    PredSU->dump(this);
399    dbgs() << " has been released too many times!\n";
400    llvm_unreachable(0);
401  }
402#endif
403  --PredSU->NumSuccsLeft;
404  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
405    SchedImpl->releaseBottomNode(PredSU);
406}
407
408/// releasePredecessors - Call releasePred on each of SU's predecessors.
409void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
410  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
411       I != E; ++I) {
412    releasePred(SU, &*I);
413  }
414}
415
416/// This is normally called from the main scheduler loop but may also be invoked
417/// by the scheduling strategy to perform additional code motion.
418void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
419                                    MachineBasicBlock::iterator InsertPos) {
420  // Advance RegionBegin if the first instruction moves down.
421  if (&*RegionBegin == MI)
422    ++RegionBegin;
423
424  // Update the instruction stream.
425  BB->splice(InsertPos, BB, MI);
426
427  // Update LiveIntervals
428  LIS->handleMove(MI, /*UpdateFlags=*/true);
429
430  // Recede RegionBegin if an instruction moves above the first.
431  if (RegionBegin == InsertPos)
432    RegionBegin = MI;
433}
434
435bool ScheduleDAGMI::checkSchedLimit() {
436#ifndef NDEBUG
437  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
438    CurrentTop = CurrentBottom;
439    return false;
440  }
441  ++NumInstrsScheduled;
442#endif
443  return true;
444}
445
446/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
447/// crossing a scheduling boundary. [begin, end) includes all instructions in
448/// the region, including the boundary itself and single-instruction regions
449/// that don't get scheduled.
450void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
451                                MachineBasicBlock::iterator begin,
452                                MachineBasicBlock::iterator end,
453                                unsigned endcount)
454{
455  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
456
457  // For convenience remember the end of the liveness region.
458  LiveRegionEnd =
459    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
460}
461
462// Setup the register pressure trackers for the top scheduled top and bottom
463// scheduled regions.
464void ScheduleDAGMI::initRegPressure() {
465  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
466  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
467
468  // Close the RPTracker to finalize live ins.
469  RPTracker.closeRegion();
470
471  DEBUG(RPTracker.getPressure().dump(TRI));
472
473  // Initialize the live ins and live outs.
474  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
475  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
476
477  // Close one end of the tracker so we can call
478  // getMaxUpward/DownwardPressureDelta before advancing across any
479  // instructions. This converts currently live regs into live ins/outs.
480  TopRPTracker.closeTop();
481  BotRPTracker.closeBottom();
482
483  // Account for liveness generated by the region boundary.
484  if (LiveRegionEnd != RegionEnd)
485    BotRPTracker.recede();
486
487  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
488
489  // Cache the list of excess pressure sets in this region. This will also track
490  // the max pressure in the scheduled code for these sets.
491  RegionCriticalPSets.clear();
492  const std::vector<unsigned> &RegionPressure =
493    RPTracker.getPressure().MaxSetPressure;
494  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
495    unsigned Limit = TRI->getRegPressureSetLimit(i);
496    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
497          << "Limit " << Limit
498          << " Actual " << RegionPressure[i] << "\n");
499    if (RegionPressure[i] > Limit)
500      RegionCriticalPSets.push_back(PressureElement(i, 0));
501  }
502  DEBUG(dbgs() << "Excess PSets: ";
503        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
504          dbgs() << TRI->getRegPressureSetName(
505            RegionCriticalPSets[i].PSetID) << " ";
506        dbgs() << "\n");
507}
508
509// FIXME: When the pressure tracker deals in pressure differences then we won't
510// iterate over all RegionCriticalPSets[i].
511void ScheduleDAGMI::
512updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
513  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
514    unsigned ID = RegionCriticalPSets[i].PSetID;
515    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
516    if ((int)NewMaxPressure[ID] > MaxUnits)
517      MaxUnits = NewMaxPressure[ID];
518  }
519  DEBUG(
520    for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
521      unsigned Limit = TRI->getRegPressureSetLimit(i);
522      if (NewMaxPressure[i] > Limit ) {
523        dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
524               << NewMaxPressure[i] << " > " << Limit << "\n";
525      }
526    });
527}
528
529/// schedule - Called back from MachineScheduler::runOnMachineFunction
530/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
531/// only includes instructions that have DAG nodes, not scheduling boundaries.
532///
533/// This is a skeletal driver, with all the functionality pushed into helpers,
534/// so that it can be easilly extended by experimental schedulers. Generally,
535/// implementing MachineSchedStrategy should be sufficient to implement a new
536/// scheduling algorithm. However, if a scheduler further subclasses
537/// ScheduleDAGMI then it will want to override this virtual method in order to
538/// update any specialized state.
539void ScheduleDAGMI::schedule() {
540  buildDAGWithRegPressure();
541
542  Topo.InitDAGTopologicalSorting();
543
544  postprocessDAG();
545
546  SmallVector<SUnit*, 8> TopRoots, BotRoots;
547  findRootsAndBiasEdges(TopRoots, BotRoots);
548
549  // Initialize the strategy before modifying the DAG.
550  // This may initialize a DFSResult to be used for queue priority.
551  SchedImpl->initialize(this);
552
553  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
554          SUnits[su].dumpAll(this));
555  if (ViewMISchedDAGs) viewGraph();
556
557  // Initialize ready queues now that the DAG and priority data are finalized.
558  initQueues(TopRoots, BotRoots);
559
560  bool IsTopNode = false;
561  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
562    assert(!SU->isScheduled && "Node already scheduled");
563    if (!checkSchedLimit())
564      break;
565
566    scheduleMI(SU, IsTopNode);
567
568    updateQueues(SU, IsTopNode);
569  }
570  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
571
572  placeDebugValues();
573
574  DEBUG({
575      unsigned BBNum = begin()->getParent()->getNumber();
576      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
577      dumpSchedule();
578      dbgs() << '\n';
579    });
580}
581
582/// Build the DAG and setup three register pressure trackers.
583void ScheduleDAGMI::buildDAGWithRegPressure() {
584  // Initialize the register pressure tracker used by buildSchedGraph.
585  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
586
587  // Account for liveness generate by the region boundary.
588  if (LiveRegionEnd != RegionEnd)
589    RPTracker.recede();
590
591  // Build the DAG, and compute current register pressure.
592  buildSchedGraph(AA, &RPTracker);
593
594  // Initialize top/bottom trackers after computing region pressure.
595  initRegPressure();
596}
597
598/// Apply each ScheduleDAGMutation step in order.
599void ScheduleDAGMI::postprocessDAG() {
600  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
601    Mutations[i]->apply(this);
602  }
603}
604
605void ScheduleDAGMI::computeDFSResult() {
606  if (!DFSResult)
607    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
608  DFSResult->clear();
609  ScheduledTrees.clear();
610  DFSResult->resize(SUnits.size());
611  DFSResult->compute(SUnits);
612  ScheduledTrees.resize(DFSResult->getNumSubtrees());
613}
614
615void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
616                                          SmallVectorImpl<SUnit*> &BotRoots) {
617  for (std::vector<SUnit>::iterator
618         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
619    SUnit *SU = &(*I);
620    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
621
622    // Order predecessors so DFSResult follows the critical path.
623    SU->biasCriticalPath();
624
625    // A SUnit is ready to top schedule if it has no predecessors.
626    if (!I->NumPredsLeft)
627      TopRoots.push_back(SU);
628    // A SUnit is ready to bottom schedule if it has no successors.
629    if (!I->NumSuccsLeft)
630      BotRoots.push_back(SU);
631  }
632  ExitSU.biasCriticalPath();
633}
634
635/// Identify DAG roots and setup scheduler queues.
636void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
637                               ArrayRef<SUnit*> BotRoots) {
638  NextClusterSucc = NULL;
639  NextClusterPred = NULL;
640
641  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
642  //
643  // Nodes with unreleased weak edges can still be roots.
644  // Release top roots in forward order.
645  for (SmallVectorImpl<SUnit*>::const_iterator
646         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
647    SchedImpl->releaseTopNode(*I);
648  }
649  // Release bottom roots in reverse order so the higher priority nodes appear
650  // first. This is more natural and slightly more efficient.
651  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
652         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
653    SchedImpl->releaseBottomNode(*I);
654  }
655
656  releaseSuccessors(&EntrySU);
657  releasePredecessors(&ExitSU);
658
659  SchedImpl->registerRoots();
660
661  // Advance past initial DebugValues.
662  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
663  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
664  TopRPTracker.setPos(CurrentTop);
665
666  CurrentBottom = RegionEnd;
667}
668
669/// Move an instruction and update register pressure.
670void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
671  // Move the instruction to its new location in the instruction stream.
672  MachineInstr *MI = SU->getInstr();
673
674  if (IsTopNode) {
675    assert(SU->isTopReady() && "node still has unscheduled dependencies");
676    if (&*CurrentTop == MI)
677      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
678    else {
679      moveInstruction(MI, CurrentTop);
680      TopRPTracker.setPos(MI);
681    }
682
683    // Update top scheduled pressure.
684    TopRPTracker.advance();
685    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
686    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
687  }
688  else {
689    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
690    MachineBasicBlock::iterator priorII =
691      priorNonDebug(CurrentBottom, CurrentTop);
692    if (&*priorII == MI)
693      CurrentBottom = priorII;
694    else {
695      if (&*CurrentTop == MI) {
696        CurrentTop = nextIfDebug(++CurrentTop, priorII);
697        TopRPTracker.setPos(CurrentTop);
698      }
699      moveInstruction(MI, CurrentBottom);
700      CurrentBottom = MI;
701    }
702    // Update bottom scheduled pressure.
703    BotRPTracker.recede();
704    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
705    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
706  }
707}
708
709/// Update scheduler queues after scheduling an instruction.
710void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
711  // Release dependent instructions for scheduling.
712  if (IsTopNode)
713    releaseSuccessors(SU);
714  else
715    releasePredecessors(SU);
716
717  SU->isScheduled = true;
718
719  if (DFSResult) {
720    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
721    if (!ScheduledTrees.test(SubtreeID)) {
722      ScheduledTrees.set(SubtreeID);
723      DFSResult->scheduleTree(SubtreeID);
724      SchedImpl->scheduleTree(SubtreeID);
725    }
726  }
727
728  // Notify the scheduling strategy after updating the DAG.
729  SchedImpl->schedNode(SU, IsTopNode);
730}
731
732/// Reinsert any remaining debug_values, just like the PostRA scheduler.
733void ScheduleDAGMI::placeDebugValues() {
734  // If first instruction was a DBG_VALUE then put it back.
735  if (FirstDbgValue) {
736    BB->splice(RegionBegin, BB, FirstDbgValue);
737    RegionBegin = FirstDbgValue;
738  }
739
740  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
741         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
742    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
743    MachineInstr *DbgValue = P.first;
744    MachineBasicBlock::iterator OrigPrevMI = P.second;
745    if (&*RegionBegin == DbgValue)
746      ++RegionBegin;
747    BB->splice(++OrigPrevMI, BB, DbgValue);
748    if (OrigPrevMI == llvm::prior(RegionEnd))
749      RegionEnd = DbgValue;
750  }
751  DbgValues.clear();
752  FirstDbgValue = NULL;
753}
754
755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
756void ScheduleDAGMI::dumpSchedule() const {
757  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
758    if (SUnit *SU = getSUnit(&(*MI)))
759      SU->dump(this);
760    else
761      dbgs() << "Missing SUnit\n";
762  }
763}
764#endif
765
766//===----------------------------------------------------------------------===//
767// LoadClusterMutation - DAG post-processing to cluster loads.
768//===----------------------------------------------------------------------===//
769
770namespace {
771/// \brief Post-process the DAG to create cluster edges between neighboring
772/// loads.
773class LoadClusterMutation : public ScheduleDAGMutation {
774  struct LoadInfo {
775    SUnit *SU;
776    unsigned BaseReg;
777    unsigned Offset;
778    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
779      : SU(su), BaseReg(reg), Offset(ofs) {}
780  };
781  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
782                           const LoadClusterMutation::LoadInfo &RHS);
783
784  const TargetInstrInfo *TII;
785  const TargetRegisterInfo *TRI;
786public:
787  LoadClusterMutation(const TargetInstrInfo *tii,
788                      const TargetRegisterInfo *tri)
789    : TII(tii), TRI(tri) {}
790
791  virtual void apply(ScheduleDAGMI *DAG);
792protected:
793  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
794};
795} // anonymous
796
797bool LoadClusterMutation::LoadInfoLess(
798  const LoadClusterMutation::LoadInfo &LHS,
799  const LoadClusterMutation::LoadInfo &RHS) {
800  if (LHS.BaseReg != RHS.BaseReg)
801    return LHS.BaseReg < RHS.BaseReg;
802  return LHS.Offset < RHS.Offset;
803}
804
805void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
806                                                  ScheduleDAGMI *DAG) {
807  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
808  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
809    SUnit *SU = Loads[Idx];
810    unsigned BaseReg;
811    unsigned Offset;
812    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
813      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
814  }
815  if (LoadRecords.size() < 2)
816    return;
817  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
818  unsigned ClusterLength = 1;
819  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
820    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
821      ClusterLength = 1;
822      continue;
823    }
824
825    SUnit *SUa = LoadRecords[Idx].SU;
826    SUnit *SUb = LoadRecords[Idx+1].SU;
827    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
828        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
829
830      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
831            << SUb->NodeNum << ")\n");
832      // Copy successor edges from SUa to SUb. Interleaving computation
833      // dependent on SUa can prevent load combining due to register reuse.
834      // Predecessor edges do not need to be copied from SUb to SUa since nearby
835      // loads should have effectively the same inputs.
836      for (SUnit::const_succ_iterator
837             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
838        if (SI->getSUnit() == SUb)
839          continue;
840        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
841        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
842      }
843      ++ClusterLength;
844    }
845    else
846      ClusterLength = 1;
847  }
848}
849
850/// \brief Callback from DAG postProcessing to create cluster edges for loads.
851void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
852  // Map DAG NodeNum to store chain ID.
853  DenseMap<unsigned, unsigned> StoreChainIDs;
854  // Map each store chain to a set of dependent loads.
855  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
856  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
857    SUnit *SU = &DAG->SUnits[Idx];
858    if (!SU->getInstr()->mayLoad())
859      continue;
860    unsigned ChainPredID = DAG->SUnits.size();
861    for (SUnit::const_pred_iterator
862           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
863      if (PI->isCtrl()) {
864        ChainPredID = PI->getSUnit()->NodeNum;
865        break;
866      }
867    }
868    // Check if this chain-like pred has been seen
869    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
870    unsigned NumChains = StoreChainDependents.size();
871    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
872      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
873    if (Result.second)
874      StoreChainDependents.resize(NumChains + 1);
875    StoreChainDependents[Result.first->second].push_back(SU);
876  }
877  // Iterate over the store chains.
878  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
879    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
880}
881
882//===----------------------------------------------------------------------===//
883// MacroFusion - DAG post-processing to encourage fusion of macro ops.
884//===----------------------------------------------------------------------===//
885
886namespace {
887/// \brief Post-process the DAG to create cluster edges between instructions
888/// that may be fused by the processor into a single operation.
889class MacroFusion : public ScheduleDAGMutation {
890  const TargetInstrInfo *TII;
891public:
892  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
893
894  virtual void apply(ScheduleDAGMI *DAG);
895};
896} // anonymous
897
898/// \brief Callback from DAG postProcessing to create cluster edges to encourage
899/// fused operations.
900void MacroFusion::apply(ScheduleDAGMI *DAG) {
901  // For now, assume targets can only fuse with the branch.
902  MachineInstr *Branch = DAG->ExitSU.getInstr();
903  if (!Branch)
904    return;
905
906  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
907    SUnit *SU = &DAG->SUnits[--Idx];
908    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
909      continue;
910
911    // Create a single weak edge from SU to ExitSU. The only effect is to cause
912    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
913    // need to copy predecessor edges from ExitSU to SU, since top-down
914    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
915    // of SU, we could create an artificial edge from the deepest root, but it
916    // hasn't been needed yet.
917    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
918    (void)Success;
919    assert(Success && "No DAG nodes should be reachable from ExitSU");
920
921    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
922    break;
923  }
924}
925
926//===----------------------------------------------------------------------===//
927// CopyConstrain - DAG post-processing to encourage copy elimination.
928//===----------------------------------------------------------------------===//
929
930namespace {
931/// \brief Post-process the DAG to create weak edges from all uses of a copy to
932/// the one use that defines the copy's source vreg, most likely an induction
933/// variable increment.
934class CopyConstrain : public ScheduleDAGMutation {
935  // Transient state.
936  SlotIndex RegionBeginIdx;
937  // RegionEndIdx is the slot index of the last non-debug instruction in the
938  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
939  SlotIndex RegionEndIdx;
940public:
941  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
942
943  virtual void apply(ScheduleDAGMI *DAG);
944
945protected:
946  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
947};
948} // anonymous
949
950/// constrainLocalCopy handles two possibilities:
951/// 1) Local src:
952/// I0:     = dst
953/// I1: src = ...
954/// I2:     = dst
955/// I3: dst = src (copy)
956/// (create pred->succ edges I0->I1, I2->I1)
957///
958/// 2) Local copy:
959/// I0: dst = src (copy)
960/// I1:     = dst
961/// I2: src = ...
962/// I3:     = dst
963/// (create pred->succ edges I1->I2, I3->I2)
964///
965/// Although the MachineScheduler is currently constrained to single blocks,
966/// this algorithm should handle extended blocks. An EBB is a set of
967/// contiguously numbered blocks such that the previous block in the EBB is
968/// always the single predecessor.
969void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
970  LiveIntervals *LIS = DAG->getLIS();
971  MachineInstr *Copy = CopySU->getInstr();
972
973  // Check for pure vreg copies.
974  unsigned SrcReg = Copy->getOperand(1).getReg();
975  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
976    return;
977
978  unsigned DstReg = Copy->getOperand(0).getReg();
979  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
980    return;
981
982  // Check if either the dest or source is local. If it's live across a back
983  // edge, it's not local. Note that if both vregs are live across the back
984  // edge, we cannot successfully contrain the copy without cyclic scheduling.
985  unsigned LocalReg = DstReg;
986  unsigned GlobalReg = SrcReg;
987  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
988  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
989    LocalReg = SrcReg;
990    GlobalReg = DstReg;
991    LocalLI = &LIS->getInterval(LocalReg);
992    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
993      return;
994  }
995  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
996
997  // Find the global segment after the start of the local LI.
998  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
999  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1000  // local live range. We could create edges from other global uses to the local
1001  // start, but the coalescer should have already eliminated these cases, so
1002  // don't bother dealing with it.
1003  if (GlobalSegment == GlobalLI->end())
1004    return;
1005
1006  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1007  // returned the next global segment. But if GlobalSegment overlaps with
1008  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1009  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1010  if (GlobalSegment->contains(LocalLI->beginIndex()))
1011    ++GlobalSegment;
1012
1013  if (GlobalSegment == GlobalLI->end())
1014    return;
1015
1016  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1017  if (GlobalSegment != GlobalLI->begin()) {
1018    // Two address defs have no hole.
1019    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
1020                               GlobalSegment->start)) {
1021      return;
1022    }
1023    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1024    // it would be a disconnected component in the live range.
1025    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
1026           "Disconnected LRG within the scheduling region.");
1027  }
1028  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1029  if (!GlobalDef)
1030    return;
1031
1032  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1033  if (!GlobalSU)
1034    return;
1035
1036  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1037  // constraining the uses of the last local def to precede GlobalDef.
1038  SmallVector<SUnit*,8> LocalUses;
1039  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1040  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1041  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1042  for (SUnit::const_succ_iterator
1043         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1044       I != E; ++I) {
1045    if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1046      continue;
1047    if (I->getSUnit() == GlobalSU)
1048      continue;
1049    if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1050      return;
1051    LocalUses.push_back(I->getSUnit());
1052  }
1053  // Open the top of the GlobalLI hole by constraining any earlier global uses
1054  // to precede the start of LocalLI.
1055  SmallVector<SUnit*,8> GlobalUses;
1056  MachineInstr *FirstLocalDef =
1057    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1058  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1059  for (SUnit::const_pred_iterator
1060         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1061    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1062      continue;
1063    if (I->getSUnit() == FirstLocalSU)
1064      continue;
1065    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1066      return;
1067    GlobalUses.push_back(I->getSUnit());
1068  }
1069  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1070  // Add the weak edges.
1071  for (SmallVectorImpl<SUnit*>::const_iterator
1072         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1073    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1074          << GlobalSU->NodeNum << ")\n");
1075    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1076  }
1077  for (SmallVectorImpl<SUnit*>::const_iterator
1078         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1079    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1080          << FirstLocalSU->NodeNum << ")\n");
1081    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1082  }
1083}
1084
1085/// \brief Callback from DAG postProcessing to create weak edges to encourage
1086/// copy elimination.
1087void CopyConstrain::apply(ScheduleDAGMI *DAG) {
1088  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1089  if (FirstPos == DAG->end())
1090    return;
1091  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1092  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1093    &*priorNonDebug(DAG->end(), DAG->begin()));
1094
1095  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1096    SUnit *SU = &DAG->SUnits[Idx];
1097    if (!SU->getInstr()->isCopy())
1098      continue;
1099
1100    constrainLocalCopy(SU, DAG);
1101  }
1102}
1103
1104//===----------------------------------------------------------------------===//
1105// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
1106//===----------------------------------------------------------------------===//
1107
1108namespace {
1109/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
1110/// the schedule.
1111class ConvergingScheduler : public MachineSchedStrategy {
1112public:
1113  /// Represent the type of SchedCandidate found within a single queue.
1114  /// pickNodeBidirectional depends on these listed by decreasing priority.
1115  enum CandReason {
1116    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
1117    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1118    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
1119    NodeOrder};
1120
1121#ifndef NDEBUG
1122  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
1123#endif
1124
1125  /// Policy for scheduling the next instruction in the candidate's zone.
1126  struct CandPolicy {
1127    bool ReduceLatency;
1128    unsigned ReduceResIdx;
1129    unsigned DemandResIdx;
1130
1131    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1132  };
1133
1134  /// Status of an instruction's critical resource consumption.
1135  struct SchedResourceDelta {
1136    // Count critical resources in the scheduled region required by SU.
1137    unsigned CritResources;
1138
1139    // Count critical resources from another region consumed by SU.
1140    unsigned DemandedResources;
1141
1142    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1143
1144    bool operator==(const SchedResourceDelta &RHS) const {
1145      return CritResources == RHS.CritResources
1146        && DemandedResources == RHS.DemandedResources;
1147    }
1148    bool operator!=(const SchedResourceDelta &RHS) const {
1149      return !operator==(RHS);
1150    }
1151  };
1152
1153  /// Store the state used by ConvergingScheduler heuristics, required for the
1154  /// lifetime of one invocation of pickNode().
1155  struct SchedCandidate {
1156    CandPolicy Policy;
1157
1158    // The best SUnit candidate.
1159    SUnit *SU;
1160
1161    // The reason for this candidate.
1162    CandReason Reason;
1163
1164    // Register pressure values for the best candidate.
1165    RegPressureDelta RPDelta;
1166
1167    // Critical resource consumption of the best candidate.
1168    SchedResourceDelta ResDelta;
1169
1170    SchedCandidate(const CandPolicy &policy)
1171    : Policy(policy), SU(NULL), Reason(NoCand) {}
1172
1173    bool isValid() const { return SU; }
1174
1175    // Copy the status of another candidate without changing policy.
1176    void setBest(SchedCandidate &Best) {
1177      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1178      SU = Best.SU;
1179      Reason = Best.Reason;
1180      RPDelta = Best.RPDelta;
1181      ResDelta = Best.ResDelta;
1182    }
1183
1184    void initResourceDelta(const ScheduleDAGMI *DAG,
1185                           const TargetSchedModel *SchedModel);
1186  };
1187
1188  /// Summarize the unscheduled region.
1189  struct SchedRemainder {
1190    // Critical path through the DAG in expected latency.
1191    unsigned CriticalPath;
1192
1193    // Unscheduled resources
1194    SmallVector<unsigned, 16> RemainingCounts;
1195    // Critical resource for the unscheduled zone.
1196    unsigned CritResIdx;
1197    // Number of micro-ops left to schedule.
1198    unsigned RemainingMicroOps;
1199
1200    void reset() {
1201      CriticalPath = 0;
1202      RemainingCounts.clear();
1203      CritResIdx = 0;
1204      RemainingMicroOps = 0;
1205    }
1206
1207    SchedRemainder() { reset(); }
1208
1209    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1210
1211    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
1212      if (!SchedModel->hasInstrSchedModel())
1213        return 0;
1214
1215      return std::max(
1216        RemainingMicroOps * SchedModel->getMicroOpFactor(),
1217        RemainingCounts[CritResIdx]);
1218    }
1219  };
1220
1221  /// Each Scheduling boundary is associated with ready queues. It tracks the
1222  /// current cycle in the direction of movement, and maintains the state
1223  /// of "hazards" and other interlocks at the current cycle.
1224  struct SchedBoundary {
1225    ScheduleDAGMI *DAG;
1226    const TargetSchedModel *SchedModel;
1227    SchedRemainder *Rem;
1228
1229    ReadyQueue Available;
1230    ReadyQueue Pending;
1231    bool CheckPending;
1232
1233    // For heuristics, keep a list of the nodes that immediately depend on the
1234    // most recently scheduled node.
1235    SmallPtrSet<const SUnit*, 8> NextSUs;
1236
1237    ScheduleHazardRecognizer *HazardRec;
1238
1239    unsigned CurrCycle;
1240    unsigned IssueCount;
1241
1242    /// MinReadyCycle - Cycle of the soonest available instruction.
1243    unsigned MinReadyCycle;
1244
1245    // The expected latency of the critical path in this scheduled zone.
1246    unsigned ExpectedLatency;
1247
1248    // The latency of dependence chains leading into this zone.
1249    // For each node scheduled: DLat = max DLat, N.Depth.
1250    // For each cycle scheduled: DLat -= 1.
1251    unsigned DependentLatency;
1252
1253    // Resources used in the scheduled zone beyond this boundary.
1254    SmallVector<unsigned, 16> ResourceCounts;
1255
1256    // Cache the critical resources ID in this scheduled zone.
1257    unsigned CritResIdx;
1258
1259    // Is the scheduled region resource limited vs. latency limited.
1260    bool IsResourceLimited;
1261
1262    unsigned ExpectedCount;
1263
1264#ifndef NDEBUG
1265    // Remember the greatest min operand latency.
1266    unsigned MaxMinLatency;
1267#endif
1268
1269    void reset() {
1270      // A new HazardRec is created for each DAG and owned by SchedBoundary.
1271      delete HazardRec;
1272
1273      Available.clear();
1274      Pending.clear();
1275      CheckPending = false;
1276      NextSUs.clear();
1277      HazardRec = 0;
1278      CurrCycle = 0;
1279      IssueCount = 0;
1280      MinReadyCycle = UINT_MAX;
1281      ExpectedLatency = 0;
1282      DependentLatency = 0;
1283      ResourceCounts.resize(1);
1284      assert(!ResourceCounts[0] && "nonzero count for bad resource");
1285      CritResIdx = 0;
1286      IsResourceLimited = false;
1287      ExpectedCount = 0;
1288#ifndef NDEBUG
1289      MaxMinLatency = 0;
1290#endif
1291      // Reserve a zero-count for invalid CritResIdx.
1292      ResourceCounts.resize(1);
1293    }
1294
1295    /// Pending queues extend the ready queues with the same ID and the
1296    /// PendingFlag set.
1297    SchedBoundary(unsigned ID, const Twine &Name):
1298      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1299      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
1300      HazardRec(0) {
1301      reset();
1302    }
1303
1304    ~SchedBoundary() { delete HazardRec; }
1305
1306    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1307              SchedRemainder *rem);
1308
1309    bool isTop() const {
1310      return Available.getID() == ConvergingScheduler::TopQID;
1311    }
1312
1313    unsigned getUnscheduledLatency(SUnit *SU) const {
1314      if (isTop())
1315        return SU->getHeight();
1316      return SU->getDepth() + SU->Latency;
1317    }
1318
1319    unsigned getCriticalCount() const {
1320      return ResourceCounts[CritResIdx];
1321    }
1322
1323    bool checkHazard(SUnit *SU);
1324
1325    void setLatencyPolicy(CandPolicy &Policy);
1326
1327    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1328
1329    void bumpCycle();
1330
1331    void countResource(unsigned PIdx, unsigned Cycles);
1332
1333    void bumpNode(SUnit *SU);
1334
1335    void releasePending();
1336
1337    void removeReady(SUnit *SU);
1338
1339    SUnit *pickOnlyChoice();
1340  };
1341
1342private:
1343  ScheduleDAGMI *DAG;
1344  const TargetSchedModel *SchedModel;
1345  const TargetRegisterInfo *TRI;
1346
1347  // State of the top and bottom scheduled instruction boundaries.
1348  SchedRemainder Rem;
1349  SchedBoundary Top;
1350  SchedBoundary Bot;
1351
1352public:
1353  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1354  enum {
1355    TopQID = 1,
1356    BotQID = 2,
1357    LogMaxQID = 2
1358  };
1359
1360  ConvergingScheduler():
1361    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1362
1363  virtual void initialize(ScheduleDAGMI *dag);
1364
1365  virtual SUnit *pickNode(bool &IsTopNode);
1366
1367  virtual void schedNode(SUnit *SU, bool IsTopNode);
1368
1369  virtual void releaseTopNode(SUnit *SU);
1370
1371  virtual void releaseBottomNode(SUnit *SU);
1372
1373  virtual void registerRoots();
1374
1375protected:
1376  void balanceZones(
1377    ConvergingScheduler::SchedBoundary &CriticalZone,
1378    ConvergingScheduler::SchedCandidate &CriticalCand,
1379    ConvergingScheduler::SchedBoundary &OppositeZone,
1380    ConvergingScheduler::SchedCandidate &OppositeCand);
1381
1382  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1383                           ConvergingScheduler::SchedCandidate &BotCand);
1384
1385  void tryCandidate(SchedCandidate &Cand,
1386                    SchedCandidate &TryCand,
1387                    SchedBoundary &Zone,
1388                    const RegPressureTracker &RPTracker,
1389                    RegPressureTracker &TempTracker);
1390
1391  SUnit *pickNodeBidirectional(bool &IsTopNode);
1392
1393  void pickNodeFromQueue(SchedBoundary &Zone,
1394                         const RegPressureTracker &RPTracker,
1395                         SchedCandidate &Candidate);
1396
1397  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
1398
1399#ifndef NDEBUG
1400  void traceCandidate(const SchedCandidate &Cand);
1401#endif
1402};
1403} // namespace
1404
1405void ConvergingScheduler::SchedRemainder::
1406init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1407  reset();
1408  if (!SchedModel->hasInstrSchedModel())
1409    return;
1410  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1411  for (std::vector<SUnit>::iterator
1412         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1413    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1414    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1415    for (TargetSchedModel::ProcResIter
1416           PI = SchedModel->getWriteProcResBegin(SC),
1417           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1418      unsigned PIdx = PI->ProcResourceIdx;
1419      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1420      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1421    }
1422  }
1423  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1424       PIdx != PEnd; ++PIdx) {
1425    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1426        >= (int)SchedModel->getLatencyFactor()) {
1427      CritResIdx = PIdx;
1428    }
1429  }
1430  DEBUG(dbgs() << "Critical Resource: "
1431        << SchedModel->getProcResource(CritResIdx)->Name
1432        << ": " << RemainingCounts[CritResIdx]
1433        << " / " << SchedModel->getLatencyFactor() << '\n');
1434}
1435
1436void ConvergingScheduler::SchedBoundary::
1437init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1438  reset();
1439  DAG = dag;
1440  SchedModel = smodel;
1441  Rem = rem;
1442  if (SchedModel->hasInstrSchedModel())
1443    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1444}
1445
1446void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1447  DAG = dag;
1448  SchedModel = DAG->getSchedModel();
1449  TRI = DAG->TRI;
1450
1451  Rem.init(DAG, SchedModel);
1452  Top.init(DAG, SchedModel, &Rem);
1453  Bot.init(DAG, SchedModel, &Rem);
1454
1455  // Initialize resource counts.
1456
1457  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1458  // are disabled, then these HazardRecs will be disabled.
1459  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1460  const TargetMachine &TM = DAG->MF.getTarget();
1461  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1462  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1463
1464  assert((!ForceTopDown || !ForceBottomUp) &&
1465         "-misched-topdown incompatible with -misched-bottomup");
1466}
1467
1468void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1469  if (SU->isScheduled)
1470    return;
1471
1472  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1473       I != E; ++I) {
1474    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1475    unsigned MinLatency = I->getMinLatency();
1476#ifndef NDEBUG
1477    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1478#endif
1479    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1480      SU->TopReadyCycle = PredReadyCycle + MinLatency;
1481  }
1482  Top.releaseNode(SU, SU->TopReadyCycle);
1483}
1484
1485void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1486  if (SU->isScheduled)
1487    return;
1488
1489  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1490
1491  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1492       I != E; ++I) {
1493    if (I->isWeak())
1494      continue;
1495    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1496    unsigned MinLatency = I->getMinLatency();
1497#ifndef NDEBUG
1498    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1499#endif
1500    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1501      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1502  }
1503  Bot.releaseNode(SU, SU->BotReadyCycle);
1504}
1505
1506void ConvergingScheduler::registerRoots() {
1507  Rem.CriticalPath = DAG->ExitSU.getDepth();
1508  // Some roots may not feed into ExitSU. Check all of them in case.
1509  for (std::vector<SUnit*>::const_iterator
1510         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1511    if ((*I)->getDepth() > Rem.CriticalPath)
1512      Rem.CriticalPath = (*I)->getDepth();
1513  }
1514  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1515}
1516
1517/// Does this SU have a hazard within the current instruction group.
1518///
1519/// The scheduler supports two modes of hazard recognition. The first is the
1520/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1521/// supports highly complicated in-order reservation tables
1522/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1523///
1524/// The second is a streamlined mechanism that checks for hazards based on
1525/// simple counters that the scheduler itself maintains. It explicitly checks
1526/// for instruction dispatch limitations, including the number of micro-ops that
1527/// can dispatch per cycle.
1528///
1529/// TODO: Also check whether the SU must start a new group.
1530bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1531  if (HazardRec->isEnabled())
1532    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1533
1534  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1535  if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) {
1536    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1537          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1538    return true;
1539  }
1540  return false;
1541}
1542
1543/// Compute the remaining latency to determine whether ILP should be increased.
1544void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1545  DEBUG(dbgs() << "  " << Available.getName()
1546        << " DependentLatency " << DependentLatency << '\n');
1547
1548  // FIXME: compile time. In all, we visit four queues here one we should only
1549  // need to visit the one that was last popped if we cache the result.
1550  unsigned RemLatency = DependentLatency;
1551  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1552       I != E; ++I) {
1553    unsigned L = getUnscheduledLatency(*I);
1554    if (L > RemLatency) {
1555      DEBUG(dbgs() << "  " << Available.getName()
1556            << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
1557      RemLatency = L;
1558    }
1559  }
1560  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1561       I != E; ++I) {
1562    unsigned L = getUnscheduledLatency(*I);
1563    if (L > RemLatency)
1564      RemLatency = L;
1565  }
1566  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1567  DEBUG(dbgs() << "  " << Available.getName()
1568        << " ExpectedLatency " << ExpectedLatency
1569        << " CP Limit " << CriticalPathLimit << '\n');
1570
1571  if (RemLatency + std::max(ExpectedLatency, CurrCycle) >= CriticalPathLimit
1572      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1573    Policy.ReduceLatency = true;
1574    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
1575  }
1576}
1577
1578void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1579                                                     unsigned ReadyCycle) {
1580
1581  if (ReadyCycle < MinReadyCycle)
1582    MinReadyCycle = ReadyCycle;
1583
1584  // Check for interlocks first. For the purpose of other heuristics, an
1585  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1586  if (ReadyCycle > CurrCycle || checkHazard(SU))
1587    Pending.push(SU);
1588  else
1589    Available.push(SU);
1590
1591  // Record this node as an immediate dependent of the scheduled node.
1592  NextSUs.insert(SU);
1593}
1594
1595/// Move the boundary of scheduled code by one cycle.
1596void ConvergingScheduler::SchedBoundary::bumpCycle() {
1597  unsigned Width = SchedModel->getIssueWidth();
1598  IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
1599
1600  unsigned NextCycle = CurrCycle + 1;
1601  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1602  if (MinReadyCycle > NextCycle) {
1603    IssueCount = 0;
1604    NextCycle = MinReadyCycle;
1605  }
1606  if ((NextCycle - CurrCycle) > DependentLatency)
1607    DependentLatency = 0;
1608  else
1609    DependentLatency -= (NextCycle - CurrCycle);
1610
1611  if (!HazardRec->isEnabled()) {
1612    // Bypass HazardRec virtual calls.
1613    CurrCycle = NextCycle;
1614  }
1615  else {
1616    // Bypass getHazardType calls in case of long latency.
1617    for (; CurrCycle != NextCycle; ++CurrCycle) {
1618      if (isTop())
1619        HazardRec->AdvanceCycle();
1620      else
1621        HazardRec->RecedeCycle();
1622    }
1623  }
1624  CheckPending = true;
1625  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1626
1627  DEBUG(dbgs() << "  " << Available.getName()
1628        << " Cycle: " << CurrCycle << '\n');
1629}
1630
1631/// Add the given processor resource to this scheduled zone.
1632void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1633                                                       unsigned Cycles) {
1634  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1635  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1636        << " +(" << Cycles << "x" << Factor
1637        << ") / " << SchedModel->getLatencyFactor() << '\n');
1638
1639  unsigned Count = Factor * Cycles;
1640  ResourceCounts[PIdx] += Count;
1641  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1642  Rem->RemainingCounts[PIdx] -= Count;
1643
1644  // Check if this resource exceeds the current critical resource by a full
1645  // cycle. If so, it becomes the critical resource.
1646  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1647      >= (int)SchedModel->getLatencyFactor()) {
1648    CritResIdx = PIdx;
1649    DEBUG(dbgs() << "  *** Critical resource "
1650          << SchedModel->getProcResource(PIdx)->Name << " x"
1651          << ResourceCounts[PIdx] << '\n');
1652  }
1653}
1654
1655/// Move the boundary of scheduled code by one SUnit.
1656void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1657  // Update the reservation table.
1658  if (HazardRec->isEnabled()) {
1659    if (!isTop() && SU->isCall) {
1660      // Calls are scheduled with their preceding instructions. For bottom-up
1661      // scheduling, clear the pipeline state before emitting.
1662      HazardRec->Reset();
1663    }
1664    HazardRec->EmitInstruction(SU);
1665  }
1666  // Update resource counts and critical resource.
1667  if (SchedModel->hasInstrSchedModel()) {
1668    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1669    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1670    for (TargetSchedModel::ProcResIter
1671           PI = SchedModel->getWriteProcResBegin(SC),
1672           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1673      countResource(PI->ProcResourceIdx, PI->Cycles);
1674    }
1675  }
1676  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1677  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1678  if (SU->getDepth() > TopLatency)
1679    TopLatency = SU->getDepth();
1680  if (SU->getHeight() > BotLatency)
1681    BotLatency = SU->getHeight();
1682
1683  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1684
1685  // Check the instruction group dispatch limit.
1686  // TODO: Check if this SU must end a dispatch group.
1687  IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
1688
1689  // checkHazard prevents scheduling multiple instructions per cycle that exceed
1690  // issue width. However, we commonly reach the maximum. In this case
1691  // opportunistically bump the cycle to avoid uselessly checking everything in
1692  // the readyQ. Furthermore, a single instruction may produce more than one
1693  // cycle's worth of micro-ops.
1694  if (IssueCount >= SchedModel->getIssueWidth()) {
1695    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1696    bumpCycle();
1697  }
1698}
1699
1700/// Release pending ready nodes in to the available queue. This makes them
1701/// visible to heuristics.
1702void ConvergingScheduler::SchedBoundary::releasePending() {
1703  // If the available queue is empty, it is safe to reset MinReadyCycle.
1704  if (Available.empty())
1705    MinReadyCycle = UINT_MAX;
1706
1707  // Check to see if any of the pending instructions are ready to issue.  If
1708  // so, add them to the available queue.
1709  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1710    SUnit *SU = *(Pending.begin()+i);
1711    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1712
1713    if (ReadyCycle < MinReadyCycle)
1714      MinReadyCycle = ReadyCycle;
1715
1716    if (ReadyCycle > CurrCycle)
1717      continue;
1718
1719    if (checkHazard(SU))
1720      continue;
1721
1722    Available.push(SU);
1723    Pending.remove(Pending.begin()+i);
1724    --i; --e;
1725  }
1726  DEBUG(if (!Pending.empty()) Pending.dump());
1727  CheckPending = false;
1728}
1729
1730/// Remove SU from the ready set for this boundary.
1731void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1732  if (Available.isInQueue(SU))
1733    Available.remove(Available.find(SU));
1734  else {
1735    assert(Pending.isInQueue(SU) && "bad ready count");
1736    Pending.remove(Pending.find(SU));
1737  }
1738}
1739
1740/// If this queue only has one ready candidate, return it. As a side effect,
1741/// defer any nodes that now hit a hazard, and advance the cycle until at least
1742/// one node is ready. If multiple instructions are ready, return NULL.
1743SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1744  if (CheckPending)
1745    releasePending();
1746
1747  if (IssueCount > 0) {
1748    // Defer any ready instrs that now have a hazard.
1749    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1750      if (checkHazard(*I)) {
1751        Pending.push(*I);
1752        I = Available.remove(I);
1753        continue;
1754      }
1755      ++I;
1756    }
1757  }
1758  for (unsigned i = 0; Available.empty(); ++i) {
1759    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1760           "permanent hazard"); (void)i;
1761    bumpCycle();
1762    releasePending();
1763  }
1764  if (Available.size() == 1)
1765    return *Available.begin();
1766  return NULL;
1767}
1768
1769/// Record the candidate policy for opposite zones with different critical
1770/// resources.
1771///
1772/// If the CriticalZone is latency limited, don't force a policy for the
1773/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1774void ConvergingScheduler::balanceZones(
1775  ConvergingScheduler::SchedBoundary &CriticalZone,
1776  ConvergingScheduler::SchedCandidate &CriticalCand,
1777  ConvergingScheduler::SchedBoundary &OppositeZone,
1778  ConvergingScheduler::SchedCandidate &OppositeCand) {
1779
1780  if (!CriticalZone.IsResourceLimited)
1781    return;
1782  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1783
1784  SchedRemainder *Rem = CriticalZone.Rem;
1785
1786  // If the critical zone is overconsuming a resource relative to the
1787  // remainder, try to reduce it.
1788  unsigned RemainingCritCount =
1789    Rem->RemainingCounts[CriticalZone.CritResIdx];
1790  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1791      > (int)SchedModel->getLatencyFactor()) {
1792    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1793    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
1794          << " reduce "
1795          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1796          << '\n');
1797  }
1798  // If the other zone is underconsuming a resource relative to the full zone,
1799  // try to increase it.
1800  unsigned OppositeCount =
1801    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1802  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1803      > (int)SchedModel->getLatencyFactor()) {
1804    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1805    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
1806          << " demand "
1807          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1808          << '\n');
1809  }
1810}
1811
1812/// Determine if the scheduled zones exceed resource limits or critical path and
1813/// set each candidate's ReduceHeight policy accordingly.
1814void ConvergingScheduler::checkResourceLimits(
1815  ConvergingScheduler::SchedCandidate &TopCand,
1816  ConvergingScheduler::SchedCandidate &BotCand) {
1817
1818  // Set ReduceLatency to true if needed.
1819  Bot.setLatencyPolicy(BotCand.Policy);
1820  Top.setLatencyPolicy(TopCand.Policy);
1821
1822  // Handle resource-limited regions.
1823  if (Top.IsResourceLimited && Bot.IsResourceLimited
1824      && Top.CritResIdx == Bot.CritResIdx) {
1825    // If the scheduled critical resource in both zones is no longer the
1826    // critical remaining resource, attempt to reduce resource height both ways.
1827    if (Top.CritResIdx != Rem.CritResIdx) {
1828      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1829      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1830      DEBUG(dbgs() << "  Reduce scheduled "
1831            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1832    }
1833    return;
1834  }
1835  // Handle latency-limited regions.
1836  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1837    // If the total scheduled expected latency exceeds the region's critical
1838    // path then reduce latency both ways.
1839    //
1840    // Just because a zone is not resource limited does not mean it is latency
1841    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1842    // to exceed expected latency.
1843    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1844        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1845      TopCand.Policy.ReduceLatency = true;
1846      BotCand.Policy.ReduceLatency = true;
1847      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
1848            << " + " << Bot.ExpectedLatency << '\n');
1849    }
1850    return;
1851  }
1852  // The critical resource is different in each zone, so request balancing.
1853
1854  // Compute the cost of each zone.
1855  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1856  Top.ExpectedCount = std::max(
1857    Top.getCriticalCount(),
1858    Top.ExpectedCount * SchedModel->getLatencyFactor());
1859  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1860  Bot.ExpectedCount = std::max(
1861    Bot.getCriticalCount(),
1862    Bot.ExpectedCount * SchedModel->getLatencyFactor());
1863
1864  balanceZones(Top, TopCand, Bot, BotCand);
1865  balanceZones(Bot, BotCand, Top, TopCand);
1866}
1867
1868void ConvergingScheduler::SchedCandidate::
1869initResourceDelta(const ScheduleDAGMI *DAG,
1870                  const TargetSchedModel *SchedModel) {
1871  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1872    return;
1873
1874  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1875  for (TargetSchedModel::ProcResIter
1876         PI = SchedModel->getWriteProcResBegin(SC),
1877         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1878    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1879      ResDelta.CritResources += PI->Cycles;
1880    if (PI->ProcResourceIdx == Policy.DemandResIdx)
1881      ResDelta.DemandedResources += PI->Cycles;
1882  }
1883}
1884
1885/// Return true if this heuristic determines order.
1886static bool tryLess(int TryVal, int CandVal,
1887                    ConvergingScheduler::SchedCandidate &TryCand,
1888                    ConvergingScheduler::SchedCandidate &Cand,
1889                    ConvergingScheduler::CandReason Reason) {
1890  if (TryVal < CandVal) {
1891    TryCand.Reason = Reason;
1892    return true;
1893  }
1894  if (TryVal > CandVal) {
1895    if (Cand.Reason > Reason)
1896      Cand.Reason = Reason;
1897    return true;
1898  }
1899  return false;
1900}
1901
1902static bool tryGreater(int TryVal, int CandVal,
1903                       ConvergingScheduler::SchedCandidate &TryCand,
1904                       ConvergingScheduler::SchedCandidate &Cand,
1905                       ConvergingScheduler::CandReason Reason) {
1906  if (TryVal > CandVal) {
1907    TryCand.Reason = Reason;
1908    return true;
1909  }
1910  if (TryVal < CandVal) {
1911    if (Cand.Reason > Reason)
1912      Cand.Reason = Reason;
1913    return true;
1914  }
1915  return false;
1916}
1917
1918static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1919  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1920}
1921
1922/// Minimize physical register live ranges. Regalloc wants them adjacent to
1923/// their physreg def/use.
1924///
1925/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
1926/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
1927/// with the operation that produces or consumes the physreg. We'll do this when
1928/// regalloc has support for parallel copies.
1929static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
1930  const MachineInstr *MI = SU->getInstr();
1931  if (!MI->isCopy())
1932    return 0;
1933
1934  unsigned ScheduledOper = isTop ? 1 : 0;
1935  unsigned UnscheduledOper = isTop ? 0 : 1;
1936  // If we have already scheduled the physreg produce/consumer, immediately
1937  // schedule the copy.
1938  if (TargetRegisterInfo::isPhysicalRegister(
1939        MI->getOperand(ScheduledOper).getReg()))
1940    return 1;
1941  // If the physreg is at the boundary, defer it. Otherwise schedule it
1942  // immediately to free the dependent. We can hoist the copy later.
1943  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
1944  if (TargetRegisterInfo::isPhysicalRegister(
1945        MI->getOperand(UnscheduledOper).getReg()))
1946    return AtBoundary ? -1 : 1;
1947  return 0;
1948}
1949
1950/// Apply a set of heursitics to a new candidate. Heuristics are currently
1951/// hierarchical. This may be more efficient than a graduated cost model because
1952/// we don't need to evaluate all aspects of the model for each node in the
1953/// queue. But it's really done to make the heuristics easier to debug and
1954/// statistically analyze.
1955///
1956/// \param Cand provides the policy and current best candidate.
1957/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1958/// \param Zone describes the scheduled zone that we are extending.
1959/// \param RPTracker describes reg pressure within the scheduled zone.
1960/// \param TempTracker is a scratch pressure tracker to reuse in queries.
1961void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1962                                       SchedCandidate &TryCand,
1963                                       SchedBoundary &Zone,
1964                                       const RegPressureTracker &RPTracker,
1965                                       RegPressureTracker &TempTracker) {
1966
1967  // Always initialize TryCand's RPDelta.
1968  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1969                                  DAG->getRegionCriticalPSets(),
1970                                  DAG->getRegPressure().MaxSetPressure);
1971
1972  // Initialize the candidate if needed.
1973  if (!Cand.isValid()) {
1974    TryCand.Reason = NodeOrder;
1975    return;
1976  }
1977
1978  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
1979                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
1980                 TryCand, Cand, PhysRegCopy))
1981    return;
1982
1983  // Avoid exceeding the target's limit.
1984  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1985              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1986    return;
1987  if (Cand.Reason == SingleExcess)
1988    Cand.Reason = MultiPressure;
1989
1990  // Avoid increasing the max critical pressure in the scheduled region.
1991  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1992              Cand.RPDelta.CriticalMax.UnitIncrease,
1993              TryCand, Cand, SingleCritical))
1994    return;
1995  if (Cand.Reason == SingleCritical)
1996    Cand.Reason = MultiPressure;
1997
1998  // Keep clustered nodes together to encourage downstream peephole
1999  // optimizations which may reduce resource requirements.
2000  //
2001  // This is a best effort to set things up for a post-RA pass. Optimizations
2002  // like generating loads of multiple registers should ideally be done within
2003  // the scheduler pass by combining the loads during DAG postprocessing.
2004  const SUnit *NextClusterSU =
2005    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2006  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2007                 TryCand, Cand, Cluster))
2008    return;
2009
2010  // Weak edges are for clustering and other constraints.
2011  //
2012  // Deferring TryCand here does not change Cand's reason. This is good in the
2013  // sense that a bad candidate shouldn't affect a previous candidate's
2014  // goodness, but bad in that it is assymetric and depends on queue order.
2015  CandReason OrigReason = Cand.Reason;
2016  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2017              getWeakLeft(Cand.SU, Zone.isTop()),
2018              TryCand, Cand, Weak)) {
2019    Cand.Reason = OrigReason;
2020    return;
2021  }
2022  // Avoid critical resource consumption and balance the schedule.
2023  TryCand.initResourceDelta(DAG, SchedModel);
2024  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2025              TryCand, Cand, ResourceReduce))
2026    return;
2027  if (tryGreater(TryCand.ResDelta.DemandedResources,
2028                 Cand.ResDelta.DemandedResources,
2029                 TryCand, Cand, ResourceDemand))
2030    return;
2031
2032  // Avoid serializing long latency dependence chains.
2033  if (Cand.Policy.ReduceLatency) {
2034    if (Zone.isTop()) {
2035      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
2036          > Zone.ExpectedCount) {
2037        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2038                    TryCand, Cand, TopDepthReduce))
2039          return;
2040      }
2041      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2042                     TryCand, Cand, TopPathReduce))
2043        return;
2044    }
2045    else {
2046      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
2047          > Zone.ExpectedCount) {
2048        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2049                    TryCand, Cand, BotHeightReduce))
2050          return;
2051      }
2052      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2053                     TryCand, Cand, BotPathReduce))
2054        return;
2055    }
2056  }
2057
2058  // Avoid increasing the max pressure of the entire region.
2059  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
2060              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
2061    return;
2062  if (Cand.Reason == SingleMax)
2063    Cand.Reason = MultiPressure;
2064
2065  // Prefer immediate defs/users of the last scheduled instruction. This is a
2066  // nice pressure avoidance strategy that also conserves the processor's
2067  // register renaming resources and keeps the machine code readable.
2068  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
2069                 TryCand, Cand, NextDefUse))
2070    return;
2071
2072  // Fall through to original instruction order.
2073  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2074      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2075    TryCand.Reason = NodeOrder;
2076  }
2077}
2078
2079/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
2080/// more desirable than RHS from scheduling standpoint.
2081static bool compareRPDelta(const RegPressureDelta &LHS,
2082                           const RegPressureDelta &RHS) {
2083  // Compare each component of pressure in decreasing order of importance
2084  // without checking if any are valid. Invalid PressureElements are assumed to
2085  // have UnitIncrease==0, so are neutral.
2086
2087  // Avoid increasing the max critical pressure in the scheduled region.
2088  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
2089    DEBUG(dbgs() << "  RP excess top - bot: "
2090          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
2091    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
2092  }
2093  // Avoid increasing the max critical pressure in the scheduled region.
2094  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
2095    DEBUG(dbgs() << "  RP critical top - bot: "
2096          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
2097          << '\n');
2098    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
2099  }
2100  // Avoid increasing the max pressure of the entire region.
2101  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
2102    DEBUG(dbgs() << "  RP current top - bot: "
2103          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
2104          << '\n');
2105    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
2106  }
2107  return false;
2108}
2109
2110#ifndef NDEBUG
2111const char *ConvergingScheduler::getReasonStr(
2112  ConvergingScheduler::CandReason Reason) {
2113  switch (Reason) {
2114  case NoCand:         return "NOCAND    ";
2115  case PhysRegCopy:    return "PREG-COPY";
2116  case SingleExcess:   return "REG-EXCESS";
2117  case SingleCritical: return "REG-CRIT  ";
2118  case Cluster:        return "CLUSTER   ";
2119  case Weak:           return "WEAK      ";
2120  case SingleMax:      return "REG-MAX   ";
2121  case MultiPressure:  return "REG-MULTI ";
2122  case ResourceReduce: return "RES-REDUCE";
2123  case ResourceDemand: return "RES-DEMAND";
2124  case TopDepthReduce: return "TOP-DEPTH ";
2125  case TopPathReduce:  return "TOP-PATH  ";
2126  case BotHeightReduce:return "BOT-HEIGHT";
2127  case BotPathReduce:  return "BOT-PATH  ";
2128  case NextDefUse:     return "DEF-USE   ";
2129  case NodeOrder:      return "ORDER     ";
2130  };
2131  llvm_unreachable("Unknown reason!");
2132}
2133
2134void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
2135  PressureElement P;
2136  unsigned ResIdx = 0;
2137  unsigned Latency = 0;
2138  switch (Cand.Reason) {
2139  default:
2140    break;
2141  case SingleExcess:
2142    P = Cand.RPDelta.Excess;
2143    break;
2144  case SingleCritical:
2145    P = Cand.RPDelta.CriticalMax;
2146    break;
2147  case SingleMax:
2148    P = Cand.RPDelta.CurrentMax;
2149    break;
2150  case ResourceReduce:
2151    ResIdx = Cand.Policy.ReduceResIdx;
2152    break;
2153  case ResourceDemand:
2154    ResIdx = Cand.Policy.DemandResIdx;
2155    break;
2156  case TopDepthReduce:
2157    Latency = Cand.SU->getDepth();
2158    break;
2159  case TopPathReduce:
2160    Latency = Cand.SU->getHeight();
2161    break;
2162  case BotHeightReduce:
2163    Latency = Cand.SU->getHeight();
2164    break;
2165  case BotPathReduce:
2166    Latency = Cand.SU->getDepth();
2167    break;
2168  }
2169  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2170  if (P.isValid())
2171    dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
2172           << ":" << P.UnitIncrease << " ";
2173  else
2174    dbgs() << "      ";
2175  if (ResIdx)
2176    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2177  else
2178    dbgs() << "         ";
2179  if (Latency)
2180    dbgs() << " " << Latency << " cycles ";
2181  else
2182    dbgs() << "          ";
2183  dbgs() << '\n';
2184}
2185#endif
2186
2187/// Pick the best candidate from the top queue.
2188///
2189/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2190/// DAG building. To adjust for the current scheduling location we need to
2191/// maintain the number of vreg uses remaining to be top-scheduled.
2192void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2193                                            const RegPressureTracker &RPTracker,
2194                                            SchedCandidate &Cand) {
2195  ReadyQueue &Q = Zone.Available;
2196
2197  DEBUG(Q.dump());
2198
2199  // getMaxPressureDelta temporarily modifies the tracker.
2200  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2201
2202  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2203
2204    SchedCandidate TryCand(Cand.Policy);
2205    TryCand.SU = *I;
2206    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2207    if (TryCand.Reason != NoCand) {
2208      // Initialize resource delta if needed in case future heuristics query it.
2209      if (TryCand.ResDelta == SchedResourceDelta())
2210        TryCand.initResourceDelta(DAG, SchedModel);
2211      Cand.setBest(TryCand);
2212      DEBUG(traceCandidate(Cand));
2213    }
2214  }
2215}
2216
2217static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
2218                      bool IsTop) {
2219  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2220        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
2221}
2222
2223/// Pick the best candidate node from either the top or bottom queue.
2224SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
2225  // Schedule as far as possible in the direction of no choice. This is most
2226  // efficient, but also provides the best heuristics for CriticalPSets.
2227  if (SUnit *SU = Bot.pickOnlyChoice()) {
2228    IsTopNode = false;
2229    DEBUG(dbgs() << "Pick Top NOCAND\n");
2230    return SU;
2231  }
2232  if (SUnit *SU = Top.pickOnlyChoice()) {
2233    IsTopNode = true;
2234    DEBUG(dbgs() << "Pick Bot NOCAND\n");
2235    return SU;
2236  }
2237  CandPolicy NoPolicy;
2238  SchedCandidate BotCand(NoPolicy);
2239  SchedCandidate TopCand(NoPolicy);
2240  checkResourceLimits(TopCand, BotCand);
2241
2242  // Prefer bottom scheduling when heuristics are silent.
2243  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2244  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2245
2246  // If either Q has a single candidate that provides the least increase in
2247  // Excess pressure, we can immediately schedule from that Q.
2248  //
2249  // RegionCriticalPSets summarizes the pressure within the scheduled region and
2250  // affects picking from either Q. If scheduling in one direction must
2251  // increase pressure for one of the excess PSets, then schedule in that
2252  // direction first to provide more freedom in the other direction.
2253  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
2254    IsTopNode = false;
2255    tracePick(BotCand, IsTopNode);
2256    return BotCand.SU;
2257  }
2258  // Check if the top Q has a better candidate.
2259  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2260  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2261
2262  // If either Q has a single candidate that minimizes pressure above the
2263  // original region's pressure pick it.
2264  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
2265    if (TopCand.Reason < BotCand.Reason) {
2266      IsTopNode = true;
2267      tracePick(TopCand, IsTopNode);
2268      return TopCand.SU;
2269    }
2270    IsTopNode = false;
2271    tracePick(BotCand, IsTopNode);
2272    return BotCand.SU;
2273  }
2274  // Check for a salient pressure difference and pick the best from either side.
2275  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
2276    IsTopNode = true;
2277    tracePick(TopCand, IsTopNode);
2278    return TopCand.SU;
2279  }
2280  // Otherwise prefer the bottom candidate, in node order if all else failed.
2281  if (TopCand.Reason < BotCand.Reason) {
2282    IsTopNode = true;
2283    tracePick(TopCand, IsTopNode);
2284    return TopCand.SU;
2285  }
2286  IsTopNode = false;
2287  tracePick(BotCand, IsTopNode);
2288  return BotCand.SU;
2289}
2290
2291/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2292SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2293  if (DAG->top() == DAG->bottom()) {
2294    assert(Top.Available.empty() && Top.Pending.empty() &&
2295           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2296    return NULL;
2297  }
2298  SUnit *SU;
2299  do {
2300    if (ForceTopDown) {
2301      SU = Top.pickOnlyChoice();
2302      if (!SU) {
2303        CandPolicy NoPolicy;
2304        SchedCandidate TopCand(NoPolicy);
2305        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2306        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2307        SU = TopCand.SU;
2308      }
2309      IsTopNode = true;
2310    }
2311    else if (ForceBottomUp) {
2312      SU = Bot.pickOnlyChoice();
2313      if (!SU) {
2314        CandPolicy NoPolicy;
2315        SchedCandidate BotCand(NoPolicy);
2316        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2317        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2318        SU = BotCand.SU;
2319      }
2320      IsTopNode = false;
2321    }
2322    else {
2323      SU = pickNodeBidirectional(IsTopNode);
2324    }
2325  } while (SU->isScheduled);
2326
2327  if (SU->isTopReady())
2328    Top.removeReady(SU);
2329  if (SU->isBottomReady())
2330    Bot.removeReady(SU);
2331
2332  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2333  return SU;
2334}
2335
2336void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2337
2338  MachineBasicBlock::iterator InsertPos = SU->getInstr();
2339  if (!isTop)
2340    ++InsertPos;
2341  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2342
2343  // Find already scheduled copies with a single physreg dependence and move
2344  // them just above the scheduled instruction.
2345  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2346       I != E; ++I) {
2347    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2348      continue;
2349    SUnit *DepSU = I->getSUnit();
2350    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2351      continue;
2352    MachineInstr *Copy = DepSU->getInstr();
2353    if (!Copy->isCopy())
2354      continue;
2355    DEBUG(dbgs() << "  Rescheduling physreg copy ";
2356          I->getSUnit()->dump(DAG));
2357    DAG->moveInstruction(Copy, InsertPos);
2358  }
2359}
2360
2361/// Update the scheduler's state after scheduling a node. This is the same node
2362/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2363/// it's state based on the current cycle before MachineSchedStrategy does.
2364///
2365/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2366/// them here. See comments in biasPhysRegCopy.
2367void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2368  if (IsTopNode) {
2369    SU->TopReadyCycle = Top.CurrCycle;
2370    Top.bumpNode(SU);
2371    if (SU->hasPhysRegUses)
2372      reschedulePhysRegCopies(SU, true);
2373  }
2374  else {
2375    SU->BotReadyCycle = Bot.CurrCycle;
2376    Bot.bumpNode(SU);
2377    if (SU->hasPhysRegDefs)
2378      reschedulePhysRegCopies(SU, false);
2379  }
2380}
2381
2382/// Create the standard converging machine scheduler. This will be used as the
2383/// default scheduler if the target does not set a default.
2384static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2385  assert((!ForceTopDown || !ForceBottomUp) &&
2386         "-misched-topdown incompatible with -misched-bottomup");
2387  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2388  // Register DAG post-processors.
2389  //
2390  // FIXME: extend the mutation API to allow earlier mutations to instantiate
2391  // data and pass it to later mutations. Have a single mutation that gathers
2392  // the interesting nodes in one pass.
2393  if (EnableCopyConstrain)
2394    DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
2395  if (EnableLoadCluster)
2396    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2397  if (EnableMacroFusion)
2398    DAG->addMutation(new MacroFusion(DAG->TII));
2399  return DAG;
2400}
2401static MachineSchedRegistry
2402ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2403                        createConvergingSched);
2404
2405//===----------------------------------------------------------------------===//
2406// ILP Scheduler. Currently for experimental analysis of heuristics.
2407//===----------------------------------------------------------------------===//
2408
2409namespace {
2410/// \brief Order nodes by the ILP metric.
2411struct ILPOrder {
2412  const SchedDFSResult *DFSResult;
2413  const BitVector *ScheduledTrees;
2414  bool MaximizeILP;
2415
2416  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2417
2418  /// \brief Apply a less-than relation on node priority.
2419  ///
2420  /// (Return true if A comes after B in the Q.)
2421  bool operator()(const SUnit *A, const SUnit *B) const {
2422    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2423    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2424    if (SchedTreeA != SchedTreeB) {
2425      // Unscheduled trees have lower priority.
2426      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2427        return ScheduledTrees->test(SchedTreeB);
2428
2429      // Trees with shallower connections have have lower priority.
2430      if (DFSResult->getSubtreeLevel(SchedTreeA)
2431          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2432        return DFSResult->getSubtreeLevel(SchedTreeA)
2433          < DFSResult->getSubtreeLevel(SchedTreeB);
2434      }
2435    }
2436    if (MaximizeILP)
2437      return DFSResult->getILP(A) < DFSResult->getILP(B);
2438    else
2439      return DFSResult->getILP(A) > DFSResult->getILP(B);
2440  }
2441};
2442
2443/// \brief Schedule based on the ILP metric.
2444class ILPScheduler : public MachineSchedStrategy {
2445  /// In case all subtrees are eventually connected to a common root through
2446  /// data dependence (e.g. reduction), place an upper limit on their size.
2447  ///
2448  /// FIXME: A subtree limit is generally good, but in the situation commented
2449  /// above, where multiple similar subtrees feed a common root, we should
2450  /// only split at a point where the resulting subtrees will be balanced.
2451  /// (a motivating test case must be found).
2452  static const unsigned SubtreeLimit = 16;
2453
2454  ScheduleDAGMI *DAG;
2455  ILPOrder Cmp;
2456
2457  std::vector<SUnit*> ReadyQ;
2458public:
2459  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2460
2461  virtual void initialize(ScheduleDAGMI *dag) {
2462    DAG = dag;
2463    DAG->computeDFSResult();
2464    Cmp.DFSResult = DAG->getDFSResult();
2465    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2466    ReadyQ.clear();
2467  }
2468
2469  virtual void registerRoots() {
2470    // Restore the heap in ReadyQ with the updated DFS results.
2471    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2472  }
2473
2474  /// Implement MachineSchedStrategy interface.
2475  /// -----------------------------------------
2476
2477  /// Callback to select the highest priority node from the ready Q.
2478  virtual SUnit *pickNode(bool &IsTopNode) {
2479    if (ReadyQ.empty()) return NULL;
2480    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2481    SUnit *SU = ReadyQ.back();
2482    ReadyQ.pop_back();
2483    IsTopNode = false;
2484    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
2485          << " ILP: " << DAG->getDFSResult()->getILP(SU)
2486          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2487          << DAG->getDFSResult()->getSubtreeLevel(
2488            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
2489          << "Scheduling " << *SU->getInstr());
2490    return SU;
2491  }
2492
2493  /// \brief Scheduler callback to notify that a new subtree is scheduled.
2494  virtual void scheduleTree(unsigned SubtreeID) {
2495    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2496  }
2497
2498  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2499  /// DFSResults, and resort the priority Q.
2500  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2501    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2502  }
2503
2504  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2505
2506  virtual void releaseBottomNode(SUnit *SU) {
2507    ReadyQ.push_back(SU);
2508    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2509  }
2510};
2511} // namespace
2512
2513static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2514  return new ScheduleDAGMI(C, new ILPScheduler(true));
2515}
2516static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2517  return new ScheduleDAGMI(C, new ILPScheduler(false));
2518}
2519static MachineSchedRegistry ILPMaxRegistry(
2520  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2521static MachineSchedRegistry ILPMinRegistry(
2522  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2523
2524//===----------------------------------------------------------------------===//
2525// Machine Instruction Shuffler for Correctness Testing
2526//===----------------------------------------------------------------------===//
2527
2528#ifndef NDEBUG
2529namespace {
2530/// Apply a less-than relation on the node order, which corresponds to the
2531/// instruction order prior to scheduling. IsReverse implements greater-than.
2532template<bool IsReverse>
2533struct SUnitOrder {
2534  bool operator()(SUnit *A, SUnit *B) const {
2535    if (IsReverse)
2536      return A->NodeNum > B->NodeNum;
2537    else
2538      return A->NodeNum < B->NodeNum;
2539  }
2540};
2541
2542/// Reorder instructions as much as possible.
2543class InstructionShuffler : public MachineSchedStrategy {
2544  bool IsAlternating;
2545  bool IsTopDown;
2546
2547  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2548  // gives nodes with a higher number higher priority causing the latest
2549  // instructions to be scheduled first.
2550  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2551    TopQ;
2552  // When scheduling bottom-up, use greater-than as the queue priority.
2553  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2554    BottomQ;
2555public:
2556  InstructionShuffler(bool alternate, bool topdown)
2557    : IsAlternating(alternate), IsTopDown(topdown) {}
2558
2559  virtual void initialize(ScheduleDAGMI *) {
2560    TopQ.clear();
2561    BottomQ.clear();
2562  }
2563
2564  /// Implement MachineSchedStrategy interface.
2565  /// -----------------------------------------
2566
2567  virtual SUnit *pickNode(bool &IsTopNode) {
2568    SUnit *SU;
2569    if (IsTopDown) {
2570      do {
2571        if (TopQ.empty()) return NULL;
2572        SU = TopQ.top();
2573        TopQ.pop();
2574      } while (SU->isScheduled);
2575      IsTopNode = true;
2576    }
2577    else {
2578      do {
2579        if (BottomQ.empty()) return NULL;
2580        SU = BottomQ.top();
2581        BottomQ.pop();
2582      } while (SU->isScheduled);
2583      IsTopNode = false;
2584    }
2585    if (IsAlternating)
2586      IsTopDown = !IsTopDown;
2587    return SU;
2588  }
2589
2590  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2591
2592  virtual void releaseTopNode(SUnit *SU) {
2593    TopQ.push(SU);
2594  }
2595  virtual void releaseBottomNode(SUnit *SU) {
2596    BottomQ.push(SU);
2597  }
2598};
2599} // namespace
2600
2601static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2602  bool Alternate = !ForceTopDown && !ForceBottomUp;
2603  bool TopDown = !ForceBottomUp;
2604  assert((TopDown || !ForceTopDown) &&
2605         "-misched-topdown incompatible with -misched-bottomup");
2606  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2607}
2608static MachineSchedRegistry ShufflerRegistry(
2609  "shuffle", "Shuffle machine instructions alternating directions",
2610  createInstructionShuffler);
2611#endif // !NDEBUG
2612
2613//===----------------------------------------------------------------------===//
2614// GraphWriter support for ScheduleDAGMI.
2615//===----------------------------------------------------------------------===//
2616
2617#ifndef NDEBUG
2618namespace llvm {
2619
2620template<> struct GraphTraits<
2621  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
2622
2623template<>
2624struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
2625
2626  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
2627
2628  static std::string getGraphName(const ScheduleDAG *G) {
2629    return G->MF.getName();
2630  }
2631
2632  static bool renderGraphFromBottomUp() {
2633    return true;
2634  }
2635
2636  static bool isNodeHidden(const SUnit *Node) {
2637    return (Node->NumPreds > 10 || Node->NumSuccs > 10);
2638  }
2639
2640  static bool hasNodeAddressLabel(const SUnit *Node,
2641                                  const ScheduleDAG *Graph) {
2642    return false;
2643  }
2644
2645  /// If you want to override the dot attributes printed for a particular
2646  /// edge, override this method.
2647  static std::string getEdgeAttributes(const SUnit *Node,
2648                                       SUnitIterator EI,
2649                                       const ScheduleDAG *Graph) {
2650    if (EI.isArtificialDep())
2651      return "color=cyan,style=dashed";
2652    if (EI.isCtrlDep())
2653      return "color=blue,style=dashed";
2654    return "";
2655  }
2656
2657  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
2658    std::string Str;
2659    raw_string_ostream SS(Str);
2660    SS << "SU(" << SU->NodeNum << ')';
2661    return SS.str();
2662  }
2663  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
2664    return G->getGraphNodeLabel(SU);
2665  }
2666
2667  static std::string getNodeAttributes(const SUnit *N,
2668                                       const ScheduleDAG *Graph) {
2669    std::string Str("shape=Mrecord");
2670    const SchedDFSResult *DFS =
2671      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
2672    if (DFS) {
2673      Str += ",style=filled,fillcolor=\"#";
2674      Str += DOT::getColorString(DFS->getSubtreeID(N));
2675      Str += '"';
2676    }
2677    return Str;
2678  }
2679};
2680} // namespace llvm
2681#endif // NDEBUG
2682
2683/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
2684/// rendered using 'dot'.
2685///
2686void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
2687#ifndef NDEBUG
2688  ViewGraph(this, Name, false, Title);
2689#else
2690  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
2691         << "systems with Graphviz or gv!\n";
2692#endif  // NDEBUG
2693}
2694
2695/// Out-of-line implementation with no arguments is handy for gdb.
2696void ScheduleDAGMI::viewGraph() {
2697  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
2698}
2699