MachineScheduler.cpp revision bacb24975d7a8a6ccff0e16057a581b3831c4c7d
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// MachineScheduler schedules machine instructions after phi elimination. It
11// preserves LiveIntervals so it can be invoked before register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "misched"
16
17#include "llvm/CodeGen/MachineScheduler.h"
18#include "llvm/ADT/OwningPtr.h"
19#include "llvm/ADT/PriorityQueue.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/CodeGen/LiveIntervalAnalysis.h"
22#include "llvm/CodeGen/MachineDominators.h"
23#include "llvm/CodeGen/MachineLoopInfo.h"
24#include "llvm/CodeGen/Passes.h"
25#include "llvm/CodeGen/RegisterClassInfo.h"
26#include "llvm/CodeGen/ScheduleDFS.h"
27#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/GraphWriter.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Target/TargetInstrInfo.h"
34#include <queue>
35
36using namespace llvm;
37
38namespace llvm {
39cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
40                           cl::desc("Force top-down list scheduling"));
41cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
42                            cl::desc("Force bottom-up list scheduling"));
43}
44
45#ifndef NDEBUG
46static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
47  cl::desc("Pop up a window to show MISched dags after they are processed"));
48
49static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
50  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
51#else
52static bool ViewMISchedDAGs = false;
53#endif // NDEBUG
54
55static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
56  cl::desc("Enable load clustering."), cl::init(true));
57
58// Experimental heuristics
59static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
60  cl::desc("Enable scheduling for macro fusion."), cl::init(true));
61
62static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
63  cl::desc("Verify machine instrs before and after machine scheduling"));
64
65// DAG subtrees must have at least this many nodes.
66static const unsigned MinSubtreeSize = 8;
67
68//===----------------------------------------------------------------------===//
69// Machine Instruction Scheduling Pass and Registry
70//===----------------------------------------------------------------------===//
71
72MachineSchedContext::MachineSchedContext():
73    MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
74  RegClassInfo = new RegisterClassInfo();
75}
76
77MachineSchedContext::~MachineSchedContext() {
78  delete RegClassInfo;
79}
80
81namespace {
82/// MachineScheduler runs after coalescing and before register allocation.
83class MachineScheduler : public MachineSchedContext,
84                         public MachineFunctionPass {
85public:
86  MachineScheduler();
87
88  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
89
90  virtual void releaseMemory() {}
91
92  virtual bool runOnMachineFunction(MachineFunction&);
93
94  virtual void print(raw_ostream &O, const Module* = 0) const;
95
96  static char ID; // Class identification, replacement for typeinfo
97};
98} // namespace
99
100char MachineScheduler::ID = 0;
101
102char &llvm::MachineSchedulerID = MachineScheduler::ID;
103
104INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
105                      "Machine Instruction Scheduler", false, false)
106INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
107INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
108INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
109INITIALIZE_PASS_END(MachineScheduler, "misched",
110                    "Machine Instruction Scheduler", false, false)
111
112MachineScheduler::MachineScheduler()
113: MachineFunctionPass(ID) {
114  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
115}
116
117void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
118  AU.setPreservesCFG();
119  AU.addRequiredID(MachineDominatorsID);
120  AU.addRequired<MachineLoopInfo>();
121  AU.addRequired<AliasAnalysis>();
122  AU.addRequired<TargetPassConfig>();
123  AU.addRequired<SlotIndexes>();
124  AU.addPreserved<SlotIndexes>();
125  AU.addRequired<LiveIntervals>();
126  AU.addPreserved<LiveIntervals>();
127  MachineFunctionPass::getAnalysisUsage(AU);
128}
129
130MachinePassRegistry MachineSchedRegistry::Registry;
131
132/// A dummy default scheduler factory indicates whether the scheduler
133/// is overridden on the command line.
134static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
135  return 0;
136}
137
138/// MachineSchedOpt allows command line selection of the scheduler.
139static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
140               RegisterPassParser<MachineSchedRegistry> >
141MachineSchedOpt("misched",
142                cl::init(&useDefaultMachineSched), cl::Hidden,
143                cl::desc("Machine instruction scheduler to use"));
144
145static MachineSchedRegistry
146DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
147                     useDefaultMachineSched);
148
149/// Forward declare the standard machine scheduler. This will be used as the
150/// default scheduler if the target does not set a default.
151static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
152
153
154/// Decrement this iterator until reaching the top or a non-debug instr.
155static MachineBasicBlock::iterator
156priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
157  assert(I != Beg && "reached the top of the region, cannot decrement");
158  while (--I != Beg) {
159    if (!I->isDebugValue())
160      break;
161  }
162  return I;
163}
164
165/// If this iterator is a debug value, increment until reaching the End or a
166/// non-debug instruction.
167static MachineBasicBlock::iterator
168nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
169  for(; I != End; ++I) {
170    if (!I->isDebugValue())
171      break;
172  }
173  return I;
174}
175
176/// Top-level MachineScheduler pass driver.
177///
178/// Visit blocks in function order. Divide each block into scheduling regions
179/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
180/// consistent with the DAG builder, which traverses the interior of the
181/// scheduling regions bottom-up.
182///
183/// This design avoids exposing scheduling boundaries to the DAG builder,
184/// simplifying the DAG builder's support for "special" target instructions.
185/// At the same time the design allows target schedulers to operate across
186/// scheduling boundaries, for example to bundle the boudary instructions
187/// without reordering them. This creates complexity, because the target
188/// scheduler must update the RegionBegin and RegionEnd positions cached by
189/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
190/// design would be to split blocks at scheduling boundaries, but LLVM has a
191/// general bias against block splitting purely for implementation simplicity.
192bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
193  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
194
195  // Initialize the context of the pass.
196  MF = &mf;
197  MLI = &getAnalysis<MachineLoopInfo>();
198  MDT = &getAnalysis<MachineDominatorTree>();
199  PassConfig = &getAnalysis<TargetPassConfig>();
200  AA = &getAnalysis<AliasAnalysis>();
201
202  LIS = &getAnalysis<LiveIntervals>();
203  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
204
205  if (VerifyScheduling) {
206    DEBUG(LIS->print(dbgs()));
207    MF->verify(this, "Before machine scheduling.");
208  }
209  RegClassInfo->runOnMachineFunction(*MF);
210
211  // Select the scheduler, or set the default.
212  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
213  if (Ctor == useDefaultMachineSched) {
214    // Get the default scheduler set by the target.
215    Ctor = MachineSchedRegistry::getDefault();
216    if (!Ctor) {
217      Ctor = createConvergingSched;
218      MachineSchedRegistry::setDefault(Ctor);
219    }
220  }
221  // Instantiate the selected scheduler.
222  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
223
224  // Visit all machine basic blocks.
225  //
226  // TODO: Visit blocks in global postorder or postorder within the bottom-up
227  // loop tree. Then we can optionally compute global RegPressure.
228  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
229       MBB != MBBEnd; ++MBB) {
230
231    Scheduler->startBlock(MBB);
232
233    // Break the block into scheduling regions [I, RegionEnd), and schedule each
234    // region as soon as it is discovered. RegionEnd points the scheduling
235    // boundary at the bottom of the region. The DAG does not include RegionEnd,
236    // but the region does (i.e. the next RegionEnd is above the previous
237    // RegionBegin). If the current block has no terminator then RegionEnd ==
238    // MBB->end() for the bottom region.
239    //
240    // The Scheduler may insert instructions during either schedule() or
241    // exitRegion(), even for empty regions. So the local iterators 'I' and
242    // 'RegionEnd' are invalid across these calls.
243    unsigned RemainingInstrs = MBB->size();
244    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
245        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
246
247      // Avoid decrementing RegionEnd for blocks with no terminator.
248      if (RegionEnd != MBB->end()
249          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
250        --RegionEnd;
251        // Count the boundary instruction.
252        --RemainingInstrs;
253      }
254
255      // The next region starts above the previous region. Look backward in the
256      // instruction stream until we find the nearest boundary.
257      MachineBasicBlock::iterator I = RegionEnd;
258      for(;I != MBB->begin(); --I, --RemainingInstrs) {
259        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
260          break;
261      }
262      // Notify the scheduler of the region, even if we may skip scheduling
263      // it. Perhaps it still needs to be bundled.
264      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
265
266      // Skip empty scheduling regions (0 or 1 schedulable instructions).
267      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
268        // Close the current region. Bundle the terminator if needed.
269        // This invalidates 'RegionEnd' and 'I'.
270        Scheduler->exitRegion();
271        continue;
272      }
273      DEBUG(dbgs() << "********** MI Scheduling **********\n");
274      DEBUG(dbgs() << MF->getName()
275            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
276            << "\n  From: " << *I << "    To: ";
277            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
278            else dbgs() << "End";
279            dbgs() << " Remaining: " << RemainingInstrs << "\n");
280
281      // Schedule a region: possibly reorder instructions.
282      // This invalidates 'RegionEnd' and 'I'.
283      Scheduler->schedule();
284
285      // Close the current region.
286      Scheduler->exitRegion();
287
288      // Scheduling has invalidated the current iterator 'I'. Ask the
289      // scheduler for the top of it's scheduled region.
290      RegionEnd = Scheduler->begin();
291    }
292    assert(RemainingInstrs == 0 && "Instruction count mismatch!");
293    Scheduler->finishBlock();
294  }
295  Scheduler->finalizeSchedule();
296  DEBUG(LIS->print(dbgs()));
297  if (VerifyScheduling)
298    MF->verify(this, "After machine scheduling.");
299  return true;
300}
301
302void MachineScheduler::print(raw_ostream &O, const Module* m) const {
303  // unimplemented
304}
305
306#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
307void ReadyQueue::dump() {
308  dbgs() << "  " << Name << ": ";
309  for (unsigned i = 0, e = Queue.size(); i < e; ++i)
310    dbgs() << Queue[i]->NodeNum << " ";
311  dbgs() << "\n";
312}
313#endif
314
315//===----------------------------------------------------------------------===//
316// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
317// preservation.
318//===----------------------------------------------------------------------===//
319
320ScheduleDAGMI::~ScheduleDAGMI() {
321  delete DFSResult;
322  DeleteContainerPointers(Mutations);
323  delete SchedImpl;
324}
325
326bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
327  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
328}
329
330bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
331  if (SuccSU != &ExitSU) {
332    // Do not use WillCreateCycle, it assumes SD scheduling.
333    // If Pred is reachable from Succ, then the edge creates a cycle.
334    if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
335      return false;
336    Topo.AddPred(SuccSU, PredDep.getSUnit());
337  }
338  SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
339  // Return true regardless of whether a new edge needed to be inserted.
340  return true;
341}
342
343/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
344/// NumPredsLeft reaches zero, release the successor node.
345///
346/// FIXME: Adjust SuccSU height based on MinLatency.
347void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
348  SUnit *SuccSU = SuccEdge->getSUnit();
349
350  if (SuccEdge->isWeak()) {
351    --SuccSU->WeakPredsLeft;
352    if (SuccEdge->isCluster())
353      NextClusterSucc = SuccSU;
354    return;
355  }
356#ifndef NDEBUG
357  if (SuccSU->NumPredsLeft == 0) {
358    dbgs() << "*** Scheduling failed! ***\n";
359    SuccSU->dump(this);
360    dbgs() << " has been released too many times!\n";
361    llvm_unreachable(0);
362  }
363#endif
364  --SuccSU->NumPredsLeft;
365  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
366    SchedImpl->releaseTopNode(SuccSU);
367}
368
369/// releaseSuccessors - Call releaseSucc on each of SU's successors.
370void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
371  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
372       I != E; ++I) {
373    releaseSucc(SU, &*I);
374  }
375}
376
377/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
378/// NumSuccsLeft reaches zero, release the predecessor node.
379///
380/// FIXME: Adjust PredSU height based on MinLatency.
381void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
382  SUnit *PredSU = PredEdge->getSUnit();
383
384  if (PredEdge->isWeak()) {
385    --PredSU->WeakSuccsLeft;
386    if (PredEdge->isCluster())
387      NextClusterPred = PredSU;
388    return;
389  }
390#ifndef NDEBUG
391  if (PredSU->NumSuccsLeft == 0) {
392    dbgs() << "*** Scheduling failed! ***\n";
393    PredSU->dump(this);
394    dbgs() << " has been released too many times!\n";
395    llvm_unreachable(0);
396  }
397#endif
398  --PredSU->NumSuccsLeft;
399  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
400    SchedImpl->releaseBottomNode(PredSU);
401}
402
403/// releasePredecessors - Call releasePred on each of SU's predecessors.
404void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
405  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
406       I != E; ++I) {
407    releasePred(SU, &*I);
408  }
409}
410
411/// This is normally called from the main scheduler loop but may also be invoked
412/// by the scheduling strategy to perform additional code motion.
413void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
414                                    MachineBasicBlock::iterator InsertPos) {
415  // Advance RegionBegin if the first instruction moves down.
416  if (&*RegionBegin == MI)
417    ++RegionBegin;
418
419  // Update the instruction stream.
420  BB->splice(InsertPos, BB, MI);
421
422  // Update LiveIntervals
423  LIS->handleMove(MI, /*UpdateFlags=*/true);
424
425  // Recede RegionBegin if an instruction moves above the first.
426  if (RegionBegin == InsertPos)
427    RegionBegin = MI;
428}
429
430bool ScheduleDAGMI::checkSchedLimit() {
431#ifndef NDEBUG
432  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
433    CurrentTop = CurrentBottom;
434    return false;
435  }
436  ++NumInstrsScheduled;
437#endif
438  return true;
439}
440
441/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
442/// crossing a scheduling boundary. [begin, end) includes all instructions in
443/// the region, including the boundary itself and single-instruction regions
444/// that don't get scheduled.
445void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
446                                MachineBasicBlock::iterator begin,
447                                MachineBasicBlock::iterator end,
448                                unsigned endcount)
449{
450  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
451
452  // For convenience remember the end of the liveness region.
453  LiveRegionEnd =
454    (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
455}
456
457// Setup the register pressure trackers for the top scheduled top and bottom
458// scheduled regions.
459void ScheduleDAGMI::initRegPressure() {
460  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
461  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
462
463  // Close the RPTracker to finalize live ins.
464  RPTracker.closeRegion();
465
466  DEBUG(RPTracker.getPressure().dump(TRI));
467
468  // Initialize the live ins and live outs.
469  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
470  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
471
472  // Close one end of the tracker so we can call
473  // getMaxUpward/DownwardPressureDelta before advancing across any
474  // instructions. This converts currently live regs into live ins/outs.
475  TopRPTracker.closeTop();
476  BotRPTracker.closeBottom();
477
478  // Account for liveness generated by the region boundary.
479  if (LiveRegionEnd != RegionEnd)
480    BotRPTracker.recede();
481
482  assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
483
484  // Cache the list of excess pressure sets in this region. This will also track
485  // the max pressure in the scheduled code for these sets.
486  RegionCriticalPSets.clear();
487  const std::vector<unsigned> &RegionPressure =
488    RPTracker.getPressure().MaxSetPressure;
489  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
490    unsigned Limit = TRI->getRegPressureSetLimit(i);
491    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
492          << "Limit " << Limit
493          << " Actual " << RegionPressure[i] << "\n");
494    if (RegionPressure[i] > Limit)
495      RegionCriticalPSets.push_back(PressureElement(i, 0));
496  }
497  DEBUG(dbgs() << "Excess PSets: ";
498        for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
499          dbgs() << TRI->getRegPressureSetName(
500            RegionCriticalPSets[i].PSetID) << " ";
501        dbgs() << "\n");
502}
503
504// FIXME: When the pressure tracker deals in pressure differences then we won't
505// iterate over all RegionCriticalPSets[i].
506void ScheduleDAGMI::
507updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
508  for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
509    unsigned ID = RegionCriticalPSets[i].PSetID;
510    int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
511    if ((int)NewMaxPressure[ID] > MaxUnits)
512      MaxUnits = NewMaxPressure[ID];
513  }
514  DEBUG(
515    for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
516      unsigned Limit = TRI->getRegPressureSetLimit(i);
517      if (NewMaxPressure[i] > Limit ) {
518        dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
519               << NewMaxPressure[i] << " > " << Limit << "\n";
520      }
521    });
522}
523
524/// schedule - Called back from MachineScheduler::runOnMachineFunction
525/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
526/// only includes instructions that have DAG nodes, not scheduling boundaries.
527///
528/// This is a skeletal driver, with all the functionality pushed into helpers,
529/// so that it can be easilly extended by experimental schedulers. Generally,
530/// implementing MachineSchedStrategy should be sufficient to implement a new
531/// scheduling algorithm. However, if a scheduler further subclasses
532/// ScheduleDAGMI then it will want to override this virtual method in order to
533/// update any specialized state.
534void ScheduleDAGMI::schedule() {
535  buildDAGWithRegPressure();
536
537  Topo.InitDAGTopologicalSorting();
538
539  postprocessDAG();
540
541  SmallVector<SUnit*, 8> TopRoots, BotRoots;
542  findRootsAndBiasEdges(TopRoots, BotRoots);
543
544  // Initialize the strategy before modifying the DAG.
545  // This may initialize a DFSResult to be used for queue priority.
546  SchedImpl->initialize(this);
547
548  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
549          SUnits[su].dumpAll(this));
550  if (ViewMISchedDAGs) viewGraph();
551
552  // Initialize ready queues now that the DAG and priority data are finalized.
553  initQueues(TopRoots, BotRoots);
554
555  bool IsTopNode = false;
556  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
557    assert(!SU->isScheduled && "Node already scheduled");
558    if (!checkSchedLimit())
559      break;
560
561    scheduleMI(SU, IsTopNode);
562
563    updateQueues(SU, IsTopNode);
564  }
565  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
566
567  placeDebugValues();
568
569  DEBUG({
570      unsigned BBNum = begin()->getParent()->getNumber();
571      dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
572      dumpSchedule();
573      dbgs() << '\n';
574    });
575}
576
577/// Build the DAG and setup three register pressure trackers.
578void ScheduleDAGMI::buildDAGWithRegPressure() {
579  // Initialize the register pressure tracker used by buildSchedGraph.
580  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
581
582  // Account for liveness generate by the region boundary.
583  if (LiveRegionEnd != RegionEnd)
584    RPTracker.recede();
585
586  // Build the DAG, and compute current register pressure.
587  buildSchedGraph(AA, &RPTracker);
588
589  // Initialize top/bottom trackers after computing region pressure.
590  initRegPressure();
591}
592
593/// Apply each ScheduleDAGMutation step in order.
594void ScheduleDAGMI::postprocessDAG() {
595  for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
596    Mutations[i]->apply(this);
597  }
598}
599
600void ScheduleDAGMI::computeDFSResult() {
601  if (!DFSResult)
602    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
603  DFSResult->clear();
604  ScheduledTrees.clear();
605  DFSResult->resize(SUnits.size());
606  DFSResult->compute(SUnits);
607  ScheduledTrees.resize(DFSResult->getNumSubtrees());
608}
609
610void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
611                                          SmallVectorImpl<SUnit*> &BotRoots) {
612  for (std::vector<SUnit>::iterator
613         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
614    SUnit *SU = &(*I);
615    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
616
617    // Order predecessors so DFSResult follows the critical path.
618    SU->biasCriticalPath();
619
620    // A SUnit is ready to top schedule if it has no predecessors.
621    if (!I->NumPredsLeft)
622      TopRoots.push_back(SU);
623    // A SUnit is ready to bottom schedule if it has no successors.
624    if (!I->NumSuccsLeft)
625      BotRoots.push_back(SU);
626  }
627  ExitSU.biasCriticalPath();
628}
629
630/// Identify DAG roots and setup scheduler queues.
631void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
632                               ArrayRef<SUnit*> BotRoots) {
633  NextClusterSucc = NULL;
634  NextClusterPred = NULL;
635
636  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
637  //
638  // Nodes with unreleased weak edges can still be roots.
639  // Release top roots in forward order.
640  for (SmallVectorImpl<SUnit*>::const_iterator
641         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
642    SchedImpl->releaseTopNode(*I);
643  }
644  // Release bottom roots in reverse order so the higher priority nodes appear
645  // first. This is more natural and slightly more efficient.
646  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
647         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
648    SchedImpl->releaseBottomNode(*I);
649  }
650
651  releaseSuccessors(&EntrySU);
652  releasePredecessors(&ExitSU);
653
654  SchedImpl->registerRoots();
655
656  // Advance past initial DebugValues.
657  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
658  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
659  TopRPTracker.setPos(CurrentTop);
660
661  CurrentBottom = RegionEnd;
662}
663
664/// Move an instruction and update register pressure.
665void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
666  // Move the instruction to its new location in the instruction stream.
667  MachineInstr *MI = SU->getInstr();
668
669  if (IsTopNode) {
670    assert(SU->isTopReady() && "node still has unscheduled dependencies");
671    if (&*CurrentTop == MI)
672      CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
673    else {
674      moveInstruction(MI, CurrentTop);
675      TopRPTracker.setPos(MI);
676    }
677
678    // Update top scheduled pressure.
679    TopRPTracker.advance();
680    assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
681    updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
682  }
683  else {
684    assert(SU->isBottomReady() && "node still has unscheduled dependencies");
685    MachineBasicBlock::iterator priorII =
686      priorNonDebug(CurrentBottom, CurrentTop);
687    if (&*priorII == MI)
688      CurrentBottom = priorII;
689    else {
690      if (&*CurrentTop == MI) {
691        CurrentTop = nextIfDebug(++CurrentTop, priorII);
692        TopRPTracker.setPos(CurrentTop);
693      }
694      moveInstruction(MI, CurrentBottom);
695      CurrentBottom = MI;
696    }
697    // Update bottom scheduled pressure.
698    BotRPTracker.recede();
699    assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
700    updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
701  }
702}
703
704/// Update scheduler queues after scheduling an instruction.
705void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
706  // Release dependent instructions for scheduling.
707  if (IsTopNode)
708    releaseSuccessors(SU);
709  else
710    releasePredecessors(SU);
711
712  SU->isScheduled = true;
713
714  if (DFSResult) {
715    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
716    if (!ScheduledTrees.test(SubtreeID)) {
717      ScheduledTrees.set(SubtreeID);
718      DFSResult->scheduleTree(SubtreeID);
719      SchedImpl->scheduleTree(SubtreeID);
720    }
721  }
722
723  // Notify the scheduling strategy after updating the DAG.
724  SchedImpl->schedNode(SU, IsTopNode);
725}
726
727/// Reinsert any remaining debug_values, just like the PostRA scheduler.
728void ScheduleDAGMI::placeDebugValues() {
729  // If first instruction was a DBG_VALUE then put it back.
730  if (FirstDbgValue) {
731    BB->splice(RegionBegin, BB, FirstDbgValue);
732    RegionBegin = FirstDbgValue;
733  }
734
735  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
736         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
737    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
738    MachineInstr *DbgValue = P.first;
739    MachineBasicBlock::iterator OrigPrevMI = P.second;
740    if (&*RegionBegin == DbgValue)
741      ++RegionBegin;
742    BB->splice(++OrigPrevMI, BB, DbgValue);
743    if (OrigPrevMI == llvm::prior(RegionEnd))
744      RegionEnd = DbgValue;
745  }
746  DbgValues.clear();
747  FirstDbgValue = NULL;
748}
749
750#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
751void ScheduleDAGMI::dumpSchedule() const {
752  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
753    if (SUnit *SU = getSUnit(&(*MI)))
754      SU->dump(this);
755    else
756      dbgs() << "Missing SUnit\n";
757  }
758}
759#endif
760
761//===----------------------------------------------------------------------===//
762// LoadClusterMutation - DAG post-processing to cluster loads.
763//===----------------------------------------------------------------------===//
764
765namespace {
766/// \brief Post-process the DAG to create cluster edges between neighboring
767/// loads.
768class LoadClusterMutation : public ScheduleDAGMutation {
769  struct LoadInfo {
770    SUnit *SU;
771    unsigned BaseReg;
772    unsigned Offset;
773    LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
774      : SU(su), BaseReg(reg), Offset(ofs) {}
775  };
776  static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
777                           const LoadClusterMutation::LoadInfo &RHS);
778
779  const TargetInstrInfo *TII;
780  const TargetRegisterInfo *TRI;
781public:
782  LoadClusterMutation(const TargetInstrInfo *tii,
783                      const TargetRegisterInfo *tri)
784    : TII(tii), TRI(tri) {}
785
786  virtual void apply(ScheduleDAGMI *DAG);
787protected:
788  void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
789};
790} // anonymous
791
792bool LoadClusterMutation::LoadInfoLess(
793  const LoadClusterMutation::LoadInfo &LHS,
794  const LoadClusterMutation::LoadInfo &RHS) {
795  if (LHS.BaseReg != RHS.BaseReg)
796    return LHS.BaseReg < RHS.BaseReg;
797  return LHS.Offset < RHS.Offset;
798}
799
800void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
801                                                  ScheduleDAGMI *DAG) {
802  SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
803  for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
804    SUnit *SU = Loads[Idx];
805    unsigned BaseReg;
806    unsigned Offset;
807    if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
808      LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
809  }
810  if (LoadRecords.size() < 2)
811    return;
812  std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
813  unsigned ClusterLength = 1;
814  for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
815    if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
816      ClusterLength = 1;
817      continue;
818    }
819
820    SUnit *SUa = LoadRecords[Idx].SU;
821    SUnit *SUb = LoadRecords[Idx+1].SU;
822    if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
823        && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
824
825      DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
826            << SUb->NodeNum << ")\n");
827      // Copy successor edges from SUa to SUb. Interleaving computation
828      // dependent on SUa can prevent load combining due to register reuse.
829      // Predecessor edges do not need to be copied from SUb to SUa since nearby
830      // loads should have effectively the same inputs.
831      for (SUnit::const_succ_iterator
832             SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
833        if (SI->getSUnit() == SUb)
834          continue;
835        DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
836        DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
837      }
838      ++ClusterLength;
839    }
840    else
841      ClusterLength = 1;
842  }
843}
844
845/// \brief Callback from DAG postProcessing to create cluster edges for loads.
846void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
847  // Map DAG NodeNum to store chain ID.
848  DenseMap<unsigned, unsigned> StoreChainIDs;
849  // Map each store chain to a set of dependent loads.
850  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
851  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
852    SUnit *SU = &DAG->SUnits[Idx];
853    if (!SU->getInstr()->mayLoad())
854      continue;
855    unsigned ChainPredID = DAG->SUnits.size();
856    for (SUnit::const_pred_iterator
857           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
858      if (PI->isCtrl()) {
859        ChainPredID = PI->getSUnit()->NodeNum;
860        break;
861      }
862    }
863    // Check if this chain-like pred has been seen
864    // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
865    unsigned NumChains = StoreChainDependents.size();
866    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
867      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
868    if (Result.second)
869      StoreChainDependents.resize(NumChains + 1);
870    StoreChainDependents[Result.first->second].push_back(SU);
871  }
872  // Iterate over the store chains.
873  for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
874    clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
875}
876
877//===----------------------------------------------------------------------===//
878// MacroFusion - DAG post-processing to encourage fusion of macro ops.
879//===----------------------------------------------------------------------===//
880
881namespace {
882/// \brief Post-process the DAG to create cluster edges between instructions
883/// that may be fused by the processor into a single operation.
884class MacroFusion : public ScheduleDAGMutation {
885  const TargetInstrInfo *TII;
886public:
887  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
888
889  virtual void apply(ScheduleDAGMI *DAG);
890};
891} // anonymous
892
893/// \brief Callback from DAG postProcessing to create cluster edges to encourage
894/// fused operations.
895void MacroFusion::apply(ScheduleDAGMI *DAG) {
896  // For now, assume targets can only fuse with the branch.
897  MachineInstr *Branch = DAG->ExitSU.getInstr();
898  if (!Branch)
899    return;
900
901  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
902    SUnit *SU = &DAG->SUnits[--Idx];
903    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
904      continue;
905
906    // Create a single weak edge from SU to ExitSU. The only effect is to cause
907    // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
908    // need to copy predecessor edges from ExitSU to SU, since top-down
909    // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
910    // of SU, we could create an artificial edge from the deepest root, but it
911    // hasn't been needed yet.
912    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
913    (void)Success;
914    assert(Success && "No DAG nodes should be reachable from ExitSU");
915
916    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
917    break;
918  }
919}
920
921//===----------------------------------------------------------------------===//
922// CopyConstrain - DAG post-processing to encourage copy elimination.
923//===----------------------------------------------------------------------===//
924
925namespace {
926/// \brief Post-process the DAG to create weak edges from all uses of a copy to
927/// the one use that defines the copy's source vreg, most likely an induction
928/// variable increment.
929class CopyConstrain : public ScheduleDAGMutation {
930  // Transient state.
931  SlotIndex RegionBeginIdx;
932  // RegionEndIdx is the slot index of the last non-debug instruction in the
933  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
934  SlotIndex RegionEndIdx;
935public:
936  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
937
938  virtual void apply(ScheduleDAGMI *DAG);
939
940protected:
941  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
942};
943} // anonymous
944
945/// constrainLocalCopy handles two possibilities:
946/// 1) Local src:
947/// I0:     = dst
948/// I1: src = ...
949/// I2:     = dst
950/// I3: dst = src (copy)
951/// (create pred->succ edges I0->I1, I2->I1)
952///
953/// 2) Local copy:
954/// I0: dst = src (copy)
955/// I1:     = dst
956/// I2: src = ...
957/// I3:     = dst
958/// (create pred->succ edges I1->I2, I3->I2)
959///
960/// Although the MachineScheduler is currently constrained to single blocks,
961/// this algorithm should handle extended blocks. An EBB is a set of
962/// contiguously numbered blocks such that the previous block in the EBB is
963/// always the single predecessor.
964void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
965  LiveIntervals *LIS = DAG->getLIS();
966  MachineInstr *Copy = CopySU->getInstr();
967
968  // Check for pure vreg copies.
969  unsigned SrcReg = Copy->getOperand(1).getReg();
970  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
971    return;
972
973  unsigned DstReg = Copy->getOperand(0).getReg();
974  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
975    return;
976
977  // Check if either the dest or source is local. If it's live across a back
978  // edge, it's not local. Note that if both vregs are live across the back
979  // edge, we cannot successfully contrain the copy without cyclic scheduling.
980  unsigned LocalReg = DstReg;
981  unsigned GlobalReg = SrcReg;
982  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
983  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
984    LocalReg = SrcReg;
985    GlobalReg = DstReg;
986    LocalLI = &LIS->getInterval(LocalReg);
987    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
988      return;
989  }
990  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
991
992  // Find the global segment after the start of the local LI.
993  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
994  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
995  // local live range. We could create edges from other global uses to the local
996  // start, but the coalescer should have already eliminated these cases, so
997  // don't bother dealing with it.
998  if (GlobalSegment == GlobalLI->end())
999    return;
1000
1001  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1002  // returned the next global segment. But if GlobalSegment overlaps with
1003  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
1004  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1005  if (GlobalSegment->contains(LocalLI->beginIndex()))
1006    ++GlobalSegment;
1007
1008  if (GlobalSegment == GlobalLI->end())
1009    return;
1010
1011  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1012  if (GlobalSegment != GlobalLI->begin()) {
1013    // Two address defs have no hole.
1014    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
1015                               GlobalSegment->start)) {
1016      return;
1017    }
1018    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1019    // it would be a disconnected component in the live range.
1020    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
1021           "Disconnected LRG within the scheduling region.");
1022  }
1023  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1024  if (!GlobalDef)
1025    return;
1026
1027  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1028  if (!GlobalSU)
1029    return;
1030
1031  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1032  // constraining the uses of the last local def to precede GlobalDef.
1033  SmallVector<SUnit*,8> LocalUses;
1034  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1035  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1036  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1037  for (SUnit::const_succ_iterator
1038         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
1039       I != E; ++I) {
1040    if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
1041      continue;
1042    if (I->getSUnit() == GlobalSU)
1043      continue;
1044    if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
1045      return;
1046    LocalUses.push_back(I->getSUnit());
1047  }
1048  // Open the top of the GlobalLI hole by constraining any earlier global uses
1049  // to precede the start of LocalLI.
1050  SmallVector<SUnit*,8> GlobalUses;
1051  MachineInstr *FirstLocalDef =
1052    LIS->getInstructionFromIndex(LocalLI->beginIndex());
1053  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1054  for (SUnit::const_pred_iterator
1055         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
1056    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
1057      continue;
1058    if (I->getSUnit() == FirstLocalSU)
1059      continue;
1060    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
1061      return;
1062    GlobalUses.push_back(I->getSUnit());
1063  }
1064  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1065  // Add the weak edges.
1066  for (SmallVectorImpl<SUnit*>::const_iterator
1067         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1068    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
1069          << GlobalSU->NodeNum << ")\n");
1070    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1071  }
1072  for (SmallVectorImpl<SUnit*>::const_iterator
1073         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1074    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
1075          << FirstLocalSU->NodeNum << ")\n");
1076    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1077  }
1078}
1079
1080/// \brief Callback from DAG postProcessing to create weak edges to encourage
1081/// copy elimination.
1082void CopyConstrain::apply(ScheduleDAGMI *DAG) {
1083  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1084  if (FirstPos == DAG->end())
1085    return;
1086  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
1087  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1088    &*priorNonDebug(DAG->end(), DAG->begin()));
1089
1090  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
1091    SUnit *SU = &DAG->SUnits[Idx];
1092    if (!SU->getInstr()->isCopy())
1093      continue;
1094
1095    constrainLocalCopy(SU, DAG);
1096  }
1097}
1098
1099//===----------------------------------------------------------------------===//
1100// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
1101//===----------------------------------------------------------------------===//
1102
1103namespace {
1104/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
1105/// the schedule.
1106class ConvergingScheduler : public MachineSchedStrategy {
1107public:
1108  /// Represent the type of SchedCandidate found within a single queue.
1109  /// pickNodeBidirectional depends on these listed by decreasing priority.
1110  enum CandReason {
1111    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
1112    ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
1113    TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
1114    NodeOrder};
1115
1116#ifndef NDEBUG
1117  static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
1118#endif
1119
1120  /// Policy for scheduling the next instruction in the candidate's zone.
1121  struct CandPolicy {
1122    bool ReduceLatency;
1123    unsigned ReduceResIdx;
1124    unsigned DemandResIdx;
1125
1126    CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
1127  };
1128
1129  /// Status of an instruction's critical resource consumption.
1130  struct SchedResourceDelta {
1131    // Count critical resources in the scheduled region required by SU.
1132    unsigned CritResources;
1133
1134    // Count critical resources from another region consumed by SU.
1135    unsigned DemandedResources;
1136
1137    SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
1138
1139    bool operator==(const SchedResourceDelta &RHS) const {
1140      return CritResources == RHS.CritResources
1141        && DemandedResources == RHS.DemandedResources;
1142    }
1143    bool operator!=(const SchedResourceDelta &RHS) const {
1144      return !operator==(RHS);
1145    }
1146  };
1147
1148  /// Store the state used by ConvergingScheduler heuristics, required for the
1149  /// lifetime of one invocation of pickNode().
1150  struct SchedCandidate {
1151    CandPolicy Policy;
1152
1153    // The best SUnit candidate.
1154    SUnit *SU;
1155
1156    // The reason for this candidate.
1157    CandReason Reason;
1158
1159    // Register pressure values for the best candidate.
1160    RegPressureDelta RPDelta;
1161
1162    // Critical resource consumption of the best candidate.
1163    SchedResourceDelta ResDelta;
1164
1165    SchedCandidate(const CandPolicy &policy)
1166    : Policy(policy), SU(NULL), Reason(NoCand) {}
1167
1168    bool isValid() const { return SU; }
1169
1170    // Copy the status of another candidate without changing policy.
1171    void setBest(SchedCandidate &Best) {
1172      assert(Best.Reason != NoCand && "uninitialized Sched candidate");
1173      SU = Best.SU;
1174      Reason = Best.Reason;
1175      RPDelta = Best.RPDelta;
1176      ResDelta = Best.ResDelta;
1177    }
1178
1179    void initResourceDelta(const ScheduleDAGMI *DAG,
1180                           const TargetSchedModel *SchedModel);
1181  };
1182
1183  /// Summarize the unscheduled region.
1184  struct SchedRemainder {
1185    // Critical path through the DAG in expected latency.
1186    unsigned CriticalPath;
1187
1188    // Unscheduled resources
1189    SmallVector<unsigned, 16> RemainingCounts;
1190    // Critical resource for the unscheduled zone.
1191    unsigned CritResIdx;
1192    // Number of micro-ops left to schedule.
1193    unsigned RemainingMicroOps;
1194
1195    void reset() {
1196      CriticalPath = 0;
1197      RemainingCounts.clear();
1198      CritResIdx = 0;
1199      RemainingMicroOps = 0;
1200    }
1201
1202    SchedRemainder() { reset(); }
1203
1204    void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
1205
1206    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
1207      if (!SchedModel->hasInstrSchedModel())
1208        return 0;
1209
1210      return std::max(
1211        RemainingMicroOps * SchedModel->getMicroOpFactor(),
1212        RemainingCounts[CritResIdx]);
1213    }
1214  };
1215
1216  /// Each Scheduling boundary is associated with ready queues. It tracks the
1217  /// current cycle in the direction of movement, and maintains the state
1218  /// of "hazards" and other interlocks at the current cycle.
1219  struct SchedBoundary {
1220    ScheduleDAGMI *DAG;
1221    const TargetSchedModel *SchedModel;
1222    SchedRemainder *Rem;
1223
1224    ReadyQueue Available;
1225    ReadyQueue Pending;
1226    bool CheckPending;
1227
1228    // For heuristics, keep a list of the nodes that immediately depend on the
1229    // most recently scheduled node.
1230    SmallPtrSet<const SUnit*, 8> NextSUs;
1231
1232    ScheduleHazardRecognizer *HazardRec;
1233
1234    unsigned CurrCycle;
1235    unsigned CurrMOps;
1236
1237    /// MinReadyCycle - Cycle of the soonest available instruction.
1238    unsigned MinReadyCycle;
1239
1240    // The expected latency of the critical path in this scheduled zone.
1241    unsigned ExpectedLatency;
1242
1243    // The latency of dependence chains leading into this zone.
1244    // For each node scheduled: DLat = max DLat, N.Depth.
1245    // For each cycle scheduled: DLat -= 1.
1246    unsigned DependentLatency;
1247
1248    // Resources used in the scheduled zone beyond this boundary.
1249    SmallVector<unsigned, 16> ResourceCounts;
1250
1251    // Cache the critical resources ID in this scheduled zone.
1252    unsigned CritResIdx;
1253
1254    // Is the scheduled region resource limited vs. latency limited.
1255    bool IsResourceLimited;
1256
1257    unsigned ExpectedCount;
1258
1259#ifndef NDEBUG
1260    // Remember the greatest min operand latency.
1261    unsigned MaxMinLatency;
1262#endif
1263
1264    void reset() {
1265      // A new HazardRec is created for each DAG and owned by SchedBoundary.
1266      delete HazardRec;
1267
1268      Available.clear();
1269      Pending.clear();
1270      CheckPending = false;
1271      NextSUs.clear();
1272      HazardRec = 0;
1273      CurrCycle = 0;
1274      CurrMOps = 0;
1275      MinReadyCycle = UINT_MAX;
1276      ExpectedLatency = 0;
1277      DependentLatency = 0;
1278      ResourceCounts.resize(1);
1279      assert(!ResourceCounts[0] && "nonzero count for bad resource");
1280      CritResIdx = 0;
1281      IsResourceLimited = false;
1282      ExpectedCount = 0;
1283#ifndef NDEBUG
1284      MaxMinLatency = 0;
1285#endif
1286      // Reserve a zero-count for invalid CritResIdx.
1287      ResourceCounts.resize(1);
1288    }
1289
1290    /// Pending queues extend the ready queues with the same ID and the
1291    /// PendingFlag set.
1292    SchedBoundary(unsigned ID, const Twine &Name):
1293      DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
1294      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
1295      HazardRec(0) {
1296      reset();
1297    }
1298
1299    ~SchedBoundary() { delete HazardRec; }
1300
1301    void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
1302              SchedRemainder *rem);
1303
1304    bool isTop() const {
1305      return Available.getID() == ConvergingScheduler::TopQID;
1306    }
1307
1308    unsigned getUnscheduledLatency(SUnit *SU) const {
1309      if (isTop())
1310        return SU->getHeight();
1311      return SU->getDepth() + SU->Latency;
1312    }
1313
1314    unsigned getCriticalCount() const {
1315      return ResourceCounts[CritResIdx];
1316    }
1317
1318    bool checkHazard(SUnit *SU);
1319
1320    void setLatencyPolicy(CandPolicy &Policy);
1321
1322    void releaseNode(SUnit *SU, unsigned ReadyCycle);
1323
1324    void bumpCycle();
1325
1326    void countResource(unsigned PIdx, unsigned Cycles);
1327
1328    void bumpNode(SUnit *SU);
1329
1330    void releasePending();
1331
1332    void removeReady(SUnit *SU);
1333
1334    SUnit *pickOnlyChoice();
1335  };
1336
1337private:
1338  ScheduleDAGMI *DAG;
1339  const TargetSchedModel *SchedModel;
1340  const TargetRegisterInfo *TRI;
1341
1342  // State of the top and bottom scheduled instruction boundaries.
1343  SchedRemainder Rem;
1344  SchedBoundary Top;
1345  SchedBoundary Bot;
1346
1347public:
1348  /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
1349  enum {
1350    TopQID = 1,
1351    BotQID = 2,
1352    LogMaxQID = 2
1353  };
1354
1355  ConvergingScheduler():
1356    DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
1357
1358  virtual void initialize(ScheduleDAGMI *dag);
1359
1360  virtual SUnit *pickNode(bool &IsTopNode);
1361
1362  virtual void schedNode(SUnit *SU, bool IsTopNode);
1363
1364  virtual void releaseTopNode(SUnit *SU);
1365
1366  virtual void releaseBottomNode(SUnit *SU);
1367
1368  virtual void registerRoots();
1369
1370protected:
1371  void balanceZones(
1372    ConvergingScheduler::SchedBoundary &CriticalZone,
1373    ConvergingScheduler::SchedCandidate &CriticalCand,
1374    ConvergingScheduler::SchedBoundary &OppositeZone,
1375    ConvergingScheduler::SchedCandidate &OppositeCand);
1376
1377  void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand,
1378                           ConvergingScheduler::SchedCandidate &BotCand);
1379
1380  void tryCandidate(SchedCandidate &Cand,
1381                    SchedCandidate &TryCand,
1382                    SchedBoundary &Zone,
1383                    const RegPressureTracker &RPTracker,
1384                    RegPressureTracker &TempTracker);
1385
1386  SUnit *pickNodeBidirectional(bool &IsTopNode);
1387
1388  void pickNodeFromQueue(SchedBoundary &Zone,
1389                         const RegPressureTracker &RPTracker,
1390                         SchedCandidate &Candidate);
1391
1392  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
1393
1394#ifndef NDEBUG
1395  void traceCandidate(const SchedCandidate &Cand);
1396#endif
1397};
1398} // namespace
1399
1400void ConvergingScheduler::SchedRemainder::
1401init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1402  reset();
1403  if (!SchedModel->hasInstrSchedModel())
1404    return;
1405  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1406  for (std::vector<SUnit>::iterator
1407         I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
1408    const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
1409    RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC);
1410    for (TargetSchedModel::ProcResIter
1411           PI = SchedModel->getWriteProcResBegin(SC),
1412           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1413      unsigned PIdx = PI->ProcResourceIdx;
1414      unsigned Factor = SchedModel->getResourceFactor(PIdx);
1415      RemainingCounts[PIdx] += (Factor * PI->Cycles);
1416    }
1417  }
1418  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
1419       PIdx != PEnd; ++PIdx) {
1420    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
1421        >= (int)SchedModel->getLatencyFactor()) {
1422      CritResIdx = PIdx;
1423    }
1424  }
1425  DEBUG(dbgs() << "Critical Resource: "
1426        << SchedModel->getProcResource(CritResIdx)->Name
1427        << ": " << RemainingCounts[CritResIdx]
1428        << " / " << SchedModel->getLatencyFactor() << '\n');
1429}
1430
1431void ConvergingScheduler::SchedBoundary::
1432init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
1433  reset();
1434  DAG = dag;
1435  SchedModel = smodel;
1436  Rem = rem;
1437  if (SchedModel->hasInstrSchedModel())
1438    ResourceCounts.resize(SchedModel->getNumProcResourceKinds());
1439}
1440
1441void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
1442  DAG = dag;
1443  SchedModel = DAG->getSchedModel();
1444  TRI = DAG->TRI;
1445
1446  Rem.init(DAG, SchedModel);
1447  Top.init(DAG, SchedModel, &Rem);
1448  Bot.init(DAG, SchedModel, &Rem);
1449
1450  // Initialize resource counts.
1451
1452  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
1453  // are disabled, then these HazardRecs will be disabled.
1454  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
1455  const TargetMachine &TM = DAG->MF.getTarget();
1456  Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1457  Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
1458
1459  assert((!ForceTopDown || !ForceBottomUp) &&
1460         "-misched-topdown incompatible with -misched-bottomup");
1461}
1462
1463void ConvergingScheduler::releaseTopNode(SUnit *SU) {
1464  if (SU->isScheduled)
1465    return;
1466
1467  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1468       I != E; ++I) {
1469    unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
1470    unsigned MinLatency = I->getMinLatency();
1471#ifndef NDEBUG
1472    Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
1473#endif
1474    if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
1475      SU->TopReadyCycle = PredReadyCycle + MinLatency;
1476  }
1477  Top.releaseNode(SU, SU->TopReadyCycle);
1478}
1479
1480void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
1481  if (SU->isScheduled)
1482    return;
1483
1484  assert(SU->getInstr() && "Scheduled SUnit must have instr");
1485
1486  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1487       I != E; ++I) {
1488    if (I->isWeak())
1489      continue;
1490    unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
1491    unsigned MinLatency = I->getMinLatency();
1492#ifndef NDEBUG
1493    Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
1494#endif
1495    if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
1496      SU->BotReadyCycle = SuccReadyCycle + MinLatency;
1497  }
1498  Bot.releaseNode(SU, SU->BotReadyCycle);
1499}
1500
1501void ConvergingScheduler::registerRoots() {
1502  Rem.CriticalPath = DAG->ExitSU.getDepth();
1503  // Some roots may not feed into ExitSU. Check all of them in case.
1504  for (std::vector<SUnit*>::const_iterator
1505         I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
1506    if ((*I)->getDepth() > Rem.CriticalPath)
1507      Rem.CriticalPath = (*I)->getDepth();
1508  }
1509  DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
1510}
1511
1512/// Does this SU have a hazard within the current instruction group.
1513///
1514/// The scheduler supports two modes of hazard recognition. The first is the
1515/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1516/// supports highly complicated in-order reservation tables
1517/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
1518///
1519/// The second is a streamlined mechanism that checks for hazards based on
1520/// simple counters that the scheduler itself maintains. It explicitly checks
1521/// for instruction dispatch limitations, including the number of micro-ops that
1522/// can dispatch per cycle.
1523///
1524/// TODO: Also check whether the SU must start a new group.
1525bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
1526  if (HazardRec->isEnabled())
1527    return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
1528
1529  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1530  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1531    DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
1532          << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1533    return true;
1534  }
1535  return false;
1536}
1537
1538/// Compute the remaining latency to determine whether ILP should be increased.
1539void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
1540  DEBUG(dbgs() << "  " << Available.getName()
1541        << " DependentLatency " << DependentLatency << '\n');
1542
1543  // FIXME: compile time. In all, we visit four queues here one we should only
1544  // need to visit the one that was last popped if we cache the result.
1545  unsigned RemLatency = DependentLatency;
1546  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
1547       I != E; ++I) {
1548    unsigned L = getUnscheduledLatency(*I);
1549    if (L > RemLatency) {
1550      DEBUG(dbgs() << "  " << Available.getName()
1551            << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
1552      RemLatency = L;
1553    }
1554  }
1555  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
1556       I != E; ++I) {
1557    unsigned L = getUnscheduledLatency(*I);
1558    if (L > RemLatency)
1559      RemLatency = L;
1560  }
1561  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
1562  DEBUG(dbgs() << "  " << Available.getName()
1563        << " ExpectedLatency " << ExpectedLatency
1564        << " CP Limit " << CriticalPathLimit << '\n');
1565
1566  if (RemLatency + std::max(ExpectedLatency, CurrCycle) >= CriticalPathLimit
1567      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
1568    Policy.ReduceLatency = true;
1569    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
1570  }
1571}
1572
1573void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
1574                                                     unsigned ReadyCycle) {
1575
1576  if (ReadyCycle < MinReadyCycle)
1577    MinReadyCycle = ReadyCycle;
1578
1579  // Check for interlocks first. For the purpose of other heuristics, an
1580  // instruction that cannot issue appears as if it's not in the ReadyQueue.
1581  if (ReadyCycle > CurrCycle || checkHazard(SU))
1582    Pending.push(SU);
1583  else
1584    Available.push(SU);
1585
1586  // Record this node as an immediate dependent of the scheduled node.
1587  NextSUs.insert(SU);
1588}
1589
1590/// Move the boundary of scheduled code by one cycle.
1591void ConvergingScheduler::SchedBoundary::bumpCycle() {
1592  unsigned Width = SchedModel->getIssueWidth();
1593  CurrMOps = (CurrMOps <= Width) ? 0 : CurrMOps - Width;
1594
1595  unsigned NextCycle = CurrCycle + 1;
1596  assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
1597  if (MinReadyCycle > NextCycle) {
1598    CurrMOps = 0;
1599    NextCycle = MinReadyCycle;
1600  }
1601  if ((NextCycle - CurrCycle) > DependentLatency)
1602    DependentLatency = 0;
1603  else
1604    DependentLatency -= (NextCycle - CurrCycle);
1605
1606  if (!HazardRec->isEnabled()) {
1607    // Bypass HazardRec virtual calls.
1608    CurrCycle = NextCycle;
1609  }
1610  else {
1611    // Bypass getHazardType calls in case of long latency.
1612    for (; CurrCycle != NextCycle; ++CurrCycle) {
1613      if (isTop())
1614        HazardRec->AdvanceCycle();
1615      else
1616        HazardRec->RecedeCycle();
1617    }
1618  }
1619  CheckPending = true;
1620  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1621
1622  DEBUG(dbgs() << "  " << Available.getName()
1623        << " Cycle: " << CurrCycle << '\n');
1624}
1625
1626/// Add the given processor resource to this scheduled zone.
1627void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
1628                                                       unsigned Cycles) {
1629  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1630  DEBUG(dbgs() << "  " << SchedModel->getProcResource(PIdx)->Name
1631        << " +(" << Cycles << "x" << Factor
1632        << ") / " << SchedModel->getLatencyFactor() << '\n');
1633
1634  unsigned Count = Factor * Cycles;
1635  ResourceCounts[PIdx] += Count;
1636  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
1637  Rem->RemainingCounts[PIdx] -= Count;
1638
1639  // Check if this resource exceeds the current critical resource by a full
1640  // cycle. If so, it becomes the critical resource.
1641  if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
1642      >= (int)SchedModel->getLatencyFactor()) {
1643    CritResIdx = PIdx;
1644    DEBUG(dbgs() << "  *** Critical resource "
1645          << SchedModel->getProcResource(PIdx)->Name << " x"
1646          << ResourceCounts[PIdx] << '\n');
1647  }
1648}
1649
1650/// Move the boundary of scheduled code by one SUnit.
1651void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
1652  // Update the reservation table.
1653  if (HazardRec->isEnabled()) {
1654    if (!isTop() && SU->isCall) {
1655      // Calls are scheduled with their preceding instructions. For bottom-up
1656      // scheduling, clear the pipeline state before emitting.
1657      HazardRec->Reset();
1658    }
1659    HazardRec->EmitInstruction(SU);
1660  }
1661  // Update resource counts and critical resource.
1662  if (SchedModel->hasInstrSchedModel()) {
1663    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1664    Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC);
1665    for (TargetSchedModel::ProcResIter
1666           PI = SchedModel->getWriteProcResBegin(SC),
1667           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1668      countResource(PI->ProcResourceIdx, PI->Cycles);
1669    }
1670  }
1671  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
1672  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
1673  if (SU->getDepth() > TopLatency)
1674    TopLatency = SU->getDepth();
1675  if (SU->getHeight() > BotLatency)
1676    BotLatency = SU->getHeight();
1677
1678  IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle);
1679
1680  // Check the instruction group dispatch limit.
1681  // TODO: Check if this SU must end a dispatch group.
1682  CurrMOps += SchedModel->getNumMicroOps(SU->getInstr());
1683
1684  // checkHazard prevents scheduling multiple instructions per cycle that exceed
1685  // issue width. However, we commonly reach the maximum. In this case
1686  // opportunistically bump the cycle to avoid uselessly checking everything in
1687  // the readyQ. Furthermore, a single instruction may produce more than one
1688  // cycle's worth of micro-ops.
1689  if (CurrMOps >= SchedModel->getIssueWidth()) {
1690    DEBUG(dbgs() << "  *** Max instrs at cycle " << CurrCycle << '\n');
1691    bumpCycle();
1692  }
1693}
1694
1695/// Release pending ready nodes in to the available queue. This makes them
1696/// visible to heuristics.
1697void ConvergingScheduler::SchedBoundary::releasePending() {
1698  // If the available queue is empty, it is safe to reset MinReadyCycle.
1699  if (Available.empty())
1700    MinReadyCycle = UINT_MAX;
1701
1702  // Check to see if any of the pending instructions are ready to issue.  If
1703  // so, add them to the available queue.
1704  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
1705    SUnit *SU = *(Pending.begin()+i);
1706    unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
1707
1708    if (ReadyCycle < MinReadyCycle)
1709      MinReadyCycle = ReadyCycle;
1710
1711    if (ReadyCycle > CurrCycle)
1712      continue;
1713
1714    if (checkHazard(SU))
1715      continue;
1716
1717    Available.push(SU);
1718    Pending.remove(Pending.begin()+i);
1719    --i; --e;
1720  }
1721  DEBUG(if (!Pending.empty()) Pending.dump());
1722  CheckPending = false;
1723}
1724
1725/// Remove SU from the ready set for this boundary.
1726void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
1727  if (Available.isInQueue(SU))
1728    Available.remove(Available.find(SU));
1729  else {
1730    assert(Pending.isInQueue(SU) && "bad ready count");
1731    Pending.remove(Pending.find(SU));
1732  }
1733}
1734
1735/// If this queue only has one ready candidate, return it. As a side effect,
1736/// defer any nodes that now hit a hazard, and advance the cycle until at least
1737/// one node is ready. If multiple instructions are ready, return NULL.
1738SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
1739  if (CheckPending)
1740    releasePending();
1741
1742  if (CurrMOps > 0) {
1743    // Defer any ready instrs that now have a hazard.
1744    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
1745      if (checkHazard(*I)) {
1746        Pending.push(*I);
1747        I = Available.remove(I);
1748        continue;
1749      }
1750      ++I;
1751    }
1752  }
1753  for (unsigned i = 0; Available.empty(); ++i) {
1754    assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
1755           "permanent hazard"); (void)i;
1756    bumpCycle();
1757    releasePending();
1758  }
1759  if (Available.size() == 1)
1760    return *Available.begin();
1761  return NULL;
1762}
1763
1764/// Record the candidate policy for opposite zones with different critical
1765/// resources.
1766///
1767/// If the CriticalZone is latency limited, don't force a policy for the
1768/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
1769void ConvergingScheduler::balanceZones(
1770  ConvergingScheduler::SchedBoundary &CriticalZone,
1771  ConvergingScheduler::SchedCandidate &CriticalCand,
1772  ConvergingScheduler::SchedBoundary &OppositeZone,
1773  ConvergingScheduler::SchedCandidate &OppositeCand) {
1774
1775  if (!CriticalZone.IsResourceLimited)
1776    return;
1777  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
1778
1779  SchedRemainder *Rem = CriticalZone.Rem;
1780
1781  // If the critical zone is overconsuming a resource relative to the
1782  // remainder, try to reduce it.
1783  unsigned RemainingCritCount =
1784    Rem->RemainingCounts[CriticalZone.CritResIdx];
1785  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
1786      > (int)SchedModel->getLatencyFactor()) {
1787    CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
1788    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
1789          << " reduce "
1790          << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
1791          << '\n');
1792  }
1793  // If the other zone is underconsuming a resource relative to the full zone,
1794  // try to increase it.
1795  unsigned OppositeCount =
1796    OppositeZone.ResourceCounts[CriticalZone.CritResIdx];
1797  if ((int)(OppositeZone.ExpectedCount - OppositeCount)
1798      > (int)SchedModel->getLatencyFactor()) {
1799    OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
1800    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
1801          << " demand "
1802          << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
1803          << '\n');
1804  }
1805}
1806
1807/// Determine if the scheduled zones exceed resource limits or critical path and
1808/// set each candidate's ReduceHeight policy accordingly.
1809void ConvergingScheduler::checkResourceLimits(
1810  ConvergingScheduler::SchedCandidate &TopCand,
1811  ConvergingScheduler::SchedCandidate &BotCand) {
1812
1813  // Set ReduceLatency to true if needed.
1814  Bot.setLatencyPolicy(BotCand.Policy);
1815  Top.setLatencyPolicy(TopCand.Policy);
1816
1817  // Handle resource-limited regions.
1818  if (Top.IsResourceLimited && Bot.IsResourceLimited
1819      && Top.CritResIdx == Bot.CritResIdx) {
1820    // If the scheduled critical resource in both zones is no longer the
1821    // critical remaining resource, attempt to reduce resource height both ways.
1822    if (Top.CritResIdx != Rem.CritResIdx) {
1823      TopCand.Policy.ReduceResIdx = Top.CritResIdx;
1824      BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
1825      DEBUG(dbgs() << "  Reduce scheduled "
1826            << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
1827    }
1828    return;
1829  }
1830  // Handle latency-limited regions.
1831  if (!Top.IsResourceLimited && !Bot.IsResourceLimited) {
1832    // If the total scheduled expected latency exceeds the region's critical
1833    // path then reduce latency both ways.
1834    //
1835    // Just because a zone is not resource limited does not mean it is latency
1836    // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle
1837    // to exceed expected latency.
1838    if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath)
1839        && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
1840      TopCand.Policy.ReduceLatency = true;
1841      BotCand.Policy.ReduceLatency = true;
1842      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
1843            << " + " << Bot.ExpectedLatency << '\n');
1844    }
1845    return;
1846  }
1847  // The critical resource is different in each zone, so request balancing.
1848
1849  // Compute the cost of each zone.
1850  Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
1851  Top.ExpectedCount = std::max(
1852    Top.getCriticalCount(),
1853    Top.ExpectedCount * SchedModel->getLatencyFactor());
1854  Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle);
1855  Bot.ExpectedCount = std::max(
1856    Bot.getCriticalCount(),
1857    Bot.ExpectedCount * SchedModel->getLatencyFactor());
1858
1859  balanceZones(Top, TopCand, Bot, BotCand);
1860  balanceZones(Bot, BotCand, Top, TopCand);
1861}
1862
1863void ConvergingScheduler::SchedCandidate::
1864initResourceDelta(const ScheduleDAGMI *DAG,
1865                  const TargetSchedModel *SchedModel) {
1866  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
1867    return;
1868
1869  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
1870  for (TargetSchedModel::ProcResIter
1871         PI = SchedModel->getWriteProcResBegin(SC),
1872         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1873    if (PI->ProcResourceIdx == Policy.ReduceResIdx)
1874      ResDelta.CritResources += PI->Cycles;
1875    if (PI->ProcResourceIdx == Policy.DemandResIdx)
1876      ResDelta.DemandedResources += PI->Cycles;
1877  }
1878}
1879
1880/// Return true if this heuristic determines order.
1881static bool tryLess(int TryVal, int CandVal,
1882                    ConvergingScheduler::SchedCandidate &TryCand,
1883                    ConvergingScheduler::SchedCandidate &Cand,
1884                    ConvergingScheduler::CandReason Reason) {
1885  if (TryVal < CandVal) {
1886    TryCand.Reason = Reason;
1887    return true;
1888  }
1889  if (TryVal > CandVal) {
1890    if (Cand.Reason > Reason)
1891      Cand.Reason = Reason;
1892    return true;
1893  }
1894  return false;
1895}
1896
1897static bool tryGreater(int TryVal, int CandVal,
1898                       ConvergingScheduler::SchedCandidate &TryCand,
1899                       ConvergingScheduler::SchedCandidate &Cand,
1900                       ConvergingScheduler::CandReason Reason) {
1901  if (TryVal > CandVal) {
1902    TryCand.Reason = Reason;
1903    return true;
1904  }
1905  if (TryVal < CandVal) {
1906    if (Cand.Reason > Reason)
1907      Cand.Reason = Reason;
1908    return true;
1909  }
1910  return false;
1911}
1912
1913static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
1914  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
1915}
1916
1917/// Minimize physical register live ranges. Regalloc wants them adjacent to
1918/// their physreg def/use.
1919///
1920/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
1921/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
1922/// with the operation that produces or consumes the physreg. We'll do this when
1923/// regalloc has support for parallel copies.
1924static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
1925  const MachineInstr *MI = SU->getInstr();
1926  if (!MI->isCopy())
1927    return 0;
1928
1929  unsigned ScheduledOper = isTop ? 1 : 0;
1930  unsigned UnscheduledOper = isTop ? 0 : 1;
1931  // If we have already scheduled the physreg produce/consumer, immediately
1932  // schedule the copy.
1933  if (TargetRegisterInfo::isPhysicalRegister(
1934        MI->getOperand(ScheduledOper).getReg()))
1935    return 1;
1936  // If the physreg is at the boundary, defer it. Otherwise schedule it
1937  // immediately to free the dependent. We can hoist the copy later.
1938  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
1939  if (TargetRegisterInfo::isPhysicalRegister(
1940        MI->getOperand(UnscheduledOper).getReg()))
1941    return AtBoundary ? -1 : 1;
1942  return 0;
1943}
1944
1945/// Apply a set of heursitics to a new candidate. Heuristics are currently
1946/// hierarchical. This may be more efficient than a graduated cost model because
1947/// we don't need to evaluate all aspects of the model for each node in the
1948/// queue. But it's really done to make the heuristics easier to debug and
1949/// statistically analyze.
1950///
1951/// \param Cand provides the policy and current best candidate.
1952/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
1953/// \param Zone describes the scheduled zone that we are extending.
1954/// \param RPTracker describes reg pressure within the scheduled zone.
1955/// \param TempTracker is a scratch pressure tracker to reuse in queries.
1956void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
1957                                       SchedCandidate &TryCand,
1958                                       SchedBoundary &Zone,
1959                                       const RegPressureTracker &RPTracker,
1960                                       RegPressureTracker &TempTracker) {
1961
1962  // Always initialize TryCand's RPDelta.
1963  TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
1964                                  DAG->getRegionCriticalPSets(),
1965                                  DAG->getRegPressure().MaxSetPressure);
1966
1967  // Initialize the candidate if needed.
1968  if (!Cand.isValid()) {
1969    TryCand.Reason = NodeOrder;
1970    return;
1971  }
1972
1973  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
1974                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
1975                 TryCand, Cand, PhysRegCopy))
1976    return;
1977
1978  // Avoid exceeding the target's limit.
1979  if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
1980              Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
1981    return;
1982  if (Cand.Reason == SingleExcess)
1983    Cand.Reason = MultiPressure;
1984
1985  // Avoid increasing the max critical pressure in the scheduled region.
1986  if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease,
1987              Cand.RPDelta.CriticalMax.UnitIncrease,
1988              TryCand, Cand, SingleCritical))
1989    return;
1990  if (Cand.Reason == SingleCritical)
1991    Cand.Reason = MultiPressure;
1992
1993  // Keep clustered nodes together to encourage downstream peephole
1994  // optimizations which may reduce resource requirements.
1995  //
1996  // This is a best effort to set things up for a post-RA pass. Optimizations
1997  // like generating loads of multiple registers should ideally be done within
1998  // the scheduler pass by combining the loads during DAG postprocessing.
1999  const SUnit *NextClusterSU =
2000    Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
2001  if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
2002                 TryCand, Cand, Cluster))
2003    return;
2004
2005  // Weak edges are for clustering and other constraints.
2006  //
2007  // Deferring TryCand here does not change Cand's reason. This is good in the
2008  // sense that a bad candidate shouldn't affect a previous candidate's
2009  // goodness, but bad in that it is assymetric and depends on queue order.
2010  CandReason OrigReason = Cand.Reason;
2011  if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
2012              getWeakLeft(Cand.SU, Zone.isTop()),
2013              TryCand, Cand, Weak)) {
2014    Cand.Reason = OrigReason;
2015    return;
2016  }
2017  // Avoid critical resource consumption and balance the schedule.
2018  TryCand.initResourceDelta(DAG, SchedModel);
2019  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
2020              TryCand, Cand, ResourceReduce))
2021    return;
2022  if (tryGreater(TryCand.ResDelta.DemandedResources,
2023                 Cand.ResDelta.DemandedResources,
2024                 TryCand, Cand, ResourceDemand))
2025    return;
2026
2027  // Avoid serializing long latency dependence chains.
2028  if (Cand.Policy.ReduceLatency) {
2029    if (Zone.isTop()) {
2030      if (Cand.SU->getDepth() * SchedModel->getLatencyFactor()
2031          > Zone.ExpectedCount) {
2032        if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2033                    TryCand, Cand, TopDepthReduce))
2034          return;
2035      }
2036      if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2037                     TryCand, Cand, TopPathReduce))
2038        return;
2039    }
2040    else {
2041      if (Cand.SU->getHeight() * SchedModel->getLatencyFactor()
2042          > Zone.ExpectedCount) {
2043        if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2044                    TryCand, Cand, BotHeightReduce))
2045          return;
2046      }
2047      if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2048                     TryCand, Cand, BotPathReduce))
2049        return;
2050    }
2051  }
2052
2053  // Avoid increasing the max pressure of the entire region.
2054  if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease,
2055              Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax))
2056    return;
2057  if (Cand.Reason == SingleMax)
2058    Cand.Reason = MultiPressure;
2059
2060  // Prefer immediate defs/users of the last scheduled instruction. This is a
2061  // nice pressure avoidance strategy that also conserves the processor's
2062  // register renaming resources and keeps the machine code readable.
2063  if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
2064                 TryCand, Cand, NextDefUse))
2065    return;
2066
2067  // Fall through to original instruction order.
2068  if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
2069      || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
2070    TryCand.Reason = NodeOrder;
2071  }
2072}
2073
2074/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is
2075/// more desirable than RHS from scheduling standpoint.
2076static bool compareRPDelta(const RegPressureDelta &LHS,
2077                           const RegPressureDelta &RHS) {
2078  // Compare each component of pressure in decreasing order of importance
2079  // without checking if any are valid. Invalid PressureElements are assumed to
2080  // have UnitIncrease==0, so are neutral.
2081
2082  // Avoid increasing the max critical pressure in the scheduled region.
2083  if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
2084    DEBUG(dbgs() << "  RP excess top - bot: "
2085          << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
2086    return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
2087  }
2088  // Avoid increasing the max critical pressure in the scheduled region.
2089  if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
2090    DEBUG(dbgs() << "  RP critical top - bot: "
2091          << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
2092          << '\n');
2093    return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
2094  }
2095  // Avoid increasing the max pressure of the entire region.
2096  if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
2097    DEBUG(dbgs() << "  RP current top - bot: "
2098          << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
2099          << '\n');
2100    return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
2101  }
2102  return false;
2103}
2104
2105#ifndef NDEBUG
2106const char *ConvergingScheduler::getReasonStr(
2107  ConvergingScheduler::CandReason Reason) {
2108  switch (Reason) {
2109  case NoCand:         return "NOCAND    ";
2110  case PhysRegCopy:    return "PREG-COPY";
2111  case SingleExcess:   return "REG-EXCESS";
2112  case SingleCritical: return "REG-CRIT  ";
2113  case Cluster:        return "CLUSTER   ";
2114  case Weak:           return "WEAK      ";
2115  case SingleMax:      return "REG-MAX   ";
2116  case MultiPressure:  return "REG-MULTI ";
2117  case ResourceReduce: return "RES-REDUCE";
2118  case ResourceDemand: return "RES-DEMAND";
2119  case TopDepthReduce: return "TOP-DEPTH ";
2120  case TopPathReduce:  return "TOP-PATH  ";
2121  case BotHeightReduce:return "BOT-HEIGHT";
2122  case BotPathReduce:  return "BOT-PATH  ";
2123  case NextDefUse:     return "DEF-USE   ";
2124  case NodeOrder:      return "ORDER     ";
2125  };
2126  llvm_unreachable("Unknown reason!");
2127}
2128
2129void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
2130  PressureElement P;
2131  unsigned ResIdx = 0;
2132  unsigned Latency = 0;
2133  switch (Cand.Reason) {
2134  default:
2135    break;
2136  case SingleExcess:
2137    P = Cand.RPDelta.Excess;
2138    break;
2139  case SingleCritical:
2140    P = Cand.RPDelta.CriticalMax;
2141    break;
2142  case SingleMax:
2143    P = Cand.RPDelta.CurrentMax;
2144    break;
2145  case ResourceReduce:
2146    ResIdx = Cand.Policy.ReduceResIdx;
2147    break;
2148  case ResourceDemand:
2149    ResIdx = Cand.Policy.DemandResIdx;
2150    break;
2151  case TopDepthReduce:
2152    Latency = Cand.SU->getDepth();
2153    break;
2154  case TopPathReduce:
2155    Latency = Cand.SU->getHeight();
2156    break;
2157  case BotHeightReduce:
2158    Latency = Cand.SU->getHeight();
2159    break;
2160  case BotPathReduce:
2161    Latency = Cand.SU->getDepth();
2162    break;
2163  }
2164  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2165  if (P.isValid())
2166    dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
2167           << ":" << P.UnitIncrease << " ";
2168  else
2169    dbgs() << "      ";
2170  if (ResIdx)
2171    dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2172  else
2173    dbgs() << "         ";
2174  if (Latency)
2175    dbgs() << " " << Latency << " cycles ";
2176  else
2177    dbgs() << "          ";
2178  dbgs() << '\n';
2179}
2180#endif
2181
2182/// Pick the best candidate from the top queue.
2183///
2184/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
2185/// DAG building. To adjust for the current scheduling location we need to
2186/// maintain the number of vreg uses remaining to be top-scheduled.
2187void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
2188                                            const RegPressureTracker &RPTracker,
2189                                            SchedCandidate &Cand) {
2190  ReadyQueue &Q = Zone.Available;
2191
2192  DEBUG(Q.dump());
2193
2194  // getMaxPressureDelta temporarily modifies the tracker.
2195  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
2196
2197  for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
2198
2199    SchedCandidate TryCand(Cand.Policy);
2200    TryCand.SU = *I;
2201    tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
2202    if (TryCand.Reason != NoCand) {
2203      // Initialize resource delta if needed in case future heuristics query it.
2204      if (TryCand.ResDelta == SchedResourceDelta())
2205        TryCand.initResourceDelta(DAG, SchedModel);
2206      Cand.setBest(TryCand);
2207      DEBUG(traceCandidate(Cand));
2208    }
2209  }
2210}
2211
2212static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
2213                      bool IsTop) {
2214  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2215        << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
2216}
2217
2218/// Pick the best candidate node from either the top or bottom queue.
2219SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
2220  // Schedule as far as possible in the direction of no choice. This is most
2221  // efficient, but also provides the best heuristics for CriticalPSets.
2222  if (SUnit *SU = Bot.pickOnlyChoice()) {
2223    IsTopNode = false;
2224    DEBUG(dbgs() << "Pick Top NOCAND\n");
2225    return SU;
2226  }
2227  if (SUnit *SU = Top.pickOnlyChoice()) {
2228    IsTopNode = true;
2229    DEBUG(dbgs() << "Pick Bot NOCAND\n");
2230    return SU;
2231  }
2232  CandPolicy NoPolicy;
2233  SchedCandidate BotCand(NoPolicy);
2234  SchedCandidate TopCand(NoPolicy);
2235  checkResourceLimits(TopCand, BotCand);
2236
2237  // Prefer bottom scheduling when heuristics are silent.
2238  pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2239  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2240
2241  // If either Q has a single candidate that provides the least increase in
2242  // Excess pressure, we can immediately schedule from that Q.
2243  //
2244  // RegionCriticalPSets summarizes the pressure within the scheduled region and
2245  // affects picking from either Q. If scheduling in one direction must
2246  // increase pressure for one of the excess PSets, then schedule in that
2247  // direction first to provide more freedom in the other direction.
2248  if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) {
2249    IsTopNode = false;
2250    tracePick(BotCand, IsTopNode);
2251    return BotCand.SU;
2252  }
2253  // Check if the top Q has a better candidate.
2254  pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2255  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2256
2257  // If either Q has a single candidate that minimizes pressure above the
2258  // original region's pressure pick it.
2259  if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) {
2260    if (TopCand.Reason < BotCand.Reason) {
2261      IsTopNode = true;
2262      tracePick(TopCand, IsTopNode);
2263      return TopCand.SU;
2264    }
2265    IsTopNode = false;
2266    tracePick(BotCand, IsTopNode);
2267    return BotCand.SU;
2268  }
2269  // Check for a salient pressure difference and pick the best from either side.
2270  if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) {
2271    IsTopNode = true;
2272    tracePick(TopCand, IsTopNode);
2273    return TopCand.SU;
2274  }
2275  // Otherwise prefer the bottom candidate, in node order if all else failed.
2276  if (TopCand.Reason < BotCand.Reason) {
2277    IsTopNode = true;
2278    tracePick(TopCand, IsTopNode);
2279    return TopCand.SU;
2280  }
2281  IsTopNode = false;
2282  tracePick(BotCand, IsTopNode);
2283  return BotCand.SU;
2284}
2285
2286/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
2287SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
2288  if (DAG->top() == DAG->bottom()) {
2289    assert(Top.Available.empty() && Top.Pending.empty() &&
2290           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
2291    return NULL;
2292  }
2293  SUnit *SU;
2294  do {
2295    if (ForceTopDown) {
2296      SU = Top.pickOnlyChoice();
2297      if (!SU) {
2298        CandPolicy NoPolicy;
2299        SchedCandidate TopCand(NoPolicy);
2300        pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
2301        assert(TopCand.Reason != NoCand && "failed to find the first candidate");
2302        SU = TopCand.SU;
2303      }
2304      IsTopNode = true;
2305    }
2306    else if (ForceBottomUp) {
2307      SU = Bot.pickOnlyChoice();
2308      if (!SU) {
2309        CandPolicy NoPolicy;
2310        SchedCandidate BotCand(NoPolicy);
2311        pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
2312        assert(BotCand.Reason != NoCand && "failed to find the first candidate");
2313        SU = BotCand.SU;
2314      }
2315      IsTopNode = false;
2316    }
2317    else {
2318      SU = pickNodeBidirectional(IsTopNode);
2319    }
2320  } while (SU->isScheduled);
2321
2322  if (SU->isTopReady())
2323    Top.removeReady(SU);
2324  if (SU->isBottomReady())
2325    Bot.removeReady(SU);
2326
2327  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
2328  return SU;
2329}
2330
2331void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
2332
2333  MachineBasicBlock::iterator InsertPos = SU->getInstr();
2334  if (!isTop)
2335    ++InsertPos;
2336  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
2337
2338  // Find already scheduled copies with a single physreg dependence and move
2339  // them just above the scheduled instruction.
2340  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
2341       I != E; ++I) {
2342    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
2343      continue;
2344    SUnit *DepSU = I->getSUnit();
2345    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
2346      continue;
2347    MachineInstr *Copy = DepSU->getInstr();
2348    if (!Copy->isCopy())
2349      continue;
2350    DEBUG(dbgs() << "  Rescheduling physreg copy ";
2351          I->getSUnit()->dump(DAG));
2352    DAG->moveInstruction(Copy, InsertPos);
2353  }
2354}
2355
2356/// Update the scheduler's state after scheduling a node. This is the same node
2357/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
2358/// it's state based on the current cycle before MachineSchedStrategy does.
2359///
2360/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
2361/// them here. See comments in biasPhysRegCopy.
2362void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
2363  if (IsTopNode) {
2364    SU->TopReadyCycle = Top.CurrCycle;
2365    Top.bumpNode(SU);
2366    if (SU->hasPhysRegUses)
2367      reschedulePhysRegCopies(SU, true);
2368  }
2369  else {
2370    SU->BotReadyCycle = Bot.CurrCycle;
2371    Bot.bumpNode(SU);
2372    if (SU->hasPhysRegDefs)
2373      reschedulePhysRegCopies(SU, false);
2374  }
2375}
2376
2377/// Create the standard converging machine scheduler. This will be used as the
2378/// default scheduler if the target does not set a default.
2379static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
2380  assert((!ForceTopDown || !ForceBottomUp) &&
2381         "-misched-topdown incompatible with -misched-bottomup");
2382  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
2383  // Register DAG post-processors.
2384  //
2385  // FIXME: extend the mutation API to allow earlier mutations to instantiate
2386  // data and pass it to later mutations. Have a single mutation that gathers
2387  // the interesting nodes in one pass.
2388  DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
2389  if (EnableLoadCluster)
2390    DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
2391  if (EnableMacroFusion)
2392    DAG->addMutation(new MacroFusion(DAG->TII));
2393  return DAG;
2394}
2395static MachineSchedRegistry
2396ConvergingSchedRegistry("converge", "Standard converging scheduler.",
2397                        createConvergingSched);
2398
2399//===----------------------------------------------------------------------===//
2400// ILP Scheduler. Currently for experimental analysis of heuristics.
2401//===----------------------------------------------------------------------===//
2402
2403namespace {
2404/// \brief Order nodes by the ILP metric.
2405struct ILPOrder {
2406  const SchedDFSResult *DFSResult;
2407  const BitVector *ScheduledTrees;
2408  bool MaximizeILP;
2409
2410  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
2411
2412  /// \brief Apply a less-than relation on node priority.
2413  ///
2414  /// (Return true if A comes after B in the Q.)
2415  bool operator()(const SUnit *A, const SUnit *B) const {
2416    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
2417    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
2418    if (SchedTreeA != SchedTreeB) {
2419      // Unscheduled trees have lower priority.
2420      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
2421        return ScheduledTrees->test(SchedTreeB);
2422
2423      // Trees with shallower connections have have lower priority.
2424      if (DFSResult->getSubtreeLevel(SchedTreeA)
2425          != DFSResult->getSubtreeLevel(SchedTreeB)) {
2426        return DFSResult->getSubtreeLevel(SchedTreeA)
2427          < DFSResult->getSubtreeLevel(SchedTreeB);
2428      }
2429    }
2430    if (MaximizeILP)
2431      return DFSResult->getILP(A) < DFSResult->getILP(B);
2432    else
2433      return DFSResult->getILP(A) > DFSResult->getILP(B);
2434  }
2435};
2436
2437/// \brief Schedule based on the ILP metric.
2438class ILPScheduler : public MachineSchedStrategy {
2439  /// In case all subtrees are eventually connected to a common root through
2440  /// data dependence (e.g. reduction), place an upper limit on their size.
2441  ///
2442  /// FIXME: A subtree limit is generally good, but in the situation commented
2443  /// above, where multiple similar subtrees feed a common root, we should
2444  /// only split at a point where the resulting subtrees will be balanced.
2445  /// (a motivating test case must be found).
2446  static const unsigned SubtreeLimit = 16;
2447
2448  ScheduleDAGMI *DAG;
2449  ILPOrder Cmp;
2450
2451  std::vector<SUnit*> ReadyQ;
2452public:
2453  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
2454
2455  virtual void initialize(ScheduleDAGMI *dag) {
2456    DAG = dag;
2457    DAG->computeDFSResult();
2458    Cmp.DFSResult = DAG->getDFSResult();
2459    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
2460    ReadyQ.clear();
2461  }
2462
2463  virtual void registerRoots() {
2464    // Restore the heap in ReadyQ with the updated DFS results.
2465    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2466  }
2467
2468  /// Implement MachineSchedStrategy interface.
2469  /// -----------------------------------------
2470
2471  /// Callback to select the highest priority node from the ready Q.
2472  virtual SUnit *pickNode(bool &IsTopNode) {
2473    if (ReadyQ.empty()) return NULL;
2474    std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2475    SUnit *SU = ReadyQ.back();
2476    ReadyQ.pop_back();
2477    IsTopNode = false;
2478    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
2479          << " ILP: " << DAG->getDFSResult()->getILP(SU)
2480          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
2481          << DAG->getDFSResult()->getSubtreeLevel(
2482            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
2483          << "Scheduling " << *SU->getInstr());
2484    return SU;
2485  }
2486
2487  /// \brief Scheduler callback to notify that a new subtree is scheduled.
2488  virtual void scheduleTree(unsigned SubtreeID) {
2489    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2490  }
2491
2492  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
2493  /// DFSResults, and resort the priority Q.
2494  virtual void schedNode(SUnit *SU, bool IsTopNode) {
2495    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
2496  }
2497
2498  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
2499
2500  virtual void releaseBottomNode(SUnit *SU) {
2501    ReadyQ.push_back(SU);
2502    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
2503  }
2504};
2505} // namespace
2506
2507static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
2508  return new ScheduleDAGMI(C, new ILPScheduler(true));
2509}
2510static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
2511  return new ScheduleDAGMI(C, new ILPScheduler(false));
2512}
2513static MachineSchedRegistry ILPMaxRegistry(
2514  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
2515static MachineSchedRegistry ILPMinRegistry(
2516  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
2517
2518//===----------------------------------------------------------------------===//
2519// Machine Instruction Shuffler for Correctness Testing
2520//===----------------------------------------------------------------------===//
2521
2522#ifndef NDEBUG
2523namespace {
2524/// Apply a less-than relation on the node order, which corresponds to the
2525/// instruction order prior to scheduling. IsReverse implements greater-than.
2526template<bool IsReverse>
2527struct SUnitOrder {
2528  bool operator()(SUnit *A, SUnit *B) const {
2529    if (IsReverse)
2530      return A->NodeNum > B->NodeNum;
2531    else
2532      return A->NodeNum < B->NodeNum;
2533  }
2534};
2535
2536/// Reorder instructions as much as possible.
2537class InstructionShuffler : public MachineSchedStrategy {
2538  bool IsAlternating;
2539  bool IsTopDown;
2540
2541  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
2542  // gives nodes with a higher number higher priority causing the latest
2543  // instructions to be scheduled first.
2544  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
2545    TopQ;
2546  // When scheduling bottom-up, use greater-than as the queue priority.
2547  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
2548    BottomQ;
2549public:
2550  InstructionShuffler(bool alternate, bool topdown)
2551    : IsAlternating(alternate), IsTopDown(topdown) {}
2552
2553  virtual void initialize(ScheduleDAGMI *) {
2554    TopQ.clear();
2555    BottomQ.clear();
2556  }
2557
2558  /// Implement MachineSchedStrategy interface.
2559  /// -----------------------------------------
2560
2561  virtual SUnit *pickNode(bool &IsTopNode) {
2562    SUnit *SU;
2563    if (IsTopDown) {
2564      do {
2565        if (TopQ.empty()) return NULL;
2566        SU = TopQ.top();
2567        TopQ.pop();
2568      } while (SU->isScheduled);
2569      IsTopNode = true;
2570    }
2571    else {
2572      do {
2573        if (BottomQ.empty()) return NULL;
2574        SU = BottomQ.top();
2575        BottomQ.pop();
2576      } while (SU->isScheduled);
2577      IsTopNode = false;
2578    }
2579    if (IsAlternating)
2580      IsTopDown = !IsTopDown;
2581    return SU;
2582  }
2583
2584  virtual void schedNode(SUnit *SU, bool IsTopNode) {}
2585
2586  virtual void releaseTopNode(SUnit *SU) {
2587    TopQ.push(SU);
2588  }
2589  virtual void releaseBottomNode(SUnit *SU) {
2590    BottomQ.push(SU);
2591  }
2592};
2593} // namespace
2594
2595static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
2596  bool Alternate = !ForceTopDown && !ForceBottomUp;
2597  bool TopDown = !ForceBottomUp;
2598  assert((TopDown || !ForceTopDown) &&
2599         "-misched-topdown incompatible with -misched-bottomup");
2600  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
2601}
2602static MachineSchedRegistry ShufflerRegistry(
2603  "shuffle", "Shuffle machine instructions alternating directions",
2604  createInstructionShuffler);
2605#endif // !NDEBUG
2606
2607//===----------------------------------------------------------------------===//
2608// GraphWriter support for ScheduleDAGMI.
2609//===----------------------------------------------------------------------===//
2610
2611#ifndef NDEBUG
2612namespace llvm {
2613
2614template<> struct GraphTraits<
2615  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
2616
2617template<>
2618struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
2619
2620  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
2621
2622  static std::string getGraphName(const ScheduleDAG *G) {
2623    return G->MF.getName();
2624  }
2625
2626  static bool renderGraphFromBottomUp() {
2627    return true;
2628  }
2629
2630  static bool isNodeHidden(const SUnit *Node) {
2631    return (Node->NumPreds > 10 || Node->NumSuccs > 10);
2632  }
2633
2634  static bool hasNodeAddressLabel(const SUnit *Node,
2635                                  const ScheduleDAG *Graph) {
2636    return false;
2637  }
2638
2639  /// If you want to override the dot attributes printed for a particular
2640  /// edge, override this method.
2641  static std::string getEdgeAttributes(const SUnit *Node,
2642                                       SUnitIterator EI,
2643                                       const ScheduleDAG *Graph) {
2644    if (EI.isArtificialDep())
2645      return "color=cyan,style=dashed";
2646    if (EI.isCtrlDep())
2647      return "color=blue,style=dashed";
2648    return "";
2649  }
2650
2651  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
2652    std::string Str;
2653    raw_string_ostream SS(Str);
2654    SS << "SU(" << SU->NodeNum << ')';
2655    return SS.str();
2656  }
2657  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
2658    return G->getGraphNodeLabel(SU);
2659  }
2660
2661  static std::string getNodeAttributes(const SUnit *N,
2662                                       const ScheduleDAG *Graph) {
2663    std::string Str("shape=Mrecord");
2664    const SchedDFSResult *DFS =
2665      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
2666    if (DFS) {
2667      Str += ",style=filled,fillcolor=\"#";
2668      Str += DOT::getColorString(DFS->getSubtreeID(N));
2669      Str += '"';
2670    }
2671    return Str;
2672  }
2673};
2674} // namespace llvm
2675#endif // NDEBUG
2676
2677/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
2678/// rendered using 'dot'.
2679///
2680void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
2681#ifndef NDEBUG
2682  ViewGraph(this, Name, false, Title);
2683#else
2684  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
2685         << "systems with Graphviz or gv!\n";
2686#endif  // NDEBUG
2687}
2688
2689/// Out-of-line implementation with no arguments is handy for gdb.
2690void ScheduleDAGMI::viewGraph() {
2691  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
2692}
2693