ScheduleDAGRRList.cpp revision 2da8bc8a5f7705ac131184cd247f48500da0d74e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> EnableSchedCycles(
70  "enable-sched-cycles",
71  cl::desc("Enable cycle-level precision during preRA scheduling"),
72  cl::init(false), cl::Hidden);
73
74namespace {
75//===----------------------------------------------------------------------===//
76/// ScheduleDAGRRList - The actual register reduction list scheduler
77/// implementation.  This supports both top-down and bottom-up scheduling.
78///
79class ScheduleDAGRRList : public ScheduleDAGSDNodes {
80private:
81  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
82  /// it is top-down.
83  bool isBottomUp;
84
85  /// NeedLatency - True if the scheduler will make use of latency information.
86  ///
87  bool NeedLatency;
88
89  /// AvailableQueue - The priority queue to use for the available SUnits.
90  SchedulingPriorityQueue *AvailableQueue;
91
92  /// PendingQueue - This contains all of the instructions whose operands have
93  /// been issued, but their results are not ready yet (due to the latency of
94  /// the operation).  Once the operands becomes available, the instruction is
95  /// added to the AvailableQueue.
96  std::vector<SUnit*> PendingQueue;
97
98  /// HazardRec - The hazard recognizer to use.
99  ScheduleHazardRecognizer *HazardRec;
100
101  /// CurCycle - The current scheduler state corresponds to this cycle.
102  unsigned CurCycle;
103
104  /// MinAvailableCycle - Cycle of the soonest available instruction.
105  unsigned MinAvailableCycle;
106
107  /// LiveRegDefs - A set of physical registers and their definition
108  /// that are "live". These nodes must be scheduled before any other nodes that
109  /// modifies the registers can be scheduled.
110  unsigned NumLiveRegs;
111  std::vector<SUnit*> LiveRegDefs;
112  std::vector<SUnit*> LiveRegGens;
113
114  /// Topo - A topological ordering for SUnits which permits fast IsReachable
115  /// and similar queries.
116  ScheduleDAGTopologicalSort Topo;
117
118public:
119  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
120                    SchedulingPriorityQueue *availqueue,
121                    CodeGenOpt::Level OptLevel)
122    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
123      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
124      Topo(SUnits) {
125
126    const TargetMachine &tm = mf.getTarget();
127    if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
128      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
129    else
130      HazardRec = new ScheduleHazardRecognizer();
131  }
132
133  ~ScheduleDAGRRList() {
134    delete HazardRec;
135    delete AvailableQueue;
136  }
137
138  void Schedule();
139
140  /// IsReachable - Checks if SU is reachable from TargetSU.
141  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
142    return Topo.IsReachable(SU, TargetSU);
143  }
144
145  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
146  /// create a cycle.
147  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
148    return Topo.WillCreateCycle(SU, TargetSU);
149  }
150
151  /// AddPred - adds a predecessor edge to SUnit SU.
152  /// This returns true if this is a new predecessor.
153  /// Updates the topological ordering if required.
154  void AddPred(SUnit *SU, const SDep &D) {
155    Topo.AddPred(SU, D.getSUnit());
156    SU->addPred(D);
157  }
158
159  /// RemovePred - removes a predecessor edge from SUnit SU.
160  /// This returns true if an edge was removed.
161  /// Updates the topological ordering if required.
162  void RemovePred(SUnit *SU, const SDep &D) {
163    Topo.RemovePred(SU, D.getSUnit());
164    SU->removePred(D);
165  }
166
167private:
168  bool isReady(SUnit *SU) {
169    return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
170      AvailableQueue->isReady(SU);
171  }
172
173  void ReleasePred(SUnit *SU, const SDep *PredEdge);
174  void ReleasePredecessors(SUnit *SU);
175  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
176  void ReleaseSuccessors(SUnit *SU);
177  void ReleasePending();
178  void AdvanceToCycle(unsigned NextCycle);
179  void AdvancePastStalls(SUnit *SU);
180  void EmitNode(SUnit *SU);
181  void ScheduleNodeBottomUp(SUnit*);
182  void CapturePred(SDep *PredEdge);
183  void UnscheduleNodeBottomUp(SUnit*);
184  void RestoreHazardCheckerBottomUp();
185  void BacktrackBottomUp(SUnit*, SUnit*);
186  SUnit *CopyAndMoveSuccessors(SUnit*);
187  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
188                                const TargetRegisterClass*,
189                                const TargetRegisterClass*,
190                                SmallVector<SUnit*, 2>&);
191  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
192
193  SUnit *PickNodeToScheduleBottomUp();
194  void ListScheduleBottomUp();
195
196  void ScheduleNodeTopDown(SUnit*);
197  void ListScheduleTopDown();
198
199
200  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
201  /// Updates the topological ordering if required.
202  SUnit *CreateNewSUnit(SDNode *N) {
203    unsigned NumSUnits = SUnits.size();
204    SUnit *NewNode = NewSUnit(N);
205    // Update the topological ordering.
206    if (NewNode->NodeNum >= NumSUnits)
207      Topo.InitDAGTopologicalSorting();
208    return NewNode;
209  }
210
211  /// CreateClone - Creates a new SUnit from an existing one.
212  /// Updates the topological ordering if required.
213  SUnit *CreateClone(SUnit *N) {
214    unsigned NumSUnits = SUnits.size();
215    SUnit *NewNode = Clone(N);
216    // Update the topological ordering.
217    if (NewNode->NodeNum >= NumSUnits)
218      Topo.InitDAGTopologicalSorting();
219    return NewNode;
220  }
221
222  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
223  /// need actual latency information but the hybrid scheduler does.
224  bool ForceUnitLatencies() const {
225    return !NeedLatency;
226  }
227};
228}  // end anonymous namespace
229
230
231/// Schedule - Schedule the DAG using list scheduling.
232void ScheduleDAGRRList::Schedule() {
233  DEBUG(dbgs()
234        << "********** List Scheduling BB#" << BB->getNumber()
235        << " '" << BB->getName() << "' **********\n");
236
237  CurCycle = 0;
238  MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
239  NumLiveRegs = 0;
240  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
241  LiveRegGens.resize(TRI->getNumRegs(), NULL);
242
243  // Build the scheduling graph.
244  BuildSchedGraph(NULL);
245
246  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
247          SUnits[su].dumpAll(this));
248  Topo.InitDAGTopologicalSorting();
249
250  AvailableQueue->initNodes(SUnits);
251
252  HazardRec->Reset();
253
254  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
255  if (isBottomUp)
256    ListScheduleBottomUp();
257  else
258    ListScheduleTopDown();
259
260  AvailableQueue->releaseState();
261}
262
263//===----------------------------------------------------------------------===//
264//  Bottom-Up Scheduling
265//===----------------------------------------------------------------------===//
266
267/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
268/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
269void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
270  SUnit *PredSU = PredEdge->getSUnit();
271
272#ifndef NDEBUG
273  if (PredSU->NumSuccsLeft == 0) {
274    dbgs() << "*** Scheduling failed! ***\n";
275    PredSU->dump(this);
276    dbgs() << " has been released too many times!\n";
277    llvm_unreachable(0);
278  }
279#endif
280  --PredSU->NumSuccsLeft;
281
282  if (!ForceUnitLatencies()) {
283    // Updating predecessor's height. This is now the cycle when the
284    // predecessor can be scheduled without causing a pipeline stall.
285    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
286  }
287
288  // If all the node's successors are scheduled, this node is ready
289  // to be scheduled. Ignore the special EntrySU node.
290  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
291    PredSU->isAvailable = true;
292
293    unsigned Height = PredSU->getHeight();
294    if (Height < MinAvailableCycle)
295      MinAvailableCycle = Height;
296
297    if (isReady(SU)) {
298      AvailableQueue->push(PredSU);
299    }
300    // CapturePred and others may have left the node in the pending queue, avoid
301    // adding it twice.
302    else if (!PredSU->isPending) {
303      PredSU->isPending = true;
304      PendingQueue.push_back(PredSU);
305    }
306  }
307}
308
309/// Call ReleasePred for each predecessor, then update register live def/gen.
310/// Always update LiveRegDefs for a register dependence even if the current SU
311/// also defines the register. This effectively create one large live range
312/// across a sequence of two-address node. This is important because the
313/// entire chain must be scheduled together. Example:
314///
315/// flags = (3) add
316/// flags = (2) addc flags
317/// flags = (1) addc flags
318///
319/// results in
320///
321/// LiveRegDefs[flags] = 3
322/// LiveRegGens[flags] = 1
323///
324/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
325/// interference on flags.
326void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
327  // Bottom up: release predecessors
328  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329       I != E; ++I) {
330    ReleasePred(SU, &*I);
331    if (I->isAssignedRegDep()) {
332      // This is a physical register dependency and it's impossible or
333      // expensive to copy the register. Make sure nothing that can
334      // clobber the register is scheduled between the predecessor and
335      // this node.
336      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
337      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
338             "interference on register dependence");
339      LiveRegDefs[I->getReg()] = I->getSUnit();
340      if (!LiveRegGens[I->getReg()]) {
341        ++NumLiveRegs;
342        LiveRegGens[I->getReg()] = SU;
343      }
344    }
345  }
346}
347
348/// Check to see if any of the pending instructions are ready to issue.  If
349/// so, add them to the available queue.
350void ScheduleDAGRRList::ReleasePending() {
351  assert(!EnableSchedCycles && "requires --enable-sched-cycles" );
352
353  // If the available queue is empty, it is safe to reset MinAvailableCycle.
354  if (AvailableQueue->empty())
355    MinAvailableCycle = UINT_MAX;
356
357  // Check to see if any of the pending instructions are ready to issue.  If
358  // so, add them to the available queue.
359  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
360    unsigned ReadyCycle =
361      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
362    if (ReadyCycle < MinAvailableCycle)
363      MinAvailableCycle = ReadyCycle;
364
365    if (PendingQueue[i]->isAvailable) {
366      if (!isReady(PendingQueue[i]))
367          continue;
368      AvailableQueue->push(PendingQueue[i]);
369    }
370    PendingQueue[i]->isPending = false;
371    PendingQueue[i] = PendingQueue.back();
372    PendingQueue.pop_back();
373    --i; --e;
374  }
375}
376
377/// Move the scheduler state forward by the specified number of Cycles.
378void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
379  if (NextCycle <= CurCycle)
380    return;
381
382  AvailableQueue->setCurCycle(NextCycle);
383  if (HazardRec->getMaxLookAhead() == 0) {
384    // Bypass lots of virtual calls in case of long latency.
385    CurCycle = NextCycle;
386  }
387  else {
388    for (; CurCycle != NextCycle; ++CurCycle) {
389      if (isBottomUp)
390        HazardRec->RecedeCycle();
391      else
392        HazardRec->AdvanceCycle();
393    }
394  }
395  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
396  // available Q to release pending nodes at least once before popping.
397  ReleasePending();
398}
399
400/// Move the scheduler state forward until the specified node's dependents are
401/// ready and can be scheduled with no resource conflicts.
402void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
403  if (!EnableSchedCycles)
404    return;
405
406  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
407
408  // Bump CurCycle to account for latency. We assume the latency of other
409  // available instructions may be hidden by the stall (not a full pipe stall).
410  // This updates the hazard recognizer's cycle before reserving resources for
411  // this instruction.
412  AdvanceToCycle(ReadyCycle);
413
414  // Calls are scheduled in their preceding cycle, so don't conflict with
415  // hazards from instructions after the call. EmitNode will reset the
416  // scoreboard state before emitting the call.
417  if (isBottomUp && SU->isCall)
418    return;
419
420  // FIXME: For resource conflicts in very long non-pipelined stages, we
421  // should probably skip ahead here to avoid useless scoreboard checks.
422  int Stalls = 0;
423  while (true) {
424    ScheduleHazardRecognizer::HazardType HT =
425      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
426
427    if (HT == ScheduleHazardRecognizer::NoHazard)
428      break;
429
430    ++Stalls;
431  }
432  AdvanceToCycle(CurCycle + Stalls);
433}
434
435/// Record this SUnit in the HazardRecognizer.
436/// Does not update CurCycle.
437void ScheduleDAGRRList::EmitNode(SUnit *SU) {
438  switch (SU->getNode()->getOpcode()) {
439  default:
440    assert(SU->getNode()->isMachineOpcode() &&
441           "This target-independent node should not be scheduled.");
442    break;
443  case ISD::MERGE_VALUES:
444  case ISD::TokenFactor:
445  case ISD::CopyToReg:
446  case ISD::CopyFromReg:
447  case ISD::EH_LABEL:
448    // Noops don't affect the scoreboard state. Copies are likely to be
449    // removed.
450    return;
451  case ISD::INLINEASM:
452    // For inline asm, clear the pipeline state.
453    HazardRec->Reset();
454    return;
455  }
456  if (isBottomUp && SU->isCall) {
457    // Calls are scheduled with their preceding instructions. For bottom-up
458    // scheduling, clear the pipeline state before emitting.
459    HazardRec->Reset();
460  }
461
462  HazardRec->EmitInstruction(SU);
463
464  if (!isBottomUp && SU->isCall) {
465    HazardRec->Reset();
466  }
467}
468
469/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
470/// count of its predecessors. If a predecessor pending count is zero, add it to
471/// the Available queue.
472void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
473  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
474  DEBUG(SU->dump(this));
475
476#ifndef NDEBUG
477  if (CurCycle < SU->getHeight())
478    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
479#endif
480
481  // FIXME: Do not modify node height. It may interfere with
482  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
483  // node it's ready cycle can aid heuristics, and after scheduling it can
484  // indicate the scheduled cycle.
485  SU->setHeightToAtLeast(CurCycle);
486
487  // Reserve resources for the scheduled intruction.
488  EmitNode(SU);
489
490  Sequence.push_back(SU);
491
492  AvailableQueue->ScheduledNode(SU);
493
494  // Update liveness of predecessors before successors to avoid treating a
495  // two-address node as a live range def.
496  ReleasePredecessors(SU);
497
498  // Release all the implicit physical register defs that are live.
499  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
500       I != E; ++I) {
501    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
502    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
503      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
504      --NumLiveRegs;
505      LiveRegDefs[I->getReg()] = NULL;
506      LiveRegGens[I->getReg()] = NULL;
507    }
508  }
509
510  SU->isScheduled = true;
511
512  // Conditions under which the scheduler should eagerly advance the cycle:
513  // (1) No available instructions
514  // (2) All pipelines full, so available instructions must have hazards.
515  //
516  // If SchedCycles is disabled, count each inst as one cycle.
517  if (!EnableSchedCycles ||
518      AvailableQueue->empty() || HazardRec->atIssueLimit())
519    AdvanceToCycle(CurCycle + 1);
520}
521
522/// CapturePred - This does the opposite of ReleasePred. Since SU is being
523/// unscheduled, incrcease the succ left count of its predecessors. Remove
524/// them from AvailableQueue if necessary.
525void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
526  SUnit *PredSU = PredEdge->getSUnit();
527  if (PredSU->isAvailable) {
528    PredSU->isAvailable = false;
529    if (!PredSU->isPending)
530      AvailableQueue->remove(PredSU);
531  }
532
533  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
534  ++PredSU->NumSuccsLeft;
535}
536
537/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
538/// its predecessor states to reflect the change.
539void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
540  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
541  DEBUG(SU->dump(this));
542
543  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
544       I != E; ++I) {
545    CapturePred(&*I);
546    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
547      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
548      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
549             "Physical register dependency violated?");
550      --NumLiveRegs;
551      LiveRegDefs[I->getReg()] = NULL;
552      LiveRegGens[I->getReg()] = NULL;
553    }
554  }
555
556  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
557       I != E; ++I) {
558    if (I->isAssignedRegDep()) {
559      // This becomes the nearest def. Note that an earlier def may still be
560      // pending if this is a two-address node.
561      LiveRegDefs[I->getReg()] = SU;
562      if (!LiveRegDefs[I->getReg()]) {
563        ++NumLiveRegs;
564      }
565      if (LiveRegGens[I->getReg()] == NULL ||
566          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
567        LiveRegGens[I->getReg()] = I->getSUnit();
568    }
569  }
570  if (SU->getHeight() < MinAvailableCycle)
571    MinAvailableCycle = SU->getHeight();
572
573  SU->setHeightDirty();
574  SU->isScheduled = false;
575  SU->isAvailable = true;
576  if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
577    // Don't make available until backtracking is complete.
578    SU->isPending = true;
579    PendingQueue.push_back(SU);
580  }
581  else {
582    AvailableQueue->push(SU);
583  }
584  AvailableQueue->UnscheduledNode(SU);
585}
586
587/// After backtracking, the hazard checker needs to be restored to a state
588/// corresponding the the current cycle.
589void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
590  HazardRec->Reset();
591
592  unsigned LookAhead = std::min((unsigned)Sequence.size(),
593                                HazardRec->getMaxLookAhead());
594  if (LookAhead == 0)
595    return;
596
597  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
598  unsigned HazardCycle = (*I)->getHeight();
599  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
600    SUnit *SU = *I;
601    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
602      HazardRec->RecedeCycle();
603    }
604    EmitNode(SU);
605  }
606}
607
608/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
609/// BTCycle in order to schedule a specific node.
610void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
611  SUnit *OldSU = Sequence.back();
612  while (true) {
613    Sequence.pop_back();
614    if (SU->isSucc(OldSU))
615      // Don't try to remove SU from AvailableQueue.
616      SU->isAvailable = false;
617    // FIXME: use ready cycle instead of height
618    CurCycle = OldSU->getHeight();
619    UnscheduleNodeBottomUp(OldSU);
620    AvailableQueue->setCurCycle(CurCycle);
621    if (OldSU == BtSU)
622      break;
623    OldSU = Sequence.back();
624  }
625
626  assert(!SU->isSucc(OldSU) && "Something is wrong!");
627
628  RestoreHazardCheckerBottomUp();
629
630  if (EnableSchedCycles)
631    ReleasePending();
632
633  ++NumBacktracks;
634}
635
636static bool isOperandOf(const SUnit *SU, SDNode *N) {
637  for (const SDNode *SUNode = SU->getNode(); SUNode;
638       SUNode = SUNode->getGluedNode()) {
639    if (SUNode->isOperandOf(N))
640      return true;
641  }
642  return false;
643}
644
645/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
646/// successors to the newly created node.
647SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
648  if (SU->getNode()->getGluedNode())
649    return NULL;
650
651  SDNode *N = SU->getNode();
652  if (!N)
653    return NULL;
654
655  SUnit *NewSU;
656  bool TryUnfold = false;
657  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
658    EVT VT = N->getValueType(i);
659    if (VT == MVT::Glue)
660      return NULL;
661    else if (VT == MVT::Other)
662      TryUnfold = true;
663  }
664  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
665    const SDValue &Op = N->getOperand(i);
666    EVT VT = Op.getNode()->getValueType(Op.getResNo());
667    if (VT == MVT::Glue)
668      return NULL;
669  }
670
671  if (TryUnfold) {
672    SmallVector<SDNode*, 2> NewNodes;
673    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
674      return NULL;
675
676    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
677    assert(NewNodes.size() == 2 && "Expected a load folding node!");
678
679    N = NewNodes[1];
680    SDNode *LoadNode = NewNodes[0];
681    unsigned NumVals = N->getNumValues();
682    unsigned OldNumVals = SU->getNode()->getNumValues();
683    for (unsigned i = 0; i != NumVals; ++i)
684      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
685    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
686                                   SDValue(LoadNode, 1));
687
688    // LoadNode may already exist. This can happen when there is another
689    // load from the same location and producing the same type of value
690    // but it has different alignment or volatileness.
691    bool isNewLoad = true;
692    SUnit *LoadSU;
693    if (LoadNode->getNodeId() != -1) {
694      LoadSU = &SUnits[LoadNode->getNodeId()];
695      isNewLoad = false;
696    } else {
697      LoadSU = CreateNewSUnit(LoadNode);
698      LoadNode->setNodeId(LoadSU->NodeNum);
699      ComputeLatency(LoadSU);
700    }
701
702    SUnit *NewSU = CreateNewSUnit(N);
703    assert(N->getNodeId() == -1 && "Node already inserted!");
704    N->setNodeId(NewSU->NodeNum);
705
706    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
707    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
708      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
709        NewSU->isTwoAddress = true;
710        break;
711      }
712    }
713    if (TID.isCommutable())
714      NewSU->isCommutable = true;
715    ComputeLatency(NewSU);
716
717    // Record all the edges to and from the old SU, by category.
718    SmallVector<SDep, 4> ChainPreds;
719    SmallVector<SDep, 4> ChainSuccs;
720    SmallVector<SDep, 4> LoadPreds;
721    SmallVector<SDep, 4> NodePreds;
722    SmallVector<SDep, 4> NodeSuccs;
723    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
724         I != E; ++I) {
725      if (I->isCtrl())
726        ChainPreds.push_back(*I);
727      else if (isOperandOf(I->getSUnit(), LoadNode))
728        LoadPreds.push_back(*I);
729      else
730        NodePreds.push_back(*I);
731    }
732    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
733         I != E; ++I) {
734      if (I->isCtrl())
735        ChainSuccs.push_back(*I);
736      else
737        NodeSuccs.push_back(*I);
738    }
739
740    // Now assign edges to the newly-created nodes.
741    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
742      const SDep &Pred = ChainPreds[i];
743      RemovePred(SU, Pred);
744      if (isNewLoad)
745        AddPred(LoadSU, Pred);
746    }
747    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
748      const SDep &Pred = LoadPreds[i];
749      RemovePred(SU, Pred);
750      if (isNewLoad)
751        AddPred(LoadSU, Pred);
752    }
753    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
754      const SDep &Pred = NodePreds[i];
755      RemovePred(SU, Pred);
756      AddPred(NewSU, Pred);
757    }
758    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
759      SDep D = NodeSuccs[i];
760      SUnit *SuccDep = D.getSUnit();
761      D.setSUnit(SU);
762      RemovePred(SuccDep, D);
763      D.setSUnit(NewSU);
764      AddPred(SuccDep, D);
765    }
766    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
767      SDep D = ChainSuccs[i];
768      SUnit *SuccDep = D.getSUnit();
769      D.setSUnit(SU);
770      RemovePred(SuccDep, D);
771      if (isNewLoad) {
772        D.setSUnit(LoadSU);
773        AddPred(SuccDep, D);
774      }
775    }
776
777    // Add a data dependency to reflect that NewSU reads the value defined
778    // by LoadSU.
779    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
780
781    if (isNewLoad)
782      AvailableQueue->addNode(LoadSU);
783    AvailableQueue->addNode(NewSU);
784
785    ++NumUnfolds;
786
787    if (NewSU->NumSuccsLeft == 0) {
788      NewSU->isAvailable = true;
789      return NewSU;
790    }
791    SU = NewSU;
792  }
793
794  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
795  NewSU = CreateClone(SU);
796
797  // New SUnit has the exact same predecessors.
798  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
799       I != E; ++I)
800    if (!I->isArtificial())
801      AddPred(NewSU, *I);
802
803  // Only copy scheduled successors. Cut them from old node's successor
804  // list and move them over.
805  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
806  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
807       I != E; ++I) {
808    if (I->isArtificial())
809      continue;
810    SUnit *SuccSU = I->getSUnit();
811    if (SuccSU->isScheduled) {
812      SDep D = *I;
813      D.setSUnit(NewSU);
814      AddPred(SuccSU, D);
815      D.setSUnit(SU);
816      DelDeps.push_back(std::make_pair(SuccSU, D));
817    }
818  }
819  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
820    RemovePred(DelDeps[i].first, DelDeps[i].second);
821
822  AvailableQueue->updateNode(SU);
823  AvailableQueue->addNode(NewSU);
824
825  ++NumDups;
826  return NewSU;
827}
828
829/// InsertCopiesAndMoveSuccs - Insert register copies and move all
830/// scheduled successors of the given SUnit to the last copy.
831void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
832                                               const TargetRegisterClass *DestRC,
833                                               const TargetRegisterClass *SrcRC,
834                                               SmallVector<SUnit*, 2> &Copies) {
835  SUnit *CopyFromSU = CreateNewSUnit(NULL);
836  CopyFromSU->CopySrcRC = SrcRC;
837  CopyFromSU->CopyDstRC = DestRC;
838
839  SUnit *CopyToSU = CreateNewSUnit(NULL);
840  CopyToSU->CopySrcRC = DestRC;
841  CopyToSU->CopyDstRC = SrcRC;
842
843  // Only copy scheduled successors. Cut them from old node's successor
844  // list and move them over.
845  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
846  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
847       I != E; ++I) {
848    if (I->isArtificial())
849      continue;
850    SUnit *SuccSU = I->getSUnit();
851    if (SuccSU->isScheduled) {
852      SDep D = *I;
853      D.setSUnit(CopyToSU);
854      AddPred(SuccSU, D);
855      DelDeps.push_back(std::make_pair(SuccSU, *I));
856    }
857  }
858  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
859    RemovePred(DelDeps[i].first, DelDeps[i].second);
860
861  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
862  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
863
864  AvailableQueue->updateNode(SU);
865  AvailableQueue->addNode(CopyFromSU);
866  AvailableQueue->addNode(CopyToSU);
867  Copies.push_back(CopyFromSU);
868  Copies.push_back(CopyToSU);
869
870  ++NumPRCopies;
871}
872
873/// getPhysicalRegisterVT - Returns the ValueType of the physical register
874/// definition of the specified node.
875/// FIXME: Move to SelectionDAG?
876static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
877                                 const TargetInstrInfo *TII) {
878  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
879  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
880  unsigned NumRes = TID.getNumDefs();
881  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
882    if (Reg == *ImpDef)
883      break;
884    ++NumRes;
885  }
886  return N->getValueType(NumRes);
887}
888
889/// CheckForLiveRegDef - Return true and update live register vector if the
890/// specified register def of the specified SUnit clobbers any "live" registers.
891static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
892                               std::vector<SUnit*> &LiveRegDefs,
893                               SmallSet<unsigned, 4> &RegAdded,
894                               SmallVector<unsigned, 4> &LRegs,
895                               const TargetRegisterInfo *TRI) {
896  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
897
898    // Check if Ref is live.
899    if (!LiveRegDefs[Reg]) continue;
900
901    // Allow multiple uses of the same def.
902    if (LiveRegDefs[Reg] == SU) continue;
903
904    // Add Reg to the set of interfering live regs.
905    if (RegAdded.insert(Reg))
906      LRegs.push_back(Reg);
907  }
908}
909
910/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
911/// scheduling of the given node to satisfy live physical register dependencies.
912/// If the specific node is the last one that's available to schedule, do
913/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
914bool ScheduleDAGRRList::
915DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
916  if (NumLiveRegs == 0)
917    return false;
918
919  SmallSet<unsigned, 4> RegAdded;
920  // If this node would clobber any "live" register, then it's not ready.
921  //
922  // If SU is the currently live definition of the same register that it uses,
923  // then we are free to schedule it.
924  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
925       I != E; ++I) {
926    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
927      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
928                         RegAdded, LRegs, TRI);
929  }
930
931  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
932    if (Node->getOpcode() == ISD::INLINEASM) {
933      // Inline asm can clobber physical defs.
934      unsigned NumOps = Node->getNumOperands();
935      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
936        --NumOps;  // Ignore the glue operand.
937
938      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
939        unsigned Flags =
940          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
941        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
942
943        ++i; // Skip the ID value.
944        if (InlineAsm::isRegDefKind(Flags) ||
945            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
946          // Check for def of register or earlyclobber register.
947          for (; NumVals; --NumVals, ++i) {
948            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
949            if (TargetRegisterInfo::isPhysicalRegister(Reg))
950              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
951          }
952        } else
953          i += NumVals;
954      }
955      continue;
956    }
957
958    if (!Node->isMachineOpcode())
959      continue;
960    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
961    if (!TID.ImplicitDefs)
962      continue;
963    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
964      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
965  }
966
967  return !LRegs.empty();
968}
969
970/// Return a node that can be scheduled in this cycle. Requirements:
971/// (1) Ready: latency has been satisfied
972/// (2) No Hazards: resources are available
973/// (3) No Interferences: may unschedule to break register interferences.
974SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
975  SmallVector<SUnit*, 4> Interferences;
976  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
977
978  SUnit *CurSU = AvailableQueue->pop();
979  while (CurSU) {
980    SmallVector<unsigned, 4> LRegs;
981    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
982      break;
983    LRegsMap.insert(std::make_pair(CurSU, LRegs));
984
985    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
986    Interferences.push_back(CurSU);
987    CurSU = AvailableQueue->pop();
988  }
989  if (CurSU) {
990    // Add the nodes that aren't ready back onto the available list.
991    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
992      Interferences[i]->isPending = false;
993      assert(Interferences[i]->isAvailable && "must still be available");
994      AvailableQueue->push(Interferences[i]);
995    }
996    return CurSU;
997  }
998
999  // All candidates are delayed due to live physical reg dependencies.
1000  // Try backtracking, code duplication, or inserting cross class copies
1001  // to resolve it.
1002  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1003    SUnit *TrySU = Interferences[i];
1004    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1005
1006    // Try unscheduling up to the point where it's safe to schedule
1007    // this node.
1008    SUnit *BtSU = NULL;
1009    unsigned LiveCycle = UINT_MAX;
1010    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1011      unsigned Reg = LRegs[j];
1012      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1013        BtSU = LiveRegGens[Reg];
1014        LiveCycle = BtSU->getHeight();
1015      }
1016    }
1017    if (!WillCreateCycle(TrySU, BtSU))  {
1018      BacktrackBottomUp(TrySU, BtSU);
1019
1020      // Force the current node to be scheduled before the node that
1021      // requires the physical reg dep.
1022      if (BtSU->isAvailable) {
1023        BtSU->isAvailable = false;
1024        if (!BtSU->isPending)
1025          AvailableQueue->remove(BtSU);
1026      }
1027      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1028                          /*Reg=*/0, /*isNormalMemory=*/false,
1029                          /*isMustAlias=*/false, /*isArtificial=*/true));
1030
1031      // If one or more successors has been unscheduled, then the current
1032      // node is no longer avaialable. Schedule a successor that's now
1033      // available instead.
1034      if (!TrySU->isAvailable) {
1035        CurSU = AvailableQueue->pop();
1036      }
1037      else {
1038        CurSU = TrySU;
1039        TrySU->isPending = false;
1040        Interferences.erase(Interferences.begin()+i);
1041      }
1042      break;
1043    }
1044  }
1045
1046  if (!CurSU) {
1047    // Can't backtrack. If it's too expensive to copy the value, then try
1048    // duplicate the nodes that produces these "too expensive to copy"
1049    // values to break the dependency. In case even that doesn't work,
1050    // insert cross class copies.
1051    // If it's not too expensive, i.e. cost != -1, issue copies.
1052    SUnit *TrySU = Interferences[0];
1053    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1054    assert(LRegs.size() == 1 && "Can't handle this yet!");
1055    unsigned Reg = LRegs[0];
1056    SUnit *LRDef = LiveRegDefs[Reg];
1057    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1058    const TargetRegisterClass *RC =
1059      TRI->getMinimalPhysRegClass(Reg, VT);
1060    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1061
1062    // If cross copy register class is null, then it must be possible copy
1063    // the value directly. Do not try duplicate the def.
1064    SUnit *NewDef = 0;
1065    if (DestRC)
1066      NewDef = CopyAndMoveSuccessors(LRDef);
1067    else
1068      DestRC = RC;
1069    if (!NewDef) {
1070      // Issue copies, these can be expensive cross register class copies.
1071      SmallVector<SUnit*, 2> Copies;
1072      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1073      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1074            << " to SU #" << Copies.front()->NodeNum << "\n");
1075      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1076                          /*Reg=*/0, /*isNormalMemory=*/false,
1077                          /*isMustAlias=*/false,
1078                          /*isArtificial=*/true));
1079      NewDef = Copies.back();
1080    }
1081
1082    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1083          << " to SU #" << TrySU->NodeNum << "\n");
1084    LiveRegDefs[Reg] = NewDef;
1085    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1086                         /*Reg=*/0, /*isNormalMemory=*/false,
1087                         /*isMustAlias=*/false,
1088                         /*isArtificial=*/true));
1089    TrySU->isAvailable = false;
1090    CurSU = NewDef;
1091  }
1092
1093  assert(CurSU && "Unable to resolve live physical register dependencies!");
1094
1095  // Add the nodes that aren't ready back onto the available list.
1096  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1097    Interferences[i]->isPending = false;
1098    // May no longer be available due to backtracking.
1099    if (Interferences[i]->isAvailable) {
1100      AvailableQueue->push(Interferences[i]);
1101    }
1102  }
1103  return CurSU;
1104}
1105
1106/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1107/// schedulers.
1108void ScheduleDAGRRList::ListScheduleBottomUp() {
1109  // Release any predecessors of the special Exit node.
1110  ReleasePredecessors(&ExitSU);
1111
1112  // Add root to Available queue.
1113  if (!SUnits.empty()) {
1114    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1115    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1116    RootSU->isAvailable = true;
1117    AvailableQueue->push(RootSU);
1118  }
1119
1120  // While Available queue is not empty, grab the node with the highest
1121  // priority. If it is not ready put it back.  Schedule the node.
1122  Sequence.reserve(SUnits.size());
1123  while (!AvailableQueue->empty()) {
1124    DEBUG(dbgs() << "\n*** Examining Available\n";
1125          AvailableQueue->dump(this));
1126
1127    // Pick the best node to schedule taking all constraints into
1128    // consideration.
1129    SUnit *SU = PickNodeToScheduleBottomUp();
1130
1131    AdvancePastStalls(SU);
1132
1133    ScheduleNodeBottomUp(SU);
1134
1135    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1136      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1137      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1138      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1139    }
1140  }
1141
1142  // Reverse the order if it is bottom up.
1143  std::reverse(Sequence.begin(), Sequence.end());
1144
1145#ifndef NDEBUG
1146  VerifySchedule(isBottomUp);
1147#endif
1148}
1149
1150//===----------------------------------------------------------------------===//
1151//  Top-Down Scheduling
1152//===----------------------------------------------------------------------===//
1153
1154/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1155/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1156void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1157  SUnit *SuccSU = SuccEdge->getSUnit();
1158
1159#ifndef NDEBUG
1160  if (SuccSU->NumPredsLeft == 0) {
1161    dbgs() << "*** Scheduling failed! ***\n";
1162    SuccSU->dump(this);
1163    dbgs() << " has been released too many times!\n";
1164    llvm_unreachable(0);
1165  }
1166#endif
1167  --SuccSU->NumPredsLeft;
1168
1169  // If all the node's predecessors are scheduled, this node is ready
1170  // to be scheduled. Ignore the special ExitSU node.
1171  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1172    SuccSU->isAvailable = true;
1173    AvailableQueue->push(SuccSU);
1174  }
1175}
1176
1177void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1178  // Top down: release successors
1179  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1180       I != E; ++I) {
1181    assert(!I->isAssignedRegDep() &&
1182           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1183
1184    ReleaseSucc(SU, &*I);
1185  }
1186}
1187
1188/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1189/// count of its successors. If a successor pending count is zero, add it to
1190/// the Available queue.
1191void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1192  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1193  DEBUG(SU->dump(this));
1194
1195  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1196  SU->setDepthToAtLeast(CurCycle);
1197  Sequence.push_back(SU);
1198
1199  ReleaseSuccessors(SU);
1200  SU->isScheduled = true;
1201  AvailableQueue->ScheduledNode(SU);
1202}
1203
1204/// ListScheduleTopDown - The main loop of list scheduling for top-down
1205/// schedulers.
1206void ScheduleDAGRRList::ListScheduleTopDown() {
1207  AvailableQueue->setCurCycle(CurCycle);
1208
1209  // Release any successors of the special Entry node.
1210  ReleaseSuccessors(&EntrySU);
1211
1212  // All leaves to Available queue.
1213  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1214    // It is available if it has no predecessors.
1215    if (SUnits[i].Preds.empty()) {
1216      AvailableQueue->push(&SUnits[i]);
1217      SUnits[i].isAvailable = true;
1218    }
1219  }
1220
1221  // While Available queue is not empty, grab the node with the highest
1222  // priority. If it is not ready put it back.  Schedule the node.
1223  Sequence.reserve(SUnits.size());
1224  while (!AvailableQueue->empty()) {
1225    SUnit *CurSU = AvailableQueue->pop();
1226
1227    if (CurSU)
1228      ScheduleNodeTopDown(CurSU);
1229    ++CurCycle;
1230    AvailableQueue->setCurCycle(CurCycle);
1231  }
1232
1233#ifndef NDEBUG
1234  VerifySchedule(isBottomUp);
1235#endif
1236}
1237
1238
1239//===----------------------------------------------------------------------===//
1240//                RegReductionPriorityQueue Implementation
1241//===----------------------------------------------------------------------===//
1242//
1243// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1244// to reduce register pressure.
1245//
1246namespace {
1247  template<class SF>
1248  class RegReductionPriorityQueue;
1249
1250  struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1251    bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1252  };
1253
1254  /// bu_ls_rr_sort - Priority function for bottom up register pressure
1255  // reduction scheduler.
1256  struct bu_ls_rr_sort : public queue_sort {
1257    enum {
1258      IsBottomUp = true,
1259      HasReadyFilter = false
1260    };
1261
1262    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
1263    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
1264    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1265
1266    bool operator()(const SUnit* left, const SUnit* right) const;
1267  };
1268
1269  // td_ls_rr_sort - Priority function for top down register pressure reduction
1270  // scheduler.
1271  struct td_ls_rr_sort : public queue_sort {
1272    enum {
1273      IsBottomUp = false,
1274      HasReadyFilter = false
1275    };
1276
1277      RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
1278    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
1279    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1280
1281    bool operator()(const SUnit* left, const SUnit* right) const;
1282  };
1283
1284  // src_ls_rr_sort - Priority function for source order scheduler.
1285  struct src_ls_rr_sort : public queue_sort {
1286    enum {
1287      IsBottomUp = true,
1288      HasReadyFilter = false
1289    };
1290
1291    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
1292    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
1293      : SPQ(spq) {}
1294    src_ls_rr_sort(const src_ls_rr_sort &RHS)
1295      : SPQ(RHS.SPQ) {}
1296
1297    bool operator()(const SUnit* left, const SUnit* right) const;
1298  };
1299
1300  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1301  struct hybrid_ls_rr_sort : public queue_sort {
1302    enum {
1303      IsBottomUp = true,
1304      HasReadyFilter = false
1305    };
1306
1307    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1308    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1309      : SPQ(spq) {}
1310    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1311      : SPQ(RHS.SPQ) {}
1312
1313    bool operator()(const SUnit* left, const SUnit* right) const;
1314  };
1315
1316  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1317  // scheduler.
1318  struct ilp_ls_rr_sort : public queue_sort {
1319    enum {
1320      IsBottomUp = true,
1321      HasReadyFilter = true
1322    };
1323
1324    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1325    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1326      : SPQ(spq) {}
1327    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1328      : SPQ(RHS.SPQ) {}
1329
1330    bool isReady(SUnit *SU, unsigned CurCycle) const;
1331
1332    bool operator()(const SUnit* left, const SUnit* right) const;
1333  };
1334}  // end anonymous namespace
1335
1336/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1337/// Smaller number is the higher priority.
1338static unsigned
1339CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1340  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1341  if (SethiUllmanNumber != 0)
1342    return SethiUllmanNumber;
1343
1344  unsigned Extra = 0;
1345  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1346       I != E; ++I) {
1347    if (I->isCtrl()) continue;  // ignore chain preds
1348    SUnit *PredSU = I->getSUnit();
1349    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1350    if (PredSethiUllman > SethiUllmanNumber) {
1351      SethiUllmanNumber = PredSethiUllman;
1352      Extra = 0;
1353    } else if (PredSethiUllman == SethiUllmanNumber)
1354      ++Extra;
1355  }
1356
1357  SethiUllmanNumber += Extra;
1358
1359  if (SethiUllmanNumber == 0)
1360    SethiUllmanNumber = 1;
1361
1362  return SethiUllmanNumber;
1363}
1364
1365namespace {
1366  template<class SF>
1367  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1368    static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1369      std::vector<SUnit *>::iterator Best = Q.begin();
1370      for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1371             E = Q.end(); I != E; ++I)
1372        if (Picker(*Best, *I))
1373          Best = I;
1374      SUnit *V = *Best;
1375      if (Best != prior(Q.end()))
1376        std::swap(*Best, Q.back());
1377      Q.pop_back();
1378      return V;
1379    }
1380
1381    std::vector<SUnit*> Queue;
1382    SF Picker;
1383    unsigned CurQueueId;
1384    bool TracksRegPressure;
1385
1386  protected:
1387    // SUnits - The SUnits for the current graph.
1388    std::vector<SUnit> *SUnits;
1389
1390    MachineFunction &MF;
1391    const TargetInstrInfo *TII;
1392    const TargetRegisterInfo *TRI;
1393    const TargetLowering *TLI;
1394    ScheduleDAGRRList *scheduleDAG;
1395
1396    // SethiUllmanNumbers - The SethiUllman number for each node.
1397    std::vector<unsigned> SethiUllmanNumbers;
1398
1399    /// RegPressure - Tracking current reg pressure per register class.
1400    ///
1401    std::vector<unsigned> RegPressure;
1402
1403    /// RegLimit - Tracking the number of allocatable registers per register
1404    /// class.
1405    std::vector<unsigned> RegLimit;
1406
1407  public:
1408    RegReductionPriorityQueue(MachineFunction &mf,
1409                              bool tracksrp,
1410                              const TargetInstrInfo *tii,
1411                              const TargetRegisterInfo *tri,
1412                              const TargetLowering *tli)
1413      : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this),
1414        CurQueueId(0), TracksRegPressure(tracksrp),
1415        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1416      if (TracksRegPressure) {
1417        unsigned NumRC = TRI->getNumRegClasses();
1418        RegLimit.resize(NumRC);
1419        RegPressure.resize(NumRC);
1420        std::fill(RegLimit.begin(), RegLimit.end(), 0);
1421        std::fill(RegPressure.begin(), RegPressure.end(), 0);
1422        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1423               E = TRI->regclass_end(); I != E; ++I)
1424          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1425      }
1426    }
1427
1428    bool isBottomUp() const { return SF::IsBottomUp; }
1429
1430    void initNodes(std::vector<SUnit> &sunits) {
1431      SUnits = &sunits;
1432      // Add pseudo dependency edges for two-address nodes.
1433      AddPseudoTwoAddrDeps();
1434      // Reroute edges to nodes with multiple uses.
1435      PrescheduleNodesWithMultipleUses();
1436      // Calculate node priorities.
1437      CalculateSethiUllmanNumbers();
1438    }
1439
1440    void addNode(const SUnit *SU) {
1441      unsigned SUSize = SethiUllmanNumbers.size();
1442      if (SUnits->size() > SUSize)
1443        SethiUllmanNumbers.resize(SUSize*2, 0);
1444      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1445    }
1446
1447    void updateNode(const SUnit *SU) {
1448      SethiUllmanNumbers[SU->NodeNum] = 0;
1449      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1450    }
1451
1452    void releaseState() {
1453      SUnits = 0;
1454      SethiUllmanNumbers.clear();
1455      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1456    }
1457
1458    unsigned getNodePriority(const SUnit *SU) const {
1459      assert(SU->NodeNum < SethiUllmanNumbers.size());
1460      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1461      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1462        // CopyToReg should be close to its uses to facilitate coalescing and
1463        // avoid spilling.
1464        return 0;
1465      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1466          Opc == TargetOpcode::SUBREG_TO_REG ||
1467          Opc == TargetOpcode::INSERT_SUBREG)
1468        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1469        // close to their uses to facilitate coalescing.
1470        return 0;
1471      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1472        // If SU does not have a register use, i.e. it doesn't produce a value
1473        // that would be consumed (e.g. store), then it terminates a chain of
1474        // computation.  Give it a large SethiUllman number so it will be
1475        // scheduled right before its predecessors that it doesn't lengthen
1476        // their live ranges.
1477        return 0xffff;
1478      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1479        // If SU does not have a register def, schedule it close to its uses
1480        // because it does not lengthen any live ranges.
1481        return 0;
1482      return SethiUllmanNumbers[SU->NodeNum];
1483    }
1484
1485    unsigned getNodeOrdering(const SUnit *SU) const {
1486      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1487    }
1488
1489    bool empty() const { return Queue.empty(); }
1490
1491    bool isReady(SUnit *U) const {
1492      return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1493    }
1494
1495    void push(SUnit *U) {
1496      assert(!U->NodeQueueId && "Node in the queue already");
1497      U->NodeQueueId = ++CurQueueId;
1498      Queue.push_back(U);
1499    }
1500
1501    SUnit *pop() {
1502      if (Queue.empty()) return NULL;
1503
1504      SUnit *V = popFromQueue(Queue, Picker);
1505      V->NodeQueueId = 0;
1506      return V;
1507    }
1508
1509    void remove(SUnit *SU) {
1510      assert(!Queue.empty() && "Queue is empty!");
1511      assert(SU->NodeQueueId != 0 && "Not in queue!");
1512      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1513                                                   SU);
1514      if (I != prior(Queue.end()))
1515        std::swap(*I, Queue.back());
1516      Queue.pop_back();
1517      SU->NodeQueueId = 0;
1518    }
1519
1520    bool HighRegPressure(const SUnit *SU) const {
1521      if (!TLI)
1522        return false;
1523
1524      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1525           I != E; ++I) {
1526        if (I->isCtrl())
1527          continue;
1528        SUnit *PredSU = I->getSUnit();
1529        const SDNode *PN = PredSU->getNode();
1530        if (!PN->isMachineOpcode()) {
1531          if (PN->getOpcode() == ISD::CopyFromReg) {
1532            EVT VT = PN->getValueType(0);
1533            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1534            unsigned Cost = TLI->getRepRegClassCostFor(VT);
1535            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1536              return true;
1537          }
1538          continue;
1539        }
1540        unsigned POpc = PN->getMachineOpcode();
1541        if (POpc == TargetOpcode::IMPLICIT_DEF)
1542          continue;
1543        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1544          EVT VT = PN->getOperand(0).getValueType();
1545          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1546          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1547          // Check if this increases register pressure of the specific register
1548          // class to the point where it would cause spills.
1549          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1550            return true;
1551          continue;
1552        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1553                   POpc == TargetOpcode::SUBREG_TO_REG) {
1554          EVT VT = PN->getValueType(0);
1555          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1556          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1557          // Check if this increases register pressure of the specific register
1558          // class to the point where it would cause spills.
1559          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1560            return true;
1561          continue;
1562        }
1563        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1564        for (unsigned i = 0; i != NumDefs; ++i) {
1565          EVT VT = PN->getValueType(i);
1566          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1567          if (RegPressure[RCId] >= RegLimit[RCId])
1568            return true; // Reg pressure already high.
1569          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1570          if (!PN->hasAnyUseOfValue(i))
1571            continue;
1572          // Check if this increases register pressure of the specific register
1573          // class to the point where it would cause spills.
1574          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1575            return true;
1576        }
1577      }
1578
1579      return false;
1580    }
1581
1582    void ScheduledNode(SUnit *SU) {
1583      if (!TracksRegPressure)
1584        return;
1585
1586      const SDNode *N = SU->getNode();
1587      if (!N->isMachineOpcode()) {
1588        if (N->getOpcode() != ISD::CopyToReg)
1589          return;
1590      } else {
1591        unsigned Opc = N->getMachineOpcode();
1592        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1593            Opc == TargetOpcode::INSERT_SUBREG ||
1594            Opc == TargetOpcode::SUBREG_TO_REG ||
1595            Opc == TargetOpcode::REG_SEQUENCE ||
1596            Opc == TargetOpcode::IMPLICIT_DEF)
1597          return;
1598      }
1599
1600      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1601           I != E; ++I) {
1602        if (I->isCtrl())
1603          continue;
1604        SUnit *PredSU = I->getSUnit();
1605        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1606          continue;
1607        const SDNode *PN = PredSU->getNode();
1608        if (!PN->isMachineOpcode()) {
1609          if (PN->getOpcode() == ISD::CopyFromReg) {
1610            EVT VT = PN->getValueType(0);
1611            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1612            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1613          }
1614          continue;
1615        }
1616        unsigned POpc = PN->getMachineOpcode();
1617        if (POpc == TargetOpcode::IMPLICIT_DEF)
1618          continue;
1619        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1620          EVT VT = PN->getOperand(0).getValueType();
1621          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1622          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1623          continue;
1624        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1625                   POpc == TargetOpcode::SUBREG_TO_REG) {
1626          EVT VT = PN->getValueType(0);
1627          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1628          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1629          continue;
1630        }
1631        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1632        for (unsigned i = 0; i != NumDefs; ++i) {
1633          EVT VT = PN->getValueType(i);
1634          if (!PN->hasAnyUseOfValue(i))
1635            continue;
1636          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1637          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1638        }
1639      }
1640
1641      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1642      // may transfer data dependencies to CopyToReg.
1643      if (SU->NumSuccs && N->isMachineOpcode()) {
1644        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1645        for (unsigned i = 0; i != NumDefs; ++i) {
1646          EVT VT = N->getValueType(i);
1647          if (!N->hasAnyUseOfValue(i))
1648            continue;
1649          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1650          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1651            // Register pressure tracking is imprecise. This can happen.
1652            RegPressure[RCId] = 0;
1653          else
1654            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1655        }
1656      }
1657
1658      dumpRegPressure();
1659    }
1660
1661    void UnscheduledNode(SUnit *SU) {
1662      if (!TracksRegPressure)
1663        return;
1664
1665      const SDNode *N = SU->getNode();
1666      if (!N->isMachineOpcode()) {
1667        if (N->getOpcode() != ISD::CopyToReg)
1668          return;
1669      } else {
1670        unsigned Opc = N->getMachineOpcode();
1671        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1672            Opc == TargetOpcode::INSERT_SUBREG ||
1673            Opc == TargetOpcode::SUBREG_TO_REG ||
1674            Opc == TargetOpcode::REG_SEQUENCE ||
1675            Opc == TargetOpcode::IMPLICIT_DEF)
1676          return;
1677      }
1678
1679      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1680           I != E; ++I) {
1681        if (I->isCtrl())
1682          continue;
1683        SUnit *PredSU = I->getSUnit();
1684        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1685          continue;
1686        const SDNode *PN = PredSU->getNode();
1687        if (!PN->isMachineOpcode()) {
1688          if (PN->getOpcode() == ISD::CopyFromReg) {
1689            EVT VT = PN->getValueType(0);
1690            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1691            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1692          }
1693          continue;
1694        }
1695        unsigned POpc = PN->getMachineOpcode();
1696        if (POpc == TargetOpcode::IMPLICIT_DEF)
1697          continue;
1698        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1699          EVT VT = PN->getOperand(0).getValueType();
1700          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1701          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1702          continue;
1703        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1704                   POpc == TargetOpcode::SUBREG_TO_REG) {
1705          EVT VT = PN->getValueType(0);
1706          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1707          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1708          continue;
1709        }
1710        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1711        for (unsigned i = 0; i != NumDefs; ++i) {
1712          EVT VT = PN->getValueType(i);
1713          if (!PN->hasAnyUseOfValue(i))
1714            continue;
1715          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1716          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1717            // Register pressure tracking is imprecise. This can happen.
1718            RegPressure[RCId] = 0;
1719          else
1720            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1721        }
1722      }
1723
1724      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1725      // may transfer data dependencies to CopyToReg.
1726      if (SU->NumSuccs && N->isMachineOpcode()) {
1727        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1728        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1729          EVT VT = N->getValueType(i);
1730          if (VT == MVT::Glue || VT == MVT::Other)
1731            continue;
1732          if (!N->hasAnyUseOfValue(i))
1733            continue;
1734          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1735          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1736        }
1737      }
1738
1739      dumpRegPressure();
1740    }
1741
1742    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1743      scheduleDAG = scheduleDag;
1744    }
1745
1746    void dumpRegPressure() const {
1747      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1748             E = TRI->regclass_end(); I != E; ++I) {
1749        const TargetRegisterClass *RC = *I;
1750        unsigned Id = RC->getID();
1751        unsigned RP = RegPressure[Id];
1752        if (!RP) continue;
1753        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1754              << '\n');
1755      }
1756    }
1757
1758    void dump(ScheduleDAG *DAG) const {
1759      // Emulate pop() without clobbering NodeQueueIds.
1760      std::vector<SUnit*> DumpQueue = Queue;
1761      SF DumpPicker = Picker;
1762      while (!DumpQueue.empty()) {
1763        SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1764        if (isBottomUp())
1765          dbgs() << "Height " << SU->getHeight() << ": ";
1766        else
1767          dbgs() << "Depth " << SU->getDepth() << ": ";
1768        SU->dump(DAG);
1769      }
1770    }
1771
1772  protected:
1773    bool canClobber(const SUnit *SU, const SUnit *Op);
1774    void AddPseudoTwoAddrDeps();
1775    void PrescheduleNodesWithMultipleUses();
1776    void CalculateSethiUllmanNumbers();
1777  };
1778
1779  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1780    BURegReductionPriorityQueue;
1781
1782  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1783    TDRegReductionPriorityQueue;
1784
1785  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1786    SrcRegReductionPriorityQueue;
1787
1788  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1789    HybridBURRPriorityQueue;
1790
1791  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1792    ILPBURRPriorityQueue;
1793}
1794
1795/// closestSucc - Returns the scheduled cycle of the successor which is
1796/// closest to the current cycle.
1797static unsigned closestSucc(const SUnit *SU) {
1798  unsigned MaxHeight = 0;
1799  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1800       I != E; ++I) {
1801    if (I->isCtrl()) continue;  // ignore chain succs
1802    unsigned Height = I->getSUnit()->getHeight();
1803    // If there are bunch of CopyToRegs stacked up, they should be considered
1804    // to be at the same position.
1805    if (I->getSUnit()->getNode() &&
1806        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1807      Height = closestSucc(I->getSUnit())+1;
1808    if (Height > MaxHeight)
1809      MaxHeight = Height;
1810  }
1811  return MaxHeight;
1812}
1813
1814/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1815/// for scratch registers, i.e. number of data dependencies.
1816static unsigned calcMaxScratches(const SUnit *SU) {
1817  unsigned Scratches = 0;
1818  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1819       I != E; ++I) {
1820    if (I->isCtrl()) continue;  // ignore chain preds
1821    Scratches++;
1822  }
1823  return Scratches;
1824}
1825
1826/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1827/// CopyToReg to a virtual register. This SU def is probably a liveout and
1828/// it has no other use. It should be scheduled closer to the terminator.
1829static bool hasOnlyLiveOutUses(const SUnit *SU) {
1830  bool RetVal = false;
1831  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1832       I != E; ++I) {
1833    if (I->isCtrl()) continue;
1834    const SUnit *SuccSU = I->getSUnit();
1835    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1836      unsigned Reg =
1837        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1838      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1839        RetVal = true;
1840        continue;
1841      }
1842    }
1843    return false;
1844  }
1845  return RetVal;
1846}
1847
1848/// UnitsSharePred - Return true if the two scheduling units share a common
1849/// data predecessor.
1850static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1851  SmallSet<const SUnit*, 4> Preds;
1852  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1853       I != E; ++I) {
1854    if (I->isCtrl()) continue;  // ignore chain preds
1855    Preds.insert(I->getSUnit());
1856  }
1857  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1858       I != E; ++I) {
1859    if (I->isCtrl()) continue;  // ignore chain preds
1860    if (Preds.count(I->getSUnit()))
1861      return true;
1862  }
1863  return false;
1864}
1865
1866template <typename RRSort>
1867static bool BURRSort(const SUnit *left, const SUnit *right,
1868                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1869  unsigned LPriority = SPQ->getNodePriority(left);
1870  unsigned RPriority = SPQ->getNodePriority(right);
1871  if (LPriority != RPriority)
1872    return LPriority > RPriority;
1873
1874  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1875  // e.g.
1876  // t1 = op t2, c1
1877  // t3 = op t4, c2
1878  //
1879  // and the following instructions are both ready.
1880  // t2 = op c3
1881  // t4 = op c4
1882  //
1883  // Then schedule t2 = op first.
1884  // i.e.
1885  // t4 = op c4
1886  // t2 = op c3
1887  // t1 = op t2, c1
1888  // t3 = op t4, c2
1889  //
1890  // This creates more short live intervals.
1891  unsigned LDist = closestSucc(left);
1892  unsigned RDist = closestSucc(right);
1893  if (LDist != RDist)
1894    return LDist < RDist;
1895
1896  // How many registers becomes live when the node is scheduled.
1897  unsigned LScratch = calcMaxScratches(left);
1898  unsigned RScratch = calcMaxScratches(right);
1899  if (LScratch != RScratch)
1900    return LScratch > RScratch;
1901
1902  // Note: with a bottom-up ready filter, the height check may be redundant.
1903  if (left->getHeight() != right->getHeight())
1904    return left->getHeight() > right->getHeight();
1905
1906  if (left->getDepth() != right->getDepth())
1907    return left->getDepth() < right->getDepth();
1908
1909  assert(left->NodeQueueId && right->NodeQueueId &&
1910         "NodeQueueId cannot be zero");
1911  return (left->NodeQueueId > right->NodeQueueId);
1912}
1913
1914// Bottom up
1915bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1916  return BURRSort(left, right, SPQ);
1917}
1918
1919// Source order, otherwise bottom up.
1920bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1921  unsigned LOrder = SPQ->getNodeOrdering(left);
1922  unsigned ROrder = SPQ->getNodeOrdering(right);
1923
1924  // Prefer an ordering where the lower the non-zero order number, the higher
1925  // the preference.
1926  if ((LOrder || ROrder) && LOrder != ROrder)
1927    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1928
1929  return BURRSort(left, right, SPQ);
1930}
1931
1932bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1933  if (left->isCall || right->isCall)
1934    // No way to compute latency of calls.
1935    return BURRSort(left, right, SPQ);
1936
1937  bool LHigh = SPQ->HighRegPressure(left);
1938  bool RHigh = SPQ->HighRegPressure(right);
1939  // Avoid causing spills. If register pressure is high, schedule for
1940  // register pressure reduction.
1941  if (LHigh && !RHigh)
1942    return true;
1943  else if (!LHigh && RHigh)
1944    return false;
1945  else if (!LHigh && !RHigh) {
1946    // If the two nodes share an operand and one of them has a single
1947    // use that is a live out copy, favor the one that is live out. Otherwise
1948    // it will be difficult to eliminate the copy if the instruction is a
1949    // loop induction variable update. e.g.
1950    // BB:
1951    // sub r1, r3, #1
1952    // str r0, [r2, r3]
1953    // mov r3, r1
1954    // cmp
1955    // bne BB
1956    bool SharePred = UnitsSharePred(left, right);
1957    // FIXME: Only adjust if BB is a loop back edge.
1958    // FIXME: What's the cost of a copy?
1959    int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1960    int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1961    int LHeight = (int)left->getHeight() - LBonus;
1962    int RHeight = (int)right->getHeight() - RBonus;
1963
1964    // Low register pressure situation, schedule for latency if possible.
1965    bool LStall = left->SchedulingPref == Sched::Latency &&
1966      (int)SPQ->getCurCycle() < LHeight;
1967    bool RStall = right->SchedulingPref == Sched::Latency &&
1968      (int)SPQ->getCurCycle() < RHeight;
1969    // If scheduling one of the node will cause a pipeline stall, delay it.
1970    // If scheduling either one of the node will cause a pipeline stall, sort
1971    // them according to their height.
1972    if (LStall) {
1973      if (!RStall)
1974        return true;
1975      if (LHeight != RHeight)
1976        return LHeight > RHeight;
1977    } else if (RStall)
1978      return false;
1979
1980    // If either node is scheduling for latency, sort them by height
1981    // and latency.
1982    if (left->SchedulingPref == Sched::Latency ||
1983        right->SchedulingPref == Sched::Latency) {
1984      if (LHeight != RHeight)
1985        return LHeight > RHeight;
1986      if (left->Latency != right->Latency)
1987        return left->Latency > right->Latency;
1988    }
1989  }
1990
1991  return BURRSort(left, right, SPQ);
1992}
1993
1994// Schedule as many instructions in each cycle as possible. So don't make an
1995// instruction available unless it is ready in the current cycle.
1996bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
1997  return SU->getHeight() <= CurCycle;
1998}
1999
2000bool ilp_ls_rr_sort::operator()(const SUnit *left,
2001                                const SUnit *right) const {
2002  if (left->isCall || right->isCall)
2003    // No way to compute latency of calls.
2004    return BURRSort(left, right, SPQ);
2005
2006  bool LHigh = SPQ->HighRegPressure(left);
2007  bool RHigh = SPQ->HighRegPressure(right);
2008  // Avoid causing spills. If register pressure is high, schedule for
2009  // register pressure reduction.
2010  if (LHigh && !RHigh)
2011    return true;
2012  else if (!LHigh && RHigh)
2013    return false;
2014  else if (!LHigh && !RHigh) {
2015    // Low register pressure situation, schedule to maximize instruction level
2016    // parallelism.
2017    if (left->NumPreds > right->NumPreds)
2018      return false;
2019    else if (left->NumPreds < right->NumPreds)
2020      return false;
2021  }
2022
2023  return BURRSort(left, right, SPQ);
2024}
2025
2026template<class SF>
2027bool
2028RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
2029  if (SU->isTwoAddress) {
2030    unsigned Opc = SU->getNode()->getMachineOpcode();
2031    const TargetInstrDesc &TID = TII->get(Opc);
2032    unsigned NumRes = TID.getNumDefs();
2033    unsigned NumOps = TID.getNumOperands() - NumRes;
2034    for (unsigned i = 0; i != NumOps; ++i) {
2035      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2036        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2037        if (DU->getNodeId() != -1 &&
2038            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2039          return true;
2040      }
2041    }
2042  }
2043  return false;
2044}
2045
2046/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2047/// physical register defs.
2048static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2049                                  const TargetInstrInfo *TII,
2050                                  const TargetRegisterInfo *TRI) {
2051  SDNode *N = SuccSU->getNode();
2052  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2053  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2054  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2055  for (const SDNode *SUNode = SU->getNode(); SUNode;
2056       SUNode = SUNode->getGluedNode()) {
2057    if (!SUNode->isMachineOpcode())
2058      continue;
2059    const unsigned *SUImpDefs =
2060      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2061    if (!SUImpDefs)
2062      return false;
2063    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2064      EVT VT = N->getValueType(i);
2065      if (VT == MVT::Glue || VT == MVT::Other)
2066        continue;
2067      if (!N->hasAnyUseOfValue(i))
2068        continue;
2069      unsigned Reg = ImpDefs[i - NumDefs];
2070      for (;*SUImpDefs; ++SUImpDefs) {
2071        unsigned SUReg = *SUImpDefs;
2072        if (TRI->regsOverlap(Reg, SUReg))
2073          return true;
2074      }
2075    }
2076  }
2077  return false;
2078}
2079
2080/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2081/// are not handled well by the general register pressure reduction
2082/// heuristics. When presented with code like this:
2083///
2084///      N
2085///    / |
2086///   /  |
2087///  U  store
2088///  |
2089/// ...
2090///
2091/// the heuristics tend to push the store up, but since the
2092/// operand of the store has another use (U), this would increase
2093/// the length of that other use (the U->N edge).
2094///
2095/// This function transforms code like the above to route U's
2096/// dependence through the store when possible, like this:
2097///
2098///      N
2099///      ||
2100///      ||
2101///     store
2102///       |
2103///       U
2104///       |
2105///      ...
2106///
2107/// This results in the store being scheduled immediately
2108/// after N, which shortens the U->N live range, reducing
2109/// register pressure.
2110///
2111template<class SF>
2112void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
2113  // Visit all the nodes in topological order, working top-down.
2114  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2115    SUnit *SU = &(*SUnits)[i];
2116    // For now, only look at nodes with no data successors, such as stores.
2117    // These are especially important, due to the heuristics in
2118    // getNodePriority for nodes with no data successors.
2119    if (SU->NumSuccs != 0)
2120      continue;
2121    // For now, only look at nodes with exactly one data predecessor.
2122    if (SU->NumPreds != 1)
2123      continue;
2124    // Avoid prescheduling copies to virtual registers, which don't behave
2125    // like other nodes from the perspective of scheduling heuristics.
2126    if (SDNode *N = SU->getNode())
2127      if (N->getOpcode() == ISD::CopyToReg &&
2128          TargetRegisterInfo::isVirtualRegister
2129            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2130        continue;
2131
2132    // Locate the single data predecessor.
2133    SUnit *PredSU = 0;
2134    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2135         EE = SU->Preds.end(); II != EE; ++II)
2136      if (!II->isCtrl()) {
2137        PredSU = II->getSUnit();
2138        break;
2139      }
2140    assert(PredSU);
2141
2142    // Don't rewrite edges that carry physregs, because that requires additional
2143    // support infrastructure.
2144    if (PredSU->hasPhysRegDefs)
2145      continue;
2146    // Short-circuit the case where SU is PredSU's only data successor.
2147    if (PredSU->NumSuccs == 1)
2148      continue;
2149    // Avoid prescheduling to copies from virtual registers, which don't behave
2150    // like other nodes from the perspective of scheduling // heuristics.
2151    if (SDNode *N = SU->getNode())
2152      if (N->getOpcode() == ISD::CopyFromReg &&
2153          TargetRegisterInfo::isVirtualRegister
2154            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2155        continue;
2156
2157    // Perform checks on the successors of PredSU.
2158    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2159         EE = PredSU->Succs.end(); II != EE; ++II) {
2160      SUnit *PredSuccSU = II->getSUnit();
2161      if (PredSuccSU == SU) continue;
2162      // If PredSU has another successor with no data successors, for
2163      // now don't attempt to choose either over the other.
2164      if (PredSuccSU->NumSuccs == 0)
2165        goto outer_loop_continue;
2166      // Don't break physical register dependencies.
2167      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2168        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2169          goto outer_loop_continue;
2170      // Don't introduce graph cycles.
2171      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2172        goto outer_loop_continue;
2173    }
2174
2175    // Ok, the transformation is safe and the heuristics suggest it is
2176    // profitable. Update the graph.
2177    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2178                 << " next to PredSU #" << PredSU->NodeNum
2179                 << " to guide scheduling in the presence of multiple uses\n");
2180    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2181      SDep Edge = PredSU->Succs[i];
2182      assert(!Edge.isAssignedRegDep());
2183      SUnit *SuccSU = Edge.getSUnit();
2184      if (SuccSU != SU) {
2185        Edge.setSUnit(PredSU);
2186        scheduleDAG->RemovePred(SuccSU, Edge);
2187        scheduleDAG->AddPred(SU, Edge);
2188        Edge.setSUnit(SU);
2189        scheduleDAG->AddPred(SuccSU, Edge);
2190        --i;
2191      }
2192    }
2193  outer_loop_continue:;
2194  }
2195}
2196
2197/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2198/// it as a def&use operand. Add a pseudo control edge from it to the other
2199/// node (if it won't create a cycle) so the two-address one will be scheduled
2200/// first (lower in the schedule). If both nodes are two-address, favor the
2201/// one that has a CopyToReg use (more likely to be a loop induction update).
2202/// If both are two-address, but one is commutable while the other is not
2203/// commutable, favor the one that's not commutable.
2204template<class SF>
2205void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
2206  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2207    SUnit *SU = &(*SUnits)[i];
2208    if (!SU->isTwoAddress)
2209      continue;
2210
2211    SDNode *Node = SU->getNode();
2212    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2213      continue;
2214
2215    bool isLiveOut = hasOnlyLiveOutUses(SU);
2216    unsigned Opc = Node->getMachineOpcode();
2217    const TargetInstrDesc &TID = TII->get(Opc);
2218    unsigned NumRes = TID.getNumDefs();
2219    unsigned NumOps = TID.getNumOperands() - NumRes;
2220    for (unsigned j = 0; j != NumOps; ++j) {
2221      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2222        continue;
2223      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2224      if (DU->getNodeId() == -1)
2225        continue;
2226      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2227      if (!DUSU) continue;
2228      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2229           E = DUSU->Succs.end(); I != E; ++I) {
2230        if (I->isCtrl()) continue;
2231        SUnit *SuccSU = I->getSUnit();
2232        if (SuccSU == SU)
2233          continue;
2234        // Be conservative. Ignore if nodes aren't at roughly the same
2235        // depth and height.
2236        if (SuccSU->getHeight() < SU->getHeight() &&
2237            (SU->getHeight() - SuccSU->getHeight()) > 1)
2238          continue;
2239        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2240        // constrains whatever is using the copy, instead of the copy
2241        // itself. In the case that the copy is coalesced, this
2242        // preserves the intent of the pseudo two-address heurietics.
2243        while (SuccSU->Succs.size() == 1 &&
2244               SuccSU->getNode()->isMachineOpcode() &&
2245               SuccSU->getNode()->getMachineOpcode() ==
2246                 TargetOpcode::COPY_TO_REGCLASS)
2247          SuccSU = SuccSU->Succs.front().getSUnit();
2248        // Don't constrain non-instruction nodes.
2249        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2250          continue;
2251        // Don't constrain nodes with physical register defs if the
2252        // predecessor can clobber them.
2253        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2254          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2255            continue;
2256        }
2257        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2258        // these may be coalesced away. We want them close to their uses.
2259        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2260        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2261            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2262            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2263          continue;
2264        if ((!canClobber(SuccSU, DUSU) ||
2265             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2266             (!SU->isCommutable && SuccSU->isCommutable)) &&
2267            !scheduleDAG->IsReachable(SuccSU, SU)) {
2268          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2269                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2270          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2271                                        /*Reg=*/0, /*isNormalMemory=*/false,
2272                                        /*isMustAlias=*/false,
2273                                        /*isArtificial=*/true));
2274        }
2275      }
2276    }
2277  }
2278}
2279
2280/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2281/// scheduling units.
2282template<class SF>
2283void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
2284  SethiUllmanNumbers.assign(SUnits->size(), 0);
2285
2286  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
2287    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
2288}
2289
2290/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2291/// predecessors of the successors of the SUnit SU. Stop when the provided
2292/// limit is exceeded.
2293static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2294                                                    unsigned Limit) {
2295  unsigned Sum = 0;
2296  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2297       I != E; ++I) {
2298    const SUnit *SuccSU = I->getSUnit();
2299    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2300         EE = SuccSU->Preds.end(); II != EE; ++II) {
2301      SUnit *PredSU = II->getSUnit();
2302      if (!PredSU->isScheduled)
2303        if (++Sum > Limit)
2304          return Sum;
2305    }
2306  }
2307  return Sum;
2308}
2309
2310
2311// Top down
2312bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2313  unsigned LPriority = SPQ->getNodePriority(left);
2314  unsigned RPriority = SPQ->getNodePriority(right);
2315  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2316  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2317  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2318  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2319  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2320  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2321
2322  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2323    return false;
2324  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2325    return true;
2326
2327  if (LIsFloater)
2328    LBonus -= 2;
2329  if (RIsFloater)
2330    RBonus -= 2;
2331  if (left->NumSuccs == 1)
2332    LBonus += 2;
2333  if (right->NumSuccs == 1)
2334    RBonus += 2;
2335
2336  if (LPriority+LBonus != RPriority+RBonus)
2337    return LPriority+LBonus < RPriority+RBonus;
2338
2339  if (left->getDepth() != right->getDepth())
2340    return left->getDepth() < right->getDepth();
2341
2342  if (left->NumSuccsLeft != right->NumSuccsLeft)
2343    return left->NumSuccsLeft > right->NumSuccsLeft;
2344
2345  assert(left->NodeQueueId && right->NodeQueueId &&
2346         "NodeQueueId cannot be zero");
2347  return (left->NodeQueueId > right->NodeQueueId);
2348}
2349
2350//===----------------------------------------------------------------------===//
2351//                         Public Constructor Functions
2352//===----------------------------------------------------------------------===//
2353
2354llvm::ScheduleDAGSDNodes *
2355llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2356                                 CodeGenOpt::Level OptLevel) {
2357  const TargetMachine &TM = IS->TM;
2358  const TargetInstrInfo *TII = TM.getInstrInfo();
2359  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2360
2361  BURegReductionPriorityQueue *PQ =
2362    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2363  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2364  PQ->setScheduleDAG(SD);
2365  return SD;
2366}
2367
2368llvm::ScheduleDAGSDNodes *
2369llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2370                                 CodeGenOpt::Level OptLevel) {
2371  const TargetMachine &TM = IS->TM;
2372  const TargetInstrInfo *TII = TM.getInstrInfo();
2373  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2374
2375  TDRegReductionPriorityQueue *PQ =
2376    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2377  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2378  PQ->setScheduleDAG(SD);
2379  return SD;
2380}
2381
2382llvm::ScheduleDAGSDNodes *
2383llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2384                                   CodeGenOpt::Level OptLevel) {
2385  const TargetMachine &TM = IS->TM;
2386  const TargetInstrInfo *TII = TM.getInstrInfo();
2387  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2388
2389  SrcRegReductionPriorityQueue *PQ =
2390    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2391  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2392  PQ->setScheduleDAG(SD);
2393  return SD;
2394}
2395
2396llvm::ScheduleDAGSDNodes *
2397llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2398                                   CodeGenOpt::Level OptLevel) {
2399  const TargetMachine &TM = IS->TM;
2400  const TargetInstrInfo *TII = TM.getInstrInfo();
2401  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2402  const TargetLowering *TLI = &IS->getTargetLowering();
2403
2404  HybridBURRPriorityQueue *PQ =
2405    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2406
2407  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2408  PQ->setScheduleDAG(SD);
2409  return SD;
2410}
2411
2412llvm::ScheduleDAGSDNodes *
2413llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2414                                CodeGenOpt::Level OptLevel) {
2415  const TargetMachine &TM = IS->TM;
2416  const TargetInstrInfo *TII = TM.getInstrInfo();
2417  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2418  const TargetLowering *TLI = &IS->getTargetLowering();
2419
2420  ILPBURRPriorityQueue *PQ =
2421    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2422  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2423  PQ->setScheduleDAG(SD);
2424  return SD;
2425}
2426