ScheduleDAGRRList.cpp revision a75ce9f5d2236d93c117e861e60e6f3f748c9555
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> EnableSchedCycles(
70  "enable-sched-cycles",
71  cl::desc("Enable cycle-level precision during preRA scheduling"),
72  cl::init(false), cl::Hidden);
73
74namespace {
75//===----------------------------------------------------------------------===//
76/// ScheduleDAGRRList - The actual register reduction list scheduler
77/// implementation.  This supports both top-down and bottom-up scheduling.
78///
79class ScheduleDAGRRList : public ScheduleDAGSDNodes {
80private:
81  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
82  /// it is top-down.
83  bool isBottomUp;
84
85  /// NeedLatency - True if the scheduler will make use of latency information.
86  ///
87  bool NeedLatency;
88
89  /// AvailableQueue - The priority queue to use for the available SUnits.
90  SchedulingPriorityQueue *AvailableQueue;
91
92  /// PendingQueue - This contains all of the instructions whose operands have
93  /// been issued, but their results are not ready yet (due to the latency of
94  /// the operation).  Once the operands becomes available, the instruction is
95  /// added to the AvailableQueue.
96  std::vector<SUnit*> PendingQueue;
97
98  /// HazardRec - The hazard recognizer to use.
99  ScheduleHazardRecognizer *HazardRec;
100
101  /// CurCycle - The current scheduler state corresponds to this cycle.
102  unsigned CurCycle;
103
104  /// MinAvailableCycle - Cycle of the soonest available instruction.
105  unsigned MinAvailableCycle;
106
107  /// LiveRegDefs - A set of physical registers and their definition
108  /// that are "live". These nodes must be scheduled before any other nodes that
109  /// modifies the registers can be scheduled.
110  unsigned NumLiveRegs;
111  std::vector<SUnit*> LiveRegDefs;
112  std::vector<SUnit*> LiveRegGens;
113
114  /// Topo - A topological ordering for SUnits which permits fast IsReachable
115  /// and similar queries.
116  ScheduleDAGTopologicalSort Topo;
117
118public:
119  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
120                    SchedulingPriorityQueue *availqueue,
121                    CodeGenOpt::Level OptLevel)
122    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
123      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
124      Topo(SUnits) {
125
126    const TargetMachine &tm = mf.getTarget();
127    if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
128      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
129    else
130      HazardRec = new ScheduleHazardRecognizer();
131  }
132
133  ~ScheduleDAGRRList() {
134    delete HazardRec;
135    delete AvailableQueue;
136  }
137
138  void Schedule();
139
140  /// IsReachable - Checks if SU is reachable from TargetSU.
141  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
142    return Topo.IsReachable(SU, TargetSU);
143  }
144
145  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
146  /// create a cycle.
147  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
148    return Topo.WillCreateCycle(SU, TargetSU);
149  }
150
151  /// AddPred - adds a predecessor edge to SUnit SU.
152  /// This returns true if this is a new predecessor.
153  /// Updates the topological ordering if required.
154  void AddPred(SUnit *SU, const SDep &D) {
155    Topo.AddPred(SU, D.getSUnit());
156    SU->addPred(D);
157  }
158
159  /// RemovePred - removes a predecessor edge from SUnit SU.
160  /// This returns true if an edge was removed.
161  /// Updates the topological ordering if required.
162  void RemovePred(SUnit *SU, const SDep &D) {
163    Topo.RemovePred(SU, D.getSUnit());
164    SU->removePred(D);
165  }
166
167private:
168  bool isReady(SUnit *SU) {
169    return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
170      AvailableQueue->isReady(SU);
171  }
172
173  void ReleasePred(SUnit *SU, const SDep *PredEdge);
174  void ReleasePredecessors(SUnit *SU);
175  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
176  void ReleaseSuccessors(SUnit *SU);
177  void ReleasePending();
178  void AdvanceToCycle(unsigned NextCycle);
179  void AdvancePastStalls(SUnit *SU);
180  void EmitNode(SUnit *SU);
181  void ScheduleNodeBottomUp(SUnit*);
182  void CapturePred(SDep *PredEdge);
183  void UnscheduleNodeBottomUp(SUnit*);
184  void RestoreHazardCheckerBottomUp();
185  void BacktrackBottomUp(SUnit*, SUnit*);
186  SUnit *CopyAndMoveSuccessors(SUnit*);
187  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
188                                const TargetRegisterClass*,
189                                const TargetRegisterClass*,
190                                SmallVector<SUnit*, 2>&);
191  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
192
193  SUnit *PickNodeToScheduleBottomUp();
194  void ListScheduleBottomUp();
195
196  void ScheduleNodeTopDown(SUnit*);
197  void ListScheduleTopDown();
198
199
200  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
201  /// Updates the topological ordering if required.
202  SUnit *CreateNewSUnit(SDNode *N) {
203    unsigned NumSUnits = SUnits.size();
204    SUnit *NewNode = NewSUnit(N);
205    // Update the topological ordering.
206    if (NewNode->NodeNum >= NumSUnits)
207      Topo.InitDAGTopologicalSorting();
208    return NewNode;
209  }
210
211  /// CreateClone - Creates a new SUnit from an existing one.
212  /// Updates the topological ordering if required.
213  SUnit *CreateClone(SUnit *N) {
214    unsigned NumSUnits = SUnits.size();
215    SUnit *NewNode = Clone(N);
216    // Update the topological ordering.
217    if (NewNode->NodeNum >= NumSUnits)
218      Topo.InitDAGTopologicalSorting();
219    return NewNode;
220  }
221
222  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
223  /// need actual latency information but the hybrid scheduler does.
224  bool ForceUnitLatencies() const {
225    return !NeedLatency;
226  }
227};
228}  // end anonymous namespace
229
230
231/// Schedule - Schedule the DAG using list scheduling.
232void ScheduleDAGRRList::Schedule() {
233  DEBUG(dbgs()
234        << "********** List Scheduling BB#" << BB->getNumber()
235        << " '" << BB->getName() << "' **********\n");
236
237  CurCycle = 0;
238  MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
239  NumLiveRegs = 0;
240  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
241  LiveRegGens.resize(TRI->getNumRegs(), NULL);
242
243  // Build the scheduling graph.
244  BuildSchedGraph(NULL);
245
246  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
247          SUnits[su].dumpAll(this));
248  Topo.InitDAGTopologicalSorting();
249
250  AvailableQueue->initNodes(SUnits);
251
252  HazardRec->Reset();
253
254  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
255  if (isBottomUp)
256    ListScheduleBottomUp();
257  else
258    ListScheduleTopDown();
259
260  AvailableQueue->releaseState();
261}
262
263//===----------------------------------------------------------------------===//
264//  Bottom-Up Scheduling
265//===----------------------------------------------------------------------===//
266
267/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
268/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
269void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
270  SUnit *PredSU = PredEdge->getSUnit();
271
272#ifndef NDEBUG
273  if (PredSU->NumSuccsLeft == 0) {
274    dbgs() << "*** Scheduling failed! ***\n";
275    PredSU->dump(this);
276    dbgs() << " has been released too many times!\n";
277    llvm_unreachable(0);
278  }
279#endif
280  --PredSU->NumSuccsLeft;
281
282  if (!ForceUnitLatencies()) {
283    // Updating predecessor's height. This is now the cycle when the
284    // predecessor can be scheduled without causing a pipeline stall.
285    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
286  }
287
288  // If all the node's successors are scheduled, this node is ready
289  // to be scheduled. Ignore the special EntrySU node.
290  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
291    PredSU->isAvailable = true;
292
293    unsigned Height = PredSU->getHeight();
294    if (Height < MinAvailableCycle)
295      MinAvailableCycle = Height;
296
297    if (isReady(SU)) {
298      AvailableQueue->push(PredSU);
299    }
300    // CapturePred and others may have left the node in the pending queue, avoid
301    // adding it twice.
302    else if (!PredSU->isPending) {
303      PredSU->isPending = true;
304      PendingQueue.push_back(PredSU);
305    }
306  }
307}
308
309/// Call ReleasePred for each predecessor, then update register live def/gen.
310/// Always update LiveRegDefs for a register dependence even if the current SU
311/// also defines the register. This effectively create one large live range
312/// across a sequence of two-address node. This is important because the
313/// entire chain must be scheduled together. Example:
314///
315/// flags = (3) add
316/// flags = (2) addc flags
317/// flags = (1) addc flags
318///
319/// results in
320///
321/// LiveRegDefs[flags] = 3
322/// LiveRegGens[flags] = 1
323///
324/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
325/// interference on flags.
326void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
327  // Bottom up: release predecessors
328  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
329       I != E; ++I) {
330    ReleasePred(SU, &*I);
331    if (I->isAssignedRegDep()) {
332      // This is a physical register dependency and it's impossible or
333      // expensive to copy the register. Make sure nothing that can
334      // clobber the register is scheduled between the predecessor and
335      // this node.
336      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
337      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
338             "interference on register dependence");
339      LiveRegDefs[I->getReg()] = I->getSUnit();
340      if (!LiveRegGens[I->getReg()]) {
341        ++NumLiveRegs;
342        LiveRegGens[I->getReg()] = SU;
343      }
344    }
345  }
346}
347
348/// Check to see if any of the pending instructions are ready to issue.  If
349/// so, add them to the available queue.
350void ScheduleDAGRRList::ReleasePending() {
351  if (!EnableSchedCycles) {
352    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
353    return;
354  }
355
356  // If the available queue is empty, it is safe to reset MinAvailableCycle.
357  if (AvailableQueue->empty())
358    MinAvailableCycle = UINT_MAX;
359
360  // Check to see if any of the pending instructions are ready to issue.  If
361  // so, add them to the available queue.
362  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
363    unsigned ReadyCycle =
364      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
365    if (ReadyCycle < MinAvailableCycle)
366      MinAvailableCycle = ReadyCycle;
367
368    if (PendingQueue[i]->isAvailable) {
369      if (!isReady(PendingQueue[i]))
370          continue;
371      AvailableQueue->push(PendingQueue[i]);
372    }
373    PendingQueue[i]->isPending = false;
374    PendingQueue[i] = PendingQueue.back();
375    PendingQueue.pop_back();
376    --i; --e;
377  }
378}
379
380/// Move the scheduler state forward by the specified number of Cycles.
381void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
382  if (NextCycle <= CurCycle)
383    return;
384
385  AvailableQueue->setCurCycle(NextCycle);
386  if (HazardRec->getMaxLookAhead() == 0) {
387    // Bypass lots of virtual calls in case of long latency.
388    CurCycle = NextCycle;
389  }
390  else {
391    for (; CurCycle != NextCycle; ++CurCycle) {
392      if (isBottomUp)
393        HazardRec->RecedeCycle();
394      else
395        HazardRec->AdvanceCycle();
396    }
397  }
398  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
399  // available Q to release pending nodes at least once before popping.
400  ReleasePending();
401}
402
403/// Move the scheduler state forward until the specified node's dependents are
404/// ready and can be scheduled with no resource conflicts.
405void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
406  if (!EnableSchedCycles)
407    return;
408
409  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
410
411  // Bump CurCycle to account for latency. We assume the latency of other
412  // available instructions may be hidden by the stall (not a full pipe stall).
413  // This updates the hazard recognizer's cycle before reserving resources for
414  // this instruction.
415  AdvanceToCycle(ReadyCycle);
416
417  // Calls are scheduled in their preceding cycle, so don't conflict with
418  // hazards from instructions after the call. EmitNode will reset the
419  // scoreboard state before emitting the call.
420  if (isBottomUp && SU->isCall)
421    return;
422
423  // FIXME: For resource conflicts in very long non-pipelined stages, we
424  // should probably skip ahead here to avoid useless scoreboard checks.
425  int Stalls = 0;
426  while (true) {
427    ScheduleHazardRecognizer::HazardType HT =
428      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
429
430    if (HT == ScheduleHazardRecognizer::NoHazard)
431      break;
432
433    ++Stalls;
434  }
435  AdvanceToCycle(CurCycle + Stalls);
436}
437
438/// Record this SUnit in the HazardRecognizer.
439/// Does not update CurCycle.
440void ScheduleDAGRRList::EmitNode(SUnit *SU) {
441  if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0)
442    return;
443
444  // Check for phys reg copy.
445  if (!SU->getNode())
446    return;
447
448  switch (SU->getNode()->getOpcode()) {
449  default:
450    assert(SU->getNode()->isMachineOpcode() &&
451           "This target-independent node should not be scheduled.");
452    break;
453  case ISD::MERGE_VALUES:
454  case ISD::TokenFactor:
455  case ISD::CopyToReg:
456  case ISD::CopyFromReg:
457  case ISD::EH_LABEL:
458    // Noops don't affect the scoreboard state. Copies are likely to be
459    // removed.
460    return;
461  case ISD::INLINEASM:
462    // For inline asm, clear the pipeline state.
463    HazardRec->Reset();
464    return;
465  }
466  if (isBottomUp && SU->isCall) {
467    // Calls are scheduled with their preceding instructions. For bottom-up
468    // scheduling, clear the pipeline state before emitting.
469    HazardRec->Reset();
470  }
471
472  HazardRec->EmitInstruction(SU);
473
474  if (!isBottomUp && SU->isCall) {
475    HazardRec->Reset();
476  }
477}
478
479/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
480/// count of its predecessors. If a predecessor pending count is zero, add it to
481/// the Available queue.
482void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
483  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
484  DEBUG(SU->dump(this));
485
486#ifndef NDEBUG
487  if (CurCycle < SU->getHeight())
488    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
489#endif
490
491  // FIXME: Do not modify node height. It may interfere with
492  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
493  // node it's ready cycle can aid heuristics, and after scheduling it can
494  // indicate the scheduled cycle.
495  SU->setHeightToAtLeast(CurCycle);
496
497  // Reserve resources for the scheduled intruction.
498  EmitNode(SU);
499
500  Sequence.push_back(SU);
501
502  AvailableQueue->ScheduledNode(SU);
503
504  // Update liveness of predecessors before successors to avoid treating a
505  // two-address node as a live range def.
506  ReleasePredecessors(SU);
507
508  // Release all the implicit physical register defs that are live.
509  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
510       I != E; ++I) {
511    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
512    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
513      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
514      --NumLiveRegs;
515      LiveRegDefs[I->getReg()] = NULL;
516      LiveRegGens[I->getReg()] = NULL;
517    }
518  }
519
520  SU->isScheduled = true;
521
522  // Conditions under which the scheduler should eagerly advance the cycle:
523  // (1) No available instructions
524  // (2) All pipelines full, so available instructions must have hazards.
525  //
526  // If SchedCycles is disabled, count each inst as one cycle.
527  if (!EnableSchedCycles ||
528      AvailableQueue->empty() || HazardRec->atIssueLimit())
529    AdvanceToCycle(CurCycle + 1);
530}
531
532/// CapturePred - This does the opposite of ReleasePred. Since SU is being
533/// unscheduled, incrcease the succ left count of its predecessors. Remove
534/// them from AvailableQueue if necessary.
535void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
536  SUnit *PredSU = PredEdge->getSUnit();
537  if (PredSU->isAvailable) {
538    PredSU->isAvailable = false;
539    if (!PredSU->isPending)
540      AvailableQueue->remove(PredSU);
541  }
542
543  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
544  ++PredSU->NumSuccsLeft;
545}
546
547/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
548/// its predecessor states to reflect the change.
549void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
550  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
551  DEBUG(SU->dump(this));
552
553  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
554       I != E; ++I) {
555    CapturePred(&*I);
556    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
557      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
558      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
559             "Physical register dependency violated?");
560      --NumLiveRegs;
561      LiveRegDefs[I->getReg()] = NULL;
562      LiveRegGens[I->getReg()] = NULL;
563    }
564  }
565
566  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
567       I != E; ++I) {
568    if (I->isAssignedRegDep()) {
569      // This becomes the nearest def. Note that an earlier def may still be
570      // pending if this is a two-address node.
571      LiveRegDefs[I->getReg()] = SU;
572      if (!LiveRegDefs[I->getReg()]) {
573        ++NumLiveRegs;
574      }
575      if (LiveRegGens[I->getReg()] == NULL ||
576          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
577        LiveRegGens[I->getReg()] = I->getSUnit();
578    }
579  }
580  if (SU->getHeight() < MinAvailableCycle)
581    MinAvailableCycle = SU->getHeight();
582
583  SU->setHeightDirty();
584  SU->isScheduled = false;
585  SU->isAvailable = true;
586  if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
587    // Don't make available until backtracking is complete.
588    SU->isPending = true;
589    PendingQueue.push_back(SU);
590  }
591  else {
592    AvailableQueue->push(SU);
593  }
594  AvailableQueue->UnscheduledNode(SU);
595}
596
597/// After backtracking, the hazard checker needs to be restored to a state
598/// corresponding the the current cycle.
599void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
600  HazardRec->Reset();
601
602  unsigned LookAhead = std::min((unsigned)Sequence.size(),
603                                HazardRec->getMaxLookAhead());
604  if (LookAhead == 0)
605    return;
606
607  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
608  unsigned HazardCycle = (*I)->getHeight();
609  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
610    SUnit *SU = *I;
611    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
612      HazardRec->RecedeCycle();
613    }
614    EmitNode(SU);
615  }
616}
617
618/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
619/// BTCycle in order to schedule a specific node.
620void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
621  SUnit *OldSU = Sequence.back();
622  while (true) {
623    Sequence.pop_back();
624    if (SU->isSucc(OldSU))
625      // Don't try to remove SU from AvailableQueue.
626      SU->isAvailable = false;
627    // FIXME: use ready cycle instead of height
628    CurCycle = OldSU->getHeight();
629    UnscheduleNodeBottomUp(OldSU);
630    AvailableQueue->setCurCycle(CurCycle);
631    if (OldSU == BtSU)
632      break;
633    OldSU = Sequence.back();
634  }
635
636  assert(!SU->isSucc(OldSU) && "Something is wrong!");
637
638  RestoreHazardCheckerBottomUp();
639
640  ReleasePending();
641
642  ++NumBacktracks;
643}
644
645static bool isOperandOf(const SUnit *SU, SDNode *N) {
646  for (const SDNode *SUNode = SU->getNode(); SUNode;
647       SUNode = SUNode->getGluedNode()) {
648    if (SUNode->isOperandOf(N))
649      return true;
650  }
651  return false;
652}
653
654/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
655/// successors to the newly created node.
656SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
657  SDNode *N = SU->getNode();
658  if (!N)
659    return NULL;
660
661  if (SU->getNode()->getGluedNode())
662    return NULL;
663
664  SUnit *NewSU;
665  bool TryUnfold = false;
666  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
667    EVT VT = N->getValueType(i);
668    if (VT == MVT::Glue)
669      return NULL;
670    else if (VT == MVT::Other)
671      TryUnfold = true;
672  }
673  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
674    const SDValue &Op = N->getOperand(i);
675    EVT VT = Op.getNode()->getValueType(Op.getResNo());
676    if (VT == MVT::Glue)
677      return NULL;
678  }
679
680  if (TryUnfold) {
681    SmallVector<SDNode*, 2> NewNodes;
682    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
683      return NULL;
684
685    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
686    assert(NewNodes.size() == 2 && "Expected a load folding node!");
687
688    N = NewNodes[1];
689    SDNode *LoadNode = NewNodes[0];
690    unsigned NumVals = N->getNumValues();
691    unsigned OldNumVals = SU->getNode()->getNumValues();
692    for (unsigned i = 0; i != NumVals; ++i)
693      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
694    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
695                                   SDValue(LoadNode, 1));
696
697    // LoadNode may already exist. This can happen when there is another
698    // load from the same location and producing the same type of value
699    // but it has different alignment or volatileness.
700    bool isNewLoad = true;
701    SUnit *LoadSU;
702    if (LoadNode->getNodeId() != -1) {
703      LoadSU = &SUnits[LoadNode->getNodeId()];
704      isNewLoad = false;
705    } else {
706      LoadSU = CreateNewSUnit(LoadNode);
707      LoadNode->setNodeId(LoadSU->NodeNum);
708      ComputeLatency(LoadSU);
709    }
710
711    SUnit *NewSU = CreateNewSUnit(N);
712    assert(N->getNodeId() == -1 && "Node already inserted!");
713    N->setNodeId(NewSU->NodeNum);
714
715    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
716    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
717      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
718        NewSU->isTwoAddress = true;
719        break;
720      }
721    }
722    if (TID.isCommutable())
723      NewSU->isCommutable = true;
724    ComputeLatency(NewSU);
725
726    // Record all the edges to and from the old SU, by category.
727    SmallVector<SDep, 4> ChainPreds;
728    SmallVector<SDep, 4> ChainSuccs;
729    SmallVector<SDep, 4> LoadPreds;
730    SmallVector<SDep, 4> NodePreds;
731    SmallVector<SDep, 4> NodeSuccs;
732    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
733         I != E; ++I) {
734      if (I->isCtrl())
735        ChainPreds.push_back(*I);
736      else if (isOperandOf(I->getSUnit(), LoadNode))
737        LoadPreds.push_back(*I);
738      else
739        NodePreds.push_back(*I);
740    }
741    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
742         I != E; ++I) {
743      if (I->isCtrl())
744        ChainSuccs.push_back(*I);
745      else
746        NodeSuccs.push_back(*I);
747    }
748
749    // Now assign edges to the newly-created nodes.
750    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
751      const SDep &Pred = ChainPreds[i];
752      RemovePred(SU, Pred);
753      if (isNewLoad)
754        AddPred(LoadSU, Pred);
755    }
756    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
757      const SDep &Pred = LoadPreds[i];
758      RemovePred(SU, Pred);
759      if (isNewLoad)
760        AddPred(LoadSU, Pred);
761    }
762    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
763      const SDep &Pred = NodePreds[i];
764      RemovePred(SU, Pred);
765      AddPred(NewSU, Pred);
766    }
767    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
768      SDep D = NodeSuccs[i];
769      SUnit *SuccDep = D.getSUnit();
770      D.setSUnit(SU);
771      RemovePred(SuccDep, D);
772      D.setSUnit(NewSU);
773      AddPred(SuccDep, D);
774    }
775    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
776      SDep D = ChainSuccs[i];
777      SUnit *SuccDep = D.getSUnit();
778      D.setSUnit(SU);
779      RemovePred(SuccDep, D);
780      if (isNewLoad) {
781        D.setSUnit(LoadSU);
782        AddPred(SuccDep, D);
783      }
784    }
785
786    // Add a data dependency to reflect that NewSU reads the value defined
787    // by LoadSU.
788    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
789
790    if (isNewLoad)
791      AvailableQueue->addNode(LoadSU);
792    AvailableQueue->addNode(NewSU);
793
794    ++NumUnfolds;
795
796    if (NewSU->NumSuccsLeft == 0) {
797      NewSU->isAvailable = true;
798      return NewSU;
799    }
800    SU = NewSU;
801  }
802
803  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
804  NewSU = CreateClone(SU);
805
806  // New SUnit has the exact same predecessors.
807  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
808       I != E; ++I)
809    if (!I->isArtificial())
810      AddPred(NewSU, *I);
811
812  // Only copy scheduled successors. Cut them from old node's successor
813  // list and move them over.
814  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
815  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
816       I != E; ++I) {
817    if (I->isArtificial())
818      continue;
819    SUnit *SuccSU = I->getSUnit();
820    if (SuccSU->isScheduled) {
821      SDep D = *I;
822      D.setSUnit(NewSU);
823      AddPred(SuccSU, D);
824      D.setSUnit(SU);
825      DelDeps.push_back(std::make_pair(SuccSU, D));
826    }
827  }
828  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
829    RemovePred(DelDeps[i].first, DelDeps[i].second);
830
831  AvailableQueue->updateNode(SU);
832  AvailableQueue->addNode(NewSU);
833
834  ++NumDups;
835  return NewSU;
836}
837
838/// InsertCopiesAndMoveSuccs - Insert register copies and move all
839/// scheduled successors of the given SUnit to the last copy.
840void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
841                                               const TargetRegisterClass *DestRC,
842                                               const TargetRegisterClass *SrcRC,
843                                               SmallVector<SUnit*, 2> &Copies) {
844  SUnit *CopyFromSU = CreateNewSUnit(NULL);
845  CopyFromSU->CopySrcRC = SrcRC;
846  CopyFromSU->CopyDstRC = DestRC;
847
848  SUnit *CopyToSU = CreateNewSUnit(NULL);
849  CopyToSU->CopySrcRC = DestRC;
850  CopyToSU->CopyDstRC = SrcRC;
851
852  // Only copy scheduled successors. Cut them from old node's successor
853  // list and move them over.
854  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
855  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
856       I != E; ++I) {
857    if (I->isArtificial())
858      continue;
859    SUnit *SuccSU = I->getSUnit();
860    if (SuccSU->isScheduled) {
861      SDep D = *I;
862      D.setSUnit(CopyToSU);
863      AddPred(SuccSU, D);
864      DelDeps.push_back(std::make_pair(SuccSU, *I));
865    }
866  }
867  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
868    RemovePred(DelDeps[i].first, DelDeps[i].second);
869
870  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
871  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
872
873  AvailableQueue->updateNode(SU);
874  AvailableQueue->addNode(CopyFromSU);
875  AvailableQueue->addNode(CopyToSU);
876  Copies.push_back(CopyFromSU);
877  Copies.push_back(CopyToSU);
878
879  ++NumPRCopies;
880}
881
882/// getPhysicalRegisterVT - Returns the ValueType of the physical register
883/// definition of the specified node.
884/// FIXME: Move to SelectionDAG?
885static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
886                                 const TargetInstrInfo *TII) {
887  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
888  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
889  unsigned NumRes = TID.getNumDefs();
890  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
891    if (Reg == *ImpDef)
892      break;
893    ++NumRes;
894  }
895  return N->getValueType(NumRes);
896}
897
898/// CheckForLiveRegDef - Return true and update live register vector if the
899/// specified register def of the specified SUnit clobbers any "live" registers.
900static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
901                               std::vector<SUnit*> &LiveRegDefs,
902                               SmallSet<unsigned, 4> &RegAdded,
903                               SmallVector<unsigned, 4> &LRegs,
904                               const TargetRegisterInfo *TRI) {
905  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
906
907    // Check if Ref is live.
908    if (!LiveRegDefs[Reg]) continue;
909
910    // Allow multiple uses of the same def.
911    if (LiveRegDefs[Reg] == SU) continue;
912
913    // Add Reg to the set of interfering live regs.
914    if (RegAdded.insert(Reg))
915      LRegs.push_back(Reg);
916  }
917}
918
919/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
920/// scheduling of the given node to satisfy live physical register dependencies.
921/// If the specific node is the last one that's available to schedule, do
922/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
923bool ScheduleDAGRRList::
924DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
925  if (NumLiveRegs == 0)
926    return false;
927
928  SmallSet<unsigned, 4> RegAdded;
929  // If this node would clobber any "live" register, then it's not ready.
930  //
931  // If SU is the currently live definition of the same register that it uses,
932  // then we are free to schedule it.
933  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
934       I != E; ++I) {
935    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
936      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
937                         RegAdded, LRegs, TRI);
938  }
939
940  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
941    if (Node->getOpcode() == ISD::INLINEASM) {
942      // Inline asm can clobber physical defs.
943      unsigned NumOps = Node->getNumOperands();
944      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
945        --NumOps;  // Ignore the glue operand.
946
947      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
948        unsigned Flags =
949          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
950        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
951
952        ++i; // Skip the ID value.
953        if (InlineAsm::isRegDefKind(Flags) ||
954            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
955          // Check for def of register or earlyclobber register.
956          for (; NumVals; --NumVals, ++i) {
957            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
958            if (TargetRegisterInfo::isPhysicalRegister(Reg))
959              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
960          }
961        } else
962          i += NumVals;
963      }
964      continue;
965    }
966
967    if (!Node->isMachineOpcode())
968      continue;
969    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
970    if (!TID.ImplicitDefs)
971      continue;
972    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
973      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
974  }
975
976  return !LRegs.empty();
977}
978
979/// Return a node that can be scheduled in this cycle. Requirements:
980/// (1) Ready: latency has been satisfied
981/// (2) No Hazards: resources are available
982/// (3) No Interferences: may unschedule to break register interferences.
983SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
984  SmallVector<SUnit*, 4> Interferences;
985  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
986
987  SUnit *CurSU = AvailableQueue->pop();
988  while (CurSU) {
989    SmallVector<unsigned, 4> LRegs;
990    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
991      break;
992    LRegsMap.insert(std::make_pair(CurSU, LRegs));
993
994    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
995    Interferences.push_back(CurSU);
996    CurSU = AvailableQueue->pop();
997  }
998  if (CurSU) {
999    // Add the nodes that aren't ready back onto the available list.
1000    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1001      Interferences[i]->isPending = false;
1002      assert(Interferences[i]->isAvailable && "must still be available");
1003      AvailableQueue->push(Interferences[i]);
1004    }
1005    return CurSU;
1006  }
1007
1008  // All candidates are delayed due to live physical reg dependencies.
1009  // Try backtracking, code duplication, or inserting cross class copies
1010  // to resolve it.
1011  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1012    SUnit *TrySU = Interferences[i];
1013    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1014
1015    // Try unscheduling up to the point where it's safe to schedule
1016    // this node.
1017    SUnit *BtSU = NULL;
1018    unsigned LiveCycle = UINT_MAX;
1019    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1020      unsigned Reg = LRegs[j];
1021      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1022        BtSU = LiveRegGens[Reg];
1023        LiveCycle = BtSU->getHeight();
1024      }
1025    }
1026    if (!WillCreateCycle(TrySU, BtSU))  {
1027      BacktrackBottomUp(TrySU, BtSU);
1028
1029      // Force the current node to be scheduled before the node that
1030      // requires the physical reg dep.
1031      if (BtSU->isAvailable) {
1032        BtSU->isAvailable = false;
1033        if (!BtSU->isPending)
1034          AvailableQueue->remove(BtSU);
1035      }
1036      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1037                          /*Reg=*/0, /*isNormalMemory=*/false,
1038                          /*isMustAlias=*/false, /*isArtificial=*/true));
1039
1040      // If one or more successors has been unscheduled, then the current
1041      // node is no longer avaialable. Schedule a successor that's now
1042      // available instead.
1043      if (!TrySU->isAvailable) {
1044        CurSU = AvailableQueue->pop();
1045      }
1046      else {
1047        CurSU = TrySU;
1048        TrySU->isPending = false;
1049        Interferences.erase(Interferences.begin()+i);
1050      }
1051      break;
1052    }
1053  }
1054
1055  if (!CurSU) {
1056    // Can't backtrack. If it's too expensive to copy the value, then try
1057    // duplicate the nodes that produces these "too expensive to copy"
1058    // values to break the dependency. In case even that doesn't work,
1059    // insert cross class copies.
1060    // If it's not too expensive, i.e. cost != -1, issue copies.
1061    SUnit *TrySU = Interferences[0];
1062    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1063    assert(LRegs.size() == 1 && "Can't handle this yet!");
1064    unsigned Reg = LRegs[0];
1065    SUnit *LRDef = LiveRegDefs[Reg];
1066    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1067    const TargetRegisterClass *RC =
1068      TRI->getMinimalPhysRegClass(Reg, VT);
1069    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1070
1071    // If cross copy register class is null, then it must be possible copy
1072    // the value directly. Do not try duplicate the def.
1073    SUnit *NewDef = 0;
1074    if (DestRC)
1075      NewDef = CopyAndMoveSuccessors(LRDef);
1076    else
1077      DestRC = RC;
1078    if (!NewDef) {
1079      // Issue copies, these can be expensive cross register class copies.
1080      SmallVector<SUnit*, 2> Copies;
1081      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1082      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1083            << " to SU #" << Copies.front()->NodeNum << "\n");
1084      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1085                          /*Reg=*/0, /*isNormalMemory=*/false,
1086                          /*isMustAlias=*/false,
1087                          /*isArtificial=*/true));
1088      NewDef = Copies.back();
1089    }
1090
1091    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1092          << " to SU #" << TrySU->NodeNum << "\n");
1093    LiveRegDefs[Reg] = NewDef;
1094    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1095                         /*Reg=*/0, /*isNormalMemory=*/false,
1096                         /*isMustAlias=*/false,
1097                         /*isArtificial=*/true));
1098    TrySU->isAvailable = false;
1099    CurSU = NewDef;
1100  }
1101
1102  assert(CurSU && "Unable to resolve live physical register dependencies!");
1103
1104  // Add the nodes that aren't ready back onto the available list.
1105  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1106    Interferences[i]->isPending = false;
1107    // May no longer be available due to backtracking.
1108    if (Interferences[i]->isAvailable) {
1109      AvailableQueue->push(Interferences[i]);
1110    }
1111  }
1112  return CurSU;
1113}
1114
1115/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1116/// schedulers.
1117void ScheduleDAGRRList::ListScheduleBottomUp() {
1118  // Release any predecessors of the special Exit node.
1119  ReleasePredecessors(&ExitSU);
1120
1121  // Add root to Available queue.
1122  if (!SUnits.empty()) {
1123    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1124    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1125    RootSU->isAvailable = true;
1126    AvailableQueue->push(RootSU);
1127  }
1128
1129  // While Available queue is not empty, grab the node with the highest
1130  // priority. If it is not ready put it back.  Schedule the node.
1131  Sequence.reserve(SUnits.size());
1132  while (!AvailableQueue->empty()) {
1133    DEBUG(dbgs() << "\n*** Examining Available\n";
1134          AvailableQueue->dump(this));
1135
1136    // Pick the best node to schedule taking all constraints into
1137    // consideration.
1138    SUnit *SU = PickNodeToScheduleBottomUp();
1139
1140    AdvancePastStalls(SU);
1141
1142    ScheduleNodeBottomUp(SU);
1143
1144    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1145      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1146      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1147      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1148    }
1149  }
1150
1151  // Reverse the order if it is bottom up.
1152  std::reverse(Sequence.begin(), Sequence.end());
1153
1154#ifndef NDEBUG
1155  VerifySchedule(isBottomUp);
1156#endif
1157}
1158
1159//===----------------------------------------------------------------------===//
1160//  Top-Down Scheduling
1161//===----------------------------------------------------------------------===//
1162
1163/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1164/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1165void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1166  SUnit *SuccSU = SuccEdge->getSUnit();
1167
1168#ifndef NDEBUG
1169  if (SuccSU->NumPredsLeft == 0) {
1170    dbgs() << "*** Scheduling failed! ***\n";
1171    SuccSU->dump(this);
1172    dbgs() << " has been released too many times!\n";
1173    llvm_unreachable(0);
1174  }
1175#endif
1176  --SuccSU->NumPredsLeft;
1177
1178  // If all the node's predecessors are scheduled, this node is ready
1179  // to be scheduled. Ignore the special ExitSU node.
1180  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1181    SuccSU->isAvailable = true;
1182    AvailableQueue->push(SuccSU);
1183  }
1184}
1185
1186void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1187  // Top down: release successors
1188  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1189       I != E; ++I) {
1190    assert(!I->isAssignedRegDep() &&
1191           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1192
1193    ReleaseSucc(SU, &*I);
1194  }
1195}
1196
1197/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1198/// count of its successors. If a successor pending count is zero, add it to
1199/// the Available queue.
1200void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1201  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1202  DEBUG(SU->dump(this));
1203
1204  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1205  SU->setDepthToAtLeast(CurCycle);
1206  Sequence.push_back(SU);
1207
1208  ReleaseSuccessors(SU);
1209  SU->isScheduled = true;
1210  AvailableQueue->ScheduledNode(SU);
1211}
1212
1213/// ListScheduleTopDown - The main loop of list scheduling for top-down
1214/// schedulers.
1215void ScheduleDAGRRList::ListScheduleTopDown() {
1216  AvailableQueue->setCurCycle(CurCycle);
1217
1218  // Release any successors of the special Entry node.
1219  ReleaseSuccessors(&EntrySU);
1220
1221  // All leaves to Available queue.
1222  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1223    // It is available if it has no predecessors.
1224    if (SUnits[i].Preds.empty()) {
1225      AvailableQueue->push(&SUnits[i]);
1226      SUnits[i].isAvailable = true;
1227    }
1228  }
1229
1230  // While Available queue is not empty, grab the node with the highest
1231  // priority. If it is not ready put it back.  Schedule the node.
1232  Sequence.reserve(SUnits.size());
1233  while (!AvailableQueue->empty()) {
1234    SUnit *CurSU = AvailableQueue->pop();
1235
1236    if (CurSU)
1237      ScheduleNodeTopDown(CurSU);
1238    ++CurCycle;
1239    AvailableQueue->setCurCycle(CurCycle);
1240  }
1241
1242#ifndef NDEBUG
1243  VerifySchedule(isBottomUp);
1244#endif
1245}
1246
1247
1248//===----------------------------------------------------------------------===//
1249//                RegReductionPriorityQueue Implementation
1250//===----------------------------------------------------------------------===//
1251//
1252// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1253// to reduce register pressure.
1254//
1255namespace {
1256  template<class SF>
1257  class RegReductionPriorityQueue;
1258
1259  struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1260    bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1261  };
1262
1263  /// bu_ls_rr_sort - Priority function for bottom up register pressure
1264  // reduction scheduler.
1265  struct bu_ls_rr_sort : public queue_sort {
1266    enum {
1267      IsBottomUp = true,
1268      HasReadyFilter = false
1269    };
1270
1271    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
1272    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
1273    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1274
1275    bool operator()(const SUnit* left, const SUnit* right) const;
1276  };
1277
1278  // td_ls_rr_sort - Priority function for top down register pressure reduction
1279  // scheduler.
1280  struct td_ls_rr_sort : public queue_sort {
1281    enum {
1282      IsBottomUp = false,
1283      HasReadyFilter = false
1284    };
1285
1286      RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
1287    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
1288    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1289
1290    bool operator()(const SUnit* left, const SUnit* right) const;
1291  };
1292
1293  // src_ls_rr_sort - Priority function for source order scheduler.
1294  struct src_ls_rr_sort : public queue_sort {
1295    enum {
1296      IsBottomUp = true,
1297      HasReadyFilter = false
1298    };
1299
1300    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
1301    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
1302      : SPQ(spq) {}
1303    src_ls_rr_sort(const src_ls_rr_sort &RHS)
1304      : SPQ(RHS.SPQ) {}
1305
1306    bool operator()(const SUnit* left, const SUnit* right) const;
1307  };
1308
1309  // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1310  struct hybrid_ls_rr_sort : public queue_sort {
1311    enum {
1312      IsBottomUp = true,
1313      HasReadyFilter = false
1314    };
1315
1316    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
1317    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
1318      : SPQ(spq) {}
1319    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1320      : SPQ(RHS.SPQ) {}
1321
1322    bool operator()(const SUnit* left, const SUnit* right) const;
1323  };
1324
1325  // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1326  // scheduler.
1327  struct ilp_ls_rr_sort : public queue_sort {
1328    enum {
1329      IsBottomUp = true,
1330      HasReadyFilter = true
1331    };
1332
1333    RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
1334    ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
1335      : SPQ(spq) {}
1336    ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1337      : SPQ(RHS.SPQ) {}
1338
1339    bool isReady(SUnit *SU, unsigned CurCycle) const;
1340
1341    bool operator()(const SUnit* left, const SUnit* right) const;
1342  };
1343}  // end anonymous namespace
1344
1345/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1346/// Smaller number is the higher priority.
1347static unsigned
1348CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1349  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1350  if (SethiUllmanNumber != 0)
1351    return SethiUllmanNumber;
1352
1353  unsigned Extra = 0;
1354  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1355       I != E; ++I) {
1356    if (I->isCtrl()) continue;  // ignore chain preds
1357    SUnit *PredSU = I->getSUnit();
1358    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1359    if (PredSethiUllman > SethiUllmanNumber) {
1360      SethiUllmanNumber = PredSethiUllman;
1361      Extra = 0;
1362    } else if (PredSethiUllman == SethiUllmanNumber)
1363      ++Extra;
1364  }
1365
1366  SethiUllmanNumber += Extra;
1367
1368  if (SethiUllmanNumber == 0)
1369    SethiUllmanNumber = 1;
1370
1371  return SethiUllmanNumber;
1372}
1373
1374namespace {
1375  template<class SF>
1376  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1377    static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1378      std::vector<SUnit *>::iterator Best = Q.begin();
1379      for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1380             E = Q.end(); I != E; ++I)
1381        if (Picker(*Best, *I))
1382          Best = I;
1383      SUnit *V = *Best;
1384      if (Best != prior(Q.end()))
1385        std::swap(*Best, Q.back());
1386      Q.pop_back();
1387      return V;
1388    }
1389
1390    std::vector<SUnit*> Queue;
1391    SF Picker;
1392    unsigned CurQueueId;
1393    bool TracksRegPressure;
1394
1395  protected:
1396    // SUnits - The SUnits for the current graph.
1397    std::vector<SUnit> *SUnits;
1398
1399    MachineFunction &MF;
1400    const TargetInstrInfo *TII;
1401    const TargetRegisterInfo *TRI;
1402    const TargetLowering *TLI;
1403    ScheduleDAGRRList *scheduleDAG;
1404
1405    // SethiUllmanNumbers - The SethiUllman number for each node.
1406    std::vector<unsigned> SethiUllmanNumbers;
1407
1408    /// RegPressure - Tracking current reg pressure per register class.
1409    ///
1410    std::vector<unsigned> RegPressure;
1411
1412    /// RegLimit - Tracking the number of allocatable registers per register
1413    /// class.
1414    std::vector<unsigned> RegLimit;
1415
1416  public:
1417    RegReductionPriorityQueue(MachineFunction &mf,
1418                              bool tracksrp,
1419                              const TargetInstrInfo *tii,
1420                              const TargetRegisterInfo *tri,
1421                              const TargetLowering *tli)
1422      : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this),
1423        CurQueueId(0), TracksRegPressure(tracksrp),
1424        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1425      if (TracksRegPressure) {
1426        unsigned NumRC = TRI->getNumRegClasses();
1427        RegLimit.resize(NumRC);
1428        RegPressure.resize(NumRC);
1429        std::fill(RegLimit.begin(), RegLimit.end(), 0);
1430        std::fill(RegPressure.begin(), RegPressure.end(), 0);
1431        for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1432               E = TRI->regclass_end(); I != E; ++I)
1433          RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1434      }
1435    }
1436
1437    bool isBottomUp() const { return SF::IsBottomUp; }
1438
1439    void initNodes(std::vector<SUnit> &sunits) {
1440      SUnits = &sunits;
1441      // Add pseudo dependency edges for two-address nodes.
1442      AddPseudoTwoAddrDeps();
1443      // Reroute edges to nodes with multiple uses.
1444      PrescheduleNodesWithMultipleUses();
1445      // Calculate node priorities.
1446      CalculateSethiUllmanNumbers();
1447    }
1448
1449    void addNode(const SUnit *SU) {
1450      unsigned SUSize = SethiUllmanNumbers.size();
1451      if (SUnits->size() > SUSize)
1452        SethiUllmanNumbers.resize(SUSize*2, 0);
1453      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1454    }
1455
1456    void updateNode(const SUnit *SU) {
1457      SethiUllmanNumbers[SU->NodeNum] = 0;
1458      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1459    }
1460
1461    void releaseState() {
1462      SUnits = 0;
1463      SethiUllmanNumbers.clear();
1464      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1465    }
1466
1467    unsigned getNodePriority(const SUnit *SU) const {
1468      assert(SU->NodeNum < SethiUllmanNumbers.size());
1469      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1470      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1471        // CopyToReg should be close to its uses to facilitate coalescing and
1472        // avoid spilling.
1473        return 0;
1474      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1475          Opc == TargetOpcode::SUBREG_TO_REG ||
1476          Opc == TargetOpcode::INSERT_SUBREG)
1477        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1478        // close to their uses to facilitate coalescing.
1479        return 0;
1480      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1481        // If SU does not have a register use, i.e. it doesn't produce a value
1482        // that would be consumed (e.g. store), then it terminates a chain of
1483        // computation.  Give it a large SethiUllman number so it will be
1484        // scheduled right before its predecessors that it doesn't lengthen
1485        // their live ranges.
1486        return 0xffff;
1487      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1488        // If SU does not have a register def, schedule it close to its uses
1489        // because it does not lengthen any live ranges.
1490        return 0;
1491      return SethiUllmanNumbers[SU->NodeNum];
1492    }
1493
1494    unsigned getNodeOrdering(const SUnit *SU) const {
1495      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1496    }
1497
1498    bool empty() const { return Queue.empty(); }
1499
1500    bool isReady(SUnit *U) const {
1501      return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1502    }
1503
1504    void push(SUnit *U) {
1505      assert(!U->NodeQueueId && "Node in the queue already");
1506      U->NodeQueueId = ++CurQueueId;
1507      Queue.push_back(U);
1508    }
1509
1510    SUnit *pop() {
1511      if (Queue.empty()) return NULL;
1512
1513      SUnit *V = popFromQueue(Queue, Picker);
1514      V->NodeQueueId = 0;
1515      return V;
1516    }
1517
1518    void remove(SUnit *SU) {
1519      assert(!Queue.empty() && "Queue is empty!");
1520      assert(SU->NodeQueueId != 0 && "Not in queue!");
1521      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1522                                                   SU);
1523      if (I != prior(Queue.end()))
1524        std::swap(*I, Queue.back());
1525      Queue.pop_back();
1526      SU->NodeQueueId = 0;
1527    }
1528
1529    bool HighRegPressure(const SUnit *SU) const {
1530      if (!TLI)
1531        return false;
1532
1533      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1534           I != E; ++I) {
1535        if (I->isCtrl())
1536          continue;
1537        SUnit *PredSU = I->getSUnit();
1538        const SDNode *PN = PredSU->getNode();
1539        if (!PN->isMachineOpcode()) {
1540          if (PN->getOpcode() == ISD::CopyFromReg) {
1541            EVT VT = PN->getValueType(0);
1542            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1543            unsigned Cost = TLI->getRepRegClassCostFor(VT);
1544            if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1545              return true;
1546          }
1547          continue;
1548        }
1549        unsigned POpc = PN->getMachineOpcode();
1550        if (POpc == TargetOpcode::IMPLICIT_DEF)
1551          continue;
1552        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1553          EVT VT = PN->getOperand(0).getValueType();
1554          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1555          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1556          // Check if this increases register pressure of the specific register
1557          // class to the point where it would cause spills.
1558          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1559            return true;
1560          continue;
1561        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1562                   POpc == TargetOpcode::SUBREG_TO_REG) {
1563          EVT VT = PN->getValueType(0);
1564          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1565          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1566          // Check if this increases register pressure of the specific register
1567          // class to the point where it would cause spills.
1568          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1569            return true;
1570          continue;
1571        }
1572        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1573        for (unsigned i = 0; i != NumDefs; ++i) {
1574          EVT VT = PN->getValueType(i);
1575          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1576          if (RegPressure[RCId] >= RegLimit[RCId])
1577            return true; // Reg pressure already high.
1578          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1579          if (!PN->hasAnyUseOfValue(i))
1580            continue;
1581          // Check if this increases register pressure of the specific register
1582          // class to the point where it would cause spills.
1583          if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1584            return true;
1585        }
1586      }
1587
1588      return false;
1589    }
1590
1591    void ScheduledNode(SUnit *SU) {
1592      if (!TracksRegPressure)
1593        return;
1594
1595      const SDNode *N = SU->getNode();
1596      if (!N->isMachineOpcode()) {
1597        if (N->getOpcode() != ISD::CopyToReg)
1598          return;
1599      } else {
1600        unsigned Opc = N->getMachineOpcode();
1601        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1602            Opc == TargetOpcode::INSERT_SUBREG ||
1603            Opc == TargetOpcode::SUBREG_TO_REG ||
1604            Opc == TargetOpcode::REG_SEQUENCE ||
1605            Opc == TargetOpcode::IMPLICIT_DEF)
1606          return;
1607      }
1608
1609      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1610           I != E; ++I) {
1611        if (I->isCtrl())
1612          continue;
1613        SUnit *PredSU = I->getSUnit();
1614        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1615          continue;
1616        const SDNode *PN = PredSU->getNode();
1617        if (!PN->isMachineOpcode()) {
1618          if (PN->getOpcode() == ISD::CopyFromReg) {
1619            EVT VT = PN->getValueType(0);
1620            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1621            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1622          }
1623          continue;
1624        }
1625        unsigned POpc = PN->getMachineOpcode();
1626        if (POpc == TargetOpcode::IMPLICIT_DEF)
1627          continue;
1628        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1629          EVT VT = PN->getOperand(0).getValueType();
1630          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1631          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1632          continue;
1633        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1634                   POpc == TargetOpcode::SUBREG_TO_REG) {
1635          EVT VT = PN->getValueType(0);
1636          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1637          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1638          continue;
1639        }
1640        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1641        for (unsigned i = 0; i != NumDefs; ++i) {
1642          EVT VT = PN->getValueType(i);
1643          if (!PN->hasAnyUseOfValue(i))
1644            continue;
1645          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1646          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1647        }
1648      }
1649
1650      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1651      // may transfer data dependencies to CopyToReg.
1652      if (SU->NumSuccs && N->isMachineOpcode()) {
1653        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1654        for (unsigned i = 0; i != NumDefs; ++i) {
1655          EVT VT = N->getValueType(i);
1656          if (!N->hasAnyUseOfValue(i))
1657            continue;
1658          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1659          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1660            // Register pressure tracking is imprecise. This can happen.
1661            RegPressure[RCId] = 0;
1662          else
1663            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1664        }
1665      }
1666
1667      dumpRegPressure();
1668    }
1669
1670    void UnscheduledNode(SUnit *SU) {
1671      if (!TracksRegPressure)
1672        return;
1673
1674      const SDNode *N = SU->getNode();
1675      if (!N->isMachineOpcode()) {
1676        if (N->getOpcode() != ISD::CopyToReg)
1677          return;
1678      } else {
1679        unsigned Opc = N->getMachineOpcode();
1680        if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1681            Opc == TargetOpcode::INSERT_SUBREG ||
1682            Opc == TargetOpcode::SUBREG_TO_REG ||
1683            Opc == TargetOpcode::REG_SEQUENCE ||
1684            Opc == TargetOpcode::IMPLICIT_DEF)
1685          return;
1686      }
1687
1688      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1689           I != E; ++I) {
1690        if (I->isCtrl())
1691          continue;
1692        SUnit *PredSU = I->getSUnit();
1693        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1694          continue;
1695        const SDNode *PN = PredSU->getNode();
1696        if (!PN->isMachineOpcode()) {
1697          if (PN->getOpcode() == ISD::CopyFromReg) {
1698            EVT VT = PN->getValueType(0);
1699            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1700            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1701          }
1702          continue;
1703        }
1704        unsigned POpc = PN->getMachineOpcode();
1705        if (POpc == TargetOpcode::IMPLICIT_DEF)
1706          continue;
1707        if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1708          EVT VT = PN->getOperand(0).getValueType();
1709          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1710          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1711          continue;
1712        } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1713                   POpc == TargetOpcode::SUBREG_TO_REG) {
1714          EVT VT = PN->getValueType(0);
1715          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1716          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1717          continue;
1718        }
1719        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1720        for (unsigned i = 0; i != NumDefs; ++i) {
1721          EVT VT = PN->getValueType(i);
1722          if (!PN->hasAnyUseOfValue(i))
1723            continue;
1724          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1725          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1726            // Register pressure tracking is imprecise. This can happen.
1727            RegPressure[RCId] = 0;
1728          else
1729            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1730        }
1731      }
1732
1733      // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1734      // may transfer data dependencies to CopyToReg.
1735      if (SU->NumSuccs && N->isMachineOpcode()) {
1736        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1737        for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1738          EVT VT = N->getValueType(i);
1739          if (VT == MVT::Glue || VT == MVT::Other)
1740            continue;
1741          if (!N->hasAnyUseOfValue(i))
1742            continue;
1743          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1744          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1745        }
1746      }
1747
1748      dumpRegPressure();
1749    }
1750
1751    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1752      scheduleDAG = scheduleDag;
1753    }
1754
1755    void dumpRegPressure() const {
1756      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1757             E = TRI->regclass_end(); I != E; ++I) {
1758        const TargetRegisterClass *RC = *I;
1759        unsigned Id = RC->getID();
1760        unsigned RP = RegPressure[Id];
1761        if (!RP) continue;
1762        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1763              << '\n');
1764      }
1765    }
1766
1767    void dump(ScheduleDAG *DAG) const {
1768      // Emulate pop() without clobbering NodeQueueIds.
1769      std::vector<SUnit*> DumpQueue = Queue;
1770      SF DumpPicker = Picker;
1771      while (!DumpQueue.empty()) {
1772        SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1773        if (isBottomUp())
1774          dbgs() << "Height " << SU->getHeight() << ": ";
1775        else
1776          dbgs() << "Depth " << SU->getDepth() << ": ";
1777        SU->dump(DAG);
1778      }
1779    }
1780
1781  protected:
1782    bool canClobber(const SUnit *SU, const SUnit *Op);
1783    void AddPseudoTwoAddrDeps();
1784    void PrescheduleNodesWithMultipleUses();
1785    void CalculateSethiUllmanNumbers();
1786  };
1787
1788  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1789    BURegReductionPriorityQueue;
1790
1791  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1792    TDRegReductionPriorityQueue;
1793
1794  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1795    SrcRegReductionPriorityQueue;
1796
1797  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1798    HybridBURRPriorityQueue;
1799
1800  typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1801    ILPBURRPriorityQueue;
1802}
1803
1804/// closestSucc - Returns the scheduled cycle of the successor which is
1805/// closest to the current cycle.
1806static unsigned closestSucc(const SUnit *SU) {
1807  unsigned MaxHeight = 0;
1808  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1809       I != E; ++I) {
1810    if (I->isCtrl()) continue;  // ignore chain succs
1811    unsigned Height = I->getSUnit()->getHeight();
1812    // If there are bunch of CopyToRegs stacked up, they should be considered
1813    // to be at the same position.
1814    if (I->getSUnit()->getNode() &&
1815        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1816      Height = closestSucc(I->getSUnit())+1;
1817    if (Height > MaxHeight)
1818      MaxHeight = Height;
1819  }
1820  return MaxHeight;
1821}
1822
1823/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1824/// for scratch registers, i.e. number of data dependencies.
1825static unsigned calcMaxScratches(const SUnit *SU) {
1826  unsigned Scratches = 0;
1827  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1828       I != E; ++I) {
1829    if (I->isCtrl()) continue;  // ignore chain preds
1830    Scratches++;
1831  }
1832  return Scratches;
1833}
1834
1835/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1836/// CopyToReg to a virtual register. This SU def is probably a liveout and
1837/// it has no other use. It should be scheduled closer to the terminator.
1838static bool hasOnlyLiveOutUses(const SUnit *SU) {
1839  bool RetVal = false;
1840  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1841       I != E; ++I) {
1842    if (I->isCtrl()) continue;
1843    const SUnit *SuccSU = I->getSUnit();
1844    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1845      unsigned Reg =
1846        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1847      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1848        RetVal = true;
1849        continue;
1850      }
1851    }
1852    return false;
1853  }
1854  return RetVal;
1855}
1856
1857/// UnitsSharePred - Return true if the two scheduling units share a common
1858/// data predecessor.
1859static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1860  SmallSet<const SUnit*, 4> Preds;
1861  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1862       I != E; ++I) {
1863    if (I->isCtrl()) continue;  // ignore chain preds
1864    Preds.insert(I->getSUnit());
1865  }
1866  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1867       I != E; ++I) {
1868    if (I->isCtrl()) continue;  // ignore chain preds
1869    if (Preds.count(I->getSUnit()))
1870      return true;
1871  }
1872  return false;
1873}
1874
1875template <typename RRSort>
1876static bool BURRSort(const SUnit *left, const SUnit *right,
1877                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1878  unsigned LPriority = SPQ->getNodePriority(left);
1879  unsigned RPriority = SPQ->getNodePriority(right);
1880  if (LPriority != RPriority)
1881    return LPriority > RPriority;
1882
1883  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1884  // e.g.
1885  // t1 = op t2, c1
1886  // t3 = op t4, c2
1887  //
1888  // and the following instructions are both ready.
1889  // t2 = op c3
1890  // t4 = op c4
1891  //
1892  // Then schedule t2 = op first.
1893  // i.e.
1894  // t4 = op c4
1895  // t2 = op c3
1896  // t1 = op t2, c1
1897  // t3 = op t4, c2
1898  //
1899  // This creates more short live intervals.
1900  unsigned LDist = closestSucc(left);
1901  unsigned RDist = closestSucc(right);
1902  if (LDist != RDist)
1903    return LDist < RDist;
1904
1905  // How many registers becomes live when the node is scheduled.
1906  unsigned LScratch = calcMaxScratches(left);
1907  unsigned RScratch = calcMaxScratches(right);
1908  if (LScratch != RScratch)
1909    return LScratch > RScratch;
1910
1911  // Note: with a bottom-up ready filter, the height check may be redundant.
1912  if (left->getHeight() != right->getHeight())
1913    return left->getHeight() > right->getHeight();
1914
1915  if (left->getDepth() != right->getDepth())
1916    return left->getDepth() < right->getDepth();
1917
1918  assert(left->NodeQueueId && right->NodeQueueId &&
1919         "NodeQueueId cannot be zero");
1920  return (left->NodeQueueId > right->NodeQueueId);
1921}
1922
1923// Bottom up
1924bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1925  return BURRSort(left, right, SPQ);
1926}
1927
1928// Source order, otherwise bottom up.
1929bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1930  unsigned LOrder = SPQ->getNodeOrdering(left);
1931  unsigned ROrder = SPQ->getNodeOrdering(right);
1932
1933  // Prefer an ordering where the lower the non-zero order number, the higher
1934  // the preference.
1935  if ((LOrder || ROrder) && LOrder != ROrder)
1936    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1937
1938  return BURRSort(left, right, SPQ);
1939}
1940
1941bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1942  if (left->isCall || right->isCall)
1943    // No way to compute latency of calls.
1944    return BURRSort(left, right, SPQ);
1945
1946  bool LHigh = SPQ->HighRegPressure(left);
1947  bool RHigh = SPQ->HighRegPressure(right);
1948  // Avoid causing spills. If register pressure is high, schedule for
1949  // register pressure reduction.
1950  if (LHigh && !RHigh)
1951    return true;
1952  else if (!LHigh && RHigh)
1953    return false;
1954  else if (!LHigh && !RHigh) {
1955    // If the two nodes share an operand and one of them has a single
1956    // use that is a live out copy, favor the one that is live out. Otherwise
1957    // it will be difficult to eliminate the copy if the instruction is a
1958    // loop induction variable update. e.g.
1959    // BB:
1960    // sub r1, r3, #1
1961    // str r0, [r2, r3]
1962    // mov r3, r1
1963    // cmp
1964    // bne BB
1965    bool SharePred = UnitsSharePred(left, right);
1966    // FIXME: Only adjust if BB is a loop back edge.
1967    // FIXME: What's the cost of a copy?
1968    int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1969    int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1970    int LHeight = (int)left->getHeight() - LBonus;
1971    int RHeight = (int)right->getHeight() - RBonus;
1972
1973    // Low register pressure situation, schedule for latency if possible.
1974    bool LStall = left->SchedulingPref == Sched::Latency &&
1975      (int)SPQ->getCurCycle() < LHeight;
1976    bool RStall = right->SchedulingPref == Sched::Latency &&
1977      (int)SPQ->getCurCycle() < RHeight;
1978    // If scheduling one of the node will cause a pipeline stall, delay it.
1979    // If scheduling either one of the node will cause a pipeline stall, sort
1980    // them according to their height.
1981    if (LStall) {
1982      if (!RStall)
1983        return true;
1984      if (LHeight != RHeight)
1985        return LHeight > RHeight;
1986    } else if (RStall)
1987      return false;
1988
1989    // If either node is scheduling for latency, sort them by height
1990    // and latency.
1991    if (left->SchedulingPref == Sched::Latency ||
1992        right->SchedulingPref == Sched::Latency) {
1993      if (LHeight != RHeight)
1994        return LHeight > RHeight;
1995      if (left->Latency != right->Latency)
1996        return left->Latency > right->Latency;
1997    }
1998  }
1999
2000  return BURRSort(left, right, SPQ);
2001}
2002
2003// Schedule as many instructions in each cycle as possible. So don't make an
2004// instruction available unless it is ready in the current cycle.
2005bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2006  return SU->getHeight() <= CurCycle;
2007}
2008
2009bool ilp_ls_rr_sort::operator()(const SUnit *left,
2010                                const SUnit *right) const {
2011  if (left->isCall || right->isCall)
2012    // No way to compute latency of calls.
2013    return BURRSort(left, right, SPQ);
2014
2015  bool LHigh = SPQ->HighRegPressure(left);
2016  bool RHigh = SPQ->HighRegPressure(right);
2017  // Avoid causing spills. If register pressure is high, schedule for
2018  // register pressure reduction.
2019  if (LHigh && !RHigh)
2020    return true;
2021  else if (!LHigh && RHigh)
2022    return false;
2023  else if (!LHigh && !RHigh) {
2024    // Low register pressure situation, schedule to maximize instruction level
2025    // parallelism.
2026    if (left->NumPreds > right->NumPreds)
2027      return false;
2028    else if (left->NumPreds < right->NumPreds)
2029      return false;
2030  }
2031
2032  return BURRSort(left, right, SPQ);
2033}
2034
2035template<class SF>
2036bool
2037RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
2038  if (SU->isTwoAddress) {
2039    unsigned Opc = SU->getNode()->getMachineOpcode();
2040    const TargetInstrDesc &TID = TII->get(Opc);
2041    unsigned NumRes = TID.getNumDefs();
2042    unsigned NumOps = TID.getNumOperands() - NumRes;
2043    for (unsigned i = 0; i != NumOps; ++i) {
2044      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2045        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2046        if (DU->getNodeId() != -1 &&
2047            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2048          return true;
2049      }
2050    }
2051  }
2052  return false;
2053}
2054
2055/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2056/// physical register defs.
2057static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2058                                  const TargetInstrInfo *TII,
2059                                  const TargetRegisterInfo *TRI) {
2060  SDNode *N = SuccSU->getNode();
2061  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2062  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2063  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2064  for (const SDNode *SUNode = SU->getNode(); SUNode;
2065       SUNode = SUNode->getGluedNode()) {
2066    if (!SUNode->isMachineOpcode())
2067      continue;
2068    const unsigned *SUImpDefs =
2069      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2070    if (!SUImpDefs)
2071      return false;
2072    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2073      EVT VT = N->getValueType(i);
2074      if (VT == MVT::Glue || VT == MVT::Other)
2075        continue;
2076      if (!N->hasAnyUseOfValue(i))
2077        continue;
2078      unsigned Reg = ImpDefs[i - NumDefs];
2079      for (;*SUImpDefs; ++SUImpDefs) {
2080        unsigned SUReg = *SUImpDefs;
2081        if (TRI->regsOverlap(Reg, SUReg))
2082          return true;
2083      }
2084    }
2085  }
2086  return false;
2087}
2088
2089/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2090/// are not handled well by the general register pressure reduction
2091/// heuristics. When presented with code like this:
2092///
2093///      N
2094///    / |
2095///   /  |
2096///  U  store
2097///  |
2098/// ...
2099///
2100/// the heuristics tend to push the store up, but since the
2101/// operand of the store has another use (U), this would increase
2102/// the length of that other use (the U->N edge).
2103///
2104/// This function transforms code like the above to route U's
2105/// dependence through the store when possible, like this:
2106///
2107///      N
2108///      ||
2109///      ||
2110///     store
2111///       |
2112///       U
2113///       |
2114///      ...
2115///
2116/// This results in the store being scheduled immediately
2117/// after N, which shortens the U->N live range, reducing
2118/// register pressure.
2119///
2120template<class SF>
2121void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
2122  // Visit all the nodes in topological order, working top-down.
2123  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2124    SUnit *SU = &(*SUnits)[i];
2125    // For now, only look at nodes with no data successors, such as stores.
2126    // These are especially important, due to the heuristics in
2127    // getNodePriority for nodes with no data successors.
2128    if (SU->NumSuccs != 0)
2129      continue;
2130    // For now, only look at nodes with exactly one data predecessor.
2131    if (SU->NumPreds != 1)
2132      continue;
2133    // Avoid prescheduling copies to virtual registers, which don't behave
2134    // like other nodes from the perspective of scheduling heuristics.
2135    if (SDNode *N = SU->getNode())
2136      if (N->getOpcode() == ISD::CopyToReg &&
2137          TargetRegisterInfo::isVirtualRegister
2138            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2139        continue;
2140
2141    // Locate the single data predecessor.
2142    SUnit *PredSU = 0;
2143    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2144         EE = SU->Preds.end(); II != EE; ++II)
2145      if (!II->isCtrl()) {
2146        PredSU = II->getSUnit();
2147        break;
2148      }
2149    assert(PredSU);
2150
2151    // Don't rewrite edges that carry physregs, because that requires additional
2152    // support infrastructure.
2153    if (PredSU->hasPhysRegDefs)
2154      continue;
2155    // Short-circuit the case where SU is PredSU's only data successor.
2156    if (PredSU->NumSuccs == 1)
2157      continue;
2158    // Avoid prescheduling to copies from virtual registers, which don't behave
2159    // like other nodes from the perspective of scheduling // heuristics.
2160    if (SDNode *N = SU->getNode())
2161      if (N->getOpcode() == ISD::CopyFromReg &&
2162          TargetRegisterInfo::isVirtualRegister
2163            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2164        continue;
2165
2166    // Perform checks on the successors of PredSU.
2167    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2168         EE = PredSU->Succs.end(); II != EE; ++II) {
2169      SUnit *PredSuccSU = II->getSUnit();
2170      if (PredSuccSU == SU) continue;
2171      // If PredSU has another successor with no data successors, for
2172      // now don't attempt to choose either over the other.
2173      if (PredSuccSU->NumSuccs == 0)
2174        goto outer_loop_continue;
2175      // Don't break physical register dependencies.
2176      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2177        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2178          goto outer_loop_continue;
2179      // Don't introduce graph cycles.
2180      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2181        goto outer_loop_continue;
2182    }
2183
2184    // Ok, the transformation is safe and the heuristics suggest it is
2185    // profitable. Update the graph.
2186    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2187                 << " next to PredSU #" << PredSU->NodeNum
2188                 << " to guide scheduling in the presence of multiple uses\n");
2189    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2190      SDep Edge = PredSU->Succs[i];
2191      assert(!Edge.isAssignedRegDep());
2192      SUnit *SuccSU = Edge.getSUnit();
2193      if (SuccSU != SU) {
2194        Edge.setSUnit(PredSU);
2195        scheduleDAG->RemovePred(SuccSU, Edge);
2196        scheduleDAG->AddPred(SU, Edge);
2197        Edge.setSUnit(SU);
2198        scheduleDAG->AddPred(SuccSU, Edge);
2199        --i;
2200      }
2201    }
2202  outer_loop_continue:;
2203  }
2204}
2205
2206/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2207/// it as a def&use operand. Add a pseudo control edge from it to the other
2208/// node (if it won't create a cycle) so the two-address one will be scheduled
2209/// first (lower in the schedule). If both nodes are two-address, favor the
2210/// one that has a CopyToReg use (more likely to be a loop induction update).
2211/// If both are two-address, but one is commutable while the other is not
2212/// commutable, favor the one that's not commutable.
2213template<class SF>
2214void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
2215  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2216    SUnit *SU = &(*SUnits)[i];
2217    if (!SU->isTwoAddress)
2218      continue;
2219
2220    SDNode *Node = SU->getNode();
2221    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2222      continue;
2223
2224    bool isLiveOut = hasOnlyLiveOutUses(SU);
2225    unsigned Opc = Node->getMachineOpcode();
2226    const TargetInstrDesc &TID = TII->get(Opc);
2227    unsigned NumRes = TID.getNumDefs();
2228    unsigned NumOps = TID.getNumOperands() - NumRes;
2229    for (unsigned j = 0; j != NumOps; ++j) {
2230      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2231        continue;
2232      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2233      if (DU->getNodeId() == -1)
2234        continue;
2235      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2236      if (!DUSU) continue;
2237      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2238           E = DUSU->Succs.end(); I != E; ++I) {
2239        if (I->isCtrl()) continue;
2240        SUnit *SuccSU = I->getSUnit();
2241        if (SuccSU == SU)
2242          continue;
2243        // Be conservative. Ignore if nodes aren't at roughly the same
2244        // depth and height.
2245        if (SuccSU->getHeight() < SU->getHeight() &&
2246            (SU->getHeight() - SuccSU->getHeight()) > 1)
2247          continue;
2248        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2249        // constrains whatever is using the copy, instead of the copy
2250        // itself. In the case that the copy is coalesced, this
2251        // preserves the intent of the pseudo two-address heurietics.
2252        while (SuccSU->Succs.size() == 1 &&
2253               SuccSU->getNode()->isMachineOpcode() &&
2254               SuccSU->getNode()->getMachineOpcode() ==
2255                 TargetOpcode::COPY_TO_REGCLASS)
2256          SuccSU = SuccSU->Succs.front().getSUnit();
2257        // Don't constrain non-instruction nodes.
2258        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2259          continue;
2260        // Don't constrain nodes with physical register defs if the
2261        // predecessor can clobber them.
2262        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2263          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2264            continue;
2265        }
2266        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2267        // these may be coalesced away. We want them close to their uses.
2268        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2269        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2270            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2271            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2272          continue;
2273        if ((!canClobber(SuccSU, DUSU) ||
2274             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2275             (!SU->isCommutable && SuccSU->isCommutable)) &&
2276            !scheduleDAG->IsReachable(SuccSU, SU)) {
2277          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2278                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2279          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2280                                        /*Reg=*/0, /*isNormalMemory=*/false,
2281                                        /*isMustAlias=*/false,
2282                                        /*isArtificial=*/true));
2283        }
2284      }
2285    }
2286  }
2287}
2288
2289/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2290/// scheduling units.
2291template<class SF>
2292void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
2293  SethiUllmanNumbers.assign(SUnits->size(), 0);
2294
2295  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
2296    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
2297}
2298
2299/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2300/// predecessors of the successors of the SUnit SU. Stop when the provided
2301/// limit is exceeded.
2302static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2303                                                    unsigned Limit) {
2304  unsigned Sum = 0;
2305  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2306       I != E; ++I) {
2307    const SUnit *SuccSU = I->getSUnit();
2308    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2309         EE = SuccSU->Preds.end(); II != EE; ++II) {
2310      SUnit *PredSU = II->getSUnit();
2311      if (!PredSU->isScheduled)
2312        if (++Sum > Limit)
2313          return Sum;
2314    }
2315  }
2316  return Sum;
2317}
2318
2319
2320// Top down
2321bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2322  unsigned LPriority = SPQ->getNodePriority(left);
2323  unsigned RPriority = SPQ->getNodePriority(right);
2324  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2325  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2326  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2327  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2328  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2329  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2330
2331  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2332    return false;
2333  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2334    return true;
2335
2336  if (LIsFloater)
2337    LBonus -= 2;
2338  if (RIsFloater)
2339    RBonus -= 2;
2340  if (left->NumSuccs == 1)
2341    LBonus += 2;
2342  if (right->NumSuccs == 1)
2343    RBonus += 2;
2344
2345  if (LPriority+LBonus != RPriority+RBonus)
2346    return LPriority+LBonus < RPriority+RBonus;
2347
2348  if (left->getDepth() != right->getDepth())
2349    return left->getDepth() < right->getDepth();
2350
2351  if (left->NumSuccsLeft != right->NumSuccsLeft)
2352    return left->NumSuccsLeft > right->NumSuccsLeft;
2353
2354  assert(left->NodeQueueId && right->NodeQueueId &&
2355         "NodeQueueId cannot be zero");
2356  return (left->NodeQueueId > right->NodeQueueId);
2357}
2358
2359//===----------------------------------------------------------------------===//
2360//                         Public Constructor Functions
2361//===----------------------------------------------------------------------===//
2362
2363llvm::ScheduleDAGSDNodes *
2364llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2365                                 CodeGenOpt::Level OptLevel) {
2366  const TargetMachine &TM = IS->TM;
2367  const TargetInstrInfo *TII = TM.getInstrInfo();
2368  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2369
2370  BURegReductionPriorityQueue *PQ =
2371    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2372  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2373  PQ->setScheduleDAG(SD);
2374  return SD;
2375}
2376
2377llvm::ScheduleDAGSDNodes *
2378llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2379                                 CodeGenOpt::Level OptLevel) {
2380  const TargetMachine &TM = IS->TM;
2381  const TargetInstrInfo *TII = TM.getInstrInfo();
2382  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2383
2384  TDRegReductionPriorityQueue *PQ =
2385    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2386  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2387  PQ->setScheduleDAG(SD);
2388  return SD;
2389}
2390
2391llvm::ScheduleDAGSDNodes *
2392llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2393                                   CodeGenOpt::Level OptLevel) {
2394  const TargetMachine &TM = IS->TM;
2395  const TargetInstrInfo *TII = TM.getInstrInfo();
2396  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2397
2398  SrcRegReductionPriorityQueue *PQ =
2399    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2400  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2401  PQ->setScheduleDAG(SD);
2402  return SD;
2403}
2404
2405llvm::ScheduleDAGSDNodes *
2406llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2407                                   CodeGenOpt::Level OptLevel) {
2408  const TargetMachine &TM = IS->TM;
2409  const TargetInstrInfo *TII = TM.getInstrInfo();
2410  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2411  const TargetLowering *TLI = &IS->getTargetLowering();
2412
2413  HybridBURRPriorityQueue *PQ =
2414    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2415
2416  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2417  PQ->setScheduleDAG(SD);
2418  return SD;
2419}
2420
2421llvm::ScheduleDAGSDNodes *
2422llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2423                                CodeGenOpt::Level OptLevel) {
2424  const TargetMachine &TM = IS->TM;
2425  const TargetInstrInfo *TII = TM.getInstrInfo();
2426  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2427  const TargetLowering *TLI = &IS->getTargetLowering();
2428
2429  ILPBURRPriorityQueue *PQ =
2430    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2431  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2432  PQ->setScheduleDAG(SD);
2433  return SD;
2434}
2435