ScheduleDAGRRList.cpp revision 8ae3edacfaebfeed3e65bbbcf18a6fb21b09feaf
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73// Temporary sched=list-ilp flags until the heuristics are robust.
74// Some options are also available under sched=list-hybrid.
75static cl::opt<bool> DisableSchedRegPressure(
76  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
77  cl::desc("Disable regpressure priority in sched=list-ilp"));
78static cl::opt<bool> DisableSchedLiveUses(
79  "disable-sched-live-uses", cl::Hidden, cl::init(true),
80  cl::desc("Disable live use priority in sched=list-ilp"));
81static cl::opt<bool> DisableSchedVRegCycle(
82  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
83  cl::desc("Disable virtual register cycle interference checks"));
84static cl::opt<bool> DisableSchedPhysRegJoin(
85  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
86  cl::desc("Disable physreg def-use affinity"));
87static cl::opt<bool> DisableSchedStalls(
88  "disable-sched-stalls", cl::Hidden, cl::init(true),
89  cl::desc("Disable no-stall priority in sched=list-ilp"));
90static cl::opt<bool> DisableSchedCriticalPath(
91  "disable-sched-critical-path", cl::Hidden, cl::init(false),
92  cl::desc("Disable critical path priority in sched=list-ilp"));
93static cl::opt<bool> DisableSchedHeight(
94  "disable-sched-height", cl::Hidden, cl::init(false),
95  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
96
97static cl::opt<int> MaxReorderWindow(
98  "max-sched-reorder", cl::Hidden, cl::init(6),
99  cl::desc("Number of instructions to allow ahead of the critical path "
100           "in sched=list-ilp"));
101
102static cl::opt<unsigned> AvgIPC(
103  "sched-avg-ipc", cl::Hidden, cl::init(1),
104  cl::desc("Average inst/cycle whan no target itinerary exists."));
105
106#ifndef NDEBUG
107namespace {
108  // For sched=list-ilp, Count the number of times each factor comes into play.
109  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
110         FactStatic, FactOther, NumFactors };
111}
112static const char *FactorName[NumFactors] =
113{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
114static int FactorCount[NumFactors];
115#endif //!NDEBUG
116
117namespace {
118//===----------------------------------------------------------------------===//
119/// ScheduleDAGRRList - The actual register reduction list scheduler
120/// implementation.  This supports both top-down and bottom-up scheduling.
121///
122class ScheduleDAGRRList : public ScheduleDAGSDNodes {
123private:
124  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
125  /// it is top-down.
126  bool isBottomUp;
127
128  /// NeedLatency - True if the scheduler will make use of latency information.
129  ///
130  bool NeedLatency;
131
132  /// AvailableQueue - The priority queue to use for the available SUnits.
133  SchedulingPriorityQueue *AvailableQueue;
134
135  /// PendingQueue - This contains all of the instructions whose operands have
136  /// been issued, but their results are not ready yet (due to the latency of
137  /// the operation).  Once the operands becomes available, the instruction is
138  /// added to the AvailableQueue.
139  std::vector<SUnit*> PendingQueue;
140
141  /// HazardRec - The hazard recognizer to use.
142  ScheduleHazardRecognizer *HazardRec;
143
144  /// CurCycle - The current scheduler state corresponds to this cycle.
145  unsigned CurCycle;
146
147  /// MinAvailableCycle - Cycle of the soonest available instruction.
148  unsigned MinAvailableCycle;
149
150  /// IssueCount - Count instructions issued in this cycle
151  /// Currently valid only for bottom-up scheduling.
152  unsigned IssueCount;
153
154  /// LiveRegDefs - A set of physical registers and their definition
155  /// that are "live". These nodes must be scheduled before any other nodes that
156  /// modifies the registers can be scheduled.
157  unsigned NumLiveRegs;
158  std::vector<SUnit*> LiveRegDefs;
159  std::vector<SUnit*> LiveRegGens;
160
161  /// Topo - A topological ordering for SUnits which permits fast IsReachable
162  /// and similar queries.
163  ScheduleDAGTopologicalSort Topo;
164
165public:
166  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
167                    SchedulingPriorityQueue *availqueue,
168                    CodeGenOpt::Level OptLevel)
169    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
170      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
171      Topo(SUnits) {
172
173    const TargetMachine &tm = mf.getTarget();
174    if (DisableSchedCycles || !NeedLatency)
175      HazardRec = new ScheduleHazardRecognizer();
176    else
177      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
178  }
179
180  ~ScheduleDAGRRList() {
181    delete HazardRec;
182    delete AvailableQueue;
183  }
184
185  void Schedule();
186
187  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
188
189  /// IsReachable - Checks if SU is reachable from TargetSU.
190  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
191    return Topo.IsReachable(SU, TargetSU);
192  }
193
194  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
195  /// create a cycle.
196  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
197    return Topo.WillCreateCycle(SU, TargetSU);
198  }
199
200  /// AddPred - adds a predecessor edge to SUnit SU.
201  /// This returns true if this is a new predecessor.
202  /// Updates the topological ordering if required.
203  void AddPred(SUnit *SU, const SDep &D) {
204    Topo.AddPred(SU, D.getSUnit());
205    SU->addPred(D);
206  }
207
208  /// RemovePred - removes a predecessor edge from SUnit SU.
209  /// This returns true if an edge was removed.
210  /// Updates the topological ordering if required.
211  void RemovePred(SUnit *SU, const SDep &D) {
212    Topo.RemovePred(SU, D.getSUnit());
213    SU->removePred(D);
214  }
215
216private:
217  bool isReady(SUnit *SU) {
218    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
219      AvailableQueue->isReady(SU);
220  }
221
222  void ReleasePred(SUnit *SU, const SDep *PredEdge);
223  void ReleasePredecessors(SUnit *SU);
224  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
225  void ReleaseSuccessors(SUnit *SU);
226  void ReleasePending();
227  void AdvanceToCycle(unsigned NextCycle);
228  void AdvancePastStalls(SUnit *SU);
229  void EmitNode(SUnit *SU);
230  void ScheduleNodeBottomUp(SUnit*);
231  void CapturePred(SDep *PredEdge);
232  void UnscheduleNodeBottomUp(SUnit*);
233  void RestoreHazardCheckerBottomUp();
234  void BacktrackBottomUp(SUnit*, SUnit*);
235  SUnit *CopyAndMoveSuccessors(SUnit*);
236  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
237                                const TargetRegisterClass*,
238                                const TargetRegisterClass*,
239                                SmallVector<SUnit*, 2>&);
240  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
241
242  SUnit *PickNodeToScheduleBottomUp();
243  void ListScheduleBottomUp();
244
245  void ScheduleNodeTopDown(SUnit*);
246  void ListScheduleTopDown();
247
248
249  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
250  /// Updates the topological ordering if required.
251  SUnit *CreateNewSUnit(SDNode *N) {
252    unsigned NumSUnits = SUnits.size();
253    SUnit *NewNode = NewSUnit(N);
254    // Update the topological ordering.
255    if (NewNode->NodeNum >= NumSUnits)
256      Topo.InitDAGTopologicalSorting();
257    return NewNode;
258  }
259
260  /// CreateClone - Creates a new SUnit from an existing one.
261  /// Updates the topological ordering if required.
262  SUnit *CreateClone(SUnit *N) {
263    unsigned NumSUnits = SUnits.size();
264    SUnit *NewNode = Clone(N);
265    // Update the topological ordering.
266    if (NewNode->NodeNum >= NumSUnits)
267      Topo.InitDAGTopologicalSorting();
268    return NewNode;
269  }
270
271  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
272  /// need actual latency information but the hybrid scheduler does.
273  bool ForceUnitLatencies() const {
274    return !NeedLatency;
275  }
276};
277}  // end anonymous namespace
278
279
280/// Schedule - Schedule the DAG using list scheduling.
281void ScheduleDAGRRList::Schedule() {
282  DEBUG(dbgs()
283        << "********** List Scheduling BB#" << BB->getNumber()
284        << " '" << BB->getName() << "' **********\n");
285#ifndef NDEBUG
286  for (int i = 0; i < NumFactors; ++i) {
287    FactorCount[i] = 0;
288  }
289#endif //!NDEBUG
290
291  CurCycle = 0;
292  IssueCount = 0;
293  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
294  NumLiveRegs = 0;
295  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
296  LiveRegGens.resize(TRI->getNumRegs(), NULL);
297
298  // Build the scheduling graph.
299  BuildSchedGraph(NULL);
300
301  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
302          SUnits[su].dumpAll(this));
303  Topo.InitDAGTopologicalSorting();
304
305  AvailableQueue->initNodes(SUnits);
306
307  HazardRec->Reset();
308
309  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
310  if (isBottomUp)
311    ListScheduleBottomUp();
312  else
313    ListScheduleTopDown();
314
315#ifndef NDEBUG
316  for (int i = 0; i < NumFactors; ++i) {
317    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
318  }
319#endif // !NDEBUG
320  AvailableQueue->releaseState();
321}
322
323//===----------------------------------------------------------------------===//
324//  Bottom-Up Scheduling
325//===----------------------------------------------------------------------===//
326
327/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
328/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
329void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
330  SUnit *PredSU = PredEdge->getSUnit();
331
332#ifndef NDEBUG
333  if (PredSU->NumSuccsLeft == 0) {
334    dbgs() << "*** Scheduling failed! ***\n";
335    PredSU->dump(this);
336    dbgs() << " has been released too many times!\n";
337    llvm_unreachable(0);
338  }
339#endif
340  --PredSU->NumSuccsLeft;
341
342  if (!ForceUnitLatencies()) {
343    // Updating predecessor's height. This is now the cycle when the
344    // predecessor can be scheduled without causing a pipeline stall.
345    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
346  }
347
348  // If all the node's successors are scheduled, this node is ready
349  // to be scheduled. Ignore the special EntrySU node.
350  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
351    PredSU->isAvailable = true;
352
353    unsigned Height = PredSU->getHeight();
354    if (Height < MinAvailableCycle)
355      MinAvailableCycle = Height;
356
357    if (isReady(PredSU)) {
358      AvailableQueue->push(PredSU);
359    }
360    // CapturePred and others may have left the node in the pending queue, avoid
361    // adding it twice.
362    else if (!PredSU->isPending) {
363      PredSU->isPending = true;
364      PendingQueue.push_back(PredSU);
365    }
366  }
367}
368
369/// Call ReleasePred for each predecessor, then update register live def/gen.
370/// Always update LiveRegDefs for a register dependence even if the current SU
371/// also defines the register. This effectively create one large live range
372/// across a sequence of two-address node. This is important because the
373/// entire chain must be scheduled together. Example:
374///
375/// flags = (3) add
376/// flags = (2) addc flags
377/// flags = (1) addc flags
378///
379/// results in
380///
381/// LiveRegDefs[flags] = 3
382/// LiveRegGens[flags] = 1
383///
384/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
385/// interference on flags.
386void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
387  // Bottom up: release predecessors
388  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
389       I != E; ++I) {
390    ReleasePred(SU, &*I);
391    if (I->isAssignedRegDep()) {
392      // This is a physical register dependency and it's impossible or
393      // expensive to copy the register. Make sure nothing that can
394      // clobber the register is scheduled between the predecessor and
395      // this node.
396      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
397      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
398             "interference on register dependence");
399      LiveRegDefs[I->getReg()] = I->getSUnit();
400      if (!LiveRegGens[I->getReg()]) {
401        ++NumLiveRegs;
402        LiveRegGens[I->getReg()] = SU;
403      }
404    }
405  }
406}
407
408/// Check to see if any of the pending instructions are ready to issue.  If
409/// so, add them to the available queue.
410void ScheduleDAGRRList::ReleasePending() {
411  if (DisableSchedCycles) {
412    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
413    return;
414  }
415
416  // If the available queue is empty, it is safe to reset MinAvailableCycle.
417  if (AvailableQueue->empty())
418    MinAvailableCycle = UINT_MAX;
419
420  // Check to see if any of the pending instructions are ready to issue.  If
421  // so, add them to the available queue.
422  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
423    unsigned ReadyCycle =
424      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
425    if (ReadyCycle < MinAvailableCycle)
426      MinAvailableCycle = ReadyCycle;
427
428    if (PendingQueue[i]->isAvailable) {
429      if (!isReady(PendingQueue[i]))
430          continue;
431      AvailableQueue->push(PendingQueue[i]);
432    }
433    PendingQueue[i]->isPending = false;
434    PendingQueue[i] = PendingQueue.back();
435    PendingQueue.pop_back();
436    --i; --e;
437  }
438}
439
440/// Move the scheduler state forward by the specified number of Cycles.
441void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
442  if (NextCycle <= CurCycle)
443    return;
444
445  IssueCount = 0;
446  AvailableQueue->setCurCycle(NextCycle);
447  if (!HazardRec->isEnabled()) {
448    // Bypass lots of virtual calls in case of long latency.
449    CurCycle = NextCycle;
450  }
451  else {
452    for (; CurCycle != NextCycle; ++CurCycle) {
453      if (isBottomUp)
454        HazardRec->RecedeCycle();
455      else
456        HazardRec->AdvanceCycle();
457    }
458  }
459  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
460  // available Q to release pending nodes at least once before popping.
461  ReleasePending();
462}
463
464/// Move the scheduler state forward until the specified node's dependents are
465/// ready and can be scheduled with no resource conflicts.
466void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
467  if (DisableSchedCycles)
468    return;
469
470  // FIXME: Nodes such as CopyFromReg probably should not advance the current
471  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
472  // has predecessors the cycle will be advanced when they are scheduled.
473  // But given the crude nature of modeling latency though such nodes, we
474  // currently need to treat these nodes like real instructions.
475  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
476
477  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
478
479  // Bump CurCycle to account for latency. We assume the latency of other
480  // available instructions may be hidden by the stall (not a full pipe stall).
481  // This updates the hazard recognizer's cycle before reserving resources for
482  // this instruction.
483  AdvanceToCycle(ReadyCycle);
484
485  // Calls are scheduled in their preceding cycle, so don't conflict with
486  // hazards from instructions after the call. EmitNode will reset the
487  // scoreboard state before emitting the call.
488  if (isBottomUp && SU->isCall)
489    return;
490
491  // FIXME: For resource conflicts in very long non-pipelined stages, we
492  // should probably skip ahead here to avoid useless scoreboard checks.
493  int Stalls = 0;
494  while (true) {
495    ScheduleHazardRecognizer::HazardType HT =
496      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
497
498    if (HT == ScheduleHazardRecognizer::NoHazard)
499      break;
500
501    ++Stalls;
502  }
503  AdvanceToCycle(CurCycle + Stalls);
504}
505
506/// Record this SUnit in the HazardRecognizer.
507/// Does not update CurCycle.
508void ScheduleDAGRRList::EmitNode(SUnit *SU) {
509  if (!HazardRec->isEnabled())
510    return;
511
512  // Check for phys reg copy.
513  if (!SU->getNode())
514    return;
515
516  switch (SU->getNode()->getOpcode()) {
517  default:
518    assert(SU->getNode()->isMachineOpcode() &&
519           "This target-independent node should not be scheduled.");
520    break;
521  case ISD::MERGE_VALUES:
522  case ISD::TokenFactor:
523  case ISD::CopyToReg:
524  case ISD::CopyFromReg:
525  case ISD::EH_LABEL:
526    // Noops don't affect the scoreboard state. Copies are likely to be
527    // removed.
528    return;
529  case ISD::INLINEASM:
530    // For inline asm, clear the pipeline state.
531    HazardRec->Reset();
532    return;
533  }
534  if (isBottomUp && SU->isCall) {
535    // Calls are scheduled with their preceding instructions. For bottom-up
536    // scheduling, clear the pipeline state before emitting.
537    HazardRec->Reset();
538  }
539
540  HazardRec->EmitInstruction(SU);
541
542  if (!isBottomUp && SU->isCall) {
543    HazardRec->Reset();
544  }
545}
546
547static void resetVRegCycle(SUnit *SU);
548
549/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
550/// count of its predecessors. If a predecessor pending count is zero, add it to
551/// the Available queue.
552void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
553  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
554  DEBUG(SU->dump(this));
555
556#ifndef NDEBUG
557  if (CurCycle < SU->getHeight())
558    DEBUG(dbgs() << "   Height [" << SU->getHeight()
559          << "] pipeline stall!\n");
560#endif
561
562  // FIXME: Do not modify node height. It may interfere with
563  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
564  // node its ready cycle can aid heuristics, and after scheduling it can
565  // indicate the scheduled cycle.
566  SU->setHeightToAtLeast(CurCycle);
567
568  // Reserve resources for the scheduled intruction.
569  EmitNode(SU);
570
571  Sequence.push_back(SU);
572
573  AvailableQueue->ScheduledNode(SU);
574
575  // If HazardRec is disabled, and each inst counts as one cycle, then
576  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
577  // PendingQueue for schedulers that implement HasReadyFilter.
578  if (!HazardRec->isEnabled() && AvgIPC < 2)
579    AdvanceToCycle(CurCycle + 1);
580
581  // Update liveness of predecessors before successors to avoid treating a
582  // two-address node as a live range def.
583  ReleasePredecessors(SU);
584
585  // Release all the implicit physical register defs that are live.
586  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
587       I != E; ++I) {
588    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
589    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
590      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
591      --NumLiveRegs;
592      LiveRegDefs[I->getReg()] = NULL;
593      LiveRegGens[I->getReg()] = NULL;
594    }
595  }
596
597  resetVRegCycle(SU);
598
599  SU->isScheduled = true;
600
601  // Conditions under which the scheduler should eagerly advance the cycle:
602  // (1) No available instructions
603  // (2) All pipelines full, so available instructions must have hazards.
604  //
605  // If HazardRec is disabled, the cycle was pre-advanced before calling
606  // ReleasePredecessors. In that case, IssueCount should remain 0.
607  //
608  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
609  if (HazardRec->isEnabled() || AvgIPC > 1) {
610    if (SU->getNode() && SU->getNode()->isMachineOpcode())
611      ++IssueCount;
612    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
613        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
614      AdvanceToCycle(CurCycle + 1);
615  }
616}
617
618/// CapturePred - This does the opposite of ReleasePred. Since SU is being
619/// unscheduled, incrcease the succ left count of its predecessors. Remove
620/// them from AvailableQueue if necessary.
621void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
622  SUnit *PredSU = PredEdge->getSUnit();
623  if (PredSU->isAvailable) {
624    PredSU->isAvailable = false;
625    if (!PredSU->isPending)
626      AvailableQueue->remove(PredSU);
627  }
628
629  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
630  ++PredSU->NumSuccsLeft;
631}
632
633/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
634/// its predecessor states to reflect the change.
635void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
636  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
637  DEBUG(SU->dump(this));
638
639  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
640       I != E; ++I) {
641    CapturePred(&*I);
642    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
643      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
644      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
645             "Physical register dependency violated?");
646      --NumLiveRegs;
647      LiveRegDefs[I->getReg()] = NULL;
648      LiveRegGens[I->getReg()] = NULL;
649    }
650  }
651
652  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
653       I != E; ++I) {
654    if (I->isAssignedRegDep()) {
655      // This becomes the nearest def. Note that an earlier def may still be
656      // pending if this is a two-address node.
657      LiveRegDefs[I->getReg()] = SU;
658      if (!LiveRegDefs[I->getReg()]) {
659        ++NumLiveRegs;
660      }
661      if (LiveRegGens[I->getReg()] == NULL ||
662          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
663        LiveRegGens[I->getReg()] = I->getSUnit();
664    }
665  }
666  if (SU->getHeight() < MinAvailableCycle)
667    MinAvailableCycle = SU->getHeight();
668
669  SU->setHeightDirty();
670  SU->isScheduled = false;
671  SU->isAvailable = true;
672  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
673    // Don't make available until backtracking is complete.
674    SU->isPending = true;
675    PendingQueue.push_back(SU);
676  }
677  else {
678    AvailableQueue->push(SU);
679  }
680  AvailableQueue->UnscheduledNode(SU);
681}
682
683/// After backtracking, the hazard checker needs to be restored to a state
684/// corresponding the the current cycle.
685void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
686  HazardRec->Reset();
687
688  unsigned LookAhead = std::min((unsigned)Sequence.size(),
689                                HazardRec->getMaxLookAhead());
690  if (LookAhead == 0)
691    return;
692
693  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
694  unsigned HazardCycle = (*I)->getHeight();
695  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
696    SUnit *SU = *I;
697    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
698      HazardRec->RecedeCycle();
699    }
700    EmitNode(SU);
701  }
702}
703
704/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
705/// BTCycle in order to schedule a specific node.
706void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
707  SUnit *OldSU = Sequence.back();
708  while (true) {
709    Sequence.pop_back();
710    if (SU->isSucc(OldSU))
711      // Don't try to remove SU from AvailableQueue.
712      SU->isAvailable = false;
713    // FIXME: use ready cycle instead of height
714    CurCycle = OldSU->getHeight();
715    UnscheduleNodeBottomUp(OldSU);
716    AvailableQueue->setCurCycle(CurCycle);
717    if (OldSU == BtSU)
718      break;
719    OldSU = Sequence.back();
720  }
721
722  assert(!SU->isSucc(OldSU) && "Something is wrong!");
723
724  RestoreHazardCheckerBottomUp();
725
726  ReleasePending();
727
728  ++NumBacktracks;
729}
730
731static bool isOperandOf(const SUnit *SU, SDNode *N) {
732  for (const SDNode *SUNode = SU->getNode(); SUNode;
733       SUNode = SUNode->getGluedNode()) {
734    if (SUNode->isOperandOf(N))
735      return true;
736  }
737  return false;
738}
739
740/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
741/// successors to the newly created node.
742SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
743  SDNode *N = SU->getNode();
744  if (!N)
745    return NULL;
746
747  if (SU->getNode()->getGluedNode())
748    return NULL;
749
750  SUnit *NewSU;
751  bool TryUnfold = false;
752  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
753    EVT VT = N->getValueType(i);
754    if (VT == MVT::Glue)
755      return NULL;
756    else if (VT == MVT::Other)
757      TryUnfold = true;
758  }
759  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
760    const SDValue &Op = N->getOperand(i);
761    EVT VT = Op.getNode()->getValueType(Op.getResNo());
762    if (VT == MVT::Glue)
763      return NULL;
764  }
765
766  if (TryUnfold) {
767    SmallVector<SDNode*, 2> NewNodes;
768    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
769      return NULL;
770
771    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
772    assert(NewNodes.size() == 2 && "Expected a load folding node!");
773
774    N = NewNodes[1];
775    SDNode *LoadNode = NewNodes[0];
776    unsigned NumVals = N->getNumValues();
777    unsigned OldNumVals = SU->getNode()->getNumValues();
778    for (unsigned i = 0; i != NumVals; ++i)
779      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
780    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
781                                   SDValue(LoadNode, 1));
782
783    // LoadNode may already exist. This can happen when there is another
784    // load from the same location and producing the same type of value
785    // but it has different alignment or volatileness.
786    bool isNewLoad = true;
787    SUnit *LoadSU;
788    if (LoadNode->getNodeId() != -1) {
789      LoadSU = &SUnits[LoadNode->getNodeId()];
790      isNewLoad = false;
791    } else {
792      LoadSU = CreateNewSUnit(LoadNode);
793      LoadNode->setNodeId(LoadSU->NodeNum);
794
795      InitNumRegDefsLeft(LoadSU);
796      ComputeLatency(LoadSU);
797    }
798
799    SUnit *NewSU = CreateNewSUnit(N);
800    assert(N->getNodeId() == -1 && "Node already inserted!");
801    N->setNodeId(NewSU->NodeNum);
802
803    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
804    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
805      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
806        NewSU->isTwoAddress = true;
807        break;
808      }
809    }
810    if (TID.isCommutable())
811      NewSU->isCommutable = true;
812
813    InitNumRegDefsLeft(NewSU);
814    ComputeLatency(NewSU);
815
816    // Record all the edges to and from the old SU, by category.
817    SmallVector<SDep, 4> ChainPreds;
818    SmallVector<SDep, 4> ChainSuccs;
819    SmallVector<SDep, 4> LoadPreds;
820    SmallVector<SDep, 4> NodePreds;
821    SmallVector<SDep, 4> NodeSuccs;
822    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
823         I != E; ++I) {
824      if (I->isCtrl())
825        ChainPreds.push_back(*I);
826      else if (isOperandOf(I->getSUnit(), LoadNode))
827        LoadPreds.push_back(*I);
828      else
829        NodePreds.push_back(*I);
830    }
831    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
832         I != E; ++I) {
833      if (I->isCtrl())
834        ChainSuccs.push_back(*I);
835      else
836        NodeSuccs.push_back(*I);
837    }
838
839    // Now assign edges to the newly-created nodes.
840    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
841      const SDep &Pred = ChainPreds[i];
842      RemovePred(SU, Pred);
843      if (isNewLoad)
844        AddPred(LoadSU, Pred);
845    }
846    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
847      const SDep &Pred = LoadPreds[i];
848      RemovePred(SU, Pred);
849      if (isNewLoad)
850        AddPred(LoadSU, Pred);
851    }
852    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
853      const SDep &Pred = NodePreds[i];
854      RemovePred(SU, Pred);
855      AddPred(NewSU, Pred);
856    }
857    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
858      SDep D = NodeSuccs[i];
859      SUnit *SuccDep = D.getSUnit();
860      D.setSUnit(SU);
861      RemovePred(SuccDep, D);
862      D.setSUnit(NewSU);
863      AddPred(SuccDep, D);
864      // Balance register pressure.
865      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
866          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
867        --NewSU->NumRegDefsLeft;
868    }
869    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
870      SDep D = ChainSuccs[i];
871      SUnit *SuccDep = D.getSUnit();
872      D.setSUnit(SU);
873      RemovePred(SuccDep, D);
874      if (isNewLoad) {
875        D.setSUnit(LoadSU);
876        AddPred(SuccDep, D);
877      }
878    }
879
880    // Add a data dependency to reflect that NewSU reads the value defined
881    // by LoadSU.
882    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
883
884    if (isNewLoad)
885      AvailableQueue->addNode(LoadSU);
886    AvailableQueue->addNode(NewSU);
887
888    ++NumUnfolds;
889
890    if (NewSU->NumSuccsLeft == 0) {
891      NewSU->isAvailable = true;
892      return NewSU;
893    }
894    SU = NewSU;
895  }
896
897  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
898  NewSU = CreateClone(SU);
899
900  // New SUnit has the exact same predecessors.
901  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
902       I != E; ++I)
903    if (!I->isArtificial())
904      AddPred(NewSU, *I);
905
906  // Only copy scheduled successors. Cut them from old node's successor
907  // list and move them over.
908  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
909  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
910       I != E; ++I) {
911    if (I->isArtificial())
912      continue;
913    SUnit *SuccSU = I->getSUnit();
914    if (SuccSU->isScheduled) {
915      SDep D = *I;
916      D.setSUnit(NewSU);
917      AddPred(SuccSU, D);
918      D.setSUnit(SU);
919      DelDeps.push_back(std::make_pair(SuccSU, D));
920    }
921  }
922  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
923    RemovePred(DelDeps[i].first, DelDeps[i].second);
924
925  AvailableQueue->updateNode(SU);
926  AvailableQueue->addNode(NewSU);
927
928  ++NumDups;
929  return NewSU;
930}
931
932/// InsertCopiesAndMoveSuccs - Insert register copies and move all
933/// scheduled successors of the given SUnit to the last copy.
934void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
935                                               const TargetRegisterClass *DestRC,
936                                               const TargetRegisterClass *SrcRC,
937                                               SmallVector<SUnit*, 2> &Copies) {
938  SUnit *CopyFromSU = CreateNewSUnit(NULL);
939  CopyFromSU->CopySrcRC = SrcRC;
940  CopyFromSU->CopyDstRC = DestRC;
941
942  SUnit *CopyToSU = CreateNewSUnit(NULL);
943  CopyToSU->CopySrcRC = DestRC;
944  CopyToSU->CopyDstRC = SrcRC;
945
946  // Only copy scheduled successors. Cut them from old node's successor
947  // list and move them over.
948  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
949  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
950       I != E; ++I) {
951    if (I->isArtificial())
952      continue;
953    SUnit *SuccSU = I->getSUnit();
954    if (SuccSU->isScheduled) {
955      SDep D = *I;
956      D.setSUnit(CopyToSU);
957      AddPred(SuccSU, D);
958      DelDeps.push_back(std::make_pair(SuccSU, *I));
959    }
960    else {
961      // Avoid scheduling the def-side copy before other successors. Otherwise
962      // we could introduce another physreg interference on the copy and
963      // continue inserting copies indefinitely.
964      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
965             /*Reg=*/0, /*isNormalMemory=*/false,
966             /*isMustAlias=*/false, /*isArtificial=*/true);
967      AddPred(SuccSU, D);
968    }
969  }
970  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
971    RemovePred(DelDeps[i].first, DelDeps[i].second);
972
973  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
974  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
975
976  AvailableQueue->updateNode(SU);
977  AvailableQueue->addNode(CopyFromSU);
978  AvailableQueue->addNode(CopyToSU);
979  Copies.push_back(CopyFromSU);
980  Copies.push_back(CopyToSU);
981
982  ++NumPRCopies;
983}
984
985/// getPhysicalRegisterVT - Returns the ValueType of the physical register
986/// definition of the specified node.
987/// FIXME: Move to SelectionDAG?
988static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
989                                 const TargetInstrInfo *TII) {
990  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
991  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
992  unsigned NumRes = TID.getNumDefs();
993  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
994    if (Reg == *ImpDef)
995      break;
996    ++NumRes;
997  }
998  return N->getValueType(NumRes);
999}
1000
1001/// CheckForLiveRegDef - Return true and update live register vector if the
1002/// specified register def of the specified SUnit clobbers any "live" registers.
1003static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1004                               std::vector<SUnit*> &LiveRegDefs,
1005                               SmallSet<unsigned, 4> &RegAdded,
1006                               SmallVector<unsigned, 4> &LRegs,
1007                               const TargetRegisterInfo *TRI) {
1008  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1009
1010    // Check if Ref is live.
1011    if (!LiveRegDefs[*AliasI]) continue;
1012
1013    // Allow multiple uses of the same def.
1014    if (LiveRegDefs[*AliasI] == SU) continue;
1015
1016    // Add Reg to the set of interfering live regs.
1017    if (RegAdded.insert(*AliasI)) {
1018      LRegs.push_back(*AliasI);
1019    }
1020  }
1021}
1022
1023/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1024/// scheduling of the given node to satisfy live physical register dependencies.
1025/// If the specific node is the last one that's available to schedule, do
1026/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1027bool ScheduleDAGRRList::
1028DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1029  if (NumLiveRegs == 0)
1030    return false;
1031
1032  SmallSet<unsigned, 4> RegAdded;
1033  // If this node would clobber any "live" register, then it's not ready.
1034  //
1035  // If SU is the currently live definition of the same register that it uses,
1036  // then we are free to schedule it.
1037  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1038       I != E; ++I) {
1039    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1040      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1041                         RegAdded, LRegs, TRI);
1042  }
1043
1044  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1045    if (Node->getOpcode() == ISD::INLINEASM) {
1046      // Inline asm can clobber physical defs.
1047      unsigned NumOps = Node->getNumOperands();
1048      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1049        --NumOps;  // Ignore the glue operand.
1050
1051      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1052        unsigned Flags =
1053          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1054        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1055
1056        ++i; // Skip the ID value.
1057        if (InlineAsm::isRegDefKind(Flags) ||
1058            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
1059          // Check for def of register or earlyclobber register.
1060          for (; NumVals; --NumVals, ++i) {
1061            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1062            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1063              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1064          }
1065        } else
1066          i += NumVals;
1067      }
1068      continue;
1069    }
1070
1071    if (!Node->isMachineOpcode())
1072      continue;
1073    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
1074    if (!TID.ImplicitDefs)
1075      continue;
1076    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
1077      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1078  }
1079
1080  return !LRegs.empty();
1081}
1082
1083/// Return a node that can be scheduled in this cycle. Requirements:
1084/// (1) Ready: latency has been satisfied
1085/// (2) No Hazards: resources are available
1086/// (3) No Interferences: may unschedule to break register interferences.
1087SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1088  SmallVector<SUnit*, 4> Interferences;
1089  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1090
1091  SUnit *CurSU = AvailableQueue->pop();
1092  while (CurSU) {
1093    SmallVector<unsigned, 4> LRegs;
1094    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1095      break;
1096    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1097
1098    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1099    Interferences.push_back(CurSU);
1100    CurSU = AvailableQueue->pop();
1101  }
1102  if (CurSU) {
1103    // Add the nodes that aren't ready back onto the available list.
1104    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1105      Interferences[i]->isPending = false;
1106      assert(Interferences[i]->isAvailable && "must still be available");
1107      AvailableQueue->push(Interferences[i]);
1108    }
1109    return CurSU;
1110  }
1111
1112  // All candidates are delayed due to live physical reg dependencies.
1113  // Try backtracking, code duplication, or inserting cross class copies
1114  // to resolve it.
1115  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1116    SUnit *TrySU = Interferences[i];
1117    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1118
1119    // Try unscheduling up to the point where it's safe to schedule
1120    // this node.
1121    SUnit *BtSU = NULL;
1122    unsigned LiveCycle = UINT_MAX;
1123    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1124      unsigned Reg = LRegs[j];
1125      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1126        BtSU = LiveRegGens[Reg];
1127        LiveCycle = BtSU->getHeight();
1128      }
1129    }
1130    if (!WillCreateCycle(TrySU, BtSU))  {
1131      BacktrackBottomUp(TrySU, BtSU);
1132
1133      // Force the current node to be scheduled before the node that
1134      // requires the physical reg dep.
1135      if (BtSU->isAvailable) {
1136        BtSU->isAvailable = false;
1137        if (!BtSU->isPending)
1138          AvailableQueue->remove(BtSU);
1139      }
1140      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1141                          /*Reg=*/0, /*isNormalMemory=*/false,
1142                          /*isMustAlias=*/false, /*isArtificial=*/true));
1143
1144      // If one or more successors has been unscheduled, then the current
1145      // node is no longer avaialable. Schedule a successor that's now
1146      // available instead.
1147      if (!TrySU->isAvailable) {
1148        CurSU = AvailableQueue->pop();
1149      }
1150      else {
1151        CurSU = TrySU;
1152        TrySU->isPending = false;
1153        Interferences.erase(Interferences.begin()+i);
1154      }
1155      break;
1156    }
1157  }
1158
1159  if (!CurSU) {
1160    // Can't backtrack. If it's too expensive to copy the value, then try
1161    // duplicate the nodes that produces these "too expensive to copy"
1162    // values to break the dependency. In case even that doesn't work,
1163    // insert cross class copies.
1164    // If it's not too expensive, i.e. cost != -1, issue copies.
1165    SUnit *TrySU = Interferences[0];
1166    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1167    assert(LRegs.size() == 1 && "Can't handle this yet!");
1168    unsigned Reg = LRegs[0];
1169    SUnit *LRDef = LiveRegDefs[Reg];
1170    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1171    const TargetRegisterClass *RC =
1172      TRI->getMinimalPhysRegClass(Reg, VT);
1173    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1174
1175    // If cross copy register class is the same as RC, then it must be possible
1176    // copy the value directly. Do not try duplicate the def.
1177    // If cross copy register class is not the same as RC, then it's possible to
1178    // copy the value but it require cross register class copies and it is
1179    // expensive.
1180    // If cross copy register class is null, then it's not possible to copy
1181    // the value at all.
1182    SUnit *NewDef = 0;
1183    if (DestRC != RC) {
1184      NewDef = CopyAndMoveSuccessors(LRDef);
1185      if (!DestRC && !NewDef)
1186        report_fatal_error("Can't handle live physical register dependency!");
1187    }
1188    if (!NewDef) {
1189      // Issue copies, these can be expensive cross register class copies.
1190      SmallVector<SUnit*, 2> Copies;
1191      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1192      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1193            << " to SU #" << Copies.front()->NodeNum << "\n");
1194      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1195                          /*Reg=*/0, /*isNormalMemory=*/false,
1196                          /*isMustAlias=*/false,
1197                          /*isArtificial=*/true));
1198      NewDef = Copies.back();
1199    }
1200
1201    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1202          << " to SU #" << TrySU->NodeNum << "\n");
1203    LiveRegDefs[Reg] = NewDef;
1204    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1205                         /*Reg=*/0, /*isNormalMemory=*/false,
1206                         /*isMustAlias=*/false,
1207                         /*isArtificial=*/true));
1208    TrySU->isAvailable = false;
1209    CurSU = NewDef;
1210  }
1211
1212  assert(CurSU && "Unable to resolve live physical register dependencies!");
1213
1214  // Add the nodes that aren't ready back onto the available list.
1215  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1216    Interferences[i]->isPending = false;
1217    // May no longer be available due to backtracking.
1218    if (Interferences[i]->isAvailable) {
1219      AvailableQueue->push(Interferences[i]);
1220    }
1221  }
1222  return CurSU;
1223}
1224
1225/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1226/// schedulers.
1227void ScheduleDAGRRList::ListScheduleBottomUp() {
1228  // Release any predecessors of the special Exit node.
1229  ReleasePredecessors(&ExitSU);
1230
1231  // Add root to Available queue.
1232  if (!SUnits.empty()) {
1233    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1234    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1235    RootSU->isAvailable = true;
1236    AvailableQueue->push(RootSU);
1237  }
1238
1239  // While Available queue is not empty, grab the node with the highest
1240  // priority. If it is not ready put it back.  Schedule the node.
1241  Sequence.reserve(SUnits.size());
1242  while (!AvailableQueue->empty()) {
1243    DEBUG(dbgs() << "\nExamining Available:\n";
1244          AvailableQueue->dump(this));
1245
1246    // Pick the best node to schedule taking all constraints into
1247    // consideration.
1248    SUnit *SU = PickNodeToScheduleBottomUp();
1249
1250    AdvancePastStalls(SU);
1251
1252    ScheduleNodeBottomUp(SU);
1253
1254    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1255      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1256      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1257      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1258    }
1259  }
1260
1261  // Reverse the order if it is bottom up.
1262  std::reverse(Sequence.begin(), Sequence.end());
1263
1264#ifndef NDEBUG
1265  VerifySchedule(isBottomUp);
1266#endif
1267}
1268
1269//===----------------------------------------------------------------------===//
1270//  Top-Down Scheduling
1271//===----------------------------------------------------------------------===//
1272
1273/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1274/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1275void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1276  SUnit *SuccSU = SuccEdge->getSUnit();
1277
1278#ifndef NDEBUG
1279  if (SuccSU->NumPredsLeft == 0) {
1280    dbgs() << "*** Scheduling failed! ***\n";
1281    SuccSU->dump(this);
1282    dbgs() << " has been released too many times!\n";
1283    llvm_unreachable(0);
1284  }
1285#endif
1286  --SuccSU->NumPredsLeft;
1287
1288  // If all the node's predecessors are scheduled, this node is ready
1289  // to be scheduled. Ignore the special ExitSU node.
1290  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1291    SuccSU->isAvailable = true;
1292    AvailableQueue->push(SuccSU);
1293  }
1294}
1295
1296void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1297  // Top down: release successors
1298  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1299       I != E; ++I) {
1300    assert(!I->isAssignedRegDep() &&
1301           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1302
1303    ReleaseSucc(SU, &*I);
1304  }
1305}
1306
1307/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1308/// count of its successors. If a successor pending count is zero, add it to
1309/// the Available queue.
1310void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1311  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1312  DEBUG(SU->dump(this));
1313
1314  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1315  SU->setDepthToAtLeast(CurCycle);
1316  Sequence.push_back(SU);
1317
1318  ReleaseSuccessors(SU);
1319  SU->isScheduled = true;
1320  AvailableQueue->ScheduledNode(SU);
1321}
1322
1323/// ListScheduleTopDown - The main loop of list scheduling for top-down
1324/// schedulers.
1325void ScheduleDAGRRList::ListScheduleTopDown() {
1326  AvailableQueue->setCurCycle(CurCycle);
1327
1328  // Release any successors of the special Entry node.
1329  ReleaseSuccessors(&EntrySU);
1330
1331  // All leaves to Available queue.
1332  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1333    // It is available if it has no predecessors.
1334    if (SUnits[i].Preds.empty()) {
1335      AvailableQueue->push(&SUnits[i]);
1336      SUnits[i].isAvailable = true;
1337    }
1338  }
1339
1340  // While Available queue is not empty, grab the node with the highest
1341  // priority. If it is not ready put it back.  Schedule the node.
1342  Sequence.reserve(SUnits.size());
1343  while (!AvailableQueue->empty()) {
1344    SUnit *CurSU = AvailableQueue->pop();
1345
1346    if (CurSU)
1347      ScheduleNodeTopDown(CurSU);
1348    ++CurCycle;
1349    AvailableQueue->setCurCycle(CurCycle);
1350  }
1351
1352#ifndef NDEBUG
1353  VerifySchedule(isBottomUp);
1354#endif
1355}
1356
1357
1358//===----------------------------------------------------------------------===//
1359//                RegReductionPriorityQueue Definition
1360//===----------------------------------------------------------------------===//
1361//
1362// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1363// to reduce register pressure.
1364//
1365namespace {
1366class RegReductionPQBase;
1367
1368struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1369  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1370};
1371
1372/// bu_ls_rr_sort - Priority function for bottom up register pressure
1373// reduction scheduler.
1374struct bu_ls_rr_sort : public queue_sort {
1375  enum {
1376    IsBottomUp = true,
1377    HasReadyFilter = false
1378  };
1379
1380  RegReductionPQBase *SPQ;
1381  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1382  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1383
1384  bool operator()(SUnit* left, SUnit* right) const;
1385};
1386
1387// td_ls_rr_sort - Priority function for top down register pressure reduction
1388// scheduler.
1389struct td_ls_rr_sort : public queue_sort {
1390  enum {
1391    IsBottomUp = false,
1392    HasReadyFilter = false
1393  };
1394
1395  RegReductionPQBase *SPQ;
1396  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1397  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1398
1399  bool operator()(const SUnit* left, const SUnit* right) const;
1400};
1401
1402// src_ls_rr_sort - Priority function for source order scheduler.
1403struct src_ls_rr_sort : public queue_sort {
1404  enum {
1405    IsBottomUp = true,
1406    HasReadyFilter = false
1407  };
1408
1409  RegReductionPQBase *SPQ;
1410  src_ls_rr_sort(RegReductionPQBase *spq)
1411    : SPQ(spq) {}
1412  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1413    : SPQ(RHS.SPQ) {}
1414
1415  bool operator()(SUnit* left, SUnit* right) const;
1416};
1417
1418// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1419struct hybrid_ls_rr_sort : public queue_sort {
1420  enum {
1421    IsBottomUp = true,
1422    HasReadyFilter = false
1423  };
1424
1425  RegReductionPQBase *SPQ;
1426  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1427    : SPQ(spq) {}
1428  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1429    : SPQ(RHS.SPQ) {}
1430
1431  bool isReady(SUnit *SU, unsigned CurCycle) const;
1432
1433  bool operator()(SUnit* left, SUnit* right) const;
1434};
1435
1436// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1437// scheduler.
1438struct ilp_ls_rr_sort : public queue_sort {
1439  enum {
1440    IsBottomUp = true,
1441    HasReadyFilter = false
1442  };
1443
1444  RegReductionPQBase *SPQ;
1445  ilp_ls_rr_sort(RegReductionPQBase *spq)
1446    : SPQ(spq) {}
1447  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1448    : SPQ(RHS.SPQ) {}
1449
1450  bool isReady(SUnit *SU, unsigned CurCycle) const;
1451
1452  bool operator()(SUnit* left, SUnit* right) const;
1453};
1454
1455class RegReductionPQBase : public SchedulingPriorityQueue {
1456protected:
1457  std::vector<SUnit*> Queue;
1458  unsigned CurQueueId;
1459  bool TracksRegPressure;
1460
1461  // SUnits - The SUnits for the current graph.
1462  std::vector<SUnit> *SUnits;
1463
1464  MachineFunction &MF;
1465  const TargetInstrInfo *TII;
1466  const TargetRegisterInfo *TRI;
1467  const TargetLowering *TLI;
1468  ScheduleDAGRRList *scheduleDAG;
1469
1470  // SethiUllmanNumbers - The SethiUllman number for each node.
1471  std::vector<unsigned> SethiUllmanNumbers;
1472
1473  /// RegPressure - Tracking current reg pressure per register class.
1474  ///
1475  std::vector<unsigned> RegPressure;
1476
1477  /// RegLimit - Tracking the number of allocatable registers per register
1478  /// class.
1479  std::vector<unsigned> RegLimit;
1480
1481public:
1482  RegReductionPQBase(MachineFunction &mf,
1483                     bool hasReadyFilter,
1484                     bool tracksrp,
1485                     const TargetInstrInfo *tii,
1486                     const TargetRegisterInfo *tri,
1487                     const TargetLowering *tli)
1488    : SchedulingPriorityQueue(hasReadyFilter),
1489      CurQueueId(0), TracksRegPressure(tracksrp),
1490      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1491    if (TracksRegPressure) {
1492      unsigned NumRC = TRI->getNumRegClasses();
1493      RegLimit.resize(NumRC);
1494      RegPressure.resize(NumRC);
1495      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1496      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1497      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1498             E = TRI->regclass_end(); I != E; ++I)
1499        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1500    }
1501  }
1502
1503  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1504    scheduleDAG = scheduleDag;
1505  }
1506
1507  ScheduleHazardRecognizer* getHazardRec() {
1508    return scheduleDAG->getHazardRec();
1509  }
1510
1511  void initNodes(std::vector<SUnit> &sunits);
1512
1513  void addNode(const SUnit *SU);
1514
1515  void updateNode(const SUnit *SU);
1516
1517  void releaseState() {
1518    SUnits = 0;
1519    SethiUllmanNumbers.clear();
1520    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1521  }
1522
1523  unsigned getNodePriority(const SUnit *SU) const;
1524
1525  unsigned getNodeOrdering(const SUnit *SU) const {
1526    if (!SU->getNode()) return 0;
1527
1528    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1529  }
1530
1531  bool empty() const { return Queue.empty(); }
1532
1533  void push(SUnit *U) {
1534    assert(!U->NodeQueueId && "Node in the queue already");
1535    U->NodeQueueId = ++CurQueueId;
1536    Queue.push_back(U);
1537  }
1538
1539  void remove(SUnit *SU) {
1540    assert(!Queue.empty() && "Queue is empty!");
1541    assert(SU->NodeQueueId != 0 && "Not in queue!");
1542    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1543                                                 SU);
1544    if (I != prior(Queue.end()))
1545      std::swap(*I, Queue.back());
1546    Queue.pop_back();
1547    SU->NodeQueueId = 0;
1548  }
1549
1550  bool tracksRegPressure() const { return TracksRegPressure; }
1551
1552  void dumpRegPressure() const;
1553
1554  bool HighRegPressure(const SUnit *SU) const;
1555
1556  bool MayReduceRegPressure(SUnit *SU) const;
1557
1558  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1559
1560  void ScheduledNode(SUnit *SU);
1561
1562  void UnscheduledNode(SUnit *SU);
1563
1564protected:
1565  bool canClobber(const SUnit *SU, const SUnit *Op);
1566  void AddPseudoTwoAddrDeps();
1567  void PrescheduleNodesWithMultipleUses();
1568  void CalculateSethiUllmanNumbers();
1569};
1570
1571template<class SF>
1572class RegReductionPriorityQueue : public RegReductionPQBase {
1573  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1574    std::vector<SUnit *>::iterator Best = Q.begin();
1575    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1576           E = Q.end(); I != E; ++I)
1577      if (Picker(*Best, *I))
1578        Best = I;
1579    SUnit *V = *Best;
1580    if (Best != prior(Q.end()))
1581      std::swap(*Best, Q.back());
1582    Q.pop_back();
1583    return V;
1584  }
1585
1586  SF Picker;
1587
1588public:
1589  RegReductionPriorityQueue(MachineFunction &mf,
1590                            bool tracksrp,
1591                            const TargetInstrInfo *tii,
1592                            const TargetRegisterInfo *tri,
1593                            const TargetLowering *tli)
1594    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1595      Picker(this) {}
1596
1597  bool isBottomUp() const { return SF::IsBottomUp; }
1598
1599  bool isReady(SUnit *U) const {
1600    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1601  }
1602
1603  SUnit *pop() {
1604    if (Queue.empty()) return NULL;
1605
1606    SUnit *V = popFromQueue(Queue, Picker);
1607    V->NodeQueueId = 0;
1608    return V;
1609  }
1610
1611  void dump(ScheduleDAG *DAG) const {
1612    // Emulate pop() without clobbering NodeQueueIds.
1613    std::vector<SUnit*> DumpQueue = Queue;
1614    SF DumpPicker = Picker;
1615    while (!DumpQueue.empty()) {
1616      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1617      if (isBottomUp())
1618        dbgs() << "Height " << SU->getHeight() << ": ";
1619      else
1620        dbgs() << "Depth " << SU->getDepth() << ": ";
1621      SU->dump(DAG);
1622    }
1623  }
1624};
1625
1626typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1627BURegReductionPriorityQueue;
1628
1629typedef RegReductionPriorityQueue<td_ls_rr_sort>
1630TDRegReductionPriorityQueue;
1631
1632typedef RegReductionPriorityQueue<src_ls_rr_sort>
1633SrcRegReductionPriorityQueue;
1634
1635typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1636HybridBURRPriorityQueue;
1637
1638typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1639ILPBURRPriorityQueue;
1640} // end anonymous namespace
1641
1642//===----------------------------------------------------------------------===//
1643//           Static Node Priority for Register Pressure Reduction
1644//===----------------------------------------------------------------------===//
1645
1646// Check for special nodes that bypass scheduling heuristics.
1647// Currently this pushes TokenFactor nodes down, but may be used for other
1648// pseudo-ops as well.
1649//
1650// Return -1 to schedule right above left, 1 for left above right.
1651// Return 0 if no bias exists.
1652static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1653  bool LSchedLow = left->isScheduleLow;
1654  bool RSchedLow = right->isScheduleLow;
1655  if (LSchedLow != RSchedLow)
1656    return LSchedLow < RSchedLow ? 1 : -1;
1657  return 0;
1658}
1659
1660/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1661/// Smaller number is the higher priority.
1662static unsigned
1663CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1664  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1665  if (SethiUllmanNumber != 0)
1666    return SethiUllmanNumber;
1667
1668  unsigned Extra = 0;
1669  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1670       I != E; ++I) {
1671    if (I->isCtrl()) continue;  // ignore chain preds
1672    SUnit *PredSU = I->getSUnit();
1673    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1674    if (PredSethiUllman > SethiUllmanNumber) {
1675      SethiUllmanNumber = PredSethiUllman;
1676      Extra = 0;
1677    } else if (PredSethiUllman == SethiUllmanNumber)
1678      ++Extra;
1679  }
1680
1681  SethiUllmanNumber += Extra;
1682
1683  if (SethiUllmanNumber == 0)
1684    SethiUllmanNumber = 1;
1685
1686  return SethiUllmanNumber;
1687}
1688
1689/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1690/// scheduling units.
1691void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1692  SethiUllmanNumbers.assign(SUnits->size(), 0);
1693
1694  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1695    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1696}
1697
1698void RegReductionPQBase::addNode(const SUnit *SU) {
1699  unsigned SUSize = SethiUllmanNumbers.size();
1700  if (SUnits->size() > SUSize)
1701    SethiUllmanNumbers.resize(SUSize*2, 0);
1702  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1703}
1704
1705void RegReductionPQBase::updateNode(const SUnit *SU) {
1706  SethiUllmanNumbers[SU->NodeNum] = 0;
1707  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1708}
1709
1710// Lower priority means schedule further down. For bottom-up scheduling, lower
1711// priority SUs are scheduled before higher priority SUs.
1712unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1713  assert(SU->NodeNum < SethiUllmanNumbers.size());
1714  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1715  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1716    // CopyToReg should be close to its uses to facilitate coalescing and
1717    // avoid spilling.
1718    return 0;
1719  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1720      Opc == TargetOpcode::SUBREG_TO_REG ||
1721      Opc == TargetOpcode::INSERT_SUBREG)
1722    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1723    // close to their uses to facilitate coalescing.
1724    return 0;
1725  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1726    // If SU does not have a register use, i.e. it doesn't produce a value
1727    // that would be consumed (e.g. store), then it terminates a chain of
1728    // computation.  Give it a large SethiUllman number so it will be
1729    // scheduled right before its predecessors that it doesn't lengthen
1730    // their live ranges.
1731    return 0xffff;
1732  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1733    // If SU does not have a register def, schedule it close to its uses
1734    // because it does not lengthen any live ranges.
1735    return 0;
1736#if 1
1737  return SethiUllmanNumbers[SU->NodeNum];
1738#else
1739  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1740  if (SU->isCallOp) {
1741    // FIXME: This assumes all of the defs are used as call operands.
1742    int NP = (int)Priority - SU->getNode()->getNumValues();
1743    return (NP > 0) ? NP : 0;
1744  }
1745  return Priority;
1746#endif
1747}
1748
1749//===----------------------------------------------------------------------===//
1750//                     Register Pressure Tracking
1751//===----------------------------------------------------------------------===//
1752
1753void RegReductionPQBase::dumpRegPressure() const {
1754  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1755         E = TRI->regclass_end(); I != E; ++I) {
1756    const TargetRegisterClass *RC = *I;
1757    unsigned Id = RC->getID();
1758    unsigned RP = RegPressure[Id];
1759    if (!RP) continue;
1760    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1761          << '\n');
1762  }
1763}
1764
1765bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1766  if (!TLI)
1767    return false;
1768
1769  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1770       I != E; ++I) {
1771    if (I->isCtrl())
1772      continue;
1773    SUnit *PredSU = I->getSUnit();
1774    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1775    // to cover the number of registers defined (they are all live).
1776    if (PredSU->NumRegDefsLeft == 0) {
1777      continue;
1778    }
1779    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1780         RegDefPos.IsValid(); RegDefPos.Advance()) {
1781      EVT VT = RegDefPos.GetValue();
1782      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1783      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1784      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1785        return true;
1786    }
1787  }
1788  return false;
1789}
1790
1791bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1792  const SDNode *N = SU->getNode();
1793
1794  if (!N->isMachineOpcode() || !SU->NumSuccs)
1795    return false;
1796
1797  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1798  for (unsigned i = 0; i != NumDefs; ++i) {
1799    EVT VT = N->getValueType(i);
1800    if (!N->hasAnyUseOfValue(i))
1801      continue;
1802    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1803    if (RegPressure[RCId] >= RegLimit[RCId])
1804      return true;
1805  }
1806  return false;
1807}
1808
1809// Compute the register pressure contribution by this instruction by count up
1810// for uses that are not live and down for defs. Only count register classes
1811// that are already under high pressure. As a side effect, compute the number of
1812// uses of registers that are already live.
1813//
1814// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1815// so could probably be factored.
1816int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1817  LiveUses = 0;
1818  int PDiff = 0;
1819  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1820       I != E; ++I) {
1821    if (I->isCtrl())
1822      continue;
1823    SUnit *PredSU = I->getSUnit();
1824    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1825    // to cover the number of registers defined (they are all live).
1826    if (PredSU->NumRegDefsLeft == 0) {
1827      if (PredSU->getNode()->isMachineOpcode())
1828        ++LiveUses;
1829      continue;
1830    }
1831    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1832         RegDefPos.IsValid(); RegDefPos.Advance()) {
1833      EVT VT = RegDefPos.GetValue();
1834      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1835      if (RegPressure[RCId] >= RegLimit[RCId])
1836        ++PDiff;
1837    }
1838  }
1839  const SDNode *N = SU->getNode();
1840
1841  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1842    return PDiff;
1843
1844  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1845  for (unsigned i = 0; i != NumDefs; ++i) {
1846    EVT VT = N->getValueType(i);
1847    if (!N->hasAnyUseOfValue(i))
1848      continue;
1849    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1850    if (RegPressure[RCId] >= RegLimit[RCId])
1851      --PDiff;
1852  }
1853  return PDiff;
1854}
1855
1856void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1857  if (!TracksRegPressure)
1858    return;
1859
1860  if (!SU->getNode())
1861    return;
1862
1863  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1864       I != E; ++I) {
1865    if (I->isCtrl())
1866      continue;
1867    SUnit *PredSU = I->getSUnit();
1868    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1869    // to cover the number of registers defined (they are all live).
1870    if (PredSU->NumRegDefsLeft == 0) {
1871      continue;
1872    }
1873    // FIXME: The ScheduleDAG currently loses information about which of a
1874    // node's values is consumed by each dependence. Consequently, if the node
1875    // defines multiple register classes, we don't know which to pressurize
1876    // here. Instead the following loop consumes the register defs in an
1877    // arbitrary order. At least it handles the common case of clustered loads
1878    // to the same class. For precise liveness, each SDep needs to indicate the
1879    // result number. But that tightly couples the ScheduleDAG with the
1880    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1881    // value type or register class to SDep.
1882    //
1883    // The most important aspect of register tracking is balancing the increase
1884    // here with the reduction further below. Note that this SU may use multiple
1885    // defs in PredSU. The can't be determined here, but we've already
1886    // compensated by reducing NumRegDefsLeft in PredSU during
1887    // ScheduleDAGSDNodes::AddSchedEdges.
1888    --PredSU->NumRegDefsLeft;
1889    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1890    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1891         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1892      if (SkipRegDefs)
1893        continue;
1894      EVT VT = RegDefPos.GetValue();
1895      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1896      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1897      break;
1898    }
1899  }
1900
1901  // We should have this assert, but there may be dead SDNodes that never
1902  // materialize as SUnits, so they don't appear to generate liveness.
1903  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1904  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1905  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1906       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1907    if (SkipRegDefs > 0)
1908      continue;
1909    EVT VT = RegDefPos.GetValue();
1910    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1911    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
1912      // Register pressure tracking is imprecise. This can happen. But we try
1913      // hard not to let it happen because it likely results in poor scheduling.
1914      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1915      RegPressure[RCId] = 0;
1916    }
1917    else {
1918      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1919    }
1920  }
1921  dumpRegPressure();
1922}
1923
1924void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1925  if (!TracksRegPressure)
1926    return;
1927
1928  const SDNode *N = SU->getNode();
1929  if (!N) return;
1930
1931  if (!N->isMachineOpcode()) {
1932    if (N->getOpcode() != ISD::CopyToReg)
1933      return;
1934  } else {
1935    unsigned Opc = N->getMachineOpcode();
1936    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1937        Opc == TargetOpcode::INSERT_SUBREG ||
1938        Opc == TargetOpcode::SUBREG_TO_REG ||
1939        Opc == TargetOpcode::REG_SEQUENCE ||
1940        Opc == TargetOpcode::IMPLICIT_DEF)
1941      return;
1942  }
1943
1944  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1945       I != E; ++I) {
1946    if (I->isCtrl())
1947      continue;
1948    SUnit *PredSU = I->getSUnit();
1949    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1950    // counts data deps.
1951    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1952      continue;
1953    const SDNode *PN = PredSU->getNode();
1954    if (!PN->isMachineOpcode()) {
1955      if (PN->getOpcode() == ISD::CopyFromReg) {
1956        EVT VT = PN->getValueType(0);
1957        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1958        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1959      }
1960      continue;
1961    }
1962    unsigned POpc = PN->getMachineOpcode();
1963    if (POpc == TargetOpcode::IMPLICIT_DEF)
1964      continue;
1965    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1966      EVT VT = PN->getOperand(0).getValueType();
1967      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1968      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1969      continue;
1970    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1971               POpc == TargetOpcode::SUBREG_TO_REG) {
1972      EVT VT = PN->getValueType(0);
1973      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1974      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1975      continue;
1976    }
1977    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1978    for (unsigned i = 0; i != NumDefs; ++i) {
1979      EVT VT = PN->getValueType(i);
1980      if (!PN->hasAnyUseOfValue(i))
1981        continue;
1982      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1983      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1984        // Register pressure tracking is imprecise. This can happen.
1985        RegPressure[RCId] = 0;
1986      else
1987        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1988    }
1989  }
1990
1991  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1992  // may transfer data dependencies to CopyToReg.
1993  if (SU->NumSuccs && N->isMachineOpcode()) {
1994    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1995    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1996      EVT VT = N->getValueType(i);
1997      if (VT == MVT::Glue || VT == MVT::Other)
1998        continue;
1999      if (!N->hasAnyUseOfValue(i))
2000        continue;
2001      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2002      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2003    }
2004  }
2005
2006  dumpRegPressure();
2007}
2008
2009//===----------------------------------------------------------------------===//
2010//           Dynamic Node Priority for Register Pressure Reduction
2011//===----------------------------------------------------------------------===//
2012
2013/// closestSucc - Returns the scheduled cycle of the successor which is
2014/// closest to the current cycle.
2015static unsigned closestSucc(const SUnit *SU) {
2016  unsigned MaxHeight = 0;
2017  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2018       I != E; ++I) {
2019    if (I->isCtrl()) continue;  // ignore chain succs
2020    unsigned Height = I->getSUnit()->getHeight();
2021    // If there are bunch of CopyToRegs stacked up, they should be considered
2022    // to be at the same position.
2023    if (I->getSUnit()->getNode() &&
2024        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2025      Height = closestSucc(I->getSUnit())+1;
2026    if (Height > MaxHeight)
2027      MaxHeight = Height;
2028  }
2029  return MaxHeight;
2030}
2031
2032/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2033/// for scratch registers, i.e. number of data dependencies.
2034static unsigned calcMaxScratches(const SUnit *SU) {
2035  unsigned Scratches = 0;
2036  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2037       I != E; ++I) {
2038    if (I->isCtrl()) continue;  // ignore chain preds
2039    Scratches++;
2040  }
2041  return Scratches;
2042}
2043
2044/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2045/// CopyFromReg from a virtual register.
2046static bool hasOnlyLiveInOpers(const SUnit *SU) {
2047  bool RetVal = false;
2048  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2049       I != E; ++I) {
2050    if (I->isCtrl()) continue;
2051    const SUnit *PredSU = I->getSUnit();
2052    if (PredSU->getNode() &&
2053        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2054      unsigned Reg =
2055        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2056      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2057        RetVal = true;
2058        continue;
2059      }
2060    }
2061    return false;
2062  }
2063  return RetVal;
2064}
2065
2066/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2067/// CopyToReg to a virtual register. This SU def is probably a liveout and
2068/// it has no other use. It should be scheduled closer to the terminator.
2069static bool hasOnlyLiveOutUses(const SUnit *SU) {
2070  bool RetVal = false;
2071  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2072       I != E; ++I) {
2073    if (I->isCtrl()) continue;
2074    const SUnit *SuccSU = I->getSUnit();
2075    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2076      unsigned Reg =
2077        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2078      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2079        RetVal = true;
2080        continue;
2081      }
2082    }
2083    return false;
2084  }
2085  return RetVal;
2086}
2087
2088// Set isVRegCycle for a node with only live in opers and live out uses. Also
2089// set isVRegCycle for its CopyFromReg operands.
2090//
2091// This is only relevant for single-block loops, in which case the VRegCycle
2092// node is likely an induction variable in which the operand and target virtual
2093// registers should be coalesced (e.g. pre/post increment values). Setting the
2094// isVRegCycle flag helps the scheduler prioritize other uses of the same
2095// CopyFromReg so that this node becomes the virtual register "kill". This
2096// avoids interference between the values live in and out of the block and
2097// eliminates a copy inside the loop.
2098static void initVRegCycle(SUnit *SU) {
2099  if (DisableSchedVRegCycle)
2100    return;
2101
2102  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2103    return;
2104
2105  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2106
2107  SU->isVRegCycle = true;
2108
2109  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2110       I != E; ++I) {
2111    if (I->isCtrl()) continue;
2112    I->getSUnit()->isVRegCycle = true;
2113  }
2114}
2115
2116// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2117// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2118static void resetVRegCycle(SUnit *SU) {
2119  if (!SU->isVRegCycle)
2120    return;
2121
2122  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2123       I != E; ++I) {
2124    if (I->isCtrl()) continue;  // ignore chain preds
2125    SUnit *PredSU = I->getSUnit();
2126    if (PredSU->isVRegCycle) {
2127      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2128             "VRegCycle def must be CopyFromReg");
2129      I->getSUnit()->isVRegCycle = 0;
2130    }
2131  }
2132}
2133
2134// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2135// means a node that defines the VRegCycle has not been scheduled yet.
2136static bool hasVRegCycleUse(const SUnit *SU) {
2137  // If this SU also defines the VReg, don't hoist it as a "use".
2138  if (SU->isVRegCycle)
2139    return false;
2140
2141  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2142       I != E; ++I) {
2143    if (I->isCtrl()) continue;  // ignore chain preds
2144    if (I->getSUnit()->isVRegCycle &&
2145        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2146      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2147      return true;
2148    }
2149  }
2150  return false;
2151}
2152
2153// Check for either a dependence (latency) or resource (hazard) stall.
2154//
2155// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2156static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2157  if ((int)SPQ->getCurCycle() < Height) return true;
2158  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2159      != ScheduleHazardRecognizer::NoHazard)
2160    return true;
2161  return false;
2162}
2163
2164// Return -1 if left has higher priority, 1 if right has higher priority.
2165// Return 0 if latency-based priority is equivalent.
2166static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2167                            RegReductionPQBase *SPQ) {
2168  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2169  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2170  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2171  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2172  int LHeight = (int)left->getHeight() + LPenalty;
2173  int RHeight = (int)right->getHeight() + RPenalty;
2174
2175  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
2176    BUHasStall(left, LHeight, SPQ);
2177  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
2178    BUHasStall(right, RHeight, SPQ);
2179
2180  // If scheduling one of the node will cause a pipeline stall, delay it.
2181  // If scheduling either one of the node will cause a pipeline stall, sort
2182  // them according to their height.
2183  if (LStall) {
2184    if (!RStall) {
2185      DEBUG(++FactorCount[FactStall]);
2186      return 1;
2187    }
2188    if (LHeight != RHeight) {
2189      DEBUG(++FactorCount[FactStall]);
2190      return LHeight > RHeight ? 1 : -1;
2191    }
2192  } else if (RStall) {
2193    DEBUG(++FactorCount[FactStall]);
2194    return -1;
2195  }
2196
2197  // If either node is scheduling for latency, sort them by height/depth
2198  // and latency.
2199  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2200                     right->SchedulingPref == Sched::Latency)) {
2201    if (DisableSchedCycles) {
2202      if (LHeight != RHeight) {
2203        DEBUG(++FactorCount[FactHeight]);
2204        return LHeight > RHeight ? 1 : -1;
2205      }
2206    }
2207    else {
2208      // If neither instruction stalls (!LStall && !RStall) then
2209      // its height is already covered so only its depth matters. We also reach
2210      // this if both stall but have the same height.
2211      int LDepth = left->getDepth() - LPenalty;
2212      int RDepth = right->getDepth() - RPenalty;
2213      if (LDepth != RDepth) {
2214        DEBUG(++FactorCount[FactDepth]);
2215        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2216              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2217              << ") depth " << RDepth << "\n");
2218        return LDepth < RDepth ? 1 : -1;
2219      }
2220    }
2221    if (left->Latency != right->Latency) {
2222      DEBUG(++FactorCount[FactOther]);
2223      return left->Latency > right->Latency ? 1 : -1;
2224    }
2225  }
2226  return 0;
2227}
2228
2229static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2230  // Schedule physical register definitions close to their use. This is
2231  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2232  // long as shortening physreg live ranges is generally good, we can defer
2233  // creating a subtarget hook.
2234  if (!DisableSchedPhysRegJoin) {
2235    bool LHasPhysReg = left->hasPhysRegDefs;
2236    bool RHasPhysReg = right->hasPhysRegDefs;
2237    if (LHasPhysReg != RHasPhysReg) {
2238      DEBUG(++FactorCount[FactRegUses]);
2239      #ifndef NDEBUG
2240      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2241      #endif
2242      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2243            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2244            << PhysRegMsg[RHasPhysReg] << "\n");
2245      return LHasPhysReg < RHasPhysReg;
2246    }
2247  }
2248
2249  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2250  unsigned LPriority = SPQ->getNodePriority(left);
2251  unsigned RPriority = SPQ->getNodePriority(right);
2252
2253  // Be really careful about hoisting call operands above previous calls.
2254  // Only allows it if it would reduce register pressure.
2255  if (left->isCall && right->isCallOp) {
2256    unsigned RNumVals = right->getNode()->getNumValues();
2257    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2258  }
2259  if (right->isCall && left->isCallOp) {
2260    unsigned LNumVals = left->getNode()->getNumValues();
2261    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2262  }
2263
2264  if (LPriority != RPriority) {
2265    DEBUG(++FactorCount[FactStatic]);
2266    return LPriority > RPriority;
2267  }
2268
2269  // One or both of the nodes are calls and their sethi-ullman numbers are the
2270  // same, then keep source order.
2271  if (left->isCall || right->isCall) {
2272    unsigned LOrder = SPQ->getNodeOrdering(left);
2273    unsigned ROrder = SPQ->getNodeOrdering(right);
2274
2275    // Prefer an ordering where the lower the non-zero order number, the higher
2276    // the preference.
2277    if ((LOrder || ROrder) && LOrder != ROrder)
2278      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2279  }
2280
2281  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2282  // e.g.
2283  // t1 = op t2, c1
2284  // t3 = op t4, c2
2285  //
2286  // and the following instructions are both ready.
2287  // t2 = op c3
2288  // t4 = op c4
2289  //
2290  // Then schedule t2 = op first.
2291  // i.e.
2292  // t4 = op c4
2293  // t2 = op c3
2294  // t1 = op t2, c1
2295  // t3 = op t4, c2
2296  //
2297  // This creates more short live intervals.
2298  unsigned LDist = closestSucc(left);
2299  unsigned RDist = closestSucc(right);
2300  if (LDist != RDist) {
2301    DEBUG(++FactorCount[FactOther]);
2302    return LDist < RDist;
2303  }
2304
2305  // How many registers becomes live when the node is scheduled.
2306  unsigned LScratch = calcMaxScratches(left);
2307  unsigned RScratch = calcMaxScratches(right);
2308  if (LScratch != RScratch) {
2309    DEBUG(++FactorCount[FactOther]);
2310    return LScratch > RScratch;
2311  }
2312
2313  // Comparing latency against a call makes little sense unless the node
2314  // is register pressure-neutral.
2315  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2316    return (left->NodeQueueId > right->NodeQueueId);
2317
2318  // Do not compare latencies when one or both of the nodes are calls.
2319  if (!DisableSchedCycles &&
2320      !(left->isCall || right->isCall)) {
2321    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2322    if (result != 0)
2323      return result > 0;
2324  }
2325  else {
2326    if (left->getHeight() != right->getHeight()) {
2327      DEBUG(++FactorCount[FactHeight]);
2328      return left->getHeight() > right->getHeight();
2329    }
2330
2331    if (left->getDepth() != right->getDepth()) {
2332      DEBUG(++FactorCount[FactDepth]);
2333      return left->getDepth() < right->getDepth();
2334    }
2335  }
2336
2337  assert(left->NodeQueueId && right->NodeQueueId &&
2338         "NodeQueueId cannot be zero");
2339  DEBUG(++FactorCount[FactOther]);
2340  return (left->NodeQueueId > right->NodeQueueId);
2341}
2342
2343// Bottom up
2344bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2345  if (int res = checkSpecialNodes(left, right))
2346    return res > 0;
2347
2348  return BURRSort(left, right, SPQ);
2349}
2350
2351// Source order, otherwise bottom up.
2352bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2353  if (int res = checkSpecialNodes(left, right))
2354    return res > 0;
2355
2356  unsigned LOrder = SPQ->getNodeOrdering(left);
2357  unsigned ROrder = SPQ->getNodeOrdering(right);
2358
2359  // Prefer an ordering where the lower the non-zero order number, the higher
2360  // the preference.
2361  if ((LOrder || ROrder) && LOrder != ROrder)
2362    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2363
2364  return BURRSort(left, right, SPQ);
2365}
2366
2367// If the time between now and when the instruction will be ready can cover
2368// the spill code, then avoid adding it to the ready queue. This gives long
2369// stalls highest priority and allows hoisting across calls. It should also
2370// speed up processing the available queue.
2371bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2372  static const unsigned ReadyDelay = 3;
2373
2374  if (SPQ->MayReduceRegPressure(SU)) return true;
2375
2376  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2377
2378  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2379      != ScheduleHazardRecognizer::NoHazard)
2380    return false;
2381
2382  return true;
2383}
2384
2385// Return true if right should be scheduled with higher priority than left.
2386bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2387  if (int res = checkSpecialNodes(left, right))
2388    return res > 0;
2389
2390  if (left->isCall || right->isCall)
2391    // No way to compute latency of calls.
2392    return BURRSort(left, right, SPQ);
2393
2394  bool LHigh = SPQ->HighRegPressure(left);
2395  bool RHigh = SPQ->HighRegPressure(right);
2396  // Avoid causing spills. If register pressure is high, schedule for
2397  // register pressure reduction.
2398  if (LHigh && !RHigh) {
2399    DEBUG(++FactorCount[FactPressureDiff]);
2400    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2401          << right->NodeNum << ")\n");
2402    return true;
2403  }
2404  else if (!LHigh && RHigh) {
2405    DEBUG(++FactorCount[FactPressureDiff]);
2406    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2407          << left->NodeNum << ")\n");
2408    return false;
2409  }
2410  if (!LHigh && !RHigh) {
2411    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2412    if (result != 0)
2413      return result > 0;
2414  }
2415  return BURRSort(left, right, SPQ);
2416}
2417
2418// Schedule as many instructions in each cycle as possible. So don't make an
2419// instruction available unless it is ready in the current cycle.
2420bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2421  if (SU->getHeight() > CurCycle) return false;
2422
2423  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2424      != ScheduleHazardRecognizer::NoHazard)
2425    return false;
2426
2427  return true;
2428}
2429
2430static bool canEnableCoalescing(SUnit *SU) {
2431  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2432  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2433    // CopyToReg should be close to its uses to facilitate coalescing and
2434    // avoid spilling.
2435    return true;
2436
2437  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2438      Opc == TargetOpcode::SUBREG_TO_REG ||
2439      Opc == TargetOpcode::INSERT_SUBREG)
2440    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2441    // close to their uses to facilitate coalescing.
2442    return true;
2443
2444  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2445    // If SU does not have a register def, schedule it close to its uses
2446    // because it does not lengthen any live ranges.
2447    return true;
2448
2449  return false;
2450}
2451
2452// list-ilp is currently an experimental scheduler that allows various
2453// heuristics to be enabled prior to the normal register reduction logic.
2454bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2455  if (int res = checkSpecialNodes(left, right))
2456    return res > 0;
2457
2458  if (left->isCall || right->isCall)
2459    // No way to compute latency of calls.
2460    return BURRSort(left, right, SPQ);
2461
2462  unsigned LLiveUses = 0, RLiveUses = 0;
2463  int LPDiff = 0, RPDiff = 0;
2464  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2465    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2466    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2467  }
2468  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2469    DEBUG(++FactorCount[FactPressureDiff]);
2470    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2471          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2472    return LPDiff > RPDiff;
2473  }
2474
2475  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2476    bool LReduce = canEnableCoalescing(left);
2477    bool RReduce = canEnableCoalescing(right);
2478    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2479    if (LReduce && !RReduce) return false;
2480    if (RReduce && !LReduce) return true;
2481  }
2482
2483  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2484    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2485          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2486    DEBUG(++FactorCount[FactRegUses]);
2487    return LLiveUses < RLiveUses;
2488  }
2489
2490  if (!DisableSchedStalls) {
2491    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2492    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2493    if (LStall != RStall) {
2494      DEBUG(++FactorCount[FactHeight]);
2495      return left->getHeight() > right->getHeight();
2496    }
2497  }
2498
2499  if (!DisableSchedCriticalPath) {
2500    int spread = (int)left->getDepth() - (int)right->getDepth();
2501    if (std::abs(spread) > MaxReorderWindow) {
2502      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2503            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2504            << right->getDepth() << "\n");
2505      DEBUG(++FactorCount[FactDepth]);
2506      return left->getDepth() < right->getDepth();
2507    }
2508  }
2509
2510  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2511    int spread = (int)left->getHeight() - (int)right->getHeight();
2512    if (std::abs(spread) > MaxReorderWindow) {
2513      DEBUG(++FactorCount[FactHeight]);
2514      return left->getHeight() > right->getHeight();
2515    }
2516  }
2517
2518  return BURRSort(left, right, SPQ);
2519}
2520
2521void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2522  SUnits = &sunits;
2523  // Add pseudo dependency edges for two-address nodes.
2524  AddPseudoTwoAddrDeps();
2525  // Reroute edges to nodes with multiple uses.
2526  if (!TracksRegPressure)
2527    PrescheduleNodesWithMultipleUses();
2528  // Calculate node priorities.
2529  CalculateSethiUllmanNumbers();
2530
2531  // For single block loops, mark nodes that look like canonical IV increments.
2532  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2533    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2534      initVRegCycle(&sunits[i]);
2535    }
2536  }
2537}
2538
2539//===----------------------------------------------------------------------===//
2540//                    Preschedule for Register Pressure
2541//===----------------------------------------------------------------------===//
2542
2543bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2544  if (SU->isTwoAddress) {
2545    unsigned Opc = SU->getNode()->getMachineOpcode();
2546    const TargetInstrDesc &TID = TII->get(Opc);
2547    unsigned NumRes = TID.getNumDefs();
2548    unsigned NumOps = TID.getNumOperands() - NumRes;
2549    for (unsigned i = 0; i != NumOps; ++i) {
2550      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2551        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2552        if (DU->getNodeId() != -1 &&
2553            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2554          return true;
2555      }
2556    }
2557  }
2558  return false;
2559}
2560
2561/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2562/// physical register defs.
2563static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2564                                  const TargetInstrInfo *TII,
2565                                  const TargetRegisterInfo *TRI) {
2566  SDNode *N = SuccSU->getNode();
2567  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2568  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2569  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2570  for (const SDNode *SUNode = SU->getNode(); SUNode;
2571       SUNode = SUNode->getGluedNode()) {
2572    if (!SUNode->isMachineOpcode())
2573      continue;
2574    const unsigned *SUImpDefs =
2575      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2576    if (!SUImpDefs)
2577      return false;
2578    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2579      EVT VT = N->getValueType(i);
2580      if (VT == MVT::Glue || VT == MVT::Other)
2581        continue;
2582      if (!N->hasAnyUseOfValue(i))
2583        continue;
2584      unsigned Reg = ImpDefs[i - NumDefs];
2585      for (;*SUImpDefs; ++SUImpDefs) {
2586        unsigned SUReg = *SUImpDefs;
2587        if (TRI->regsOverlap(Reg, SUReg))
2588          return true;
2589      }
2590    }
2591  }
2592  return false;
2593}
2594
2595/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2596/// are not handled well by the general register pressure reduction
2597/// heuristics. When presented with code like this:
2598///
2599///      N
2600///    / |
2601///   /  |
2602///  U  store
2603///  |
2604/// ...
2605///
2606/// the heuristics tend to push the store up, but since the
2607/// operand of the store has another use (U), this would increase
2608/// the length of that other use (the U->N edge).
2609///
2610/// This function transforms code like the above to route U's
2611/// dependence through the store when possible, like this:
2612///
2613///      N
2614///      ||
2615///      ||
2616///     store
2617///       |
2618///       U
2619///       |
2620///      ...
2621///
2622/// This results in the store being scheduled immediately
2623/// after N, which shortens the U->N live range, reducing
2624/// register pressure.
2625///
2626void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2627  // Visit all the nodes in topological order, working top-down.
2628  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2629    SUnit *SU = &(*SUnits)[i];
2630    // For now, only look at nodes with no data successors, such as stores.
2631    // These are especially important, due to the heuristics in
2632    // getNodePriority for nodes with no data successors.
2633    if (SU->NumSuccs != 0)
2634      continue;
2635    // For now, only look at nodes with exactly one data predecessor.
2636    if (SU->NumPreds != 1)
2637      continue;
2638    // Avoid prescheduling copies to virtual registers, which don't behave
2639    // like other nodes from the perspective of scheduling heuristics.
2640    if (SDNode *N = SU->getNode())
2641      if (N->getOpcode() == ISD::CopyToReg &&
2642          TargetRegisterInfo::isVirtualRegister
2643            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2644        continue;
2645
2646    // Locate the single data predecessor.
2647    SUnit *PredSU = 0;
2648    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2649         EE = SU->Preds.end(); II != EE; ++II)
2650      if (!II->isCtrl()) {
2651        PredSU = II->getSUnit();
2652        break;
2653      }
2654    assert(PredSU);
2655
2656    // Don't rewrite edges that carry physregs, because that requires additional
2657    // support infrastructure.
2658    if (PredSU->hasPhysRegDefs)
2659      continue;
2660    // Short-circuit the case where SU is PredSU's only data successor.
2661    if (PredSU->NumSuccs == 1)
2662      continue;
2663    // Avoid prescheduling to copies from virtual registers, which don't behave
2664    // like other nodes from the perspective of scheduling heuristics.
2665    if (SDNode *N = SU->getNode())
2666      if (N->getOpcode() == ISD::CopyFromReg &&
2667          TargetRegisterInfo::isVirtualRegister
2668            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2669        continue;
2670
2671    // Perform checks on the successors of PredSU.
2672    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2673         EE = PredSU->Succs.end(); II != EE; ++II) {
2674      SUnit *PredSuccSU = II->getSUnit();
2675      if (PredSuccSU == SU) continue;
2676      // If PredSU has another successor with no data successors, for
2677      // now don't attempt to choose either over the other.
2678      if (PredSuccSU->NumSuccs == 0)
2679        goto outer_loop_continue;
2680      // Don't break physical register dependencies.
2681      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2682        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2683          goto outer_loop_continue;
2684      // Don't introduce graph cycles.
2685      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2686        goto outer_loop_continue;
2687    }
2688
2689    // Ok, the transformation is safe and the heuristics suggest it is
2690    // profitable. Update the graph.
2691    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2692                 << " next to PredSU #" << PredSU->NodeNum
2693                 << " to guide scheduling in the presence of multiple uses\n");
2694    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2695      SDep Edge = PredSU->Succs[i];
2696      assert(!Edge.isAssignedRegDep());
2697      SUnit *SuccSU = Edge.getSUnit();
2698      if (SuccSU != SU) {
2699        Edge.setSUnit(PredSU);
2700        scheduleDAG->RemovePred(SuccSU, Edge);
2701        scheduleDAG->AddPred(SU, Edge);
2702        Edge.setSUnit(SU);
2703        scheduleDAG->AddPred(SuccSU, Edge);
2704        --i;
2705      }
2706    }
2707  outer_loop_continue:;
2708  }
2709}
2710
2711/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2712/// it as a def&use operand. Add a pseudo control edge from it to the other
2713/// node (if it won't create a cycle) so the two-address one will be scheduled
2714/// first (lower in the schedule). If both nodes are two-address, favor the
2715/// one that has a CopyToReg use (more likely to be a loop induction update).
2716/// If both are two-address, but one is commutable while the other is not
2717/// commutable, favor the one that's not commutable.
2718void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2719  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2720    SUnit *SU = &(*SUnits)[i];
2721    if (!SU->isTwoAddress)
2722      continue;
2723
2724    SDNode *Node = SU->getNode();
2725    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2726      continue;
2727
2728    bool isLiveOut = hasOnlyLiveOutUses(SU);
2729    unsigned Opc = Node->getMachineOpcode();
2730    const TargetInstrDesc &TID = TII->get(Opc);
2731    unsigned NumRes = TID.getNumDefs();
2732    unsigned NumOps = TID.getNumOperands() - NumRes;
2733    for (unsigned j = 0; j != NumOps; ++j) {
2734      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2735        continue;
2736      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2737      if (DU->getNodeId() == -1)
2738        continue;
2739      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2740      if (!DUSU) continue;
2741      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2742           E = DUSU->Succs.end(); I != E; ++I) {
2743        if (I->isCtrl()) continue;
2744        SUnit *SuccSU = I->getSUnit();
2745        if (SuccSU == SU)
2746          continue;
2747        // Be conservative. Ignore if nodes aren't at roughly the same
2748        // depth and height.
2749        if (SuccSU->getHeight() < SU->getHeight() &&
2750            (SU->getHeight() - SuccSU->getHeight()) > 1)
2751          continue;
2752        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2753        // constrains whatever is using the copy, instead of the copy
2754        // itself. In the case that the copy is coalesced, this
2755        // preserves the intent of the pseudo two-address heurietics.
2756        while (SuccSU->Succs.size() == 1 &&
2757               SuccSU->getNode()->isMachineOpcode() &&
2758               SuccSU->getNode()->getMachineOpcode() ==
2759                 TargetOpcode::COPY_TO_REGCLASS)
2760          SuccSU = SuccSU->Succs.front().getSUnit();
2761        // Don't constrain non-instruction nodes.
2762        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2763          continue;
2764        // Don't constrain nodes with physical register defs if the
2765        // predecessor can clobber them.
2766        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2767          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2768            continue;
2769        }
2770        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2771        // these may be coalesced away. We want them close to their uses.
2772        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2773        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2774            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2775            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2776          continue;
2777        if ((!canClobber(SuccSU, DUSU) ||
2778             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2779             (!SU->isCommutable && SuccSU->isCommutable)) &&
2780            !scheduleDAG->IsReachable(SuccSU, SU)) {
2781          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2782                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2783          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2784                                        /*Reg=*/0, /*isNormalMemory=*/false,
2785                                        /*isMustAlias=*/false,
2786                                        /*isArtificial=*/true));
2787        }
2788      }
2789    }
2790  }
2791}
2792
2793/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2794/// predecessors of the successors of the SUnit SU. Stop when the provided
2795/// limit is exceeded.
2796static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2797                                                    unsigned Limit) {
2798  unsigned Sum = 0;
2799  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2800       I != E; ++I) {
2801    const SUnit *SuccSU = I->getSUnit();
2802    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2803         EE = SuccSU->Preds.end(); II != EE; ++II) {
2804      SUnit *PredSU = II->getSUnit();
2805      if (!PredSU->isScheduled)
2806        if (++Sum > Limit)
2807          return Sum;
2808    }
2809  }
2810  return Sum;
2811}
2812
2813
2814// Top down
2815bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2816  if (int res = checkSpecialNodes(left, right))
2817    return res < 0;
2818
2819  unsigned LPriority = SPQ->getNodePriority(left);
2820  unsigned RPriority = SPQ->getNodePriority(right);
2821  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2822  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2823  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2824  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2825  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2826  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2827
2828  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2829    return false;
2830  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2831    return true;
2832
2833  if (LIsFloater)
2834    LBonus -= 2;
2835  if (RIsFloater)
2836    RBonus -= 2;
2837  if (left->NumSuccs == 1)
2838    LBonus += 2;
2839  if (right->NumSuccs == 1)
2840    RBonus += 2;
2841
2842  if (LPriority+LBonus != RPriority+RBonus)
2843    return LPriority+LBonus < RPriority+RBonus;
2844
2845  if (left->getDepth() != right->getDepth())
2846    return left->getDepth() < right->getDepth();
2847
2848  if (left->NumSuccsLeft != right->NumSuccsLeft)
2849    return left->NumSuccsLeft > right->NumSuccsLeft;
2850
2851  assert(left->NodeQueueId && right->NodeQueueId &&
2852         "NodeQueueId cannot be zero");
2853  return (left->NodeQueueId > right->NodeQueueId);
2854}
2855
2856//===----------------------------------------------------------------------===//
2857//                         Public Constructor Functions
2858//===----------------------------------------------------------------------===//
2859
2860llvm::ScheduleDAGSDNodes *
2861llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2862                                 CodeGenOpt::Level OptLevel) {
2863  const TargetMachine &TM = IS->TM;
2864  const TargetInstrInfo *TII = TM.getInstrInfo();
2865  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2866
2867  BURegReductionPriorityQueue *PQ =
2868    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2869  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2870  PQ->setScheduleDAG(SD);
2871  return SD;
2872}
2873
2874llvm::ScheduleDAGSDNodes *
2875llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2876                                 CodeGenOpt::Level OptLevel) {
2877  const TargetMachine &TM = IS->TM;
2878  const TargetInstrInfo *TII = TM.getInstrInfo();
2879  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2880
2881  TDRegReductionPriorityQueue *PQ =
2882    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2883  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2884  PQ->setScheduleDAG(SD);
2885  return SD;
2886}
2887
2888llvm::ScheduleDAGSDNodes *
2889llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2890                                   CodeGenOpt::Level OptLevel) {
2891  const TargetMachine &TM = IS->TM;
2892  const TargetInstrInfo *TII = TM.getInstrInfo();
2893  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2894
2895  SrcRegReductionPriorityQueue *PQ =
2896    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2897  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2898  PQ->setScheduleDAG(SD);
2899  return SD;
2900}
2901
2902llvm::ScheduleDAGSDNodes *
2903llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2904                                   CodeGenOpt::Level OptLevel) {
2905  const TargetMachine &TM = IS->TM;
2906  const TargetInstrInfo *TII = TM.getInstrInfo();
2907  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2908  const TargetLowering *TLI = &IS->getTargetLowering();
2909
2910  HybridBURRPriorityQueue *PQ =
2911    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2912
2913  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2914  PQ->setScheduleDAG(SD);
2915  return SD;
2916}
2917
2918llvm::ScheduleDAGSDNodes *
2919llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2920                                CodeGenOpt::Level OptLevel) {
2921  const TargetMachine &TM = IS->TM;
2922  const TargetInstrInfo *TII = TM.getInstrInfo();
2923  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2924  const TargetLowering *TLI = &IS->getTargetLowering();
2925
2926  ILPBURRPriorityQueue *PQ =
2927    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2928  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2929  PQ->setScheduleDAG(SD);
2930  return SD;
2931}
2932