ScheduleDAGRRList.cpp revision 4cb971ce1c8b254f29365c988b55f6dcfe86d21e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73// Temporary sched=list-ilp flags until the heuristics are robust.
74// Some options are also available under sched=list-hybrid.
75static cl::opt<bool> DisableSchedRegPressure(
76  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
77  cl::desc("Disable regpressure priority in sched=list-ilp"));
78static cl::opt<bool> DisableSchedLiveUses(
79  "disable-sched-live-uses", cl::Hidden, cl::init(true),
80  cl::desc("Disable live use priority in sched=list-ilp"));
81static cl::opt<bool> DisableSchedVRegCycle(
82  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
83  cl::desc("Disable virtual register cycle interference checks"));
84static cl::opt<bool> DisableSchedPhysRegJoin(
85  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
86  cl::desc("Disable physreg def-use affinity"));
87static cl::opt<bool> DisableSchedStalls(
88  "disable-sched-stalls", cl::Hidden, cl::init(true),
89  cl::desc("Disable no-stall priority in sched=list-ilp"));
90static cl::opt<bool> DisableSchedCriticalPath(
91  "disable-sched-critical-path", cl::Hidden, cl::init(false),
92  cl::desc("Disable critical path priority in sched=list-ilp"));
93static cl::opt<bool> DisableSchedHeight(
94  "disable-sched-height", cl::Hidden, cl::init(false),
95  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
96
97static cl::opt<int> MaxReorderWindow(
98  "max-sched-reorder", cl::Hidden, cl::init(6),
99  cl::desc("Number of instructions to allow ahead of the critical path "
100           "in sched=list-ilp"));
101
102static cl::opt<unsigned> AvgIPC(
103  "sched-avg-ipc", cl::Hidden, cl::init(1),
104  cl::desc("Average inst/cycle whan no target itinerary exists."));
105
106#ifndef NDEBUG
107namespace {
108  // For sched=list-ilp, Count the number of times each factor comes into play.
109  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
110         FactStatic, FactOther, NumFactors };
111}
112static const char *FactorName[NumFactors] =
113{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
114static int FactorCount[NumFactors];
115#endif //!NDEBUG
116
117namespace {
118//===----------------------------------------------------------------------===//
119/// ScheduleDAGRRList - The actual register reduction list scheduler
120/// implementation.  This supports both top-down and bottom-up scheduling.
121///
122class ScheduleDAGRRList : public ScheduleDAGSDNodes {
123private:
124  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
125  /// it is top-down.
126  bool isBottomUp;
127
128  /// NeedLatency - True if the scheduler will make use of latency information.
129  ///
130  bool NeedLatency;
131
132  /// AvailableQueue - The priority queue to use for the available SUnits.
133  SchedulingPriorityQueue *AvailableQueue;
134
135  /// PendingQueue - This contains all of the instructions whose operands have
136  /// been issued, but their results are not ready yet (due to the latency of
137  /// the operation).  Once the operands becomes available, the instruction is
138  /// added to the AvailableQueue.
139  std::vector<SUnit*> PendingQueue;
140
141  /// HazardRec - The hazard recognizer to use.
142  ScheduleHazardRecognizer *HazardRec;
143
144  /// CurCycle - The current scheduler state corresponds to this cycle.
145  unsigned CurCycle;
146
147  /// MinAvailableCycle - Cycle of the soonest available instruction.
148  unsigned MinAvailableCycle;
149
150  /// IssueCount - Count instructions issued in this cycle
151  /// Currently valid only for bottom-up scheduling.
152  unsigned IssueCount;
153
154  /// LiveRegDefs - A set of physical registers and their definition
155  /// that are "live". These nodes must be scheduled before any other nodes that
156  /// modifies the registers can be scheduled.
157  unsigned NumLiveRegs;
158  std::vector<SUnit*> LiveRegDefs;
159  std::vector<SUnit*> LiveRegGens;
160
161  /// Topo - A topological ordering for SUnits which permits fast IsReachable
162  /// and similar queries.
163  ScheduleDAGTopologicalSort Topo;
164
165public:
166  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
167                    SchedulingPriorityQueue *availqueue,
168                    CodeGenOpt::Level OptLevel)
169    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
170      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
171      Topo(SUnits) {
172
173    const TargetMachine &tm = mf.getTarget();
174    if (DisableSchedCycles || !NeedLatency)
175      HazardRec = new ScheduleHazardRecognizer();
176    else
177      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
178  }
179
180  ~ScheduleDAGRRList() {
181    delete HazardRec;
182    delete AvailableQueue;
183  }
184
185  void Schedule();
186
187  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
188
189  /// IsReachable - Checks if SU is reachable from TargetSU.
190  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
191    return Topo.IsReachable(SU, TargetSU);
192  }
193
194  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
195  /// create a cycle.
196  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
197    return Topo.WillCreateCycle(SU, TargetSU);
198  }
199
200  /// AddPred - adds a predecessor edge to SUnit SU.
201  /// This returns true if this is a new predecessor.
202  /// Updates the topological ordering if required.
203  void AddPred(SUnit *SU, const SDep &D) {
204    Topo.AddPred(SU, D.getSUnit());
205    SU->addPred(D);
206  }
207
208  /// RemovePred - removes a predecessor edge from SUnit SU.
209  /// This returns true if an edge was removed.
210  /// Updates the topological ordering if required.
211  void RemovePred(SUnit *SU, const SDep &D) {
212    Topo.RemovePred(SU, D.getSUnit());
213    SU->removePred(D);
214  }
215
216private:
217  bool isReady(SUnit *SU) {
218    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
219      AvailableQueue->isReady(SU);
220  }
221
222  void ReleasePred(SUnit *SU, const SDep *PredEdge);
223  void ReleasePredecessors(SUnit *SU);
224  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
225  void ReleaseSuccessors(SUnit *SU);
226  void ReleasePending();
227  void AdvanceToCycle(unsigned NextCycle);
228  void AdvancePastStalls(SUnit *SU);
229  void EmitNode(SUnit *SU);
230  void ScheduleNodeBottomUp(SUnit*);
231  void CapturePred(SDep *PredEdge);
232  void UnscheduleNodeBottomUp(SUnit*);
233  void RestoreHazardCheckerBottomUp();
234  void BacktrackBottomUp(SUnit*, SUnit*);
235  SUnit *CopyAndMoveSuccessors(SUnit*);
236  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
237                                const TargetRegisterClass*,
238                                const TargetRegisterClass*,
239                                SmallVector<SUnit*, 2>&);
240  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
241
242  SUnit *PickNodeToScheduleBottomUp();
243  void ListScheduleBottomUp();
244
245  void ScheduleNodeTopDown(SUnit*);
246  void ListScheduleTopDown();
247
248
249  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
250  /// Updates the topological ordering if required.
251  SUnit *CreateNewSUnit(SDNode *N) {
252    unsigned NumSUnits = SUnits.size();
253    SUnit *NewNode = NewSUnit(N);
254    // Update the topological ordering.
255    if (NewNode->NodeNum >= NumSUnits)
256      Topo.InitDAGTopologicalSorting();
257    return NewNode;
258  }
259
260  /// CreateClone - Creates a new SUnit from an existing one.
261  /// Updates the topological ordering if required.
262  SUnit *CreateClone(SUnit *N) {
263    unsigned NumSUnits = SUnits.size();
264    SUnit *NewNode = Clone(N);
265    // Update the topological ordering.
266    if (NewNode->NodeNum >= NumSUnits)
267      Topo.InitDAGTopologicalSorting();
268    return NewNode;
269  }
270
271  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
272  /// need actual latency information but the hybrid scheduler does.
273  bool ForceUnitLatencies() const {
274    return !NeedLatency;
275  }
276};
277}  // end anonymous namespace
278
279
280/// Schedule - Schedule the DAG using list scheduling.
281void ScheduleDAGRRList::Schedule() {
282  DEBUG(dbgs()
283        << "********** List Scheduling BB#" << BB->getNumber()
284        << " '" << BB->getName() << "' **********\n");
285#ifndef NDEBUG
286  for (int i = 0; i < NumFactors; ++i) {
287    FactorCount[i] = 0;
288  }
289#endif //!NDEBUG
290
291  CurCycle = 0;
292  IssueCount = 0;
293  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
294  NumLiveRegs = 0;
295  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
296  LiveRegGens.resize(TRI->getNumRegs(), NULL);
297
298  // Build the scheduling graph.
299  BuildSchedGraph(NULL);
300
301  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
302          SUnits[su].dumpAll(this));
303  Topo.InitDAGTopologicalSorting();
304
305  AvailableQueue->initNodes(SUnits);
306
307  HazardRec->Reset();
308
309  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
310  if (isBottomUp)
311    ListScheduleBottomUp();
312  else
313    ListScheduleTopDown();
314
315#ifndef NDEBUG
316  for (int i = 0; i < NumFactors; ++i) {
317    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
318  }
319#endif // !NDEBUG
320  AvailableQueue->releaseState();
321}
322
323//===----------------------------------------------------------------------===//
324//  Bottom-Up Scheduling
325//===----------------------------------------------------------------------===//
326
327/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
328/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
329void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
330  SUnit *PredSU = PredEdge->getSUnit();
331
332#ifndef NDEBUG
333  if (PredSU->NumSuccsLeft == 0) {
334    dbgs() << "*** Scheduling failed! ***\n";
335    PredSU->dump(this);
336    dbgs() << " has been released too many times!\n";
337    llvm_unreachable(0);
338  }
339#endif
340  --PredSU->NumSuccsLeft;
341
342  if (!ForceUnitLatencies()) {
343    // Updating predecessor's height. This is now the cycle when the
344    // predecessor can be scheduled without causing a pipeline stall.
345    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
346  }
347
348  // If all the node's successors are scheduled, this node is ready
349  // to be scheduled. Ignore the special EntrySU node.
350  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
351    PredSU->isAvailable = true;
352
353    unsigned Height = PredSU->getHeight();
354    if (Height < MinAvailableCycle)
355      MinAvailableCycle = Height;
356
357    if (isReady(PredSU)) {
358      AvailableQueue->push(PredSU);
359    }
360    // CapturePred and others may have left the node in the pending queue, avoid
361    // adding it twice.
362    else if (!PredSU->isPending) {
363      PredSU->isPending = true;
364      PendingQueue.push_back(PredSU);
365    }
366  }
367}
368
369/// Call ReleasePred for each predecessor, then update register live def/gen.
370/// Always update LiveRegDefs for a register dependence even if the current SU
371/// also defines the register. This effectively create one large live range
372/// across a sequence of two-address node. This is important because the
373/// entire chain must be scheduled together. Example:
374///
375/// flags = (3) add
376/// flags = (2) addc flags
377/// flags = (1) addc flags
378///
379/// results in
380///
381/// LiveRegDefs[flags] = 3
382/// LiveRegGens[flags] = 1
383///
384/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
385/// interference on flags.
386void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
387  // Bottom up: release predecessors
388  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
389       I != E; ++I) {
390    ReleasePred(SU, &*I);
391    if (I->isAssignedRegDep()) {
392      // This is a physical register dependency and it's impossible or
393      // expensive to copy the register. Make sure nothing that can
394      // clobber the register is scheduled between the predecessor and
395      // this node.
396      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
397      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
398             "interference on register dependence");
399      LiveRegDefs[I->getReg()] = I->getSUnit();
400      if (!LiveRegGens[I->getReg()]) {
401        ++NumLiveRegs;
402        LiveRegGens[I->getReg()] = SU;
403      }
404    }
405  }
406}
407
408/// Check to see if any of the pending instructions are ready to issue.  If
409/// so, add them to the available queue.
410void ScheduleDAGRRList::ReleasePending() {
411  if (DisableSchedCycles) {
412    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
413    return;
414  }
415
416  // If the available queue is empty, it is safe to reset MinAvailableCycle.
417  if (AvailableQueue->empty())
418    MinAvailableCycle = UINT_MAX;
419
420  // Check to see if any of the pending instructions are ready to issue.  If
421  // so, add them to the available queue.
422  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
423    unsigned ReadyCycle =
424      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
425    if (ReadyCycle < MinAvailableCycle)
426      MinAvailableCycle = ReadyCycle;
427
428    if (PendingQueue[i]->isAvailable) {
429      if (!isReady(PendingQueue[i]))
430          continue;
431      AvailableQueue->push(PendingQueue[i]);
432    }
433    PendingQueue[i]->isPending = false;
434    PendingQueue[i] = PendingQueue.back();
435    PendingQueue.pop_back();
436    --i; --e;
437  }
438}
439
440/// Move the scheduler state forward by the specified number of Cycles.
441void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
442  if (NextCycle <= CurCycle)
443    return;
444
445  IssueCount = 0;
446  AvailableQueue->setCurCycle(NextCycle);
447  if (!HazardRec->isEnabled()) {
448    // Bypass lots of virtual calls in case of long latency.
449    CurCycle = NextCycle;
450  }
451  else {
452    for (; CurCycle != NextCycle; ++CurCycle) {
453      if (isBottomUp)
454        HazardRec->RecedeCycle();
455      else
456        HazardRec->AdvanceCycle();
457    }
458  }
459  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
460  // available Q to release pending nodes at least once before popping.
461  ReleasePending();
462}
463
464/// Move the scheduler state forward until the specified node's dependents are
465/// ready and can be scheduled with no resource conflicts.
466void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
467  if (DisableSchedCycles)
468    return;
469
470  // FIXME: Nodes such as CopyFromReg probably should not advance the current
471  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
472  // has predecessors the cycle will be advanced when they are scheduled.
473  // But given the crude nature of modeling latency though such nodes, we
474  // currently need to treat these nodes like real instructions.
475  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
476
477  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
478
479  // Bump CurCycle to account for latency. We assume the latency of other
480  // available instructions may be hidden by the stall (not a full pipe stall).
481  // This updates the hazard recognizer's cycle before reserving resources for
482  // this instruction.
483  AdvanceToCycle(ReadyCycle);
484
485  // Calls are scheduled in their preceding cycle, so don't conflict with
486  // hazards from instructions after the call. EmitNode will reset the
487  // scoreboard state before emitting the call.
488  if (isBottomUp && SU->isCall)
489    return;
490
491  // FIXME: For resource conflicts in very long non-pipelined stages, we
492  // should probably skip ahead here to avoid useless scoreboard checks.
493  int Stalls = 0;
494  while (true) {
495    ScheduleHazardRecognizer::HazardType HT =
496      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
497
498    if (HT == ScheduleHazardRecognizer::NoHazard)
499      break;
500
501    ++Stalls;
502  }
503  AdvanceToCycle(CurCycle + Stalls);
504}
505
506/// Record this SUnit in the HazardRecognizer.
507/// Does not update CurCycle.
508void ScheduleDAGRRList::EmitNode(SUnit *SU) {
509  if (!HazardRec->isEnabled())
510    return;
511
512  // Check for phys reg copy.
513  if (!SU->getNode())
514    return;
515
516  switch (SU->getNode()->getOpcode()) {
517  default:
518    assert(SU->getNode()->isMachineOpcode() &&
519           "This target-independent node should not be scheduled.");
520    break;
521  case ISD::MERGE_VALUES:
522  case ISD::TokenFactor:
523  case ISD::CopyToReg:
524  case ISD::CopyFromReg:
525  case ISD::EH_LABEL:
526    // Noops don't affect the scoreboard state. Copies are likely to be
527    // removed.
528    return;
529  case ISD::INLINEASM:
530    // For inline asm, clear the pipeline state.
531    HazardRec->Reset();
532    return;
533  }
534  if (isBottomUp && SU->isCall) {
535    // Calls are scheduled with their preceding instructions. For bottom-up
536    // scheduling, clear the pipeline state before emitting.
537    HazardRec->Reset();
538  }
539
540  HazardRec->EmitInstruction(SU);
541
542  if (!isBottomUp && SU->isCall) {
543    HazardRec->Reset();
544  }
545}
546
547static void resetVRegCycle(SUnit *SU);
548
549/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
550/// count of its predecessors. If a predecessor pending count is zero, add it to
551/// the Available queue.
552void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
553  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
554  DEBUG(SU->dump(this));
555
556#ifndef NDEBUG
557  if (CurCycle < SU->getHeight())
558    DEBUG(dbgs() << "   Height [" << SU->getHeight()
559          << "] pipeline stall!\n");
560#endif
561
562  // FIXME: Do not modify node height. It may interfere with
563  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
564  // node its ready cycle can aid heuristics, and after scheduling it can
565  // indicate the scheduled cycle.
566  SU->setHeightToAtLeast(CurCycle);
567
568  // Reserve resources for the scheduled intruction.
569  EmitNode(SU);
570
571  Sequence.push_back(SU);
572
573  AvailableQueue->ScheduledNode(SU);
574
575  // If HazardRec is disabled, and each inst counts as one cycle, then
576  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
577  // PendingQueue for schedulers that implement HasReadyFilter.
578  if (!HazardRec->isEnabled() && AvgIPC < 2)
579    AdvanceToCycle(CurCycle + 1);
580
581  // Update liveness of predecessors before successors to avoid treating a
582  // two-address node as a live range def.
583  ReleasePredecessors(SU);
584
585  // Release all the implicit physical register defs that are live.
586  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
587       I != E; ++I) {
588    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
589    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
590      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
591      --NumLiveRegs;
592      LiveRegDefs[I->getReg()] = NULL;
593      LiveRegGens[I->getReg()] = NULL;
594    }
595  }
596
597  resetVRegCycle(SU);
598
599  SU->isScheduled = true;
600
601  // Conditions under which the scheduler should eagerly advance the cycle:
602  // (1) No available instructions
603  // (2) All pipelines full, so available instructions must have hazards.
604  //
605  // If HazardRec is disabled, the cycle was pre-advanced before calling
606  // ReleasePredecessors. In that case, IssueCount should remain 0.
607  //
608  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
609  if (HazardRec->isEnabled() || AvgIPC > 1) {
610    if (SU->getNode() && SU->getNode()->isMachineOpcode())
611      ++IssueCount;
612    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
613        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
614      AdvanceToCycle(CurCycle + 1);
615  }
616}
617
618/// CapturePred - This does the opposite of ReleasePred. Since SU is being
619/// unscheduled, incrcease the succ left count of its predecessors. Remove
620/// them from AvailableQueue if necessary.
621void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
622  SUnit *PredSU = PredEdge->getSUnit();
623  if (PredSU->isAvailable) {
624    PredSU->isAvailable = false;
625    if (!PredSU->isPending)
626      AvailableQueue->remove(PredSU);
627  }
628
629  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
630  ++PredSU->NumSuccsLeft;
631}
632
633/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
634/// its predecessor states to reflect the change.
635void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
636  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
637  DEBUG(SU->dump(this));
638
639  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
640       I != E; ++I) {
641    CapturePred(&*I);
642    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
643      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
644      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
645             "Physical register dependency violated?");
646      --NumLiveRegs;
647      LiveRegDefs[I->getReg()] = NULL;
648      LiveRegGens[I->getReg()] = NULL;
649    }
650  }
651
652  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
653       I != E; ++I) {
654    if (I->isAssignedRegDep()) {
655      // This becomes the nearest def. Note that an earlier def may still be
656      // pending if this is a two-address node.
657      LiveRegDefs[I->getReg()] = SU;
658      if (!LiveRegDefs[I->getReg()]) {
659        ++NumLiveRegs;
660      }
661      if (LiveRegGens[I->getReg()] == NULL ||
662          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
663        LiveRegGens[I->getReg()] = I->getSUnit();
664    }
665  }
666  if (SU->getHeight() < MinAvailableCycle)
667    MinAvailableCycle = SU->getHeight();
668
669  SU->setHeightDirty();
670  SU->isScheduled = false;
671  SU->isAvailable = true;
672  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
673    // Don't make available until backtracking is complete.
674    SU->isPending = true;
675    PendingQueue.push_back(SU);
676  }
677  else {
678    AvailableQueue->push(SU);
679  }
680  AvailableQueue->UnscheduledNode(SU);
681}
682
683/// After backtracking, the hazard checker needs to be restored to a state
684/// corresponding the the current cycle.
685void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
686  HazardRec->Reset();
687
688  unsigned LookAhead = std::min((unsigned)Sequence.size(),
689                                HazardRec->getMaxLookAhead());
690  if (LookAhead == 0)
691    return;
692
693  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
694  unsigned HazardCycle = (*I)->getHeight();
695  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
696    SUnit *SU = *I;
697    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
698      HazardRec->RecedeCycle();
699    }
700    EmitNode(SU);
701  }
702}
703
704/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
705/// BTCycle in order to schedule a specific node.
706void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
707  SUnit *OldSU = Sequence.back();
708  while (true) {
709    Sequence.pop_back();
710    if (SU->isSucc(OldSU))
711      // Don't try to remove SU from AvailableQueue.
712      SU->isAvailable = false;
713    // FIXME: use ready cycle instead of height
714    CurCycle = OldSU->getHeight();
715    UnscheduleNodeBottomUp(OldSU);
716    AvailableQueue->setCurCycle(CurCycle);
717    if (OldSU == BtSU)
718      break;
719    OldSU = Sequence.back();
720  }
721
722  assert(!SU->isSucc(OldSU) && "Something is wrong!");
723
724  RestoreHazardCheckerBottomUp();
725
726  ReleasePending();
727
728  ++NumBacktracks;
729}
730
731static bool isOperandOf(const SUnit *SU, SDNode *N) {
732  for (const SDNode *SUNode = SU->getNode(); SUNode;
733       SUNode = SUNode->getGluedNode()) {
734    if (SUNode->isOperandOf(N))
735      return true;
736  }
737  return false;
738}
739
740/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
741/// successors to the newly created node.
742SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
743  SDNode *N = SU->getNode();
744  if (!N)
745    return NULL;
746
747  if (SU->getNode()->getGluedNode())
748    return NULL;
749
750  SUnit *NewSU;
751  bool TryUnfold = false;
752  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
753    EVT VT = N->getValueType(i);
754    if (VT == MVT::Glue)
755      return NULL;
756    else if (VT == MVT::Other)
757      TryUnfold = true;
758  }
759  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
760    const SDValue &Op = N->getOperand(i);
761    EVT VT = Op.getNode()->getValueType(Op.getResNo());
762    if (VT == MVT::Glue)
763      return NULL;
764  }
765
766  if (TryUnfold) {
767    SmallVector<SDNode*, 2> NewNodes;
768    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
769      return NULL;
770
771    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
772    assert(NewNodes.size() == 2 && "Expected a load folding node!");
773
774    N = NewNodes[1];
775    SDNode *LoadNode = NewNodes[0];
776    unsigned NumVals = N->getNumValues();
777    unsigned OldNumVals = SU->getNode()->getNumValues();
778    for (unsigned i = 0; i != NumVals; ++i)
779      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
780    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
781                                   SDValue(LoadNode, 1));
782
783    // LoadNode may already exist. This can happen when there is another
784    // load from the same location and producing the same type of value
785    // but it has different alignment or volatileness.
786    bool isNewLoad = true;
787    SUnit *LoadSU;
788    if (LoadNode->getNodeId() != -1) {
789      LoadSU = &SUnits[LoadNode->getNodeId()];
790      isNewLoad = false;
791    } else {
792      LoadSU = CreateNewSUnit(LoadNode);
793      LoadNode->setNodeId(LoadSU->NodeNum);
794
795      InitNumRegDefsLeft(LoadSU);
796      ComputeLatency(LoadSU);
797    }
798
799    SUnit *NewSU = CreateNewSUnit(N);
800    assert(N->getNodeId() == -1 && "Node already inserted!");
801    N->setNodeId(NewSU->NodeNum);
802
803    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
804    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
805      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
806        NewSU->isTwoAddress = true;
807        break;
808      }
809    }
810    if (TID.isCommutable())
811      NewSU->isCommutable = true;
812
813    InitNumRegDefsLeft(NewSU);
814    ComputeLatency(NewSU);
815
816    // Record all the edges to and from the old SU, by category.
817    SmallVector<SDep, 4> ChainPreds;
818    SmallVector<SDep, 4> ChainSuccs;
819    SmallVector<SDep, 4> LoadPreds;
820    SmallVector<SDep, 4> NodePreds;
821    SmallVector<SDep, 4> NodeSuccs;
822    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
823         I != E; ++I) {
824      if (I->isCtrl())
825        ChainPreds.push_back(*I);
826      else if (isOperandOf(I->getSUnit(), LoadNode))
827        LoadPreds.push_back(*I);
828      else
829        NodePreds.push_back(*I);
830    }
831    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
832         I != E; ++I) {
833      if (I->isCtrl())
834        ChainSuccs.push_back(*I);
835      else
836        NodeSuccs.push_back(*I);
837    }
838
839    // Now assign edges to the newly-created nodes.
840    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
841      const SDep &Pred = ChainPreds[i];
842      RemovePred(SU, Pred);
843      if (isNewLoad)
844        AddPred(LoadSU, Pred);
845    }
846    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
847      const SDep &Pred = LoadPreds[i];
848      RemovePred(SU, Pred);
849      if (isNewLoad)
850        AddPred(LoadSU, Pred);
851    }
852    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
853      const SDep &Pred = NodePreds[i];
854      RemovePred(SU, Pred);
855      AddPred(NewSU, Pred);
856    }
857    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
858      SDep D = NodeSuccs[i];
859      SUnit *SuccDep = D.getSUnit();
860      D.setSUnit(SU);
861      RemovePred(SuccDep, D);
862      D.setSUnit(NewSU);
863      AddPred(SuccDep, D);
864      // Balance register pressure.
865      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
866          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
867        --NewSU->NumRegDefsLeft;
868    }
869    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
870      SDep D = ChainSuccs[i];
871      SUnit *SuccDep = D.getSUnit();
872      D.setSUnit(SU);
873      RemovePred(SuccDep, D);
874      if (isNewLoad) {
875        D.setSUnit(LoadSU);
876        AddPred(SuccDep, D);
877      }
878    }
879
880    // Add a data dependency to reflect that NewSU reads the value defined
881    // by LoadSU.
882    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
883
884    if (isNewLoad)
885      AvailableQueue->addNode(LoadSU);
886    AvailableQueue->addNode(NewSU);
887
888    ++NumUnfolds;
889
890    if (NewSU->NumSuccsLeft == 0) {
891      NewSU->isAvailable = true;
892      return NewSU;
893    }
894    SU = NewSU;
895  }
896
897  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
898  NewSU = CreateClone(SU);
899
900  // New SUnit has the exact same predecessors.
901  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
902       I != E; ++I)
903    if (!I->isArtificial())
904      AddPred(NewSU, *I);
905
906  // Only copy scheduled successors. Cut them from old node's successor
907  // list and move them over.
908  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
909  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
910       I != E; ++I) {
911    if (I->isArtificial())
912      continue;
913    SUnit *SuccSU = I->getSUnit();
914    if (SuccSU->isScheduled) {
915      SDep D = *I;
916      D.setSUnit(NewSU);
917      AddPred(SuccSU, D);
918      D.setSUnit(SU);
919      DelDeps.push_back(std::make_pair(SuccSU, D));
920    }
921  }
922  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
923    RemovePred(DelDeps[i].first, DelDeps[i].second);
924
925  AvailableQueue->updateNode(SU);
926  AvailableQueue->addNode(NewSU);
927
928  ++NumDups;
929  return NewSU;
930}
931
932/// InsertCopiesAndMoveSuccs - Insert register copies and move all
933/// scheduled successors of the given SUnit to the last copy.
934void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
935                                               const TargetRegisterClass *DestRC,
936                                               const TargetRegisterClass *SrcRC,
937                                               SmallVector<SUnit*, 2> &Copies) {
938  SUnit *CopyFromSU = CreateNewSUnit(NULL);
939  CopyFromSU->CopySrcRC = SrcRC;
940  CopyFromSU->CopyDstRC = DestRC;
941
942  SUnit *CopyToSU = CreateNewSUnit(NULL);
943  CopyToSU->CopySrcRC = DestRC;
944  CopyToSU->CopyDstRC = SrcRC;
945
946  // Only copy scheduled successors. Cut them from old node's successor
947  // list and move them over.
948  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
949  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
950       I != E; ++I) {
951    if (I->isArtificial())
952      continue;
953    SUnit *SuccSU = I->getSUnit();
954    if (SuccSU->isScheduled) {
955      SDep D = *I;
956      D.setSUnit(CopyToSU);
957      AddPred(SuccSU, D);
958      DelDeps.push_back(std::make_pair(SuccSU, *I));
959    }
960    else {
961      // Avoid scheduling the def-side copy before other successors. Otherwise
962      // we could introduce another physreg interference on the copy and
963      // continue inserting copies indefinitely.
964      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
965             /*Reg=*/0, /*isNormalMemory=*/false,
966             /*isMustAlias=*/false, /*isArtificial=*/true);
967      AddPred(SuccSU, D);
968    }
969  }
970  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
971    RemovePred(DelDeps[i].first, DelDeps[i].second);
972
973  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
974  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
975
976  AvailableQueue->updateNode(SU);
977  AvailableQueue->addNode(CopyFromSU);
978  AvailableQueue->addNode(CopyToSU);
979  Copies.push_back(CopyFromSU);
980  Copies.push_back(CopyToSU);
981
982  ++NumPRCopies;
983}
984
985/// getPhysicalRegisterVT - Returns the ValueType of the physical register
986/// definition of the specified node.
987/// FIXME: Move to SelectionDAG?
988static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
989                                 const TargetInstrInfo *TII) {
990  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
991  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
992  unsigned NumRes = TID.getNumDefs();
993  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
994    if (Reg == *ImpDef)
995      break;
996    ++NumRes;
997  }
998  return N->getValueType(NumRes);
999}
1000
1001/// CheckForLiveRegDef - Return true and update live register vector if the
1002/// specified register def of the specified SUnit clobbers any "live" registers.
1003static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1004                               std::vector<SUnit*> &LiveRegDefs,
1005                               SmallSet<unsigned, 4> &RegAdded,
1006                               SmallVector<unsigned, 4> &LRegs,
1007                               const TargetRegisterInfo *TRI) {
1008  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1009
1010    // Check if Ref is live.
1011    if (!LiveRegDefs[*AliasI]) continue;
1012
1013    // Allow multiple uses of the same def.
1014    if (LiveRegDefs[*AliasI] == SU) continue;
1015
1016    // Add Reg to the set of interfering live regs.
1017    if (RegAdded.insert(*AliasI)) {
1018      LRegs.push_back(*AliasI);
1019    }
1020  }
1021}
1022
1023/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1024/// scheduling of the given node to satisfy live physical register dependencies.
1025/// If the specific node is the last one that's available to schedule, do
1026/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1027bool ScheduleDAGRRList::
1028DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1029  if (NumLiveRegs == 0)
1030    return false;
1031
1032  SmallSet<unsigned, 4> RegAdded;
1033  // If this node would clobber any "live" register, then it's not ready.
1034  //
1035  // If SU is the currently live definition of the same register that it uses,
1036  // then we are free to schedule it.
1037  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1038       I != E; ++I) {
1039    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1040      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1041                         RegAdded, LRegs, TRI);
1042  }
1043
1044  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1045    if (Node->getOpcode() == ISD::INLINEASM) {
1046      // Inline asm can clobber physical defs.
1047      unsigned NumOps = Node->getNumOperands();
1048      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1049        --NumOps;  // Ignore the glue operand.
1050
1051      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1052        unsigned Flags =
1053          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1054        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1055
1056        ++i; // Skip the ID value.
1057        if (InlineAsm::isRegDefKind(Flags) ||
1058            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
1059          // Check for def of register or earlyclobber register.
1060          for (; NumVals; --NumVals, ++i) {
1061            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1062            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1063              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1064          }
1065        } else
1066          i += NumVals;
1067      }
1068      continue;
1069    }
1070
1071    if (!Node->isMachineOpcode())
1072      continue;
1073    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
1074    if (!TID.ImplicitDefs)
1075      continue;
1076    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
1077      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1078  }
1079
1080  return !LRegs.empty();
1081}
1082
1083/// Return a node that can be scheduled in this cycle. Requirements:
1084/// (1) Ready: latency has been satisfied
1085/// (2) No Hazards: resources are available
1086/// (3) No Interferences: may unschedule to break register interferences.
1087SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1088  SmallVector<SUnit*, 4> Interferences;
1089  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1090
1091  SUnit *CurSU = AvailableQueue->pop();
1092  while (CurSU) {
1093    SmallVector<unsigned, 4> LRegs;
1094    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1095      break;
1096    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1097
1098    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1099    Interferences.push_back(CurSU);
1100    CurSU = AvailableQueue->pop();
1101  }
1102  if (CurSU) {
1103    // Add the nodes that aren't ready back onto the available list.
1104    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1105      Interferences[i]->isPending = false;
1106      assert(Interferences[i]->isAvailable && "must still be available");
1107      AvailableQueue->push(Interferences[i]);
1108    }
1109    return CurSU;
1110  }
1111
1112  // All candidates are delayed due to live physical reg dependencies.
1113  // Try backtracking, code duplication, or inserting cross class copies
1114  // to resolve it.
1115  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1116    SUnit *TrySU = Interferences[i];
1117    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1118
1119    // Try unscheduling up to the point where it's safe to schedule
1120    // this node.
1121    SUnit *BtSU = NULL;
1122    unsigned LiveCycle = UINT_MAX;
1123    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1124      unsigned Reg = LRegs[j];
1125      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1126        BtSU = LiveRegGens[Reg];
1127        LiveCycle = BtSU->getHeight();
1128      }
1129    }
1130    if (!WillCreateCycle(TrySU, BtSU))  {
1131      BacktrackBottomUp(TrySU, BtSU);
1132
1133      // Force the current node to be scheduled before the node that
1134      // requires the physical reg dep.
1135      if (BtSU->isAvailable) {
1136        BtSU->isAvailable = false;
1137        if (!BtSU->isPending)
1138          AvailableQueue->remove(BtSU);
1139      }
1140      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1141                          /*Reg=*/0, /*isNormalMemory=*/false,
1142                          /*isMustAlias=*/false, /*isArtificial=*/true));
1143
1144      // If one or more successors has been unscheduled, then the current
1145      // node is no longer avaialable. Schedule a successor that's now
1146      // available instead.
1147      if (!TrySU->isAvailable) {
1148        CurSU = AvailableQueue->pop();
1149      }
1150      else {
1151        CurSU = TrySU;
1152        TrySU->isPending = false;
1153        Interferences.erase(Interferences.begin()+i);
1154      }
1155      break;
1156    }
1157  }
1158
1159  if (!CurSU) {
1160    // Can't backtrack. If it's too expensive to copy the value, then try
1161    // duplicate the nodes that produces these "too expensive to copy"
1162    // values to break the dependency. In case even that doesn't work,
1163    // insert cross class copies.
1164    // If it's not too expensive, i.e. cost != -1, issue copies.
1165    SUnit *TrySU = Interferences[0];
1166    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1167    assert(LRegs.size() == 1 && "Can't handle this yet!");
1168    unsigned Reg = LRegs[0];
1169    SUnit *LRDef = LiveRegDefs[Reg];
1170    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1171    const TargetRegisterClass *RC =
1172      TRI->getMinimalPhysRegClass(Reg, VT);
1173    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1174
1175    // If cross copy register class is the same as RC, then it must be possible
1176    // copy the value directly. Do not try duplicate the def.
1177    // If cross copy register class is not the same as RC, then it's possible to
1178    // copy the value but it require cross register class copies and it is
1179    // expensive.
1180    // If cross copy register class is null, then it's not possible to copy
1181    // the value at all.
1182    SUnit *NewDef = 0;
1183    if (DestRC != RC) {
1184      NewDef = CopyAndMoveSuccessors(LRDef);
1185      if (!DestRC && !NewDef)
1186        report_fatal_error("Can't handle live physical register dependency!");
1187    }
1188    if (!NewDef) {
1189      // Issue copies, these can be expensive cross register class copies.
1190      SmallVector<SUnit*, 2> Copies;
1191      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1192      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1193            << " to SU #" << Copies.front()->NodeNum << "\n");
1194      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1195                          /*Reg=*/0, /*isNormalMemory=*/false,
1196                          /*isMustAlias=*/false,
1197                          /*isArtificial=*/true));
1198      NewDef = Copies.back();
1199    }
1200
1201    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1202          << " to SU #" << TrySU->NodeNum << "\n");
1203    LiveRegDefs[Reg] = NewDef;
1204    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1205                         /*Reg=*/0, /*isNormalMemory=*/false,
1206                         /*isMustAlias=*/false,
1207                         /*isArtificial=*/true));
1208    TrySU->isAvailable = false;
1209    CurSU = NewDef;
1210  }
1211
1212  assert(CurSU && "Unable to resolve live physical register dependencies!");
1213
1214  // Add the nodes that aren't ready back onto the available list.
1215  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1216    Interferences[i]->isPending = false;
1217    // May no longer be available due to backtracking.
1218    if (Interferences[i]->isAvailable) {
1219      AvailableQueue->push(Interferences[i]);
1220    }
1221  }
1222  return CurSU;
1223}
1224
1225/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1226/// schedulers.
1227void ScheduleDAGRRList::ListScheduleBottomUp() {
1228  // Release any predecessors of the special Exit node.
1229  ReleasePredecessors(&ExitSU);
1230
1231  // Add root to Available queue.
1232  if (!SUnits.empty()) {
1233    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1234    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1235    RootSU->isAvailable = true;
1236    AvailableQueue->push(RootSU);
1237  }
1238
1239  // While Available queue is not empty, grab the node with the highest
1240  // priority. If it is not ready put it back.  Schedule the node.
1241  Sequence.reserve(SUnits.size());
1242  while (!AvailableQueue->empty()) {
1243    DEBUG(dbgs() << "\nExamining Available:\n";
1244          AvailableQueue->dump(this));
1245
1246    // Pick the best node to schedule taking all constraints into
1247    // consideration.
1248    SUnit *SU = PickNodeToScheduleBottomUp();
1249
1250    AdvancePastStalls(SU);
1251
1252    ScheduleNodeBottomUp(SU);
1253
1254    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1255      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1256      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1257      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1258    }
1259  }
1260
1261  // Reverse the order if it is bottom up.
1262  std::reverse(Sequence.begin(), Sequence.end());
1263
1264#ifndef NDEBUG
1265  VerifySchedule(isBottomUp);
1266#endif
1267}
1268
1269//===----------------------------------------------------------------------===//
1270//  Top-Down Scheduling
1271//===----------------------------------------------------------------------===//
1272
1273/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1274/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1275void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1276  SUnit *SuccSU = SuccEdge->getSUnit();
1277
1278#ifndef NDEBUG
1279  if (SuccSU->NumPredsLeft == 0) {
1280    dbgs() << "*** Scheduling failed! ***\n";
1281    SuccSU->dump(this);
1282    dbgs() << " has been released too many times!\n";
1283    llvm_unreachable(0);
1284  }
1285#endif
1286  --SuccSU->NumPredsLeft;
1287
1288  // If all the node's predecessors are scheduled, this node is ready
1289  // to be scheduled. Ignore the special ExitSU node.
1290  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1291    SuccSU->isAvailable = true;
1292    AvailableQueue->push(SuccSU);
1293  }
1294}
1295
1296void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1297  // Top down: release successors
1298  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1299       I != E; ++I) {
1300    assert(!I->isAssignedRegDep() &&
1301           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1302
1303    ReleaseSucc(SU, &*I);
1304  }
1305}
1306
1307/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1308/// count of its successors. If a successor pending count is zero, add it to
1309/// the Available queue.
1310void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1311  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1312  DEBUG(SU->dump(this));
1313
1314  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1315  SU->setDepthToAtLeast(CurCycle);
1316  Sequence.push_back(SU);
1317
1318  ReleaseSuccessors(SU);
1319  SU->isScheduled = true;
1320  AvailableQueue->ScheduledNode(SU);
1321}
1322
1323/// ListScheduleTopDown - The main loop of list scheduling for top-down
1324/// schedulers.
1325void ScheduleDAGRRList::ListScheduleTopDown() {
1326  AvailableQueue->setCurCycle(CurCycle);
1327
1328  // Release any successors of the special Entry node.
1329  ReleaseSuccessors(&EntrySU);
1330
1331  // All leaves to Available queue.
1332  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1333    // It is available if it has no predecessors.
1334    if (SUnits[i].Preds.empty()) {
1335      AvailableQueue->push(&SUnits[i]);
1336      SUnits[i].isAvailable = true;
1337    }
1338  }
1339
1340  // While Available queue is not empty, grab the node with the highest
1341  // priority. If it is not ready put it back.  Schedule the node.
1342  Sequence.reserve(SUnits.size());
1343  while (!AvailableQueue->empty()) {
1344    SUnit *CurSU = AvailableQueue->pop();
1345
1346    if (CurSU)
1347      ScheduleNodeTopDown(CurSU);
1348    ++CurCycle;
1349    AvailableQueue->setCurCycle(CurCycle);
1350  }
1351
1352#ifndef NDEBUG
1353  VerifySchedule(isBottomUp);
1354#endif
1355}
1356
1357
1358//===----------------------------------------------------------------------===//
1359//                RegReductionPriorityQueue Definition
1360//===----------------------------------------------------------------------===//
1361//
1362// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1363// to reduce register pressure.
1364//
1365namespace {
1366class RegReductionPQBase;
1367
1368struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1369  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1370};
1371
1372#ifndef NDEBUG
1373template<class SF>
1374struct reverse_sort : public queue_sort {
1375  SF &SortFunc;
1376  reverse_sort(SF &sf) : SortFunc(sf) {}
1377  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1378
1379  bool operator()(SUnit* left, SUnit* right) const {
1380    // reverse left/right rather than simply !SortFunc(left, right)
1381    // to expose different paths in the comparison logic.
1382    return SortFunc(right, left);
1383  }
1384};
1385#endif // NDEBUG
1386
1387/// bu_ls_rr_sort - Priority function for bottom up register pressure
1388// reduction scheduler.
1389struct bu_ls_rr_sort : public queue_sort {
1390  enum {
1391    IsBottomUp = true,
1392    HasReadyFilter = false
1393  };
1394
1395  RegReductionPQBase *SPQ;
1396  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1397  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1398
1399  bool operator()(SUnit* left, SUnit* right) const;
1400};
1401
1402// td_ls_rr_sort - Priority function for top down register pressure reduction
1403// scheduler.
1404struct td_ls_rr_sort : public queue_sort {
1405  enum {
1406    IsBottomUp = false,
1407    HasReadyFilter = false
1408  };
1409
1410  RegReductionPQBase *SPQ;
1411  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1412  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1413
1414  bool operator()(const SUnit* left, const SUnit* right) const;
1415};
1416
1417// src_ls_rr_sort - Priority function for source order scheduler.
1418struct src_ls_rr_sort : public queue_sort {
1419  enum {
1420    IsBottomUp = true,
1421    HasReadyFilter = false
1422  };
1423
1424  RegReductionPQBase *SPQ;
1425  src_ls_rr_sort(RegReductionPQBase *spq)
1426    : SPQ(spq) {}
1427  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1428    : SPQ(RHS.SPQ) {}
1429
1430  bool operator()(SUnit* left, SUnit* right) const;
1431};
1432
1433// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1434struct hybrid_ls_rr_sort : public queue_sort {
1435  enum {
1436    IsBottomUp = true,
1437    HasReadyFilter = false
1438  };
1439
1440  RegReductionPQBase *SPQ;
1441  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1442    : SPQ(spq) {}
1443  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1444    : SPQ(RHS.SPQ) {}
1445
1446  bool isReady(SUnit *SU, unsigned CurCycle) const;
1447
1448  bool operator()(SUnit* left, SUnit* right) const;
1449};
1450
1451// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1452// scheduler.
1453struct ilp_ls_rr_sort : public queue_sort {
1454  enum {
1455    IsBottomUp = true,
1456    HasReadyFilter = false
1457  };
1458
1459  RegReductionPQBase *SPQ;
1460  ilp_ls_rr_sort(RegReductionPQBase *spq)
1461    : SPQ(spq) {}
1462  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1463    : SPQ(RHS.SPQ) {}
1464
1465  bool isReady(SUnit *SU, unsigned CurCycle) const;
1466
1467  bool operator()(SUnit* left, SUnit* right) const;
1468};
1469
1470class RegReductionPQBase : public SchedulingPriorityQueue {
1471protected:
1472  std::vector<SUnit*> Queue;
1473  unsigned CurQueueId;
1474  bool TracksRegPressure;
1475
1476  // SUnits - The SUnits for the current graph.
1477  std::vector<SUnit> *SUnits;
1478
1479  MachineFunction &MF;
1480  const TargetInstrInfo *TII;
1481  const TargetRegisterInfo *TRI;
1482  const TargetLowering *TLI;
1483  ScheduleDAGRRList *scheduleDAG;
1484
1485  // SethiUllmanNumbers - The SethiUllman number for each node.
1486  std::vector<unsigned> SethiUllmanNumbers;
1487
1488  /// RegPressure - Tracking current reg pressure per register class.
1489  ///
1490  std::vector<unsigned> RegPressure;
1491
1492  /// RegLimit - Tracking the number of allocatable registers per register
1493  /// class.
1494  std::vector<unsigned> RegLimit;
1495
1496public:
1497  RegReductionPQBase(MachineFunction &mf,
1498                     bool hasReadyFilter,
1499                     bool tracksrp,
1500                     const TargetInstrInfo *tii,
1501                     const TargetRegisterInfo *tri,
1502                     const TargetLowering *tli)
1503    : SchedulingPriorityQueue(hasReadyFilter),
1504      CurQueueId(0), TracksRegPressure(tracksrp),
1505      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1506    if (TracksRegPressure) {
1507      unsigned NumRC = TRI->getNumRegClasses();
1508      RegLimit.resize(NumRC);
1509      RegPressure.resize(NumRC);
1510      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1511      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1512      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1513             E = TRI->regclass_end(); I != E; ++I)
1514        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1515    }
1516  }
1517
1518  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1519    scheduleDAG = scheduleDag;
1520  }
1521
1522  ScheduleHazardRecognizer* getHazardRec() {
1523    return scheduleDAG->getHazardRec();
1524  }
1525
1526  void initNodes(std::vector<SUnit> &sunits);
1527
1528  void addNode(const SUnit *SU);
1529
1530  void updateNode(const SUnit *SU);
1531
1532  void releaseState() {
1533    SUnits = 0;
1534    SethiUllmanNumbers.clear();
1535    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1536  }
1537
1538  unsigned getNodePriority(const SUnit *SU) const;
1539
1540  unsigned getNodeOrdering(const SUnit *SU) const {
1541    if (!SU->getNode()) return 0;
1542
1543    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1544  }
1545
1546  bool empty() const { return Queue.empty(); }
1547
1548  void push(SUnit *U) {
1549    assert(!U->NodeQueueId && "Node in the queue already");
1550    U->NodeQueueId = ++CurQueueId;
1551    Queue.push_back(U);
1552  }
1553
1554  void remove(SUnit *SU) {
1555    assert(!Queue.empty() && "Queue is empty!");
1556    assert(SU->NodeQueueId != 0 && "Not in queue!");
1557    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1558                                                 SU);
1559    if (I != prior(Queue.end()))
1560      std::swap(*I, Queue.back());
1561    Queue.pop_back();
1562    SU->NodeQueueId = 0;
1563  }
1564
1565  bool tracksRegPressure() const { return TracksRegPressure; }
1566
1567  void dumpRegPressure() const;
1568
1569  bool HighRegPressure(const SUnit *SU) const;
1570
1571  bool MayReduceRegPressure(SUnit *SU) const;
1572
1573  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1574
1575  void ScheduledNode(SUnit *SU);
1576
1577  void UnscheduledNode(SUnit *SU);
1578
1579protected:
1580  bool canClobber(const SUnit *SU, const SUnit *Op);
1581  void AddPseudoTwoAddrDeps();
1582  void PrescheduleNodesWithMultipleUses();
1583  void CalculateSethiUllmanNumbers();
1584};
1585
1586template<class SF>
1587static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1588  std::vector<SUnit *>::iterator Best = Q.begin();
1589  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1590         E = Q.end(); I != E; ++I)
1591    if (Picker(*Best, *I))
1592      Best = I;
1593  SUnit *V = *Best;
1594  if (Best != prior(Q.end()))
1595    std::swap(*Best, Q.back());
1596  Q.pop_back();
1597  return V;
1598}
1599
1600template<class SF>
1601SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1602#ifndef NDEBUG
1603  if (DAG->StressSched) {
1604    reverse_sort<SF> RPicker(Picker);
1605    return popFromQueueImpl(Q, RPicker);
1606  }
1607#endif
1608  (void)DAG;
1609  return popFromQueueImpl(Q, Picker);
1610}
1611
1612template<class SF>
1613class RegReductionPriorityQueue : public RegReductionPQBase {
1614  SF Picker;
1615
1616public:
1617  RegReductionPriorityQueue(MachineFunction &mf,
1618                            bool tracksrp,
1619                            const TargetInstrInfo *tii,
1620                            const TargetRegisterInfo *tri,
1621                            const TargetLowering *tli)
1622    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1623      Picker(this) {}
1624
1625  bool isBottomUp() const { return SF::IsBottomUp; }
1626
1627  bool isReady(SUnit *U) const {
1628    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1629  }
1630
1631  SUnit *pop() {
1632    if (Queue.empty()) return NULL;
1633
1634    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1635    V->NodeQueueId = 0;
1636    return V;
1637  }
1638
1639  void dump(ScheduleDAG *DAG) const {
1640    // Emulate pop() without clobbering NodeQueueIds.
1641    std::vector<SUnit*> DumpQueue = Queue;
1642    SF DumpPicker = Picker;
1643    while (!DumpQueue.empty()) {
1644      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1645      if (isBottomUp())
1646        dbgs() << "Height " << SU->getHeight() << ": ";
1647      else
1648        dbgs() << "Depth " << SU->getDepth() << ": ";
1649      SU->dump(DAG);
1650    }
1651  }
1652};
1653
1654typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1655BURegReductionPriorityQueue;
1656
1657typedef RegReductionPriorityQueue<td_ls_rr_sort>
1658TDRegReductionPriorityQueue;
1659
1660typedef RegReductionPriorityQueue<src_ls_rr_sort>
1661SrcRegReductionPriorityQueue;
1662
1663typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1664HybridBURRPriorityQueue;
1665
1666typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1667ILPBURRPriorityQueue;
1668} // end anonymous namespace
1669
1670//===----------------------------------------------------------------------===//
1671//           Static Node Priority for Register Pressure Reduction
1672//===----------------------------------------------------------------------===//
1673
1674// Check for special nodes that bypass scheduling heuristics.
1675// Currently this pushes TokenFactor nodes down, but may be used for other
1676// pseudo-ops as well.
1677//
1678// Return -1 to schedule right above left, 1 for left above right.
1679// Return 0 if no bias exists.
1680static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1681  bool LSchedLow = left->isScheduleLow;
1682  bool RSchedLow = right->isScheduleLow;
1683  if (LSchedLow != RSchedLow)
1684    return LSchedLow < RSchedLow ? 1 : -1;
1685  return 0;
1686}
1687
1688/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1689/// Smaller number is the higher priority.
1690static unsigned
1691CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1692  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1693  if (SethiUllmanNumber != 0)
1694    return SethiUllmanNumber;
1695
1696  unsigned Extra = 0;
1697  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1698       I != E; ++I) {
1699    if (I->isCtrl()) continue;  // ignore chain preds
1700    SUnit *PredSU = I->getSUnit();
1701    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1702    if (PredSethiUllman > SethiUllmanNumber) {
1703      SethiUllmanNumber = PredSethiUllman;
1704      Extra = 0;
1705    } else if (PredSethiUllman == SethiUllmanNumber)
1706      ++Extra;
1707  }
1708
1709  SethiUllmanNumber += Extra;
1710
1711  if (SethiUllmanNumber == 0)
1712    SethiUllmanNumber = 1;
1713
1714  return SethiUllmanNumber;
1715}
1716
1717/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1718/// scheduling units.
1719void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1720  SethiUllmanNumbers.assign(SUnits->size(), 0);
1721
1722  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1723    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1724}
1725
1726void RegReductionPQBase::addNode(const SUnit *SU) {
1727  unsigned SUSize = SethiUllmanNumbers.size();
1728  if (SUnits->size() > SUSize)
1729    SethiUllmanNumbers.resize(SUSize*2, 0);
1730  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1731}
1732
1733void RegReductionPQBase::updateNode(const SUnit *SU) {
1734  SethiUllmanNumbers[SU->NodeNum] = 0;
1735  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1736}
1737
1738// Lower priority means schedule further down. For bottom-up scheduling, lower
1739// priority SUs are scheduled before higher priority SUs.
1740unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1741  assert(SU->NodeNum < SethiUllmanNumbers.size());
1742  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1743  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1744    // CopyToReg should be close to its uses to facilitate coalescing and
1745    // avoid spilling.
1746    return 0;
1747  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1748      Opc == TargetOpcode::SUBREG_TO_REG ||
1749      Opc == TargetOpcode::INSERT_SUBREG)
1750    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1751    // close to their uses to facilitate coalescing.
1752    return 0;
1753  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1754    // If SU does not have a register use, i.e. it doesn't produce a value
1755    // that would be consumed (e.g. store), then it terminates a chain of
1756    // computation.  Give it a large SethiUllman number so it will be
1757    // scheduled right before its predecessors that it doesn't lengthen
1758    // their live ranges.
1759    return 0xffff;
1760  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1761    // If SU does not have a register def, schedule it close to its uses
1762    // because it does not lengthen any live ranges.
1763    return 0;
1764#if 1
1765  return SethiUllmanNumbers[SU->NodeNum];
1766#else
1767  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1768  if (SU->isCallOp) {
1769    // FIXME: This assumes all of the defs are used as call operands.
1770    int NP = (int)Priority - SU->getNode()->getNumValues();
1771    return (NP > 0) ? NP : 0;
1772  }
1773  return Priority;
1774#endif
1775}
1776
1777//===----------------------------------------------------------------------===//
1778//                     Register Pressure Tracking
1779//===----------------------------------------------------------------------===//
1780
1781void RegReductionPQBase::dumpRegPressure() const {
1782  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1783         E = TRI->regclass_end(); I != E; ++I) {
1784    const TargetRegisterClass *RC = *I;
1785    unsigned Id = RC->getID();
1786    unsigned RP = RegPressure[Id];
1787    if (!RP) continue;
1788    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1789          << '\n');
1790  }
1791}
1792
1793bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1794  if (!TLI)
1795    return false;
1796
1797  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1798       I != E; ++I) {
1799    if (I->isCtrl())
1800      continue;
1801    SUnit *PredSU = I->getSUnit();
1802    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1803    // to cover the number of registers defined (they are all live).
1804    if (PredSU->NumRegDefsLeft == 0) {
1805      continue;
1806    }
1807    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1808         RegDefPos.IsValid(); RegDefPos.Advance()) {
1809      EVT VT = RegDefPos.GetValue();
1810      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1811      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1812      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1813        return true;
1814    }
1815  }
1816  return false;
1817}
1818
1819bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1820  const SDNode *N = SU->getNode();
1821
1822  if (!N->isMachineOpcode() || !SU->NumSuccs)
1823    return false;
1824
1825  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1826  for (unsigned i = 0; i != NumDefs; ++i) {
1827    EVT VT = N->getValueType(i);
1828    if (!N->hasAnyUseOfValue(i))
1829      continue;
1830    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1831    if (RegPressure[RCId] >= RegLimit[RCId])
1832      return true;
1833  }
1834  return false;
1835}
1836
1837// Compute the register pressure contribution by this instruction by count up
1838// for uses that are not live and down for defs. Only count register classes
1839// that are already under high pressure. As a side effect, compute the number of
1840// uses of registers that are already live.
1841//
1842// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1843// so could probably be factored.
1844int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1845  LiveUses = 0;
1846  int PDiff = 0;
1847  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1848       I != E; ++I) {
1849    if (I->isCtrl())
1850      continue;
1851    SUnit *PredSU = I->getSUnit();
1852    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1853    // to cover the number of registers defined (they are all live).
1854    if (PredSU->NumRegDefsLeft == 0) {
1855      if (PredSU->getNode()->isMachineOpcode())
1856        ++LiveUses;
1857      continue;
1858    }
1859    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1860         RegDefPos.IsValid(); RegDefPos.Advance()) {
1861      EVT VT = RegDefPos.GetValue();
1862      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1863      if (RegPressure[RCId] >= RegLimit[RCId])
1864        ++PDiff;
1865    }
1866  }
1867  const SDNode *N = SU->getNode();
1868
1869  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1870    return PDiff;
1871
1872  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1873  for (unsigned i = 0; i != NumDefs; ++i) {
1874    EVT VT = N->getValueType(i);
1875    if (!N->hasAnyUseOfValue(i))
1876      continue;
1877    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1878    if (RegPressure[RCId] >= RegLimit[RCId])
1879      --PDiff;
1880  }
1881  return PDiff;
1882}
1883
1884void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1885  if (!TracksRegPressure)
1886    return;
1887
1888  if (!SU->getNode())
1889    return;
1890
1891  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1892       I != E; ++I) {
1893    if (I->isCtrl())
1894      continue;
1895    SUnit *PredSU = I->getSUnit();
1896    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1897    // to cover the number of registers defined (they are all live).
1898    if (PredSU->NumRegDefsLeft == 0) {
1899      continue;
1900    }
1901    // FIXME: The ScheduleDAG currently loses information about which of a
1902    // node's values is consumed by each dependence. Consequently, if the node
1903    // defines multiple register classes, we don't know which to pressurize
1904    // here. Instead the following loop consumes the register defs in an
1905    // arbitrary order. At least it handles the common case of clustered loads
1906    // to the same class. For precise liveness, each SDep needs to indicate the
1907    // result number. But that tightly couples the ScheduleDAG with the
1908    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1909    // value type or register class to SDep.
1910    //
1911    // The most important aspect of register tracking is balancing the increase
1912    // here with the reduction further below. Note that this SU may use multiple
1913    // defs in PredSU. The can't be determined here, but we've already
1914    // compensated by reducing NumRegDefsLeft in PredSU during
1915    // ScheduleDAGSDNodes::AddSchedEdges.
1916    --PredSU->NumRegDefsLeft;
1917    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1918    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1919         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1920      if (SkipRegDefs)
1921        continue;
1922      EVT VT = RegDefPos.GetValue();
1923      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1924      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1925      break;
1926    }
1927  }
1928
1929  // We should have this assert, but there may be dead SDNodes that never
1930  // materialize as SUnits, so they don't appear to generate liveness.
1931  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1932  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1933  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1934       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1935    if (SkipRegDefs > 0)
1936      continue;
1937    EVT VT = RegDefPos.GetValue();
1938    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1939    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
1940      // Register pressure tracking is imprecise. This can happen. But we try
1941      // hard not to let it happen because it likely results in poor scheduling.
1942      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1943      RegPressure[RCId] = 0;
1944    }
1945    else {
1946      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1947    }
1948  }
1949  dumpRegPressure();
1950}
1951
1952void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1953  if (!TracksRegPressure)
1954    return;
1955
1956  const SDNode *N = SU->getNode();
1957  if (!N) return;
1958
1959  if (!N->isMachineOpcode()) {
1960    if (N->getOpcode() != ISD::CopyToReg)
1961      return;
1962  } else {
1963    unsigned Opc = N->getMachineOpcode();
1964    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1965        Opc == TargetOpcode::INSERT_SUBREG ||
1966        Opc == TargetOpcode::SUBREG_TO_REG ||
1967        Opc == TargetOpcode::REG_SEQUENCE ||
1968        Opc == TargetOpcode::IMPLICIT_DEF)
1969      return;
1970  }
1971
1972  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1973       I != E; ++I) {
1974    if (I->isCtrl())
1975      continue;
1976    SUnit *PredSU = I->getSUnit();
1977    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1978    // counts data deps.
1979    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1980      continue;
1981    const SDNode *PN = PredSU->getNode();
1982    if (!PN->isMachineOpcode()) {
1983      if (PN->getOpcode() == ISD::CopyFromReg) {
1984        EVT VT = PN->getValueType(0);
1985        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1986        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1987      }
1988      continue;
1989    }
1990    unsigned POpc = PN->getMachineOpcode();
1991    if (POpc == TargetOpcode::IMPLICIT_DEF)
1992      continue;
1993    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1994      EVT VT = PN->getOperand(0).getValueType();
1995      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1996      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1997      continue;
1998    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1999               POpc == TargetOpcode::SUBREG_TO_REG) {
2000      EVT VT = PN->getValueType(0);
2001      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2002      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2003      continue;
2004    }
2005    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2006    for (unsigned i = 0; i != NumDefs; ++i) {
2007      EVT VT = PN->getValueType(i);
2008      if (!PN->hasAnyUseOfValue(i))
2009        continue;
2010      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2011      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2012        // Register pressure tracking is imprecise. This can happen.
2013        RegPressure[RCId] = 0;
2014      else
2015        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2016    }
2017  }
2018
2019  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2020  // may transfer data dependencies to CopyToReg.
2021  if (SU->NumSuccs && N->isMachineOpcode()) {
2022    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2023    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2024      EVT VT = N->getValueType(i);
2025      if (VT == MVT::Glue || VT == MVT::Other)
2026        continue;
2027      if (!N->hasAnyUseOfValue(i))
2028        continue;
2029      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2030      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2031    }
2032  }
2033
2034  dumpRegPressure();
2035}
2036
2037//===----------------------------------------------------------------------===//
2038//           Dynamic Node Priority for Register Pressure Reduction
2039//===----------------------------------------------------------------------===//
2040
2041/// closestSucc - Returns the scheduled cycle of the successor which is
2042/// closest to the current cycle.
2043static unsigned closestSucc(const SUnit *SU) {
2044  unsigned MaxHeight = 0;
2045  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2046       I != E; ++I) {
2047    if (I->isCtrl()) continue;  // ignore chain succs
2048    unsigned Height = I->getSUnit()->getHeight();
2049    // If there are bunch of CopyToRegs stacked up, they should be considered
2050    // to be at the same position.
2051    if (I->getSUnit()->getNode() &&
2052        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2053      Height = closestSucc(I->getSUnit())+1;
2054    if (Height > MaxHeight)
2055      MaxHeight = Height;
2056  }
2057  return MaxHeight;
2058}
2059
2060/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2061/// for scratch registers, i.e. number of data dependencies.
2062static unsigned calcMaxScratches(const SUnit *SU) {
2063  unsigned Scratches = 0;
2064  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2065       I != E; ++I) {
2066    if (I->isCtrl()) continue;  // ignore chain preds
2067    Scratches++;
2068  }
2069  return Scratches;
2070}
2071
2072/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2073/// CopyFromReg from a virtual register.
2074static bool hasOnlyLiveInOpers(const SUnit *SU) {
2075  bool RetVal = false;
2076  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2077       I != E; ++I) {
2078    if (I->isCtrl()) continue;
2079    const SUnit *PredSU = I->getSUnit();
2080    if (PredSU->getNode() &&
2081        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2082      unsigned Reg =
2083        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2084      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2085        RetVal = true;
2086        continue;
2087      }
2088    }
2089    return false;
2090  }
2091  return RetVal;
2092}
2093
2094/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2095/// CopyToReg to a virtual register. This SU def is probably a liveout and
2096/// it has no other use. It should be scheduled closer to the terminator.
2097static bool hasOnlyLiveOutUses(const SUnit *SU) {
2098  bool RetVal = false;
2099  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2100       I != E; ++I) {
2101    if (I->isCtrl()) continue;
2102    const SUnit *SuccSU = I->getSUnit();
2103    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2104      unsigned Reg =
2105        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2106      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2107        RetVal = true;
2108        continue;
2109      }
2110    }
2111    return false;
2112  }
2113  return RetVal;
2114}
2115
2116// Set isVRegCycle for a node with only live in opers and live out uses. Also
2117// set isVRegCycle for its CopyFromReg operands.
2118//
2119// This is only relevant for single-block loops, in which case the VRegCycle
2120// node is likely an induction variable in which the operand and target virtual
2121// registers should be coalesced (e.g. pre/post increment values). Setting the
2122// isVRegCycle flag helps the scheduler prioritize other uses of the same
2123// CopyFromReg so that this node becomes the virtual register "kill". This
2124// avoids interference between the values live in and out of the block and
2125// eliminates a copy inside the loop.
2126static void initVRegCycle(SUnit *SU) {
2127  if (DisableSchedVRegCycle)
2128    return;
2129
2130  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2131    return;
2132
2133  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2134
2135  SU->isVRegCycle = true;
2136
2137  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2138       I != E; ++I) {
2139    if (I->isCtrl()) continue;
2140    I->getSUnit()->isVRegCycle = true;
2141  }
2142}
2143
2144// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2145// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2146static void resetVRegCycle(SUnit *SU) {
2147  if (!SU->isVRegCycle)
2148    return;
2149
2150  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2151       I != E; ++I) {
2152    if (I->isCtrl()) continue;  // ignore chain preds
2153    SUnit *PredSU = I->getSUnit();
2154    if (PredSU->isVRegCycle) {
2155      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2156             "VRegCycle def must be CopyFromReg");
2157      I->getSUnit()->isVRegCycle = 0;
2158    }
2159  }
2160}
2161
2162// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2163// means a node that defines the VRegCycle has not been scheduled yet.
2164static bool hasVRegCycleUse(const SUnit *SU) {
2165  // If this SU also defines the VReg, don't hoist it as a "use".
2166  if (SU->isVRegCycle)
2167    return false;
2168
2169  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2170       I != E; ++I) {
2171    if (I->isCtrl()) continue;  // ignore chain preds
2172    if (I->getSUnit()->isVRegCycle &&
2173        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2174      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2175      return true;
2176    }
2177  }
2178  return false;
2179}
2180
2181// Check for either a dependence (latency) or resource (hazard) stall.
2182//
2183// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2184static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2185  if ((int)SPQ->getCurCycle() < Height) return true;
2186  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2187      != ScheduleHazardRecognizer::NoHazard)
2188    return true;
2189  return false;
2190}
2191
2192// Return -1 if left has higher priority, 1 if right has higher priority.
2193// Return 0 if latency-based priority is equivalent.
2194static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2195                            RegReductionPQBase *SPQ) {
2196  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2197  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2198  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2199  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2200  int LHeight = (int)left->getHeight() + LPenalty;
2201  int RHeight = (int)right->getHeight() + RPenalty;
2202
2203  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
2204    BUHasStall(left, LHeight, SPQ);
2205  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
2206    BUHasStall(right, RHeight, SPQ);
2207
2208  // If scheduling one of the node will cause a pipeline stall, delay it.
2209  // If scheduling either one of the node will cause a pipeline stall, sort
2210  // them according to their height.
2211  if (LStall) {
2212    if (!RStall) {
2213      DEBUG(++FactorCount[FactStall]);
2214      return 1;
2215    }
2216    if (LHeight != RHeight) {
2217      DEBUG(++FactorCount[FactStall]);
2218      return LHeight > RHeight ? 1 : -1;
2219    }
2220  } else if (RStall) {
2221    DEBUG(++FactorCount[FactStall]);
2222    return -1;
2223  }
2224
2225  // If either node is scheduling for latency, sort them by height/depth
2226  // and latency.
2227  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2228                     right->SchedulingPref == Sched::Latency)) {
2229    if (DisableSchedCycles) {
2230      if (LHeight != RHeight) {
2231        DEBUG(++FactorCount[FactHeight]);
2232        return LHeight > RHeight ? 1 : -1;
2233      }
2234    }
2235    else {
2236      // If neither instruction stalls (!LStall && !RStall) then
2237      // its height is already covered so only its depth matters. We also reach
2238      // this if both stall but have the same height.
2239      int LDepth = left->getDepth() - LPenalty;
2240      int RDepth = right->getDepth() - RPenalty;
2241      if (LDepth != RDepth) {
2242        DEBUG(++FactorCount[FactDepth]);
2243        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2244              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2245              << ") depth " << RDepth << "\n");
2246        return LDepth < RDepth ? 1 : -1;
2247      }
2248    }
2249    if (left->Latency != right->Latency) {
2250      DEBUG(++FactorCount[FactOther]);
2251      return left->Latency > right->Latency ? 1 : -1;
2252    }
2253  }
2254  return 0;
2255}
2256
2257static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2258  // Schedule physical register definitions close to their use. This is
2259  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2260  // long as shortening physreg live ranges is generally good, we can defer
2261  // creating a subtarget hook.
2262  if (!DisableSchedPhysRegJoin) {
2263    bool LHasPhysReg = left->hasPhysRegDefs;
2264    bool RHasPhysReg = right->hasPhysRegDefs;
2265    if (LHasPhysReg != RHasPhysReg) {
2266      DEBUG(++FactorCount[FactRegUses]);
2267      #ifndef NDEBUG
2268      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2269      #endif
2270      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2271            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2272            << PhysRegMsg[RHasPhysReg] << "\n");
2273      return LHasPhysReg < RHasPhysReg;
2274    }
2275  }
2276
2277  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2278  unsigned LPriority = SPQ->getNodePriority(left);
2279  unsigned RPriority = SPQ->getNodePriority(right);
2280
2281  // Be really careful about hoisting call operands above previous calls.
2282  // Only allows it if it would reduce register pressure.
2283  if (left->isCall && right->isCallOp) {
2284    unsigned RNumVals = right->getNode()->getNumValues();
2285    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2286  }
2287  if (right->isCall && left->isCallOp) {
2288    unsigned LNumVals = left->getNode()->getNumValues();
2289    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2290  }
2291
2292  if (LPriority != RPriority) {
2293    DEBUG(++FactorCount[FactStatic]);
2294    return LPriority > RPriority;
2295  }
2296
2297  // One or both of the nodes are calls and their sethi-ullman numbers are the
2298  // same, then keep source order.
2299  if (left->isCall || right->isCall) {
2300    unsigned LOrder = SPQ->getNodeOrdering(left);
2301    unsigned ROrder = SPQ->getNodeOrdering(right);
2302
2303    // Prefer an ordering where the lower the non-zero order number, the higher
2304    // the preference.
2305    if ((LOrder || ROrder) && LOrder != ROrder)
2306      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2307  }
2308
2309  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2310  // e.g.
2311  // t1 = op t2, c1
2312  // t3 = op t4, c2
2313  //
2314  // and the following instructions are both ready.
2315  // t2 = op c3
2316  // t4 = op c4
2317  //
2318  // Then schedule t2 = op first.
2319  // i.e.
2320  // t4 = op c4
2321  // t2 = op c3
2322  // t1 = op t2, c1
2323  // t3 = op t4, c2
2324  //
2325  // This creates more short live intervals.
2326  unsigned LDist = closestSucc(left);
2327  unsigned RDist = closestSucc(right);
2328  if (LDist != RDist) {
2329    DEBUG(++FactorCount[FactOther]);
2330    return LDist < RDist;
2331  }
2332
2333  // How many registers becomes live when the node is scheduled.
2334  unsigned LScratch = calcMaxScratches(left);
2335  unsigned RScratch = calcMaxScratches(right);
2336  if (LScratch != RScratch) {
2337    DEBUG(++FactorCount[FactOther]);
2338    return LScratch > RScratch;
2339  }
2340
2341  // Comparing latency against a call makes little sense unless the node
2342  // is register pressure-neutral.
2343  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2344    return (left->NodeQueueId > right->NodeQueueId);
2345
2346  // Do not compare latencies when one or both of the nodes are calls.
2347  if (!DisableSchedCycles &&
2348      !(left->isCall || right->isCall)) {
2349    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2350    if (result != 0)
2351      return result > 0;
2352  }
2353  else {
2354    if (left->getHeight() != right->getHeight()) {
2355      DEBUG(++FactorCount[FactHeight]);
2356      return left->getHeight() > right->getHeight();
2357    }
2358
2359    if (left->getDepth() != right->getDepth()) {
2360      DEBUG(++FactorCount[FactDepth]);
2361      return left->getDepth() < right->getDepth();
2362    }
2363  }
2364
2365  assert(left->NodeQueueId && right->NodeQueueId &&
2366         "NodeQueueId cannot be zero");
2367  DEBUG(++FactorCount[FactOther]);
2368  return (left->NodeQueueId > right->NodeQueueId);
2369}
2370
2371// Bottom up
2372bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2373  if (int res = checkSpecialNodes(left, right))
2374    return res > 0;
2375
2376  return BURRSort(left, right, SPQ);
2377}
2378
2379// Source order, otherwise bottom up.
2380bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2381  if (int res = checkSpecialNodes(left, right))
2382    return res > 0;
2383
2384  unsigned LOrder = SPQ->getNodeOrdering(left);
2385  unsigned ROrder = SPQ->getNodeOrdering(right);
2386
2387  // Prefer an ordering where the lower the non-zero order number, the higher
2388  // the preference.
2389  if ((LOrder || ROrder) && LOrder != ROrder)
2390    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2391
2392  return BURRSort(left, right, SPQ);
2393}
2394
2395// If the time between now and when the instruction will be ready can cover
2396// the spill code, then avoid adding it to the ready queue. This gives long
2397// stalls highest priority and allows hoisting across calls. It should also
2398// speed up processing the available queue.
2399bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2400  static const unsigned ReadyDelay = 3;
2401
2402  if (SPQ->MayReduceRegPressure(SU)) return true;
2403
2404  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2405
2406  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2407      != ScheduleHazardRecognizer::NoHazard)
2408    return false;
2409
2410  return true;
2411}
2412
2413// Return true if right should be scheduled with higher priority than left.
2414bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2415  if (int res = checkSpecialNodes(left, right))
2416    return res > 0;
2417
2418  if (left->isCall || right->isCall)
2419    // No way to compute latency of calls.
2420    return BURRSort(left, right, SPQ);
2421
2422  bool LHigh = SPQ->HighRegPressure(left);
2423  bool RHigh = SPQ->HighRegPressure(right);
2424  // Avoid causing spills. If register pressure is high, schedule for
2425  // register pressure reduction.
2426  if (LHigh && !RHigh) {
2427    DEBUG(++FactorCount[FactPressureDiff]);
2428    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2429          << right->NodeNum << ")\n");
2430    return true;
2431  }
2432  else if (!LHigh && RHigh) {
2433    DEBUG(++FactorCount[FactPressureDiff]);
2434    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2435          << left->NodeNum << ")\n");
2436    return false;
2437  }
2438  if (!LHigh && !RHigh) {
2439    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2440    if (result != 0)
2441      return result > 0;
2442  }
2443  return BURRSort(left, right, SPQ);
2444}
2445
2446// Schedule as many instructions in each cycle as possible. So don't make an
2447// instruction available unless it is ready in the current cycle.
2448bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2449  if (SU->getHeight() > CurCycle) return false;
2450
2451  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2452      != ScheduleHazardRecognizer::NoHazard)
2453    return false;
2454
2455  return true;
2456}
2457
2458static bool canEnableCoalescing(SUnit *SU) {
2459  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2460  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2461    // CopyToReg should be close to its uses to facilitate coalescing and
2462    // avoid spilling.
2463    return true;
2464
2465  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2466      Opc == TargetOpcode::SUBREG_TO_REG ||
2467      Opc == TargetOpcode::INSERT_SUBREG)
2468    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2469    // close to their uses to facilitate coalescing.
2470    return true;
2471
2472  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2473    // If SU does not have a register def, schedule it close to its uses
2474    // because it does not lengthen any live ranges.
2475    return true;
2476
2477  return false;
2478}
2479
2480// list-ilp is currently an experimental scheduler that allows various
2481// heuristics to be enabled prior to the normal register reduction logic.
2482bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2483  if (int res = checkSpecialNodes(left, right))
2484    return res > 0;
2485
2486  if (left->isCall || right->isCall)
2487    // No way to compute latency of calls.
2488    return BURRSort(left, right, SPQ);
2489
2490  unsigned LLiveUses = 0, RLiveUses = 0;
2491  int LPDiff = 0, RPDiff = 0;
2492  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2493    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2494    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2495  }
2496  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2497    DEBUG(++FactorCount[FactPressureDiff]);
2498    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2499          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2500    return LPDiff > RPDiff;
2501  }
2502
2503  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2504    bool LReduce = canEnableCoalescing(left);
2505    bool RReduce = canEnableCoalescing(right);
2506    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2507    if (LReduce && !RReduce) return false;
2508    if (RReduce && !LReduce) return true;
2509  }
2510
2511  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2512    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2513          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2514    DEBUG(++FactorCount[FactRegUses]);
2515    return LLiveUses < RLiveUses;
2516  }
2517
2518  if (!DisableSchedStalls) {
2519    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2520    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2521    if (LStall != RStall) {
2522      DEBUG(++FactorCount[FactHeight]);
2523      return left->getHeight() > right->getHeight();
2524    }
2525  }
2526
2527  if (!DisableSchedCriticalPath) {
2528    int spread = (int)left->getDepth() - (int)right->getDepth();
2529    if (std::abs(spread) > MaxReorderWindow) {
2530      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2531            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2532            << right->getDepth() << "\n");
2533      DEBUG(++FactorCount[FactDepth]);
2534      return left->getDepth() < right->getDepth();
2535    }
2536  }
2537
2538  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2539    int spread = (int)left->getHeight() - (int)right->getHeight();
2540    if (std::abs(spread) > MaxReorderWindow) {
2541      DEBUG(++FactorCount[FactHeight]);
2542      return left->getHeight() > right->getHeight();
2543    }
2544  }
2545
2546  return BURRSort(left, right, SPQ);
2547}
2548
2549void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2550  SUnits = &sunits;
2551  // Add pseudo dependency edges for two-address nodes.
2552  AddPseudoTwoAddrDeps();
2553  // Reroute edges to nodes with multiple uses.
2554  if (!TracksRegPressure)
2555    PrescheduleNodesWithMultipleUses();
2556  // Calculate node priorities.
2557  CalculateSethiUllmanNumbers();
2558
2559  // For single block loops, mark nodes that look like canonical IV increments.
2560  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2561    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2562      initVRegCycle(&sunits[i]);
2563    }
2564  }
2565}
2566
2567//===----------------------------------------------------------------------===//
2568//                    Preschedule for Register Pressure
2569//===----------------------------------------------------------------------===//
2570
2571bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2572  if (SU->isTwoAddress) {
2573    unsigned Opc = SU->getNode()->getMachineOpcode();
2574    const TargetInstrDesc &TID = TII->get(Opc);
2575    unsigned NumRes = TID.getNumDefs();
2576    unsigned NumOps = TID.getNumOperands() - NumRes;
2577    for (unsigned i = 0; i != NumOps; ++i) {
2578      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2579        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2580        if (DU->getNodeId() != -1 &&
2581            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2582          return true;
2583      }
2584    }
2585  }
2586  return false;
2587}
2588
2589/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2590/// physical register defs.
2591static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2592                                  const TargetInstrInfo *TII,
2593                                  const TargetRegisterInfo *TRI) {
2594  SDNode *N = SuccSU->getNode();
2595  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2596  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2597  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2598  for (const SDNode *SUNode = SU->getNode(); SUNode;
2599       SUNode = SUNode->getGluedNode()) {
2600    if (!SUNode->isMachineOpcode())
2601      continue;
2602    const unsigned *SUImpDefs =
2603      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2604    if (!SUImpDefs)
2605      return false;
2606    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2607      EVT VT = N->getValueType(i);
2608      if (VT == MVT::Glue || VT == MVT::Other)
2609        continue;
2610      if (!N->hasAnyUseOfValue(i))
2611        continue;
2612      unsigned Reg = ImpDefs[i - NumDefs];
2613      for (;*SUImpDefs; ++SUImpDefs) {
2614        unsigned SUReg = *SUImpDefs;
2615        if (TRI->regsOverlap(Reg, SUReg))
2616          return true;
2617      }
2618    }
2619  }
2620  return false;
2621}
2622
2623/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2624/// are not handled well by the general register pressure reduction
2625/// heuristics. When presented with code like this:
2626///
2627///      N
2628///    / |
2629///   /  |
2630///  U  store
2631///  |
2632/// ...
2633///
2634/// the heuristics tend to push the store up, but since the
2635/// operand of the store has another use (U), this would increase
2636/// the length of that other use (the U->N edge).
2637///
2638/// This function transforms code like the above to route U's
2639/// dependence through the store when possible, like this:
2640///
2641///      N
2642///      ||
2643///      ||
2644///     store
2645///       |
2646///       U
2647///       |
2648///      ...
2649///
2650/// This results in the store being scheduled immediately
2651/// after N, which shortens the U->N live range, reducing
2652/// register pressure.
2653///
2654void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2655  // Visit all the nodes in topological order, working top-down.
2656  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2657    SUnit *SU = &(*SUnits)[i];
2658    // For now, only look at nodes with no data successors, such as stores.
2659    // These are especially important, due to the heuristics in
2660    // getNodePriority for nodes with no data successors.
2661    if (SU->NumSuccs != 0)
2662      continue;
2663    // For now, only look at nodes with exactly one data predecessor.
2664    if (SU->NumPreds != 1)
2665      continue;
2666    // Avoid prescheduling copies to virtual registers, which don't behave
2667    // like other nodes from the perspective of scheduling heuristics.
2668    if (SDNode *N = SU->getNode())
2669      if (N->getOpcode() == ISD::CopyToReg &&
2670          TargetRegisterInfo::isVirtualRegister
2671            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2672        continue;
2673
2674    // Locate the single data predecessor.
2675    SUnit *PredSU = 0;
2676    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2677         EE = SU->Preds.end(); II != EE; ++II)
2678      if (!II->isCtrl()) {
2679        PredSU = II->getSUnit();
2680        break;
2681      }
2682    assert(PredSU);
2683
2684    // Don't rewrite edges that carry physregs, because that requires additional
2685    // support infrastructure.
2686    if (PredSU->hasPhysRegDefs)
2687      continue;
2688    // Short-circuit the case where SU is PredSU's only data successor.
2689    if (PredSU->NumSuccs == 1)
2690      continue;
2691    // Avoid prescheduling to copies from virtual registers, which don't behave
2692    // like other nodes from the perspective of scheduling heuristics.
2693    if (SDNode *N = SU->getNode())
2694      if (N->getOpcode() == ISD::CopyFromReg &&
2695          TargetRegisterInfo::isVirtualRegister
2696            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2697        continue;
2698
2699    // Perform checks on the successors of PredSU.
2700    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2701         EE = PredSU->Succs.end(); II != EE; ++II) {
2702      SUnit *PredSuccSU = II->getSUnit();
2703      if (PredSuccSU == SU) continue;
2704      // If PredSU has another successor with no data successors, for
2705      // now don't attempt to choose either over the other.
2706      if (PredSuccSU->NumSuccs == 0)
2707        goto outer_loop_continue;
2708      // Don't break physical register dependencies.
2709      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2710        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2711          goto outer_loop_continue;
2712      // Don't introduce graph cycles.
2713      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2714        goto outer_loop_continue;
2715    }
2716
2717    // Ok, the transformation is safe and the heuristics suggest it is
2718    // profitable. Update the graph.
2719    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2720                 << " next to PredSU #" << PredSU->NodeNum
2721                 << " to guide scheduling in the presence of multiple uses\n");
2722    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2723      SDep Edge = PredSU->Succs[i];
2724      assert(!Edge.isAssignedRegDep());
2725      SUnit *SuccSU = Edge.getSUnit();
2726      if (SuccSU != SU) {
2727        Edge.setSUnit(PredSU);
2728        scheduleDAG->RemovePred(SuccSU, Edge);
2729        scheduleDAG->AddPred(SU, Edge);
2730        Edge.setSUnit(SU);
2731        scheduleDAG->AddPred(SuccSU, Edge);
2732        --i;
2733      }
2734    }
2735  outer_loop_continue:;
2736  }
2737}
2738
2739/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2740/// it as a def&use operand. Add a pseudo control edge from it to the other
2741/// node (if it won't create a cycle) so the two-address one will be scheduled
2742/// first (lower in the schedule). If both nodes are two-address, favor the
2743/// one that has a CopyToReg use (more likely to be a loop induction update).
2744/// If both are two-address, but one is commutable while the other is not
2745/// commutable, favor the one that's not commutable.
2746void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2747  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2748    SUnit *SU = &(*SUnits)[i];
2749    if (!SU->isTwoAddress)
2750      continue;
2751
2752    SDNode *Node = SU->getNode();
2753    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2754      continue;
2755
2756    bool isLiveOut = hasOnlyLiveOutUses(SU);
2757    unsigned Opc = Node->getMachineOpcode();
2758    const TargetInstrDesc &TID = TII->get(Opc);
2759    unsigned NumRes = TID.getNumDefs();
2760    unsigned NumOps = TID.getNumOperands() - NumRes;
2761    for (unsigned j = 0; j != NumOps; ++j) {
2762      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2763        continue;
2764      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2765      if (DU->getNodeId() == -1)
2766        continue;
2767      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2768      if (!DUSU) continue;
2769      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2770           E = DUSU->Succs.end(); I != E; ++I) {
2771        if (I->isCtrl()) continue;
2772        SUnit *SuccSU = I->getSUnit();
2773        if (SuccSU == SU)
2774          continue;
2775        // Be conservative. Ignore if nodes aren't at roughly the same
2776        // depth and height.
2777        if (SuccSU->getHeight() < SU->getHeight() &&
2778            (SU->getHeight() - SuccSU->getHeight()) > 1)
2779          continue;
2780        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2781        // constrains whatever is using the copy, instead of the copy
2782        // itself. In the case that the copy is coalesced, this
2783        // preserves the intent of the pseudo two-address heurietics.
2784        while (SuccSU->Succs.size() == 1 &&
2785               SuccSU->getNode()->isMachineOpcode() &&
2786               SuccSU->getNode()->getMachineOpcode() ==
2787                 TargetOpcode::COPY_TO_REGCLASS)
2788          SuccSU = SuccSU->Succs.front().getSUnit();
2789        // Don't constrain non-instruction nodes.
2790        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2791          continue;
2792        // Don't constrain nodes with physical register defs if the
2793        // predecessor can clobber them.
2794        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2795          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2796            continue;
2797        }
2798        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2799        // these may be coalesced away. We want them close to their uses.
2800        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2801        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2802            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2803            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2804          continue;
2805        if ((!canClobber(SuccSU, DUSU) ||
2806             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2807             (!SU->isCommutable && SuccSU->isCommutable)) &&
2808            !scheduleDAG->IsReachable(SuccSU, SU)) {
2809          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2810                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2811          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2812                                        /*Reg=*/0, /*isNormalMemory=*/false,
2813                                        /*isMustAlias=*/false,
2814                                        /*isArtificial=*/true));
2815        }
2816      }
2817    }
2818  }
2819}
2820
2821/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2822/// predecessors of the successors of the SUnit SU. Stop when the provided
2823/// limit is exceeded.
2824static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2825                                                    unsigned Limit) {
2826  unsigned Sum = 0;
2827  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2828       I != E; ++I) {
2829    const SUnit *SuccSU = I->getSUnit();
2830    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2831         EE = SuccSU->Preds.end(); II != EE; ++II) {
2832      SUnit *PredSU = II->getSUnit();
2833      if (!PredSU->isScheduled)
2834        if (++Sum > Limit)
2835          return Sum;
2836    }
2837  }
2838  return Sum;
2839}
2840
2841
2842// Top down
2843bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2844  if (int res = checkSpecialNodes(left, right))
2845    return res < 0;
2846
2847  unsigned LPriority = SPQ->getNodePriority(left);
2848  unsigned RPriority = SPQ->getNodePriority(right);
2849  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2850  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2851  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2852  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2853  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2854  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2855
2856  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2857    return false;
2858  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2859    return true;
2860
2861  if (LIsFloater)
2862    LBonus -= 2;
2863  if (RIsFloater)
2864    RBonus -= 2;
2865  if (left->NumSuccs == 1)
2866    LBonus += 2;
2867  if (right->NumSuccs == 1)
2868    RBonus += 2;
2869
2870  if (LPriority+LBonus != RPriority+RBonus)
2871    return LPriority+LBonus < RPriority+RBonus;
2872
2873  if (left->getDepth() != right->getDepth())
2874    return left->getDepth() < right->getDepth();
2875
2876  if (left->NumSuccsLeft != right->NumSuccsLeft)
2877    return left->NumSuccsLeft > right->NumSuccsLeft;
2878
2879  assert(left->NodeQueueId && right->NodeQueueId &&
2880         "NodeQueueId cannot be zero");
2881  return (left->NodeQueueId > right->NodeQueueId);
2882}
2883
2884//===----------------------------------------------------------------------===//
2885//                         Public Constructor Functions
2886//===----------------------------------------------------------------------===//
2887
2888llvm::ScheduleDAGSDNodes *
2889llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2890                                 CodeGenOpt::Level OptLevel) {
2891  const TargetMachine &TM = IS->TM;
2892  const TargetInstrInfo *TII = TM.getInstrInfo();
2893  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2894
2895  BURegReductionPriorityQueue *PQ =
2896    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2897  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2898  PQ->setScheduleDAG(SD);
2899  return SD;
2900}
2901
2902llvm::ScheduleDAGSDNodes *
2903llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2904                                 CodeGenOpt::Level OptLevel) {
2905  const TargetMachine &TM = IS->TM;
2906  const TargetInstrInfo *TII = TM.getInstrInfo();
2907  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2908
2909  TDRegReductionPriorityQueue *PQ =
2910    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2911  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2912  PQ->setScheduleDAG(SD);
2913  return SD;
2914}
2915
2916llvm::ScheduleDAGSDNodes *
2917llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2918                                   CodeGenOpt::Level OptLevel) {
2919  const TargetMachine &TM = IS->TM;
2920  const TargetInstrInfo *TII = TM.getInstrInfo();
2921  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2922
2923  SrcRegReductionPriorityQueue *PQ =
2924    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2925  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2926  PQ->setScheduleDAG(SD);
2927  return SD;
2928}
2929
2930llvm::ScheduleDAGSDNodes *
2931llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2932                                   CodeGenOpt::Level OptLevel) {
2933  const TargetMachine &TM = IS->TM;
2934  const TargetInstrInfo *TII = TM.getInstrInfo();
2935  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2936  const TargetLowering *TLI = &IS->getTargetLowering();
2937
2938  HybridBURRPriorityQueue *PQ =
2939    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2940
2941  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2942  PQ->setScheduleDAG(SD);
2943  return SD;
2944}
2945
2946llvm::ScheduleDAGSDNodes *
2947llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2948                                CodeGenOpt::Level OptLevel) {
2949  const TargetMachine &TM = IS->TM;
2950  const TargetInstrInfo *TII = TM.getInstrInfo();
2951  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2952  const TargetLowering *TLI = &IS->getTargetLowering();
2953
2954  ILPBURRPriorityQueue *PQ =
2955    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2956  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2957  PQ->setScheduleDAG(SD);
2958  return SD;
2959}
2960