ScheduleDAGRRList.cpp revision 2ba60e593012ba9b2a9d20b86733eadca288bcb2
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  sourceListDAGScheduler("source",
49                         "Similar to list-burr but schedules in source "
50                         "order when possible",
51                         createSourceListDAGScheduler);
52
53static RegisterScheduler
54  hybridListDAGScheduler("list-hybrid",
55                         "Bottom-up register pressure aware list scheduling "
56                         "which tries to balance latency and register pressure",
57                         createHybridListDAGScheduler);
58
59static RegisterScheduler
60  ILPListDAGScheduler("list-ilp",
61                      "Bottom-up register pressure aware list scheduling "
62                      "which tries to balance ILP and register pressure",
63                      createILPListDAGScheduler);
64
65static cl::opt<bool> DisableSchedCycles(
66  "disable-sched-cycles", cl::Hidden, cl::init(false),
67  cl::desc("Disable cycle-level precision during preRA scheduling"));
68
69// Temporary sched=list-ilp flags until the heuristics are robust.
70// Some options are also available under sched=list-hybrid.
71static cl::opt<bool> DisableSchedRegPressure(
72  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
73  cl::desc("Disable regpressure priority in sched=list-ilp"));
74static cl::opt<bool> DisableSchedLiveUses(
75  "disable-sched-live-uses", cl::Hidden, cl::init(true),
76  cl::desc("Disable live use priority in sched=list-ilp"));
77static cl::opt<bool> DisableSchedVRegCycle(
78  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
79  cl::desc("Disable virtual register cycle interference checks"));
80static cl::opt<bool> DisableSchedPhysRegJoin(
81  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
82  cl::desc("Disable physreg def-use affinity"));
83static cl::opt<bool> DisableSchedStalls(
84  "disable-sched-stalls", cl::Hidden, cl::init(true),
85  cl::desc("Disable no-stall priority in sched=list-ilp"));
86static cl::opt<bool> DisableSchedCriticalPath(
87  "disable-sched-critical-path", cl::Hidden, cl::init(false),
88  cl::desc("Disable critical path priority in sched=list-ilp"));
89static cl::opt<bool> DisableSchedHeight(
90  "disable-sched-height", cl::Hidden, cl::init(false),
91  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
92
93static cl::opt<int> MaxReorderWindow(
94  "max-sched-reorder", cl::Hidden, cl::init(6),
95  cl::desc("Number of instructions to allow ahead of the critical path "
96           "in sched=list-ilp"));
97
98static cl::opt<unsigned> AvgIPC(
99  "sched-avg-ipc", cl::Hidden, cl::init(1),
100  cl::desc("Average inst/cycle whan no target itinerary exists."));
101
102#ifndef NDEBUG
103namespace {
104  // For sched=list-ilp, Count the number of times each factor comes into play.
105  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
106         FactStatic, FactOther, NumFactors };
107}
108static const char *FactorName[NumFactors] =
109{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
110static int FactorCount[NumFactors];
111#endif //!NDEBUG
112
113namespace {
114//===----------------------------------------------------------------------===//
115/// ScheduleDAGRRList - The actual register reduction list scheduler
116/// implementation.  This supports both top-down and bottom-up scheduling.
117///
118class ScheduleDAGRRList : public ScheduleDAGSDNodes {
119private:
120  /// NeedLatency - True if the scheduler will make use of latency information.
121  ///
122  bool NeedLatency;
123
124  /// AvailableQueue - The priority queue to use for the available SUnits.
125  SchedulingPriorityQueue *AvailableQueue;
126
127  /// PendingQueue - This contains all of the instructions whose operands have
128  /// been issued, but their results are not ready yet (due to the latency of
129  /// the operation).  Once the operands becomes available, the instruction is
130  /// added to the AvailableQueue.
131  std::vector<SUnit*> PendingQueue;
132
133  /// HazardRec - The hazard recognizer to use.
134  ScheduleHazardRecognizer *HazardRec;
135
136  /// CurCycle - The current scheduler state corresponds to this cycle.
137  unsigned CurCycle;
138
139  /// MinAvailableCycle - Cycle of the soonest available instruction.
140  unsigned MinAvailableCycle;
141
142  /// IssueCount - Count instructions issued in this cycle
143  /// Currently valid only for bottom-up scheduling.
144  unsigned IssueCount;
145
146  /// LiveRegDefs - A set of physical registers and their definition
147  /// that are "live". These nodes must be scheduled before any other nodes that
148  /// modifies the registers can be scheduled.
149  unsigned NumLiveRegs;
150  std::vector<SUnit*> LiveRegDefs;
151  std::vector<SUnit*> LiveRegGens;
152
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
155  ScheduleDAGTopologicalSort Topo;
156
157public:
158  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
159                    SchedulingPriorityQueue *availqueue,
160                    CodeGenOpt::Level OptLevel)
161    : ScheduleDAGSDNodes(mf),
162      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
163      Topo(SUnits) {
164
165    const TargetMachine &tm = mf.getTarget();
166    if (DisableSchedCycles || !NeedLatency)
167      HazardRec = new ScheduleHazardRecognizer();
168    else
169      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
170  }
171
172  ~ScheduleDAGRRList() {
173    delete HazardRec;
174    delete AvailableQueue;
175  }
176
177  void Schedule();
178
179  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
180
181  /// IsReachable - Checks if SU is reachable from TargetSU.
182  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
183    return Topo.IsReachable(SU, TargetSU);
184  }
185
186  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
187  /// create a cycle.
188  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
189    return Topo.WillCreateCycle(SU, TargetSU);
190  }
191
192  /// AddPred - adds a predecessor edge to SUnit SU.
193  /// This returns true if this is a new predecessor.
194  /// Updates the topological ordering if required.
195  void AddPred(SUnit *SU, const SDep &D) {
196    Topo.AddPred(SU, D.getSUnit());
197    SU->addPred(D);
198  }
199
200  /// RemovePred - removes a predecessor edge from SUnit SU.
201  /// This returns true if an edge was removed.
202  /// Updates the topological ordering if required.
203  void RemovePred(SUnit *SU, const SDep &D) {
204    Topo.RemovePred(SU, D.getSUnit());
205    SU->removePred(D);
206  }
207
208private:
209  bool isReady(SUnit *SU) {
210    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
211      AvailableQueue->isReady(SU);
212  }
213
214  void ReleasePred(SUnit *SU, const SDep *PredEdge);
215  void ReleasePredecessors(SUnit *SU);
216  void ReleasePending();
217  void AdvanceToCycle(unsigned NextCycle);
218  void AdvancePastStalls(SUnit *SU);
219  void EmitNode(SUnit *SU);
220  void ScheduleNodeBottomUp(SUnit*);
221  void CapturePred(SDep *PredEdge);
222  void UnscheduleNodeBottomUp(SUnit*);
223  void RestoreHazardCheckerBottomUp();
224  void BacktrackBottomUp(SUnit*, SUnit*);
225  SUnit *CopyAndMoveSuccessors(SUnit*);
226  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
227                                const TargetRegisterClass*,
228                                const TargetRegisterClass*,
229                                SmallVector<SUnit*, 2>&);
230  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
231
232  SUnit *PickNodeToScheduleBottomUp();
233  void ListScheduleBottomUp();
234
235  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
236  /// Updates the topological ordering if required.
237  SUnit *CreateNewSUnit(SDNode *N) {
238    unsigned NumSUnits = SUnits.size();
239    SUnit *NewNode = NewSUnit(N);
240    // Update the topological ordering.
241    if (NewNode->NodeNum >= NumSUnits)
242      Topo.InitDAGTopologicalSorting();
243    return NewNode;
244  }
245
246  /// CreateClone - Creates a new SUnit from an existing one.
247  /// Updates the topological ordering if required.
248  SUnit *CreateClone(SUnit *N) {
249    unsigned NumSUnits = SUnits.size();
250    SUnit *NewNode = Clone(N);
251    // Update the topological ordering.
252    if (NewNode->NodeNum >= NumSUnits)
253      Topo.InitDAGTopologicalSorting();
254    return NewNode;
255  }
256
257  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
258  /// need actual latency information but the hybrid scheduler does.
259  bool ForceUnitLatencies() const {
260    return !NeedLatency;
261  }
262};
263}  // end anonymous namespace
264
265/// GetCostForDef - Looks up the register class and cost for a given definition.
266/// Typically this just means looking up the representative register class,
267/// but for untyped values (MVT::untyped) it means inspecting the node's
268/// opcode to determine what register class is being generated.
269static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
270                          const TargetLowering *TLI,
271                          const TargetInstrInfo *TII,
272                          const TargetRegisterInfo *TRI,
273                          unsigned &RegClass, unsigned &Cost) {
274  EVT VT = RegDefPos.GetValue();
275
276  // Special handling for untyped values.  These values can only come from
277  // the expansion of custom DAG-to-DAG patterns.
278  if (VT == MVT::untyped) {
279    const SDNode *Node = RegDefPos.GetNode();
280    unsigned Opcode = Node->getMachineOpcode();
281
282    if (Opcode == TargetOpcode::REG_SEQUENCE) {
283      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
284      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
285      RegClass = RC->getID();
286      Cost = 1;
287      return;
288    }
289
290    unsigned Idx = RegDefPos.GetIdx();
291    const MCInstrDesc Desc = TII->get(Opcode);
292    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
293    RegClass = RC->getID();
294    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
295    // better way to determine it.
296    Cost = 1;
297  } else {
298    RegClass = TLI->getRepRegClassFor(VT)->getID();
299    Cost = TLI->getRepRegClassCostFor(VT);
300  }
301}
302
303/// Schedule - Schedule the DAG using list scheduling.
304void ScheduleDAGRRList::Schedule() {
305  DEBUG(dbgs()
306        << "********** List Scheduling BB#" << BB->getNumber()
307        << " '" << BB->getName() << "' **********\n");
308#ifndef NDEBUG
309  for (int i = 0; i < NumFactors; ++i) {
310    FactorCount[i] = 0;
311  }
312#endif //!NDEBUG
313
314  CurCycle = 0;
315  IssueCount = 0;
316  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
317  NumLiveRegs = 0;
318  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
319  LiveRegGens.resize(TRI->getNumRegs(), NULL);
320
321  // Build the scheduling graph.
322  BuildSchedGraph(NULL);
323
324  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
325          SUnits[su].dumpAll(this));
326  Topo.InitDAGTopologicalSorting();
327
328  AvailableQueue->initNodes(SUnits);
329
330  HazardRec->Reset();
331
332  // Execute the actual scheduling loop.
333  ListScheduleBottomUp();
334
335#ifndef NDEBUG
336  for (int i = 0; i < NumFactors; ++i) {
337    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
338  }
339#endif // !NDEBUG
340  AvailableQueue->releaseState();
341}
342
343//===----------------------------------------------------------------------===//
344//  Bottom-Up Scheduling
345//===----------------------------------------------------------------------===//
346
347/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
348/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
349void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
350  SUnit *PredSU = PredEdge->getSUnit();
351
352#ifndef NDEBUG
353  if (PredSU->NumSuccsLeft == 0) {
354    dbgs() << "*** Scheduling failed! ***\n";
355    PredSU->dump(this);
356    dbgs() << " has been released too many times!\n";
357    llvm_unreachable(0);
358  }
359#endif
360  --PredSU->NumSuccsLeft;
361
362  if (!ForceUnitLatencies()) {
363    // Updating predecessor's height. This is now the cycle when the
364    // predecessor can be scheduled without causing a pipeline stall.
365    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
366  }
367
368  // If all the node's successors are scheduled, this node is ready
369  // to be scheduled. Ignore the special EntrySU node.
370  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
371    PredSU->isAvailable = true;
372
373    unsigned Height = PredSU->getHeight();
374    if (Height < MinAvailableCycle)
375      MinAvailableCycle = Height;
376
377    if (isReady(PredSU)) {
378      AvailableQueue->push(PredSU);
379    }
380    // CapturePred and others may have left the node in the pending queue, avoid
381    // adding it twice.
382    else if (!PredSU->isPending) {
383      PredSU->isPending = true;
384      PendingQueue.push_back(PredSU);
385    }
386  }
387}
388
389/// IsChainDependent - Test if Outer is reachable from Inner through
390/// chain dependencies.
391static bool IsChainDependent(SDNode *Outer, SDNode *Inner) {
392  SDNode *N = Outer;
393  for (;;) {
394    if (N == Inner)
395      return true;
396    if (N->getOpcode() == ISD::TokenFactor) {
397      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
398        if (IsChainDependent(N->getOperand(i).getNode(), Inner))
399          return true;
400      return false;
401    }
402    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
403      if (N->getOperand(i).getValueType() == MVT::Other) {
404        N = N->getOperand(i).getNode();
405        goto found_chain_operand;
406      }
407    return false;
408  found_chain_operand:;
409    if (N->getOpcode() == ISD::EntryToken)
410      return false;
411  }
412}
413
414/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
415/// the corresponding (lowered) CALLSEQ_BEGIN node.
416///
417/// NestLevel and MaxNested are used in recursion to indcate the current level
418/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
419/// level seen so far.
420///
421/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
422/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
423static SDNode *
424FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
425                 const TargetInstrInfo *TII) {
426  for (;;) {
427    // For a TokenFactor, examine each operand. There may be multiple ways
428    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
429    // most nesting in order to ensure that we find the corresponding match.
430    if (N->getOpcode() == ISD::TokenFactor) {
431      SDNode *Best = 0;
432      unsigned BestMaxNest = MaxNest;
433      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
434        unsigned MyNestLevel = NestLevel;
435        unsigned MyMaxNest = MaxNest;
436        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
437                                           MyNestLevel, MyMaxNest, TII))
438          if (!Best || (MyMaxNest > BestMaxNest)) {
439            Best = New;
440            BestMaxNest = MyMaxNest;
441          }
442      }
443      assert(Best);
444      MaxNest = BestMaxNest;
445      return Best;
446    }
447    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
448    if (N->isMachineOpcode()) {
449      if (N->getMachineOpcode() ==
450          (unsigned)TII->getCallFrameDestroyOpcode()) {
451        ++NestLevel;
452        MaxNest = std::max(MaxNest, NestLevel);
453      } else if (N->getMachineOpcode() ==
454                 (unsigned)TII->getCallFrameSetupOpcode()) {
455        --NestLevel;
456        if (NestLevel == 0)
457          return N;
458      }
459    }
460    // Otherwise, find the chain and continue climbing.
461    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
462      if (N->getOperand(i).getValueType() == MVT::Other) {
463        N = N->getOperand(i).getNode();
464        goto found_chain_operand;
465      }
466    return 0;
467  found_chain_operand:;
468    if (N->getOpcode() == ISD::EntryToken)
469      return 0;
470  }
471}
472
473/// Call ReleasePred for each predecessor, then update register live def/gen.
474/// Always update LiveRegDefs for a register dependence even if the current SU
475/// also defines the register. This effectively create one large live range
476/// across a sequence of two-address node. This is important because the
477/// entire chain must be scheduled together. Example:
478///
479/// flags = (3) add
480/// flags = (2) addc flags
481/// flags = (1) addc flags
482///
483/// results in
484///
485/// LiveRegDefs[flags] = 3
486/// LiveRegGens[flags] = 1
487///
488/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
489/// interference on flags.
490void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
491  // Bottom up: release predecessors
492  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
493       I != E; ++I) {
494    ReleasePred(SU, &*I);
495    if (I->isAssignedRegDep()) {
496      // This is a physical register dependency and it's impossible or
497      // expensive to copy the register. Make sure nothing that can
498      // clobber the register is scheduled between the predecessor and
499      // this node.
500      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
501      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
502             "interference on register dependence");
503      LiveRegDefs[I->getReg()] = I->getSUnit();
504      if (!LiveRegGens[I->getReg()]) {
505        ++NumLiveRegs;
506        LiveRegGens[I->getReg()] = SU;
507      }
508    }
509  }
510
511  // If we're scheduling a lowered CALLSEQ_END, find the corresponding CALLSEQ_BEGIN.
512  // Inject an artificial physical register dependence between these nodes, to
513  // prevent other calls from being interscheduled with them.
514  const TargetLowering *TLI = TM.getTargetLowering();
515  unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
516  if (!LiveRegDefs[SP])
517    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
518      if (Node->isMachineOpcode() &&
519          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
520        unsigned NestLevel = 0;
521        unsigned MaxNest = 0;
522        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
523
524        SUnit *Def = &SUnits[N->getNodeId()];
525        ++NumLiveRegs;
526        LiveRegDefs[SP] = Def;
527        LiveRegGens[SP] = SU;
528        break;
529      }
530}
531
532/// Check to see if any of the pending instructions are ready to issue.  If
533/// so, add them to the available queue.
534void ScheduleDAGRRList::ReleasePending() {
535  if (DisableSchedCycles) {
536    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
537    return;
538  }
539
540  // If the available queue is empty, it is safe to reset MinAvailableCycle.
541  if (AvailableQueue->empty())
542    MinAvailableCycle = UINT_MAX;
543
544  // Check to see if any of the pending instructions are ready to issue.  If
545  // so, add them to the available queue.
546  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
547    unsigned ReadyCycle = PendingQueue[i]->getHeight();
548    if (ReadyCycle < MinAvailableCycle)
549      MinAvailableCycle = ReadyCycle;
550
551    if (PendingQueue[i]->isAvailable) {
552      if (!isReady(PendingQueue[i]))
553          continue;
554      AvailableQueue->push(PendingQueue[i]);
555    }
556    PendingQueue[i]->isPending = false;
557    PendingQueue[i] = PendingQueue.back();
558    PendingQueue.pop_back();
559    --i; --e;
560  }
561}
562
563/// Move the scheduler state forward by the specified number of Cycles.
564void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
565  if (NextCycle <= CurCycle)
566    return;
567
568  IssueCount = 0;
569  AvailableQueue->setCurCycle(NextCycle);
570  if (!HazardRec->isEnabled()) {
571    // Bypass lots of virtual calls in case of long latency.
572    CurCycle = NextCycle;
573  }
574  else {
575    for (; CurCycle != NextCycle; ++CurCycle) {
576      HazardRec->RecedeCycle();
577    }
578  }
579  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
580  // available Q to release pending nodes at least once before popping.
581  ReleasePending();
582}
583
584/// Move the scheduler state forward until the specified node's dependents are
585/// ready and can be scheduled with no resource conflicts.
586void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
587  if (DisableSchedCycles)
588    return;
589
590  // FIXME: Nodes such as CopyFromReg probably should not advance the current
591  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
592  // has predecessors the cycle will be advanced when they are scheduled.
593  // But given the crude nature of modeling latency though such nodes, we
594  // currently need to treat these nodes like real instructions.
595  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
596
597  unsigned ReadyCycle = SU->getHeight();
598
599  // Bump CurCycle to account for latency. We assume the latency of other
600  // available instructions may be hidden by the stall (not a full pipe stall).
601  // This updates the hazard recognizer's cycle before reserving resources for
602  // this instruction.
603  AdvanceToCycle(ReadyCycle);
604
605  // Calls are scheduled in their preceding cycle, so don't conflict with
606  // hazards from instructions after the call. EmitNode will reset the
607  // scoreboard state before emitting the call.
608  if (SU->isCall)
609    return;
610
611  // FIXME: For resource conflicts in very long non-pipelined stages, we
612  // should probably skip ahead here to avoid useless scoreboard checks.
613  int Stalls = 0;
614  while (true) {
615    ScheduleHazardRecognizer::HazardType HT =
616      HazardRec->getHazardType(SU, -Stalls);
617
618    if (HT == ScheduleHazardRecognizer::NoHazard)
619      break;
620
621    ++Stalls;
622  }
623  AdvanceToCycle(CurCycle + Stalls);
624}
625
626/// Record this SUnit in the HazardRecognizer.
627/// Does not update CurCycle.
628void ScheduleDAGRRList::EmitNode(SUnit *SU) {
629  if (!HazardRec->isEnabled())
630    return;
631
632  // Check for phys reg copy.
633  if (!SU->getNode())
634    return;
635
636  switch (SU->getNode()->getOpcode()) {
637  default:
638    assert(SU->getNode()->isMachineOpcode() &&
639           "This target-independent node should not be scheduled.");
640    break;
641  case ISD::MERGE_VALUES:
642  case ISD::TokenFactor:
643  case ISD::CopyToReg:
644  case ISD::CopyFromReg:
645  case ISD::EH_LABEL:
646    // Noops don't affect the scoreboard state. Copies are likely to be
647    // removed.
648    return;
649  case ISD::INLINEASM:
650    // For inline asm, clear the pipeline state.
651    HazardRec->Reset();
652    return;
653  }
654  if (SU->isCall) {
655    // Calls are scheduled with their preceding instructions. For bottom-up
656    // scheduling, clear the pipeline state before emitting.
657    HazardRec->Reset();
658  }
659
660  HazardRec->EmitInstruction(SU);
661}
662
663static void resetVRegCycle(SUnit *SU);
664
665/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
666/// count of its predecessors. If a predecessor pending count is zero, add it to
667/// the Available queue.
668void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
669  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
670  DEBUG(SU->dump(this));
671
672#ifndef NDEBUG
673  if (CurCycle < SU->getHeight())
674    DEBUG(dbgs() << "   Height [" << SU->getHeight()
675          << "] pipeline stall!\n");
676#endif
677
678  // FIXME: Do not modify node height. It may interfere with
679  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
680  // node its ready cycle can aid heuristics, and after scheduling it can
681  // indicate the scheduled cycle.
682  SU->setHeightToAtLeast(CurCycle);
683
684  // Reserve resources for the scheduled intruction.
685  EmitNode(SU);
686
687  Sequence.push_back(SU);
688
689  AvailableQueue->ScheduledNode(SU);
690
691  // If HazardRec is disabled, and each inst counts as one cycle, then
692  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
693  // PendingQueue for schedulers that implement HasReadyFilter.
694  if (!HazardRec->isEnabled() && AvgIPC < 2)
695    AdvanceToCycle(CurCycle + 1);
696
697  // Update liveness of predecessors before successors to avoid treating a
698  // two-address node as a live range def.
699  ReleasePredecessors(SU);
700
701  // Release all the implicit physical register defs that are live.
702  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
703       I != E; ++I) {
704    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
705    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
706      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
707      --NumLiveRegs;
708      LiveRegDefs[I->getReg()] = NULL;
709      LiveRegGens[I->getReg()] = NULL;
710    }
711  }
712  // Release the special call resource dependence, if this is the beginning
713  // of a call.
714  const TargetLowering *TLI = TM.getTargetLowering();
715  unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
716  if (LiveRegDefs[SP] == SU)
717  for (const SDNode *SUNode = SU->getNode(); SUNode;
718       SUNode = SUNode->getGluedNode()) {
719    if (SUNode->isMachineOpcode() &&
720        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() &&
721        LiveRegDefs[SP] == SU) {
722      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
723      --NumLiveRegs;
724      LiveRegDefs[SP] = NULL;
725      LiveRegGens[SP] = NULL;
726    }
727  }
728
729  resetVRegCycle(SU);
730
731  SU->isScheduled = true;
732
733  // Conditions under which the scheduler should eagerly advance the cycle:
734  // (1) No available instructions
735  // (2) All pipelines full, so available instructions must have hazards.
736  //
737  // If HazardRec is disabled, the cycle was pre-advanced before calling
738  // ReleasePredecessors. In that case, IssueCount should remain 0.
739  //
740  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
741  if (HazardRec->isEnabled() || AvgIPC > 1) {
742    if (SU->getNode() && SU->getNode()->isMachineOpcode())
743      ++IssueCount;
744    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
745        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
746      AdvanceToCycle(CurCycle + 1);
747  }
748}
749
750/// CapturePred - This does the opposite of ReleasePred. Since SU is being
751/// unscheduled, incrcease the succ left count of its predecessors. Remove
752/// them from AvailableQueue if necessary.
753void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
754  SUnit *PredSU = PredEdge->getSUnit();
755  if (PredSU->isAvailable) {
756    PredSU->isAvailable = false;
757    if (!PredSU->isPending)
758      AvailableQueue->remove(PredSU);
759  }
760
761  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
762  ++PredSU->NumSuccsLeft;
763}
764
765/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
766/// its predecessor states to reflect the change.
767void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
768  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
769  DEBUG(SU->dump(this));
770
771  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
772       I != E; ++I) {
773    CapturePred(&*I);
774    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
775      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
776      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
777             "Physical register dependency violated?");
778      --NumLiveRegs;
779      LiveRegDefs[I->getReg()] = NULL;
780      LiveRegGens[I->getReg()] = NULL;
781    }
782  }
783
784  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
785       I != E; ++I) {
786    if (I->isAssignedRegDep()) {
787      // This becomes the nearest def. Note that an earlier def may still be
788      // pending if this is a two-address node.
789      LiveRegDefs[I->getReg()] = SU;
790      if (!LiveRegDefs[I->getReg()]) {
791        ++NumLiveRegs;
792      }
793      if (LiveRegGens[I->getReg()] == NULL ||
794          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
795        LiveRegGens[I->getReg()] = I->getSUnit();
796    }
797  }
798  if (SU->getHeight() < MinAvailableCycle)
799    MinAvailableCycle = SU->getHeight();
800
801  SU->setHeightDirty();
802  SU->isScheduled = false;
803  SU->isAvailable = true;
804  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
805    // Don't make available until backtracking is complete.
806    SU->isPending = true;
807    PendingQueue.push_back(SU);
808  }
809  else {
810    AvailableQueue->push(SU);
811  }
812  AvailableQueue->UnscheduledNode(SU);
813}
814
815/// After backtracking, the hazard checker needs to be restored to a state
816/// corresponding the the current cycle.
817void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
818  HazardRec->Reset();
819
820  unsigned LookAhead = std::min((unsigned)Sequence.size(),
821                                HazardRec->getMaxLookAhead());
822  if (LookAhead == 0)
823    return;
824
825  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
826  unsigned HazardCycle = (*I)->getHeight();
827  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
828    SUnit *SU = *I;
829    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
830      HazardRec->RecedeCycle();
831    }
832    EmitNode(SU);
833  }
834}
835
836/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
837/// BTCycle in order to schedule a specific node.
838void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
839  SUnit *OldSU = Sequence.back();
840  while (true) {
841    Sequence.pop_back();
842    if (SU->isSucc(OldSU))
843      // Don't try to remove SU from AvailableQueue.
844      SU->isAvailable = false;
845    // FIXME: use ready cycle instead of height
846    CurCycle = OldSU->getHeight();
847    UnscheduleNodeBottomUp(OldSU);
848    AvailableQueue->setCurCycle(CurCycle);
849    if (OldSU == BtSU)
850      break;
851    OldSU = Sequence.back();
852  }
853
854  assert(!SU->isSucc(OldSU) && "Something is wrong!");
855
856  RestoreHazardCheckerBottomUp();
857
858  ReleasePending();
859
860  ++NumBacktracks;
861}
862
863static bool isOperandOf(const SUnit *SU, SDNode *N) {
864  for (const SDNode *SUNode = SU->getNode(); SUNode;
865       SUNode = SUNode->getGluedNode()) {
866    if (SUNode->isOperandOf(N))
867      return true;
868  }
869  return false;
870}
871
872/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
873/// successors to the newly created node.
874SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
875  SDNode *N = SU->getNode();
876  if (!N)
877    return NULL;
878
879  if (SU->getNode()->getGluedNode())
880    return NULL;
881
882  SUnit *NewSU;
883  bool TryUnfold = false;
884  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
885    EVT VT = N->getValueType(i);
886    if (VT == MVT::Glue)
887      return NULL;
888    else if (VT == MVT::Other)
889      TryUnfold = true;
890  }
891  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
892    const SDValue &Op = N->getOperand(i);
893    EVT VT = Op.getNode()->getValueType(Op.getResNo());
894    if (VT == MVT::Glue)
895      return NULL;
896  }
897
898  if (TryUnfold) {
899    SmallVector<SDNode*, 2> NewNodes;
900    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
901      return NULL;
902
903    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
904    assert(NewNodes.size() == 2 && "Expected a load folding node!");
905
906    N = NewNodes[1];
907    SDNode *LoadNode = NewNodes[0];
908    unsigned NumVals = N->getNumValues();
909    unsigned OldNumVals = SU->getNode()->getNumValues();
910    for (unsigned i = 0; i != NumVals; ++i)
911      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
912    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
913                                   SDValue(LoadNode, 1));
914
915    // LoadNode may already exist. This can happen when there is another
916    // load from the same location and producing the same type of value
917    // but it has different alignment or volatileness.
918    bool isNewLoad = true;
919    SUnit *LoadSU;
920    if (LoadNode->getNodeId() != -1) {
921      LoadSU = &SUnits[LoadNode->getNodeId()];
922      isNewLoad = false;
923    } else {
924      LoadSU = CreateNewSUnit(LoadNode);
925      LoadNode->setNodeId(LoadSU->NodeNum);
926
927      InitNumRegDefsLeft(LoadSU);
928      ComputeLatency(LoadSU);
929    }
930
931    SUnit *NewSU = CreateNewSUnit(N);
932    assert(N->getNodeId() == -1 && "Node already inserted!");
933    N->setNodeId(NewSU->NodeNum);
934
935    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
936    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
937      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
938        NewSU->isTwoAddress = true;
939        break;
940      }
941    }
942    if (MCID.isCommutable())
943      NewSU->isCommutable = true;
944
945    InitNumRegDefsLeft(NewSU);
946    ComputeLatency(NewSU);
947
948    // Record all the edges to and from the old SU, by category.
949    SmallVector<SDep, 4> ChainPreds;
950    SmallVector<SDep, 4> ChainSuccs;
951    SmallVector<SDep, 4> LoadPreds;
952    SmallVector<SDep, 4> NodePreds;
953    SmallVector<SDep, 4> NodeSuccs;
954    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
955         I != E; ++I) {
956      if (I->isCtrl())
957        ChainPreds.push_back(*I);
958      else if (isOperandOf(I->getSUnit(), LoadNode))
959        LoadPreds.push_back(*I);
960      else
961        NodePreds.push_back(*I);
962    }
963    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
964         I != E; ++I) {
965      if (I->isCtrl())
966        ChainSuccs.push_back(*I);
967      else
968        NodeSuccs.push_back(*I);
969    }
970
971    // Now assign edges to the newly-created nodes.
972    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
973      const SDep &Pred = ChainPreds[i];
974      RemovePred(SU, Pred);
975      if (isNewLoad)
976        AddPred(LoadSU, Pred);
977    }
978    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
979      const SDep &Pred = LoadPreds[i];
980      RemovePred(SU, Pred);
981      if (isNewLoad)
982        AddPred(LoadSU, Pred);
983    }
984    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
985      const SDep &Pred = NodePreds[i];
986      RemovePred(SU, Pred);
987      AddPred(NewSU, Pred);
988    }
989    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
990      SDep D = NodeSuccs[i];
991      SUnit *SuccDep = D.getSUnit();
992      D.setSUnit(SU);
993      RemovePred(SuccDep, D);
994      D.setSUnit(NewSU);
995      AddPred(SuccDep, D);
996      // Balance register pressure.
997      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
998          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
999        --NewSU->NumRegDefsLeft;
1000    }
1001    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
1002      SDep D = ChainSuccs[i];
1003      SUnit *SuccDep = D.getSUnit();
1004      D.setSUnit(SU);
1005      RemovePred(SuccDep, D);
1006      if (isNewLoad) {
1007        D.setSUnit(LoadSU);
1008        AddPred(SuccDep, D);
1009      }
1010    }
1011
1012    // Add a data dependency to reflect that NewSU reads the value defined
1013    // by LoadSU.
1014    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
1015
1016    if (isNewLoad)
1017      AvailableQueue->addNode(LoadSU);
1018    AvailableQueue->addNode(NewSU);
1019
1020    ++NumUnfolds;
1021
1022    if (NewSU->NumSuccsLeft == 0) {
1023      NewSU->isAvailable = true;
1024      return NewSU;
1025    }
1026    SU = NewSU;
1027  }
1028
1029  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
1030  NewSU = CreateClone(SU);
1031
1032  // New SUnit has the exact same predecessors.
1033  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1034       I != E; ++I)
1035    if (!I->isArtificial())
1036      AddPred(NewSU, *I);
1037
1038  // Only copy scheduled successors. Cut them from old node's successor
1039  // list and move them over.
1040  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1041  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1042       I != E; ++I) {
1043    if (I->isArtificial())
1044      continue;
1045    SUnit *SuccSU = I->getSUnit();
1046    if (SuccSU->isScheduled) {
1047      SDep D = *I;
1048      D.setSUnit(NewSU);
1049      AddPred(SuccSU, D);
1050      D.setSUnit(SU);
1051      DelDeps.push_back(std::make_pair(SuccSU, D));
1052    }
1053  }
1054  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1055    RemovePred(DelDeps[i].first, DelDeps[i].second);
1056
1057  AvailableQueue->updateNode(SU);
1058  AvailableQueue->addNode(NewSU);
1059
1060  ++NumDups;
1061  return NewSU;
1062}
1063
1064/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1065/// scheduled successors of the given SUnit to the last copy.
1066void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1067                                               const TargetRegisterClass *DestRC,
1068                                               const TargetRegisterClass *SrcRC,
1069                                               SmallVector<SUnit*, 2> &Copies) {
1070  SUnit *CopyFromSU = CreateNewSUnit(NULL);
1071  CopyFromSU->CopySrcRC = SrcRC;
1072  CopyFromSU->CopyDstRC = DestRC;
1073
1074  SUnit *CopyToSU = CreateNewSUnit(NULL);
1075  CopyToSU->CopySrcRC = DestRC;
1076  CopyToSU->CopyDstRC = SrcRC;
1077
1078  // Only copy scheduled successors. Cut them from old node's successor
1079  // list and move them over.
1080  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1081  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1082       I != E; ++I) {
1083    if (I->isArtificial())
1084      continue;
1085    SUnit *SuccSU = I->getSUnit();
1086    if (SuccSU->isScheduled) {
1087      SDep D = *I;
1088      D.setSUnit(CopyToSU);
1089      AddPred(SuccSU, D);
1090      DelDeps.push_back(std::make_pair(SuccSU, *I));
1091    }
1092    else {
1093      // Avoid scheduling the def-side copy before other successors. Otherwise
1094      // we could introduce another physreg interference on the copy and
1095      // continue inserting copies indefinitely.
1096      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
1097             /*Reg=*/0, /*isNormalMemory=*/false,
1098             /*isMustAlias=*/false, /*isArtificial=*/true);
1099      AddPred(SuccSU, D);
1100    }
1101  }
1102  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1103    RemovePred(DelDeps[i].first, DelDeps[i].second);
1104
1105  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
1106  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
1107
1108  AvailableQueue->updateNode(SU);
1109  AvailableQueue->addNode(CopyFromSU);
1110  AvailableQueue->addNode(CopyToSU);
1111  Copies.push_back(CopyFromSU);
1112  Copies.push_back(CopyToSU);
1113
1114  ++NumPRCopies;
1115}
1116
1117/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1118/// definition of the specified node.
1119/// FIXME: Move to SelectionDAG?
1120static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1121                                 const TargetInstrInfo *TII) {
1122  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1123  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1124  unsigned NumRes = MCID.getNumDefs();
1125  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1126    if (Reg == *ImpDef)
1127      break;
1128    ++NumRes;
1129  }
1130  return N->getValueType(NumRes);
1131}
1132
1133/// CheckForLiveRegDef - Return true and update live register vector if the
1134/// specified register def of the specified SUnit clobbers any "live" registers.
1135static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1136                               std::vector<SUnit*> &LiveRegDefs,
1137                               SmallSet<unsigned, 4> &RegAdded,
1138                               SmallVector<unsigned, 4> &LRegs,
1139                               const TargetRegisterInfo *TRI) {
1140  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1141
1142    // Check if Ref is live.
1143    if (!LiveRegDefs[*AliasI]) continue;
1144
1145    // Allow multiple uses of the same def.
1146    if (LiveRegDefs[*AliasI] == SU) continue;
1147
1148    // Add Reg to the set of interfering live regs.
1149    if (RegAdded.insert(*AliasI)) {
1150      LRegs.push_back(*AliasI);
1151    }
1152  }
1153}
1154
1155/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1156/// scheduling of the given node to satisfy live physical register dependencies.
1157/// If the specific node is the last one that's available to schedule, do
1158/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1159bool ScheduleDAGRRList::
1160DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1161  if (NumLiveRegs == 0)
1162    return false;
1163
1164  SmallSet<unsigned, 4> RegAdded;
1165  // If this node would clobber any "live" register, then it's not ready.
1166  //
1167  // If SU is the currently live definition of the same register that it uses,
1168  // then we are free to schedule it.
1169  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1170       I != E; ++I) {
1171    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1172      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1173                         RegAdded, LRegs, TRI);
1174  }
1175
1176  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1177    if (Node->getOpcode() == ISD::INLINEASM) {
1178      // Inline asm can clobber physical defs.
1179      unsigned NumOps = Node->getNumOperands();
1180      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1181        --NumOps;  // Ignore the glue operand.
1182
1183      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1184        unsigned Flags =
1185          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1186        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1187
1188        ++i; // Skip the ID value.
1189        if (InlineAsm::isRegDefKind(Flags) ||
1190            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1191            InlineAsm::isClobberKind(Flags)) {
1192          // Check for def of register or earlyclobber register.
1193          for (; NumVals; --NumVals, ++i) {
1194            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1195            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1196              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1197          }
1198        } else
1199          i += NumVals;
1200      }
1201      continue;
1202    }
1203
1204    if (!Node->isMachineOpcode())
1205      continue;
1206    // If we're in the middle of scheduling a call, don't begin scheduling
1207    // another call.
1208    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() ||
1209        Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1210      for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
1211        if (LiveRegDefs[i]) {
1212          SDNode *Gen = LiveRegGens[i]->getNode();
1213          while (SDNode *Glued = Gen->getGluedNode())
1214            Gen = Glued;
1215          if (!IsChainDependent(Gen, Node) && RegAdded.insert(i))
1216            LRegs.push_back(i);
1217        }
1218      continue;
1219    }
1220    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1221    if (!MCID.ImplicitDefs)
1222      continue;
1223    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
1224      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1225  }
1226
1227  return !LRegs.empty();
1228}
1229
1230/// Return a node that can be scheduled in this cycle. Requirements:
1231/// (1) Ready: latency has been satisfied
1232/// (2) No Hazards: resources are available
1233/// (3) No Interferences: may unschedule to break register interferences.
1234SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1235  SmallVector<SUnit*, 4> Interferences;
1236  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1237
1238  SUnit *CurSU = AvailableQueue->pop();
1239  while (CurSU) {
1240    SmallVector<unsigned, 4> LRegs;
1241    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1242      break;
1243    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1244
1245    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1246    Interferences.push_back(CurSU);
1247    CurSU = AvailableQueue->pop();
1248  }
1249  if (CurSU) {
1250    // Add the nodes that aren't ready back onto the available list.
1251    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1252      Interferences[i]->isPending = false;
1253      assert(Interferences[i]->isAvailable && "must still be available");
1254      AvailableQueue->push(Interferences[i]);
1255    }
1256    return CurSU;
1257  }
1258
1259  // All candidates are delayed due to live physical reg dependencies.
1260  // Try backtracking, code duplication, or inserting cross class copies
1261  // to resolve it.
1262  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1263    SUnit *TrySU = Interferences[i];
1264    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1265
1266    // Try unscheduling up to the point where it's safe to schedule
1267    // this node.
1268    SUnit *BtSU = NULL;
1269    unsigned LiveCycle = UINT_MAX;
1270    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1271      unsigned Reg = LRegs[j];
1272      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1273        BtSU = LiveRegGens[Reg];
1274        LiveCycle = BtSU->getHeight();
1275      }
1276    }
1277    if (!WillCreateCycle(TrySU, BtSU))  {
1278      BacktrackBottomUp(TrySU, BtSU);
1279
1280      // Force the current node to be scheduled before the node that
1281      // requires the physical reg dep.
1282      if (BtSU->isAvailable) {
1283        BtSU->isAvailable = false;
1284        if (!BtSU->isPending)
1285          AvailableQueue->remove(BtSU);
1286      }
1287      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1288                          /*Reg=*/0, /*isNormalMemory=*/false,
1289                          /*isMustAlias=*/false, /*isArtificial=*/true));
1290
1291      // If one or more successors has been unscheduled, then the current
1292      // node is no longer avaialable. Schedule a successor that's now
1293      // available instead.
1294      if (!TrySU->isAvailable) {
1295        CurSU = AvailableQueue->pop();
1296      }
1297      else {
1298        CurSU = TrySU;
1299        TrySU->isPending = false;
1300        Interferences.erase(Interferences.begin()+i);
1301      }
1302      break;
1303    }
1304  }
1305
1306  if (!CurSU) {
1307    // Can't backtrack. If it's too expensive to copy the value, then try
1308    // duplicate the nodes that produces these "too expensive to copy"
1309    // values to break the dependency. In case even that doesn't work,
1310    // insert cross class copies.
1311    // If it's not too expensive, i.e. cost != -1, issue copies.
1312    SUnit *TrySU = Interferences[0];
1313    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1314    assert(LRegs.size() == 1 && "Can't handle this yet!");
1315    unsigned Reg = LRegs[0];
1316    SUnit *LRDef = LiveRegDefs[Reg];
1317    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1318    const TargetRegisterClass *RC =
1319      TRI->getMinimalPhysRegClass(Reg, VT);
1320    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1321
1322    // If cross copy register class is the same as RC, then it must be possible
1323    // copy the value directly. Do not try duplicate the def.
1324    // If cross copy register class is not the same as RC, then it's possible to
1325    // copy the value but it require cross register class copies and it is
1326    // expensive.
1327    // If cross copy register class is null, then it's not possible to copy
1328    // the value at all.
1329    SUnit *NewDef = 0;
1330    if (DestRC != RC) {
1331      NewDef = CopyAndMoveSuccessors(LRDef);
1332      if (!DestRC && !NewDef)
1333        report_fatal_error("Can't handle live physical register dependency!");
1334    }
1335    if (!NewDef) {
1336      // Issue copies, these can be expensive cross register class copies.
1337      SmallVector<SUnit*, 2> Copies;
1338      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1339      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1340            << " to SU #" << Copies.front()->NodeNum << "\n");
1341      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1342                          /*Reg=*/0, /*isNormalMemory=*/false,
1343                          /*isMustAlias=*/false,
1344                          /*isArtificial=*/true));
1345      NewDef = Copies.back();
1346    }
1347
1348    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1349          << " to SU #" << TrySU->NodeNum << "\n");
1350    LiveRegDefs[Reg] = NewDef;
1351    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1352                         /*Reg=*/0, /*isNormalMemory=*/false,
1353                         /*isMustAlias=*/false,
1354                         /*isArtificial=*/true));
1355    TrySU->isAvailable = false;
1356    CurSU = NewDef;
1357  }
1358
1359  assert(CurSU && "Unable to resolve live physical register dependencies!");
1360
1361  // Add the nodes that aren't ready back onto the available list.
1362  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1363    Interferences[i]->isPending = false;
1364    // May no longer be available due to backtracking.
1365    if (Interferences[i]->isAvailable) {
1366      AvailableQueue->push(Interferences[i]);
1367    }
1368  }
1369  return CurSU;
1370}
1371
1372/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1373/// schedulers.
1374void ScheduleDAGRRList::ListScheduleBottomUp() {
1375  // Release any predecessors of the special Exit node.
1376  ReleasePredecessors(&ExitSU);
1377
1378  // Add root to Available queue.
1379  if (!SUnits.empty()) {
1380    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1381    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1382    RootSU->isAvailable = true;
1383    AvailableQueue->push(RootSU);
1384  }
1385
1386  // While Available queue is not empty, grab the node with the highest
1387  // priority. If it is not ready put it back.  Schedule the node.
1388  Sequence.reserve(SUnits.size());
1389  while (!AvailableQueue->empty()) {
1390    DEBUG(dbgs() << "\nExamining Available:\n";
1391          AvailableQueue->dump(this));
1392
1393    // Pick the best node to schedule taking all constraints into
1394    // consideration.
1395    SUnit *SU = PickNodeToScheduleBottomUp();
1396
1397    AdvancePastStalls(SU);
1398
1399    ScheduleNodeBottomUp(SU);
1400
1401    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1402      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1403      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1404      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1405    }
1406  }
1407
1408  // Reverse the order if it is bottom up.
1409  std::reverse(Sequence.begin(), Sequence.end());
1410
1411#ifndef NDEBUG
1412  VerifySchedule(/*isBottomUp=*/true);
1413#endif
1414}
1415
1416//===----------------------------------------------------------------------===//
1417//                RegReductionPriorityQueue Definition
1418//===----------------------------------------------------------------------===//
1419//
1420// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1421// to reduce register pressure.
1422//
1423namespace {
1424class RegReductionPQBase;
1425
1426struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1427  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1428};
1429
1430#ifndef NDEBUG
1431template<class SF>
1432struct reverse_sort : public queue_sort {
1433  SF &SortFunc;
1434  reverse_sort(SF &sf) : SortFunc(sf) {}
1435  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1436
1437  bool operator()(SUnit* left, SUnit* right) const {
1438    // reverse left/right rather than simply !SortFunc(left, right)
1439    // to expose different paths in the comparison logic.
1440    return SortFunc(right, left);
1441  }
1442};
1443#endif // NDEBUG
1444
1445/// bu_ls_rr_sort - Priority function for bottom up register pressure
1446// reduction scheduler.
1447struct bu_ls_rr_sort : public queue_sort {
1448  enum {
1449    IsBottomUp = true,
1450    HasReadyFilter = false
1451  };
1452
1453  RegReductionPQBase *SPQ;
1454  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1455  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1456
1457  bool operator()(SUnit* left, SUnit* right) const;
1458};
1459
1460// src_ls_rr_sort - Priority function for source order scheduler.
1461struct src_ls_rr_sort : public queue_sort {
1462  enum {
1463    IsBottomUp = true,
1464    HasReadyFilter = false
1465  };
1466
1467  RegReductionPQBase *SPQ;
1468  src_ls_rr_sort(RegReductionPQBase *spq)
1469    : SPQ(spq) {}
1470  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1471    : SPQ(RHS.SPQ) {}
1472
1473  bool operator()(SUnit* left, SUnit* right) const;
1474};
1475
1476// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1477struct hybrid_ls_rr_sort : public queue_sort {
1478  enum {
1479    IsBottomUp = true,
1480    HasReadyFilter = false
1481  };
1482
1483  RegReductionPQBase *SPQ;
1484  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1485    : SPQ(spq) {}
1486  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1487    : SPQ(RHS.SPQ) {}
1488
1489  bool isReady(SUnit *SU, unsigned CurCycle) const;
1490
1491  bool operator()(SUnit* left, SUnit* right) const;
1492};
1493
1494// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1495// scheduler.
1496struct ilp_ls_rr_sort : public queue_sort {
1497  enum {
1498    IsBottomUp = true,
1499    HasReadyFilter = false
1500  };
1501
1502  RegReductionPQBase *SPQ;
1503  ilp_ls_rr_sort(RegReductionPQBase *spq)
1504    : SPQ(spq) {}
1505  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1506    : SPQ(RHS.SPQ) {}
1507
1508  bool isReady(SUnit *SU, unsigned CurCycle) const;
1509
1510  bool operator()(SUnit* left, SUnit* right) const;
1511};
1512
1513class RegReductionPQBase : public SchedulingPriorityQueue {
1514protected:
1515  std::vector<SUnit*> Queue;
1516  unsigned CurQueueId;
1517  bool TracksRegPressure;
1518
1519  // SUnits - The SUnits for the current graph.
1520  std::vector<SUnit> *SUnits;
1521
1522  MachineFunction &MF;
1523  const TargetInstrInfo *TII;
1524  const TargetRegisterInfo *TRI;
1525  const TargetLowering *TLI;
1526  ScheduleDAGRRList *scheduleDAG;
1527
1528  // SethiUllmanNumbers - The SethiUllman number for each node.
1529  std::vector<unsigned> SethiUllmanNumbers;
1530
1531  /// RegPressure - Tracking current reg pressure per register class.
1532  ///
1533  std::vector<unsigned> RegPressure;
1534
1535  /// RegLimit - Tracking the number of allocatable registers per register
1536  /// class.
1537  std::vector<unsigned> RegLimit;
1538
1539public:
1540  RegReductionPQBase(MachineFunction &mf,
1541                     bool hasReadyFilter,
1542                     bool tracksrp,
1543                     const TargetInstrInfo *tii,
1544                     const TargetRegisterInfo *tri,
1545                     const TargetLowering *tli)
1546    : SchedulingPriorityQueue(hasReadyFilter),
1547      CurQueueId(0), TracksRegPressure(tracksrp),
1548      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1549    if (TracksRegPressure) {
1550      unsigned NumRC = TRI->getNumRegClasses();
1551      RegLimit.resize(NumRC);
1552      RegPressure.resize(NumRC);
1553      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1554      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1555      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1556             E = TRI->regclass_end(); I != E; ++I)
1557        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1558    }
1559  }
1560
1561  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1562    scheduleDAG = scheduleDag;
1563  }
1564
1565  ScheduleHazardRecognizer* getHazardRec() {
1566    return scheduleDAG->getHazardRec();
1567  }
1568
1569  void initNodes(std::vector<SUnit> &sunits);
1570
1571  void addNode(const SUnit *SU);
1572
1573  void updateNode(const SUnit *SU);
1574
1575  void releaseState() {
1576    SUnits = 0;
1577    SethiUllmanNumbers.clear();
1578    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1579  }
1580
1581  unsigned getNodePriority(const SUnit *SU) const;
1582
1583  unsigned getNodeOrdering(const SUnit *SU) const {
1584    if (!SU->getNode()) return 0;
1585
1586    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1587  }
1588
1589  bool empty() const { return Queue.empty(); }
1590
1591  void push(SUnit *U) {
1592    assert(!U->NodeQueueId && "Node in the queue already");
1593    U->NodeQueueId = ++CurQueueId;
1594    Queue.push_back(U);
1595  }
1596
1597  void remove(SUnit *SU) {
1598    assert(!Queue.empty() && "Queue is empty!");
1599    assert(SU->NodeQueueId != 0 && "Not in queue!");
1600    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1601                                                 SU);
1602    if (I != prior(Queue.end()))
1603      std::swap(*I, Queue.back());
1604    Queue.pop_back();
1605    SU->NodeQueueId = 0;
1606  }
1607
1608  bool tracksRegPressure() const { return TracksRegPressure; }
1609
1610  void dumpRegPressure() const;
1611
1612  bool HighRegPressure(const SUnit *SU) const;
1613
1614  bool MayReduceRegPressure(SUnit *SU) const;
1615
1616  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1617
1618  void ScheduledNode(SUnit *SU);
1619
1620  void UnscheduledNode(SUnit *SU);
1621
1622protected:
1623  bool canClobber(const SUnit *SU, const SUnit *Op);
1624  void AddPseudoTwoAddrDeps();
1625  void PrescheduleNodesWithMultipleUses();
1626  void CalculateSethiUllmanNumbers();
1627};
1628
1629template<class SF>
1630static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1631  std::vector<SUnit *>::iterator Best = Q.begin();
1632  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1633         E = Q.end(); I != E; ++I)
1634    if (Picker(*Best, *I))
1635      Best = I;
1636  SUnit *V = *Best;
1637  if (Best != prior(Q.end()))
1638    std::swap(*Best, Q.back());
1639  Q.pop_back();
1640  return V;
1641}
1642
1643template<class SF>
1644SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1645#ifndef NDEBUG
1646  if (DAG->StressSched) {
1647    reverse_sort<SF> RPicker(Picker);
1648    return popFromQueueImpl(Q, RPicker);
1649  }
1650#endif
1651  (void)DAG;
1652  return popFromQueueImpl(Q, Picker);
1653}
1654
1655template<class SF>
1656class RegReductionPriorityQueue : public RegReductionPQBase {
1657  SF Picker;
1658
1659public:
1660  RegReductionPriorityQueue(MachineFunction &mf,
1661                            bool tracksrp,
1662                            const TargetInstrInfo *tii,
1663                            const TargetRegisterInfo *tri,
1664                            const TargetLowering *tli)
1665    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1666      Picker(this) {}
1667
1668  bool isBottomUp() const { return SF::IsBottomUp; }
1669
1670  bool isReady(SUnit *U) const {
1671    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1672  }
1673
1674  SUnit *pop() {
1675    if (Queue.empty()) return NULL;
1676
1677    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1678    V->NodeQueueId = 0;
1679    return V;
1680  }
1681
1682  void dump(ScheduleDAG *DAG) const {
1683    // Emulate pop() without clobbering NodeQueueIds.
1684    std::vector<SUnit*> DumpQueue = Queue;
1685    SF DumpPicker = Picker;
1686    while (!DumpQueue.empty()) {
1687      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1688      dbgs() << "Height " << SU->getHeight() << ": ";
1689      SU->dump(DAG);
1690    }
1691  }
1692};
1693
1694typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1695BURegReductionPriorityQueue;
1696
1697typedef RegReductionPriorityQueue<src_ls_rr_sort>
1698SrcRegReductionPriorityQueue;
1699
1700typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1701HybridBURRPriorityQueue;
1702
1703typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1704ILPBURRPriorityQueue;
1705} // end anonymous namespace
1706
1707//===----------------------------------------------------------------------===//
1708//           Static Node Priority for Register Pressure Reduction
1709//===----------------------------------------------------------------------===//
1710
1711// Check for special nodes that bypass scheduling heuristics.
1712// Currently this pushes TokenFactor nodes down, but may be used for other
1713// pseudo-ops as well.
1714//
1715// Return -1 to schedule right above left, 1 for left above right.
1716// Return 0 if no bias exists.
1717static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1718  bool LSchedLow = left->isScheduleLow;
1719  bool RSchedLow = right->isScheduleLow;
1720  if (LSchedLow != RSchedLow)
1721    return LSchedLow < RSchedLow ? 1 : -1;
1722  return 0;
1723}
1724
1725/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1726/// Smaller number is the higher priority.
1727static unsigned
1728CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1729  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1730  if (SethiUllmanNumber != 0)
1731    return SethiUllmanNumber;
1732
1733  unsigned Extra = 0;
1734  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1735       I != E; ++I) {
1736    if (I->isCtrl()) continue;  // ignore chain preds
1737    SUnit *PredSU = I->getSUnit();
1738    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1739    if (PredSethiUllman > SethiUllmanNumber) {
1740      SethiUllmanNumber = PredSethiUllman;
1741      Extra = 0;
1742    } else if (PredSethiUllman == SethiUllmanNumber)
1743      ++Extra;
1744  }
1745
1746  SethiUllmanNumber += Extra;
1747
1748  if (SethiUllmanNumber == 0)
1749    SethiUllmanNumber = 1;
1750
1751  return SethiUllmanNumber;
1752}
1753
1754/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1755/// scheduling units.
1756void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1757  SethiUllmanNumbers.assign(SUnits->size(), 0);
1758
1759  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1760    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1761}
1762
1763void RegReductionPQBase::addNode(const SUnit *SU) {
1764  unsigned SUSize = SethiUllmanNumbers.size();
1765  if (SUnits->size() > SUSize)
1766    SethiUllmanNumbers.resize(SUSize*2, 0);
1767  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1768}
1769
1770void RegReductionPQBase::updateNode(const SUnit *SU) {
1771  SethiUllmanNumbers[SU->NodeNum] = 0;
1772  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1773}
1774
1775// Lower priority means schedule further down. For bottom-up scheduling, lower
1776// priority SUs are scheduled before higher priority SUs.
1777unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1778  assert(SU->NodeNum < SethiUllmanNumbers.size());
1779  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1780  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1781    // CopyToReg should be close to its uses to facilitate coalescing and
1782    // avoid spilling.
1783    return 0;
1784  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1785      Opc == TargetOpcode::SUBREG_TO_REG ||
1786      Opc == TargetOpcode::INSERT_SUBREG)
1787    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1788    // close to their uses to facilitate coalescing.
1789    return 0;
1790  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1791    // If SU does not have a register use, i.e. it doesn't produce a value
1792    // that would be consumed (e.g. store), then it terminates a chain of
1793    // computation.  Give it a large SethiUllman number so it will be
1794    // scheduled right before its predecessors that it doesn't lengthen
1795    // their live ranges.
1796    return 0xffff;
1797  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1798    // If SU does not have a register def, schedule it close to its uses
1799    // because it does not lengthen any live ranges.
1800    return 0;
1801#if 1
1802  return SethiUllmanNumbers[SU->NodeNum];
1803#else
1804  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1805  if (SU->isCallOp) {
1806    // FIXME: This assumes all of the defs are used as call operands.
1807    int NP = (int)Priority - SU->getNode()->getNumValues();
1808    return (NP > 0) ? NP : 0;
1809  }
1810  return Priority;
1811#endif
1812}
1813
1814//===----------------------------------------------------------------------===//
1815//                     Register Pressure Tracking
1816//===----------------------------------------------------------------------===//
1817
1818void RegReductionPQBase::dumpRegPressure() const {
1819  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1820         E = TRI->regclass_end(); I != E; ++I) {
1821    const TargetRegisterClass *RC = *I;
1822    unsigned Id = RC->getID();
1823    unsigned RP = RegPressure[Id];
1824    if (!RP) continue;
1825    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1826          << '\n');
1827  }
1828}
1829
1830bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1831  if (!TLI)
1832    return false;
1833
1834  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1835       I != E; ++I) {
1836    if (I->isCtrl())
1837      continue;
1838    SUnit *PredSU = I->getSUnit();
1839    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1840    // to cover the number of registers defined (they are all live).
1841    if (PredSU->NumRegDefsLeft == 0) {
1842      continue;
1843    }
1844    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1845         RegDefPos.IsValid(); RegDefPos.Advance()) {
1846      unsigned RCId, Cost;
1847      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1848
1849      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1850        return true;
1851    }
1852  }
1853  return false;
1854}
1855
1856bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1857  const SDNode *N = SU->getNode();
1858
1859  if (!N->isMachineOpcode() || !SU->NumSuccs)
1860    return false;
1861
1862  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1863  for (unsigned i = 0; i != NumDefs; ++i) {
1864    EVT VT = N->getValueType(i);
1865    if (!N->hasAnyUseOfValue(i))
1866      continue;
1867    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1868    if (RegPressure[RCId] >= RegLimit[RCId])
1869      return true;
1870  }
1871  return false;
1872}
1873
1874// Compute the register pressure contribution by this instruction by count up
1875// for uses that are not live and down for defs. Only count register classes
1876// that are already under high pressure. As a side effect, compute the number of
1877// uses of registers that are already live.
1878//
1879// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1880// so could probably be factored.
1881int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1882  LiveUses = 0;
1883  int PDiff = 0;
1884  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1885       I != E; ++I) {
1886    if (I->isCtrl())
1887      continue;
1888    SUnit *PredSU = I->getSUnit();
1889    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1890    // to cover the number of registers defined (they are all live).
1891    if (PredSU->NumRegDefsLeft == 0) {
1892      if (PredSU->getNode()->isMachineOpcode())
1893        ++LiveUses;
1894      continue;
1895    }
1896    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1897         RegDefPos.IsValid(); RegDefPos.Advance()) {
1898      EVT VT = RegDefPos.GetValue();
1899      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1900      if (RegPressure[RCId] >= RegLimit[RCId])
1901        ++PDiff;
1902    }
1903  }
1904  const SDNode *N = SU->getNode();
1905
1906  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1907    return PDiff;
1908
1909  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1910  for (unsigned i = 0; i != NumDefs; ++i) {
1911    EVT VT = N->getValueType(i);
1912    if (!N->hasAnyUseOfValue(i))
1913      continue;
1914    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1915    if (RegPressure[RCId] >= RegLimit[RCId])
1916      --PDiff;
1917  }
1918  return PDiff;
1919}
1920
1921void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1922  if (!TracksRegPressure)
1923    return;
1924
1925  if (!SU->getNode())
1926    return;
1927
1928  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1929       I != E; ++I) {
1930    if (I->isCtrl())
1931      continue;
1932    SUnit *PredSU = I->getSUnit();
1933    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1934    // to cover the number of registers defined (they are all live).
1935    if (PredSU->NumRegDefsLeft == 0) {
1936      continue;
1937    }
1938    // FIXME: The ScheduleDAG currently loses information about which of a
1939    // node's values is consumed by each dependence. Consequently, if the node
1940    // defines multiple register classes, we don't know which to pressurize
1941    // here. Instead the following loop consumes the register defs in an
1942    // arbitrary order. At least it handles the common case of clustered loads
1943    // to the same class. For precise liveness, each SDep needs to indicate the
1944    // result number. But that tightly couples the ScheduleDAG with the
1945    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1946    // value type or register class to SDep.
1947    //
1948    // The most important aspect of register tracking is balancing the increase
1949    // here with the reduction further below. Note that this SU may use multiple
1950    // defs in PredSU. The can't be determined here, but we've already
1951    // compensated by reducing NumRegDefsLeft in PredSU during
1952    // ScheduleDAGSDNodes::AddSchedEdges.
1953    --PredSU->NumRegDefsLeft;
1954    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1955    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1956         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1957      if (SkipRegDefs)
1958        continue;
1959
1960      unsigned RCId, Cost;
1961      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1962      RegPressure[RCId] += Cost;
1963      break;
1964    }
1965  }
1966
1967  // We should have this assert, but there may be dead SDNodes that never
1968  // materialize as SUnits, so they don't appear to generate liveness.
1969  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1970  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1971  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1972       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1973    if (SkipRegDefs > 0)
1974      continue;
1975    unsigned RCId, Cost;
1976    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1977    if (RegPressure[RCId] < Cost) {
1978      // Register pressure tracking is imprecise. This can happen. But we try
1979      // hard not to let it happen because it likely results in poor scheduling.
1980      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1981      RegPressure[RCId] = 0;
1982    }
1983    else {
1984      RegPressure[RCId] -= Cost;
1985    }
1986  }
1987  dumpRegPressure();
1988}
1989
1990void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1991  if (!TracksRegPressure)
1992    return;
1993
1994  const SDNode *N = SU->getNode();
1995  if (!N) return;
1996
1997  if (!N->isMachineOpcode()) {
1998    if (N->getOpcode() != ISD::CopyToReg)
1999      return;
2000  } else {
2001    unsigned Opc = N->getMachineOpcode();
2002    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2003        Opc == TargetOpcode::INSERT_SUBREG ||
2004        Opc == TargetOpcode::SUBREG_TO_REG ||
2005        Opc == TargetOpcode::REG_SEQUENCE ||
2006        Opc == TargetOpcode::IMPLICIT_DEF)
2007      return;
2008  }
2009
2010  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2011       I != E; ++I) {
2012    if (I->isCtrl())
2013      continue;
2014    SUnit *PredSU = I->getSUnit();
2015    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2016    // counts data deps.
2017    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2018      continue;
2019    const SDNode *PN = PredSU->getNode();
2020    if (!PN->isMachineOpcode()) {
2021      if (PN->getOpcode() == ISD::CopyFromReg) {
2022        EVT VT = PN->getValueType(0);
2023        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2024        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2025      }
2026      continue;
2027    }
2028    unsigned POpc = PN->getMachineOpcode();
2029    if (POpc == TargetOpcode::IMPLICIT_DEF)
2030      continue;
2031    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2032        POpc == TargetOpcode::INSERT_SUBREG ||
2033        POpc == TargetOpcode::SUBREG_TO_REG) {
2034      EVT VT = PN->getValueType(0);
2035      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2036      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2037      continue;
2038    }
2039    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2040    for (unsigned i = 0; i != NumDefs; ++i) {
2041      EVT VT = PN->getValueType(i);
2042      if (!PN->hasAnyUseOfValue(i))
2043        continue;
2044      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2045      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2046        // Register pressure tracking is imprecise. This can happen.
2047        RegPressure[RCId] = 0;
2048      else
2049        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2050    }
2051  }
2052
2053  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2054  // may transfer data dependencies to CopyToReg.
2055  if (SU->NumSuccs && N->isMachineOpcode()) {
2056    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2057    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2058      EVT VT = N->getValueType(i);
2059      if (VT == MVT::Glue || VT == MVT::Other)
2060        continue;
2061      if (!N->hasAnyUseOfValue(i))
2062        continue;
2063      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2064      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2065    }
2066  }
2067
2068  dumpRegPressure();
2069}
2070
2071//===----------------------------------------------------------------------===//
2072//           Dynamic Node Priority for Register Pressure Reduction
2073//===----------------------------------------------------------------------===//
2074
2075/// closestSucc - Returns the scheduled cycle of the successor which is
2076/// closest to the current cycle.
2077static unsigned closestSucc(const SUnit *SU) {
2078  unsigned MaxHeight = 0;
2079  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2080       I != E; ++I) {
2081    if (I->isCtrl()) continue;  // ignore chain succs
2082    unsigned Height = I->getSUnit()->getHeight();
2083    // If there are bunch of CopyToRegs stacked up, they should be considered
2084    // to be at the same position.
2085    if (I->getSUnit()->getNode() &&
2086        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2087      Height = closestSucc(I->getSUnit())+1;
2088    if (Height > MaxHeight)
2089      MaxHeight = Height;
2090  }
2091  return MaxHeight;
2092}
2093
2094/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2095/// for scratch registers, i.e. number of data dependencies.
2096static unsigned calcMaxScratches(const SUnit *SU) {
2097  unsigned Scratches = 0;
2098  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2099       I != E; ++I) {
2100    if (I->isCtrl()) continue;  // ignore chain preds
2101    Scratches++;
2102  }
2103  return Scratches;
2104}
2105
2106/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2107/// CopyFromReg from a virtual register.
2108static bool hasOnlyLiveInOpers(const SUnit *SU) {
2109  bool RetVal = false;
2110  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2111       I != E; ++I) {
2112    if (I->isCtrl()) continue;
2113    const SUnit *PredSU = I->getSUnit();
2114    if (PredSU->getNode() &&
2115        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2116      unsigned Reg =
2117        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2118      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2119        RetVal = true;
2120        continue;
2121      }
2122    }
2123    return false;
2124  }
2125  return RetVal;
2126}
2127
2128/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2129/// CopyToReg to a virtual register. This SU def is probably a liveout and
2130/// it has no other use. It should be scheduled closer to the terminator.
2131static bool hasOnlyLiveOutUses(const SUnit *SU) {
2132  bool RetVal = false;
2133  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2134       I != E; ++I) {
2135    if (I->isCtrl()) continue;
2136    const SUnit *SuccSU = I->getSUnit();
2137    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2138      unsigned Reg =
2139        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2140      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2141        RetVal = true;
2142        continue;
2143      }
2144    }
2145    return false;
2146  }
2147  return RetVal;
2148}
2149
2150// Set isVRegCycle for a node with only live in opers and live out uses. Also
2151// set isVRegCycle for its CopyFromReg operands.
2152//
2153// This is only relevant for single-block loops, in which case the VRegCycle
2154// node is likely an induction variable in which the operand and target virtual
2155// registers should be coalesced (e.g. pre/post increment values). Setting the
2156// isVRegCycle flag helps the scheduler prioritize other uses of the same
2157// CopyFromReg so that this node becomes the virtual register "kill". This
2158// avoids interference between the values live in and out of the block and
2159// eliminates a copy inside the loop.
2160static void initVRegCycle(SUnit *SU) {
2161  if (DisableSchedVRegCycle)
2162    return;
2163
2164  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2165    return;
2166
2167  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2168
2169  SU->isVRegCycle = true;
2170
2171  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2172       I != E; ++I) {
2173    if (I->isCtrl()) continue;
2174    I->getSUnit()->isVRegCycle = true;
2175  }
2176}
2177
2178// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2179// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2180static void resetVRegCycle(SUnit *SU) {
2181  if (!SU->isVRegCycle)
2182    return;
2183
2184  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2185       I != E; ++I) {
2186    if (I->isCtrl()) continue;  // ignore chain preds
2187    SUnit *PredSU = I->getSUnit();
2188    if (PredSU->isVRegCycle) {
2189      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2190             "VRegCycle def must be CopyFromReg");
2191      I->getSUnit()->isVRegCycle = 0;
2192    }
2193  }
2194}
2195
2196// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2197// means a node that defines the VRegCycle has not been scheduled yet.
2198static bool hasVRegCycleUse(const SUnit *SU) {
2199  // If this SU also defines the VReg, don't hoist it as a "use".
2200  if (SU->isVRegCycle)
2201    return false;
2202
2203  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2204       I != E; ++I) {
2205    if (I->isCtrl()) continue;  // ignore chain preds
2206    if (I->getSUnit()->isVRegCycle &&
2207        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2208      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2209      return true;
2210    }
2211  }
2212  return false;
2213}
2214
2215// Check for either a dependence (latency) or resource (hazard) stall.
2216//
2217// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2218static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2219  if ((int)SPQ->getCurCycle() < Height) return true;
2220  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2221      != ScheduleHazardRecognizer::NoHazard)
2222    return true;
2223  return false;
2224}
2225
2226// Return -1 if left has higher priority, 1 if right has higher priority.
2227// Return 0 if latency-based priority is equivalent.
2228static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2229                            RegReductionPQBase *SPQ) {
2230  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2231  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2232  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2233  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2234  int LHeight = (int)left->getHeight() + LPenalty;
2235  int RHeight = (int)right->getHeight() + RPenalty;
2236
2237  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2238    BUHasStall(left, LHeight, SPQ);
2239  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2240    BUHasStall(right, RHeight, SPQ);
2241
2242  // If scheduling one of the node will cause a pipeline stall, delay it.
2243  // If scheduling either one of the node will cause a pipeline stall, sort
2244  // them according to their height.
2245  if (LStall) {
2246    if (!RStall) {
2247      DEBUG(++FactorCount[FactStall]);
2248      return 1;
2249    }
2250    if (LHeight != RHeight) {
2251      DEBUG(++FactorCount[FactStall]);
2252      return LHeight > RHeight ? 1 : -1;
2253    }
2254  } else if (RStall) {
2255    DEBUG(++FactorCount[FactStall]);
2256    return -1;
2257  }
2258
2259  // If either node is scheduling for latency, sort them by height/depth
2260  // and latency.
2261  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2262                     right->SchedulingPref == Sched::ILP)) {
2263    if (DisableSchedCycles) {
2264      if (LHeight != RHeight) {
2265        DEBUG(++FactorCount[FactHeight]);
2266        return LHeight > RHeight ? 1 : -1;
2267      }
2268    }
2269    else {
2270      // If neither instruction stalls (!LStall && !RStall) then
2271      // its height is already covered so only its depth matters. We also reach
2272      // this if both stall but have the same height.
2273      int LDepth = left->getDepth() - LPenalty;
2274      int RDepth = right->getDepth() - RPenalty;
2275      if (LDepth != RDepth) {
2276        DEBUG(++FactorCount[FactDepth]);
2277        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2278              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2279              << ") depth " << RDepth << "\n");
2280        return LDepth < RDepth ? 1 : -1;
2281      }
2282    }
2283    if (left->Latency != right->Latency) {
2284      DEBUG(++FactorCount[FactOther]);
2285      return left->Latency > right->Latency ? 1 : -1;
2286    }
2287  }
2288  return 0;
2289}
2290
2291static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2292  // Schedule physical register definitions close to their use. This is
2293  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2294  // long as shortening physreg live ranges is generally good, we can defer
2295  // creating a subtarget hook.
2296  if (!DisableSchedPhysRegJoin) {
2297    bool LHasPhysReg = left->hasPhysRegDefs;
2298    bool RHasPhysReg = right->hasPhysRegDefs;
2299    if (LHasPhysReg != RHasPhysReg) {
2300      DEBUG(++FactorCount[FactRegUses]);
2301      #ifndef NDEBUG
2302      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2303      #endif
2304      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2305            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2306            << PhysRegMsg[RHasPhysReg] << "\n");
2307      return LHasPhysReg < RHasPhysReg;
2308    }
2309  }
2310
2311  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2312  unsigned LPriority = SPQ->getNodePriority(left);
2313  unsigned RPriority = SPQ->getNodePriority(right);
2314
2315  // Be really careful about hoisting call operands above previous calls.
2316  // Only allows it if it would reduce register pressure.
2317  if (left->isCall && right->isCallOp) {
2318    unsigned RNumVals = right->getNode()->getNumValues();
2319    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2320  }
2321  if (right->isCall && left->isCallOp) {
2322    unsigned LNumVals = left->getNode()->getNumValues();
2323    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2324  }
2325
2326  if (LPriority != RPriority) {
2327    DEBUG(++FactorCount[FactStatic]);
2328    return LPriority > RPriority;
2329  }
2330
2331  // One or both of the nodes are calls and their sethi-ullman numbers are the
2332  // same, then keep source order.
2333  if (left->isCall || right->isCall) {
2334    unsigned LOrder = SPQ->getNodeOrdering(left);
2335    unsigned ROrder = SPQ->getNodeOrdering(right);
2336
2337    // Prefer an ordering where the lower the non-zero order number, the higher
2338    // the preference.
2339    if ((LOrder || ROrder) && LOrder != ROrder)
2340      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2341  }
2342
2343  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2344  // e.g.
2345  // t1 = op t2, c1
2346  // t3 = op t4, c2
2347  //
2348  // and the following instructions are both ready.
2349  // t2 = op c3
2350  // t4 = op c4
2351  //
2352  // Then schedule t2 = op first.
2353  // i.e.
2354  // t4 = op c4
2355  // t2 = op c3
2356  // t1 = op t2, c1
2357  // t3 = op t4, c2
2358  //
2359  // This creates more short live intervals.
2360  unsigned LDist = closestSucc(left);
2361  unsigned RDist = closestSucc(right);
2362  if (LDist != RDist) {
2363    DEBUG(++FactorCount[FactOther]);
2364    return LDist < RDist;
2365  }
2366
2367  // How many registers becomes live when the node is scheduled.
2368  unsigned LScratch = calcMaxScratches(left);
2369  unsigned RScratch = calcMaxScratches(right);
2370  if (LScratch != RScratch) {
2371    DEBUG(++FactorCount[FactOther]);
2372    return LScratch > RScratch;
2373  }
2374
2375  // Comparing latency against a call makes little sense unless the node
2376  // is register pressure-neutral.
2377  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2378    return (left->NodeQueueId > right->NodeQueueId);
2379
2380  // Do not compare latencies when one or both of the nodes are calls.
2381  if (!DisableSchedCycles &&
2382      !(left->isCall || right->isCall)) {
2383    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2384    if (result != 0)
2385      return result > 0;
2386  }
2387  else {
2388    if (left->getHeight() != right->getHeight()) {
2389      DEBUG(++FactorCount[FactHeight]);
2390      return left->getHeight() > right->getHeight();
2391    }
2392
2393    if (left->getDepth() != right->getDepth()) {
2394      DEBUG(++FactorCount[FactDepth]);
2395      return left->getDepth() < right->getDepth();
2396    }
2397  }
2398
2399  assert(left->NodeQueueId && right->NodeQueueId &&
2400         "NodeQueueId cannot be zero");
2401  DEBUG(++FactorCount[FactOther]);
2402  return (left->NodeQueueId > right->NodeQueueId);
2403}
2404
2405// Bottom up
2406bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2407  if (int res = checkSpecialNodes(left, right))
2408    return res > 0;
2409
2410  return BURRSort(left, right, SPQ);
2411}
2412
2413// Source order, otherwise bottom up.
2414bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2415  if (int res = checkSpecialNodes(left, right))
2416    return res > 0;
2417
2418  unsigned LOrder = SPQ->getNodeOrdering(left);
2419  unsigned ROrder = SPQ->getNodeOrdering(right);
2420
2421  // Prefer an ordering where the lower the non-zero order number, the higher
2422  // the preference.
2423  if ((LOrder || ROrder) && LOrder != ROrder)
2424    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2425
2426  return BURRSort(left, right, SPQ);
2427}
2428
2429// If the time between now and when the instruction will be ready can cover
2430// the spill code, then avoid adding it to the ready queue. This gives long
2431// stalls highest priority and allows hoisting across calls. It should also
2432// speed up processing the available queue.
2433bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2434  static const unsigned ReadyDelay = 3;
2435
2436  if (SPQ->MayReduceRegPressure(SU)) return true;
2437
2438  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2439
2440  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2441      != ScheduleHazardRecognizer::NoHazard)
2442    return false;
2443
2444  return true;
2445}
2446
2447// Return true if right should be scheduled with higher priority than left.
2448bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2449  if (int res = checkSpecialNodes(left, right))
2450    return res > 0;
2451
2452  if (left->isCall || right->isCall)
2453    // No way to compute latency of calls.
2454    return BURRSort(left, right, SPQ);
2455
2456  bool LHigh = SPQ->HighRegPressure(left);
2457  bool RHigh = SPQ->HighRegPressure(right);
2458  // Avoid causing spills. If register pressure is high, schedule for
2459  // register pressure reduction.
2460  if (LHigh && !RHigh) {
2461    DEBUG(++FactorCount[FactPressureDiff]);
2462    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2463          << right->NodeNum << ")\n");
2464    return true;
2465  }
2466  else if (!LHigh && RHigh) {
2467    DEBUG(++FactorCount[FactPressureDiff]);
2468    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2469          << left->NodeNum << ")\n");
2470    return false;
2471  }
2472  if (!LHigh && !RHigh) {
2473    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2474    if (result != 0)
2475      return result > 0;
2476  }
2477  return BURRSort(left, right, SPQ);
2478}
2479
2480// Schedule as many instructions in each cycle as possible. So don't make an
2481// instruction available unless it is ready in the current cycle.
2482bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2483  if (SU->getHeight() > CurCycle) return false;
2484
2485  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2486      != ScheduleHazardRecognizer::NoHazard)
2487    return false;
2488
2489  return true;
2490}
2491
2492static bool canEnableCoalescing(SUnit *SU) {
2493  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2494  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2495    // CopyToReg should be close to its uses to facilitate coalescing and
2496    // avoid spilling.
2497    return true;
2498
2499  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2500      Opc == TargetOpcode::SUBREG_TO_REG ||
2501      Opc == TargetOpcode::INSERT_SUBREG)
2502    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2503    // close to their uses to facilitate coalescing.
2504    return true;
2505
2506  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2507    // If SU does not have a register def, schedule it close to its uses
2508    // because it does not lengthen any live ranges.
2509    return true;
2510
2511  return false;
2512}
2513
2514// list-ilp is currently an experimental scheduler that allows various
2515// heuristics to be enabled prior to the normal register reduction logic.
2516bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2517  if (int res = checkSpecialNodes(left, right))
2518    return res > 0;
2519
2520  if (left->isCall || right->isCall)
2521    // No way to compute latency of calls.
2522    return BURRSort(left, right, SPQ);
2523
2524  unsigned LLiveUses = 0, RLiveUses = 0;
2525  int LPDiff = 0, RPDiff = 0;
2526  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2527    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2528    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2529  }
2530  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2531    DEBUG(++FactorCount[FactPressureDiff]);
2532    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2533          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2534    return LPDiff > RPDiff;
2535  }
2536
2537  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2538    bool LReduce = canEnableCoalescing(left);
2539    bool RReduce = canEnableCoalescing(right);
2540    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2541    if (LReduce && !RReduce) return false;
2542    if (RReduce && !LReduce) return true;
2543  }
2544
2545  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2546    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2547          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2548    DEBUG(++FactorCount[FactRegUses]);
2549    return LLiveUses < RLiveUses;
2550  }
2551
2552  if (!DisableSchedStalls) {
2553    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2554    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2555    if (LStall != RStall) {
2556      DEBUG(++FactorCount[FactHeight]);
2557      return left->getHeight() > right->getHeight();
2558    }
2559  }
2560
2561  if (!DisableSchedCriticalPath) {
2562    int spread = (int)left->getDepth() - (int)right->getDepth();
2563    if (std::abs(spread) > MaxReorderWindow) {
2564      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2565            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2566            << right->getDepth() << "\n");
2567      DEBUG(++FactorCount[FactDepth]);
2568      return left->getDepth() < right->getDepth();
2569    }
2570  }
2571
2572  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2573    int spread = (int)left->getHeight() - (int)right->getHeight();
2574    if (std::abs(spread) > MaxReorderWindow) {
2575      DEBUG(++FactorCount[FactHeight]);
2576      return left->getHeight() > right->getHeight();
2577    }
2578  }
2579
2580  return BURRSort(left, right, SPQ);
2581}
2582
2583void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2584  SUnits = &sunits;
2585  // Add pseudo dependency edges for two-address nodes.
2586  AddPseudoTwoAddrDeps();
2587  // Reroute edges to nodes with multiple uses.
2588  if (!TracksRegPressure)
2589    PrescheduleNodesWithMultipleUses();
2590  // Calculate node priorities.
2591  CalculateSethiUllmanNumbers();
2592
2593  // For single block loops, mark nodes that look like canonical IV increments.
2594  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2595    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2596      initVRegCycle(&sunits[i]);
2597    }
2598  }
2599}
2600
2601//===----------------------------------------------------------------------===//
2602//                    Preschedule for Register Pressure
2603//===----------------------------------------------------------------------===//
2604
2605bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2606  if (SU->isTwoAddress) {
2607    unsigned Opc = SU->getNode()->getMachineOpcode();
2608    const MCInstrDesc &MCID = TII->get(Opc);
2609    unsigned NumRes = MCID.getNumDefs();
2610    unsigned NumOps = MCID.getNumOperands() - NumRes;
2611    for (unsigned i = 0; i != NumOps; ++i) {
2612      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2613        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2614        if (DU->getNodeId() != -1 &&
2615            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2616          return true;
2617      }
2618    }
2619  }
2620  return false;
2621}
2622
2623/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2624/// successor's explicit physregs whose definition can reach DepSU.
2625/// i.e. DepSU should not be scheduled above SU.
2626static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2627                                         ScheduleDAGRRList *scheduleDAG,
2628                                         const TargetInstrInfo *TII,
2629                                         const TargetRegisterInfo *TRI) {
2630  const unsigned *ImpDefs
2631    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2632  if(!ImpDefs)
2633    return false;
2634
2635  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2636       SI != SE; ++SI) {
2637    SUnit *SuccSU = SI->getSUnit();
2638    for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2639           PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2640      if (!PI->isAssignedRegDep())
2641        continue;
2642
2643      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
2644        // Return true if SU clobbers this physical register use and the
2645        // definition of the register reaches from DepSU. IsReachable queries a
2646        // topological forward sort of the DAG (following the successors).
2647        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2648            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2649          return true;
2650      }
2651    }
2652  }
2653  return false;
2654}
2655
2656/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2657/// physical register defs.
2658static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2659                                  const TargetInstrInfo *TII,
2660                                  const TargetRegisterInfo *TRI) {
2661  SDNode *N = SuccSU->getNode();
2662  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2663  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2664  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2665  for (const SDNode *SUNode = SU->getNode(); SUNode;
2666       SUNode = SUNode->getGluedNode()) {
2667    if (!SUNode->isMachineOpcode())
2668      continue;
2669    const unsigned *SUImpDefs =
2670      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2671    if (!SUImpDefs)
2672      return false;
2673    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2674      EVT VT = N->getValueType(i);
2675      if (VT == MVT::Glue || VT == MVT::Other)
2676        continue;
2677      if (!N->hasAnyUseOfValue(i))
2678        continue;
2679      unsigned Reg = ImpDefs[i - NumDefs];
2680      for (;*SUImpDefs; ++SUImpDefs) {
2681        unsigned SUReg = *SUImpDefs;
2682        if (TRI->regsOverlap(Reg, SUReg))
2683          return true;
2684      }
2685    }
2686  }
2687  return false;
2688}
2689
2690/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2691/// are not handled well by the general register pressure reduction
2692/// heuristics. When presented with code like this:
2693///
2694///      N
2695///    / |
2696///   /  |
2697///  U  store
2698///  |
2699/// ...
2700///
2701/// the heuristics tend to push the store up, but since the
2702/// operand of the store has another use (U), this would increase
2703/// the length of that other use (the U->N edge).
2704///
2705/// This function transforms code like the above to route U's
2706/// dependence through the store when possible, like this:
2707///
2708///      N
2709///      ||
2710///      ||
2711///     store
2712///       |
2713///       U
2714///       |
2715///      ...
2716///
2717/// This results in the store being scheduled immediately
2718/// after N, which shortens the U->N live range, reducing
2719/// register pressure.
2720///
2721void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2722  // Visit all the nodes in topological order, working top-down.
2723  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2724    SUnit *SU = &(*SUnits)[i];
2725    // For now, only look at nodes with no data successors, such as stores.
2726    // These are especially important, due to the heuristics in
2727    // getNodePriority for nodes with no data successors.
2728    if (SU->NumSuccs != 0)
2729      continue;
2730    // For now, only look at nodes with exactly one data predecessor.
2731    if (SU->NumPreds != 1)
2732      continue;
2733    // Avoid prescheduling copies to virtual registers, which don't behave
2734    // like other nodes from the perspective of scheduling heuristics.
2735    if (SDNode *N = SU->getNode())
2736      if (N->getOpcode() == ISD::CopyToReg &&
2737          TargetRegisterInfo::isVirtualRegister
2738            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2739        continue;
2740
2741    // Locate the single data predecessor.
2742    SUnit *PredSU = 0;
2743    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2744         EE = SU->Preds.end(); II != EE; ++II)
2745      if (!II->isCtrl()) {
2746        PredSU = II->getSUnit();
2747        break;
2748      }
2749    assert(PredSU);
2750
2751    // Don't rewrite edges that carry physregs, because that requires additional
2752    // support infrastructure.
2753    if (PredSU->hasPhysRegDefs)
2754      continue;
2755    // Short-circuit the case where SU is PredSU's only data successor.
2756    if (PredSU->NumSuccs == 1)
2757      continue;
2758    // Avoid prescheduling to copies from virtual registers, which don't behave
2759    // like other nodes from the perspective of scheduling heuristics.
2760    if (SDNode *N = SU->getNode())
2761      if (N->getOpcode() == ISD::CopyFromReg &&
2762          TargetRegisterInfo::isVirtualRegister
2763            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2764        continue;
2765
2766    // Perform checks on the successors of PredSU.
2767    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2768         EE = PredSU->Succs.end(); II != EE; ++II) {
2769      SUnit *PredSuccSU = II->getSUnit();
2770      if (PredSuccSU == SU) continue;
2771      // If PredSU has another successor with no data successors, for
2772      // now don't attempt to choose either over the other.
2773      if (PredSuccSU->NumSuccs == 0)
2774        goto outer_loop_continue;
2775      // Don't break physical register dependencies.
2776      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2777        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2778          goto outer_loop_continue;
2779      // Don't introduce graph cycles.
2780      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2781        goto outer_loop_continue;
2782    }
2783
2784    // Ok, the transformation is safe and the heuristics suggest it is
2785    // profitable. Update the graph.
2786    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2787                 << " next to PredSU #" << PredSU->NodeNum
2788                 << " to guide scheduling in the presence of multiple uses\n");
2789    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2790      SDep Edge = PredSU->Succs[i];
2791      assert(!Edge.isAssignedRegDep());
2792      SUnit *SuccSU = Edge.getSUnit();
2793      if (SuccSU != SU) {
2794        Edge.setSUnit(PredSU);
2795        scheduleDAG->RemovePred(SuccSU, Edge);
2796        scheduleDAG->AddPred(SU, Edge);
2797        Edge.setSUnit(SU);
2798        scheduleDAG->AddPred(SuccSU, Edge);
2799        --i;
2800      }
2801    }
2802  outer_loop_continue:;
2803  }
2804}
2805
2806/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2807/// it as a def&use operand. Add a pseudo control edge from it to the other
2808/// node (if it won't create a cycle) so the two-address one will be scheduled
2809/// first (lower in the schedule). If both nodes are two-address, favor the
2810/// one that has a CopyToReg use (more likely to be a loop induction update).
2811/// If both are two-address, but one is commutable while the other is not
2812/// commutable, favor the one that's not commutable.
2813void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2814  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2815    SUnit *SU = &(*SUnits)[i];
2816    if (!SU->isTwoAddress)
2817      continue;
2818
2819    SDNode *Node = SU->getNode();
2820    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2821      continue;
2822
2823    bool isLiveOut = hasOnlyLiveOutUses(SU);
2824    unsigned Opc = Node->getMachineOpcode();
2825    const MCInstrDesc &MCID = TII->get(Opc);
2826    unsigned NumRes = MCID.getNumDefs();
2827    unsigned NumOps = MCID.getNumOperands() - NumRes;
2828    for (unsigned j = 0; j != NumOps; ++j) {
2829      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2830        continue;
2831      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2832      if (DU->getNodeId() == -1)
2833        continue;
2834      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2835      if (!DUSU) continue;
2836      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2837           E = DUSU->Succs.end(); I != E; ++I) {
2838        if (I->isCtrl()) continue;
2839        SUnit *SuccSU = I->getSUnit();
2840        if (SuccSU == SU)
2841          continue;
2842        // Be conservative. Ignore if nodes aren't at roughly the same
2843        // depth and height.
2844        if (SuccSU->getHeight() < SU->getHeight() &&
2845            (SU->getHeight() - SuccSU->getHeight()) > 1)
2846          continue;
2847        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2848        // constrains whatever is using the copy, instead of the copy
2849        // itself. In the case that the copy is coalesced, this
2850        // preserves the intent of the pseudo two-address heurietics.
2851        while (SuccSU->Succs.size() == 1 &&
2852               SuccSU->getNode()->isMachineOpcode() &&
2853               SuccSU->getNode()->getMachineOpcode() ==
2854                 TargetOpcode::COPY_TO_REGCLASS)
2855          SuccSU = SuccSU->Succs.front().getSUnit();
2856        // Don't constrain non-instruction nodes.
2857        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2858          continue;
2859        // Don't constrain nodes with physical register defs if the
2860        // predecessor can clobber them.
2861        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2862          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2863            continue;
2864        }
2865        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2866        // these may be coalesced away. We want them close to their uses.
2867        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2868        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2869            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2870            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2871          continue;
2872        if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2873            (!canClobber(SuccSU, DUSU) ||
2874             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2875             (!SU->isCommutable && SuccSU->isCommutable)) &&
2876            !scheduleDAG->IsReachable(SuccSU, SU)) {
2877          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2878                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2879          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2880                                        /*Reg=*/0, /*isNormalMemory=*/false,
2881                                        /*isMustAlias=*/false,
2882                                        /*isArtificial=*/true));
2883        }
2884      }
2885    }
2886  }
2887}
2888
2889//===----------------------------------------------------------------------===//
2890//                         Public Constructor Functions
2891//===----------------------------------------------------------------------===//
2892
2893llvm::ScheduleDAGSDNodes *
2894llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2895                                 CodeGenOpt::Level OptLevel) {
2896  const TargetMachine &TM = IS->TM;
2897  const TargetInstrInfo *TII = TM.getInstrInfo();
2898  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2899
2900  BURegReductionPriorityQueue *PQ =
2901    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2902  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2903  PQ->setScheduleDAG(SD);
2904  return SD;
2905}
2906
2907llvm::ScheduleDAGSDNodes *
2908llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2909                                   CodeGenOpt::Level OptLevel) {
2910  const TargetMachine &TM = IS->TM;
2911  const TargetInstrInfo *TII = TM.getInstrInfo();
2912  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2913
2914  SrcRegReductionPriorityQueue *PQ =
2915    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2916  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2917  PQ->setScheduleDAG(SD);
2918  return SD;
2919}
2920
2921llvm::ScheduleDAGSDNodes *
2922llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2923                                   CodeGenOpt::Level OptLevel) {
2924  const TargetMachine &TM = IS->TM;
2925  const TargetInstrInfo *TII = TM.getInstrInfo();
2926  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2927  const TargetLowering *TLI = &IS->getTargetLowering();
2928
2929  HybridBURRPriorityQueue *PQ =
2930    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2931
2932  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2933  PQ->setScheduleDAG(SD);
2934  return SD;
2935}
2936
2937llvm::ScheduleDAGSDNodes *
2938llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2939                                CodeGenOpt::Level OptLevel) {
2940  const TargetMachine &TM = IS->TM;
2941  const TargetInstrInfo *TII = TM.getInstrInfo();
2942  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2943  const TargetLowering *TLI = &IS->getTargetLowering();
2944
2945  ILPBURRPriorityQueue *PQ =
2946    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2947  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2948  PQ->setScheduleDAG(SD);
2949  return SD;
2950}
2951