ScheduleDAGRRList.cpp revision 15993f83a419950f06d2879d6701530ae6449317
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73// Temporary sched=list-ilp flags until the heuristics are robust.
74// Some options are also available under sched=list-hybrid.
75static cl::opt<bool> DisableSchedRegPressure(
76  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
77  cl::desc("Disable regpressure priority in sched=list-ilp"));
78static cl::opt<bool> DisableSchedLiveUses(
79  "disable-sched-live-uses", cl::Hidden, cl::init(true),
80  cl::desc("Disable live use priority in sched=list-ilp"));
81static cl::opt<bool> DisableSchedVRegCycle(
82  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
83  cl::desc("Disable virtual register cycle interference checks"));
84static cl::opt<bool> DisableSchedPhysRegJoin(
85  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
86  cl::desc("Disable physreg def-use affinity"));
87static cl::opt<bool> DisableSchedStalls(
88  "disable-sched-stalls", cl::Hidden, cl::init(true),
89  cl::desc("Disable no-stall priority in sched=list-ilp"));
90static cl::opt<bool> DisableSchedCriticalPath(
91  "disable-sched-critical-path", cl::Hidden, cl::init(false),
92  cl::desc("Disable critical path priority in sched=list-ilp"));
93static cl::opt<bool> DisableSchedHeight(
94  "disable-sched-height", cl::Hidden, cl::init(false),
95  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
96
97static cl::opt<int> MaxReorderWindow(
98  "max-sched-reorder", cl::Hidden, cl::init(6),
99  cl::desc("Number of instructions to allow ahead of the critical path "
100           "in sched=list-ilp"));
101
102static cl::opt<unsigned> AvgIPC(
103  "sched-avg-ipc", cl::Hidden, cl::init(1),
104  cl::desc("Average inst/cycle whan no target itinerary exists."));
105
106#ifndef NDEBUG
107namespace {
108  // For sched=list-ilp, Count the number of times each factor comes into play.
109  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
110         FactStatic, FactOther, NumFactors };
111}
112static const char *FactorName[NumFactors] =
113{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
114static int FactorCount[NumFactors];
115#endif //!NDEBUG
116
117namespace {
118//===----------------------------------------------------------------------===//
119/// ScheduleDAGRRList - The actual register reduction list scheduler
120/// implementation.  This supports both top-down and bottom-up scheduling.
121///
122class ScheduleDAGRRList : public ScheduleDAGSDNodes {
123private:
124  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
125  /// it is top-down.
126  bool isBottomUp;
127
128  /// NeedLatency - True if the scheduler will make use of latency information.
129  ///
130  bool NeedLatency;
131
132  /// AvailableQueue - The priority queue to use for the available SUnits.
133  SchedulingPriorityQueue *AvailableQueue;
134
135  /// PendingQueue - This contains all of the instructions whose operands have
136  /// been issued, but their results are not ready yet (due to the latency of
137  /// the operation).  Once the operands becomes available, the instruction is
138  /// added to the AvailableQueue.
139  std::vector<SUnit*> PendingQueue;
140
141  /// HazardRec - The hazard recognizer to use.
142  ScheduleHazardRecognizer *HazardRec;
143
144  /// CurCycle - The current scheduler state corresponds to this cycle.
145  unsigned CurCycle;
146
147  /// MinAvailableCycle - Cycle of the soonest available instruction.
148  unsigned MinAvailableCycle;
149
150  /// IssueCount - Count instructions issued in this cycle
151  /// Currently valid only for bottom-up scheduling.
152  unsigned IssueCount;
153
154  /// LiveRegDefs - A set of physical registers and their definition
155  /// that are "live". These nodes must be scheduled before any other nodes that
156  /// modifies the registers can be scheduled.
157  unsigned NumLiveRegs;
158  std::vector<SUnit*> LiveRegDefs;
159  std::vector<SUnit*> LiveRegGens;
160
161  /// Topo - A topological ordering for SUnits which permits fast IsReachable
162  /// and similar queries.
163  ScheduleDAGTopologicalSort Topo;
164
165public:
166  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
167                    SchedulingPriorityQueue *availqueue,
168                    CodeGenOpt::Level OptLevel)
169    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
170      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
171      Topo(SUnits) {
172
173    const TargetMachine &tm = mf.getTarget();
174    if (DisableSchedCycles || !NeedLatency)
175      HazardRec = new ScheduleHazardRecognizer();
176    else
177      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
178  }
179
180  ~ScheduleDAGRRList() {
181    delete HazardRec;
182    delete AvailableQueue;
183  }
184
185  void Schedule();
186
187  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
188
189  /// IsReachable - Checks if SU is reachable from TargetSU.
190  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
191    return Topo.IsReachable(SU, TargetSU);
192  }
193
194  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
195  /// create a cycle.
196  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
197    return Topo.WillCreateCycle(SU, TargetSU);
198  }
199
200  /// AddPred - adds a predecessor edge to SUnit SU.
201  /// This returns true if this is a new predecessor.
202  /// Updates the topological ordering if required.
203  void AddPred(SUnit *SU, const SDep &D) {
204    Topo.AddPred(SU, D.getSUnit());
205    SU->addPred(D);
206  }
207
208  /// RemovePred - removes a predecessor edge from SUnit SU.
209  /// This returns true if an edge was removed.
210  /// Updates the topological ordering if required.
211  void RemovePred(SUnit *SU, const SDep &D) {
212    Topo.RemovePred(SU, D.getSUnit());
213    SU->removePred(D);
214  }
215
216private:
217  bool isReady(SUnit *SU) {
218    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
219      AvailableQueue->isReady(SU);
220  }
221
222  void ReleasePred(SUnit *SU, const SDep *PredEdge);
223  void ReleasePredecessors(SUnit *SU);
224  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
225  void ReleaseSuccessors(SUnit *SU);
226  void ReleasePending();
227  void AdvanceToCycle(unsigned NextCycle);
228  void AdvancePastStalls(SUnit *SU);
229  void EmitNode(SUnit *SU);
230  void ScheduleNodeBottomUp(SUnit*);
231  void CapturePred(SDep *PredEdge);
232  void UnscheduleNodeBottomUp(SUnit*);
233  void RestoreHazardCheckerBottomUp();
234  void BacktrackBottomUp(SUnit*, SUnit*);
235  SUnit *CopyAndMoveSuccessors(SUnit*);
236  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
237                                const TargetRegisterClass*,
238                                const TargetRegisterClass*,
239                                SmallVector<SUnit*, 2>&);
240  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
241
242  SUnit *PickNodeToScheduleBottomUp();
243  void ListScheduleBottomUp();
244
245  void ScheduleNodeTopDown(SUnit*);
246  void ListScheduleTopDown();
247
248
249  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
250  /// Updates the topological ordering if required.
251  SUnit *CreateNewSUnit(SDNode *N) {
252    unsigned NumSUnits = SUnits.size();
253    SUnit *NewNode = NewSUnit(N);
254    // Update the topological ordering.
255    if (NewNode->NodeNum >= NumSUnits)
256      Topo.InitDAGTopologicalSorting();
257    return NewNode;
258  }
259
260  /// CreateClone - Creates a new SUnit from an existing one.
261  /// Updates the topological ordering if required.
262  SUnit *CreateClone(SUnit *N) {
263    unsigned NumSUnits = SUnits.size();
264    SUnit *NewNode = Clone(N);
265    // Update the topological ordering.
266    if (NewNode->NodeNum >= NumSUnits)
267      Topo.InitDAGTopologicalSorting();
268    return NewNode;
269  }
270
271  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
272  /// need actual latency information but the hybrid scheduler does.
273  bool ForceUnitLatencies() const {
274    return !NeedLatency;
275  }
276};
277}  // end anonymous namespace
278
279/// GetCostForDef - Looks up the register class and cost for a given definition.
280/// Typically this just means looking up the representative register class,
281/// but for untyped values (MVT::untyped) it means inspecting the node's
282/// opcode to determine what register class is being generated.
283static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
284                          const TargetLowering *TLI,
285                          const TargetInstrInfo *TII,
286                          const TargetRegisterInfo *TRI,
287                          unsigned &RegClass, unsigned &Cost) {
288  EVT VT = RegDefPos.GetValue();
289
290  // Special handling for untyped values.  These values can only come from
291  // the expansion of custom DAG-to-DAG patterns.
292  if (VT == MVT::untyped) {
293    const SDNode *Node = RegDefPos.GetNode();
294    unsigned Opcode = Node->getMachineOpcode();
295
296    if (Opcode == TargetOpcode::REG_SEQUENCE) {
297      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
298      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
299      RegClass = RC->getID();
300      Cost = 1;
301      return;
302    }
303
304    unsigned Idx = RegDefPos.GetIdx();
305    const TargetInstrDesc Desc = TII->get(Opcode);
306    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
307    RegClass = RC->getID();
308    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
309    // better way to determine it.
310    Cost = 1;
311  } else {
312    RegClass = TLI->getRepRegClassFor(VT)->getID();
313    Cost = TLI->getRepRegClassCostFor(VT);
314  }
315}
316
317/// Schedule - Schedule the DAG using list scheduling.
318void ScheduleDAGRRList::Schedule() {
319  DEBUG(dbgs()
320        << "********** List Scheduling BB#" << BB->getNumber()
321        << " '" << BB->getName() << "' **********\n");
322#ifndef NDEBUG
323  for (int i = 0; i < NumFactors; ++i) {
324    FactorCount[i] = 0;
325  }
326#endif //!NDEBUG
327
328  CurCycle = 0;
329  IssueCount = 0;
330  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
331  NumLiveRegs = 0;
332  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
333  LiveRegGens.resize(TRI->getNumRegs(), NULL);
334
335  // Build the scheduling graph.
336  BuildSchedGraph(NULL);
337
338  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
339          SUnits[su].dumpAll(this));
340  Topo.InitDAGTopologicalSorting();
341
342  AvailableQueue->initNodes(SUnits);
343
344  HazardRec->Reset();
345
346  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
347  if (isBottomUp)
348    ListScheduleBottomUp();
349  else
350    ListScheduleTopDown();
351
352#ifndef NDEBUG
353  for (int i = 0; i < NumFactors; ++i) {
354    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
355  }
356#endif // !NDEBUG
357  AvailableQueue->releaseState();
358}
359
360//===----------------------------------------------------------------------===//
361//  Bottom-Up Scheduling
362//===----------------------------------------------------------------------===//
363
364/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
365/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
366void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
367  SUnit *PredSU = PredEdge->getSUnit();
368
369#ifndef NDEBUG
370  if (PredSU->NumSuccsLeft == 0) {
371    dbgs() << "*** Scheduling failed! ***\n";
372    PredSU->dump(this);
373    dbgs() << " has been released too many times!\n";
374    llvm_unreachable(0);
375  }
376#endif
377  --PredSU->NumSuccsLeft;
378
379  if (!ForceUnitLatencies()) {
380    // Updating predecessor's height. This is now the cycle when the
381    // predecessor can be scheduled without causing a pipeline stall.
382    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
383  }
384
385  // If all the node's successors are scheduled, this node is ready
386  // to be scheduled. Ignore the special EntrySU node.
387  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
388    PredSU->isAvailable = true;
389
390    unsigned Height = PredSU->getHeight();
391    if (Height < MinAvailableCycle)
392      MinAvailableCycle = Height;
393
394    if (isReady(PredSU)) {
395      AvailableQueue->push(PredSU);
396    }
397    // CapturePred and others may have left the node in the pending queue, avoid
398    // adding it twice.
399    else if (!PredSU->isPending) {
400      PredSU->isPending = true;
401      PendingQueue.push_back(PredSU);
402    }
403  }
404}
405
406/// Call ReleasePred for each predecessor, then update register live def/gen.
407/// Always update LiveRegDefs for a register dependence even if the current SU
408/// also defines the register. This effectively create one large live range
409/// across a sequence of two-address node. This is important because the
410/// entire chain must be scheduled together. Example:
411///
412/// flags = (3) add
413/// flags = (2) addc flags
414/// flags = (1) addc flags
415///
416/// results in
417///
418/// LiveRegDefs[flags] = 3
419/// LiveRegGens[flags] = 1
420///
421/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
422/// interference on flags.
423void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
424  // Bottom up: release predecessors
425  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
426       I != E; ++I) {
427    ReleasePred(SU, &*I);
428    if (I->isAssignedRegDep()) {
429      // This is a physical register dependency and it's impossible or
430      // expensive to copy the register. Make sure nothing that can
431      // clobber the register is scheduled between the predecessor and
432      // this node.
433      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
434      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
435             "interference on register dependence");
436      LiveRegDefs[I->getReg()] = I->getSUnit();
437      if (!LiveRegGens[I->getReg()]) {
438        ++NumLiveRegs;
439        LiveRegGens[I->getReg()] = SU;
440      }
441    }
442  }
443}
444
445/// Check to see if any of the pending instructions are ready to issue.  If
446/// so, add them to the available queue.
447void ScheduleDAGRRList::ReleasePending() {
448  if (DisableSchedCycles) {
449    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
450    return;
451  }
452
453  // If the available queue is empty, it is safe to reset MinAvailableCycle.
454  if (AvailableQueue->empty())
455    MinAvailableCycle = UINT_MAX;
456
457  // Check to see if any of the pending instructions are ready to issue.  If
458  // so, add them to the available queue.
459  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
460    unsigned ReadyCycle =
461      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
462    if (ReadyCycle < MinAvailableCycle)
463      MinAvailableCycle = ReadyCycle;
464
465    if (PendingQueue[i]->isAvailable) {
466      if (!isReady(PendingQueue[i]))
467          continue;
468      AvailableQueue->push(PendingQueue[i]);
469    }
470    PendingQueue[i]->isPending = false;
471    PendingQueue[i] = PendingQueue.back();
472    PendingQueue.pop_back();
473    --i; --e;
474  }
475}
476
477/// Move the scheduler state forward by the specified number of Cycles.
478void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
479  if (NextCycle <= CurCycle)
480    return;
481
482  IssueCount = 0;
483  AvailableQueue->setCurCycle(NextCycle);
484  if (!HazardRec->isEnabled()) {
485    // Bypass lots of virtual calls in case of long latency.
486    CurCycle = NextCycle;
487  }
488  else {
489    for (; CurCycle != NextCycle; ++CurCycle) {
490      if (isBottomUp)
491        HazardRec->RecedeCycle();
492      else
493        HazardRec->AdvanceCycle();
494    }
495  }
496  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
497  // available Q to release pending nodes at least once before popping.
498  ReleasePending();
499}
500
501/// Move the scheduler state forward until the specified node's dependents are
502/// ready and can be scheduled with no resource conflicts.
503void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
504  if (DisableSchedCycles)
505    return;
506
507  // FIXME: Nodes such as CopyFromReg probably should not advance the current
508  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
509  // has predecessors the cycle will be advanced when they are scheduled.
510  // But given the crude nature of modeling latency though such nodes, we
511  // currently need to treat these nodes like real instructions.
512  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
513
514  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
515
516  // Bump CurCycle to account for latency. We assume the latency of other
517  // available instructions may be hidden by the stall (not a full pipe stall).
518  // This updates the hazard recognizer's cycle before reserving resources for
519  // this instruction.
520  AdvanceToCycle(ReadyCycle);
521
522  // Calls are scheduled in their preceding cycle, so don't conflict with
523  // hazards from instructions after the call. EmitNode will reset the
524  // scoreboard state before emitting the call.
525  if (isBottomUp && SU->isCall)
526    return;
527
528  // FIXME: For resource conflicts in very long non-pipelined stages, we
529  // should probably skip ahead here to avoid useless scoreboard checks.
530  int Stalls = 0;
531  while (true) {
532    ScheduleHazardRecognizer::HazardType HT =
533      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
534
535    if (HT == ScheduleHazardRecognizer::NoHazard)
536      break;
537
538    ++Stalls;
539  }
540  AdvanceToCycle(CurCycle + Stalls);
541}
542
543/// Record this SUnit in the HazardRecognizer.
544/// Does not update CurCycle.
545void ScheduleDAGRRList::EmitNode(SUnit *SU) {
546  if (!HazardRec->isEnabled())
547    return;
548
549  // Check for phys reg copy.
550  if (!SU->getNode())
551    return;
552
553  switch (SU->getNode()->getOpcode()) {
554  default:
555    assert(SU->getNode()->isMachineOpcode() &&
556           "This target-independent node should not be scheduled.");
557    break;
558  case ISD::MERGE_VALUES:
559  case ISD::TokenFactor:
560  case ISD::CopyToReg:
561  case ISD::CopyFromReg:
562  case ISD::EH_LABEL:
563    // Noops don't affect the scoreboard state. Copies are likely to be
564    // removed.
565    return;
566  case ISD::INLINEASM:
567    // For inline asm, clear the pipeline state.
568    HazardRec->Reset();
569    return;
570  }
571  if (isBottomUp && SU->isCall) {
572    // Calls are scheduled with their preceding instructions. For bottom-up
573    // scheduling, clear the pipeline state before emitting.
574    HazardRec->Reset();
575  }
576
577  HazardRec->EmitInstruction(SU);
578
579  if (!isBottomUp && SU->isCall) {
580    HazardRec->Reset();
581  }
582}
583
584static void resetVRegCycle(SUnit *SU);
585
586/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
587/// count of its predecessors. If a predecessor pending count is zero, add it to
588/// the Available queue.
589void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
590  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
591  DEBUG(SU->dump(this));
592
593#ifndef NDEBUG
594  if (CurCycle < SU->getHeight())
595    DEBUG(dbgs() << "   Height [" << SU->getHeight()
596          << "] pipeline stall!\n");
597#endif
598
599  // FIXME: Do not modify node height. It may interfere with
600  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
601  // node its ready cycle can aid heuristics, and after scheduling it can
602  // indicate the scheduled cycle.
603  SU->setHeightToAtLeast(CurCycle);
604
605  // Reserve resources for the scheduled intruction.
606  EmitNode(SU);
607
608  Sequence.push_back(SU);
609
610  AvailableQueue->ScheduledNode(SU);
611
612  // If HazardRec is disabled, and each inst counts as one cycle, then
613  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
614  // PendingQueue for schedulers that implement HasReadyFilter.
615  if (!HazardRec->isEnabled() && AvgIPC < 2)
616    AdvanceToCycle(CurCycle + 1);
617
618  // Update liveness of predecessors before successors to avoid treating a
619  // two-address node as a live range def.
620  ReleasePredecessors(SU);
621
622  // Release all the implicit physical register defs that are live.
623  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
624       I != E; ++I) {
625    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
626    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
627      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
628      --NumLiveRegs;
629      LiveRegDefs[I->getReg()] = NULL;
630      LiveRegGens[I->getReg()] = NULL;
631    }
632  }
633
634  resetVRegCycle(SU);
635
636  SU->isScheduled = true;
637
638  // Conditions under which the scheduler should eagerly advance the cycle:
639  // (1) No available instructions
640  // (2) All pipelines full, so available instructions must have hazards.
641  //
642  // If HazardRec is disabled, the cycle was pre-advanced before calling
643  // ReleasePredecessors. In that case, IssueCount should remain 0.
644  //
645  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
646  if (HazardRec->isEnabled() || AvgIPC > 1) {
647    if (SU->getNode() && SU->getNode()->isMachineOpcode())
648      ++IssueCount;
649    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
650        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
651      AdvanceToCycle(CurCycle + 1);
652  }
653}
654
655/// CapturePred - This does the opposite of ReleasePred. Since SU is being
656/// unscheduled, incrcease the succ left count of its predecessors. Remove
657/// them from AvailableQueue if necessary.
658void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
659  SUnit *PredSU = PredEdge->getSUnit();
660  if (PredSU->isAvailable) {
661    PredSU->isAvailable = false;
662    if (!PredSU->isPending)
663      AvailableQueue->remove(PredSU);
664  }
665
666  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
667  ++PredSU->NumSuccsLeft;
668}
669
670/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
671/// its predecessor states to reflect the change.
672void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
673  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
674  DEBUG(SU->dump(this));
675
676  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
677       I != E; ++I) {
678    CapturePred(&*I);
679    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
680      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
681      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
682             "Physical register dependency violated?");
683      --NumLiveRegs;
684      LiveRegDefs[I->getReg()] = NULL;
685      LiveRegGens[I->getReg()] = NULL;
686    }
687  }
688
689  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
690       I != E; ++I) {
691    if (I->isAssignedRegDep()) {
692      // This becomes the nearest def. Note that an earlier def may still be
693      // pending if this is a two-address node.
694      LiveRegDefs[I->getReg()] = SU;
695      if (!LiveRegDefs[I->getReg()]) {
696        ++NumLiveRegs;
697      }
698      if (LiveRegGens[I->getReg()] == NULL ||
699          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
700        LiveRegGens[I->getReg()] = I->getSUnit();
701    }
702  }
703  if (SU->getHeight() < MinAvailableCycle)
704    MinAvailableCycle = SU->getHeight();
705
706  SU->setHeightDirty();
707  SU->isScheduled = false;
708  SU->isAvailable = true;
709  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
710    // Don't make available until backtracking is complete.
711    SU->isPending = true;
712    PendingQueue.push_back(SU);
713  }
714  else {
715    AvailableQueue->push(SU);
716  }
717  AvailableQueue->UnscheduledNode(SU);
718}
719
720/// After backtracking, the hazard checker needs to be restored to a state
721/// corresponding the the current cycle.
722void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
723  HazardRec->Reset();
724
725  unsigned LookAhead = std::min((unsigned)Sequence.size(),
726                                HazardRec->getMaxLookAhead());
727  if (LookAhead == 0)
728    return;
729
730  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
731  unsigned HazardCycle = (*I)->getHeight();
732  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
733    SUnit *SU = *I;
734    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
735      HazardRec->RecedeCycle();
736    }
737    EmitNode(SU);
738  }
739}
740
741/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
742/// BTCycle in order to schedule a specific node.
743void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
744  SUnit *OldSU = Sequence.back();
745  while (true) {
746    Sequence.pop_back();
747    if (SU->isSucc(OldSU))
748      // Don't try to remove SU from AvailableQueue.
749      SU->isAvailable = false;
750    // FIXME: use ready cycle instead of height
751    CurCycle = OldSU->getHeight();
752    UnscheduleNodeBottomUp(OldSU);
753    AvailableQueue->setCurCycle(CurCycle);
754    if (OldSU == BtSU)
755      break;
756    OldSU = Sequence.back();
757  }
758
759  assert(!SU->isSucc(OldSU) && "Something is wrong!");
760
761  RestoreHazardCheckerBottomUp();
762
763  ReleasePending();
764
765  ++NumBacktracks;
766}
767
768static bool isOperandOf(const SUnit *SU, SDNode *N) {
769  for (const SDNode *SUNode = SU->getNode(); SUNode;
770       SUNode = SUNode->getGluedNode()) {
771    if (SUNode->isOperandOf(N))
772      return true;
773  }
774  return false;
775}
776
777/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
778/// successors to the newly created node.
779SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
780  SDNode *N = SU->getNode();
781  if (!N)
782    return NULL;
783
784  if (SU->getNode()->getGluedNode())
785    return NULL;
786
787  SUnit *NewSU;
788  bool TryUnfold = false;
789  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
790    EVT VT = N->getValueType(i);
791    if (VT == MVT::Glue)
792      return NULL;
793    else if (VT == MVT::Other)
794      TryUnfold = true;
795  }
796  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
797    const SDValue &Op = N->getOperand(i);
798    EVT VT = Op.getNode()->getValueType(Op.getResNo());
799    if (VT == MVT::Glue)
800      return NULL;
801  }
802
803  if (TryUnfold) {
804    SmallVector<SDNode*, 2> NewNodes;
805    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
806      return NULL;
807
808    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
809    assert(NewNodes.size() == 2 && "Expected a load folding node!");
810
811    N = NewNodes[1];
812    SDNode *LoadNode = NewNodes[0];
813    unsigned NumVals = N->getNumValues();
814    unsigned OldNumVals = SU->getNode()->getNumValues();
815    for (unsigned i = 0; i != NumVals; ++i)
816      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
817    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
818                                   SDValue(LoadNode, 1));
819
820    // LoadNode may already exist. This can happen when there is another
821    // load from the same location and producing the same type of value
822    // but it has different alignment or volatileness.
823    bool isNewLoad = true;
824    SUnit *LoadSU;
825    if (LoadNode->getNodeId() != -1) {
826      LoadSU = &SUnits[LoadNode->getNodeId()];
827      isNewLoad = false;
828    } else {
829      LoadSU = CreateNewSUnit(LoadNode);
830      LoadNode->setNodeId(LoadSU->NodeNum);
831
832      InitNumRegDefsLeft(LoadSU);
833      ComputeLatency(LoadSU);
834    }
835
836    SUnit *NewSU = CreateNewSUnit(N);
837    assert(N->getNodeId() == -1 && "Node already inserted!");
838    N->setNodeId(NewSU->NodeNum);
839
840    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
841    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
842      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
843        NewSU->isTwoAddress = true;
844        break;
845      }
846    }
847    if (TID.isCommutable())
848      NewSU->isCommutable = true;
849
850    InitNumRegDefsLeft(NewSU);
851    ComputeLatency(NewSU);
852
853    // Record all the edges to and from the old SU, by category.
854    SmallVector<SDep, 4> ChainPreds;
855    SmallVector<SDep, 4> ChainSuccs;
856    SmallVector<SDep, 4> LoadPreds;
857    SmallVector<SDep, 4> NodePreds;
858    SmallVector<SDep, 4> NodeSuccs;
859    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
860         I != E; ++I) {
861      if (I->isCtrl())
862        ChainPreds.push_back(*I);
863      else if (isOperandOf(I->getSUnit(), LoadNode))
864        LoadPreds.push_back(*I);
865      else
866        NodePreds.push_back(*I);
867    }
868    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
869         I != E; ++I) {
870      if (I->isCtrl())
871        ChainSuccs.push_back(*I);
872      else
873        NodeSuccs.push_back(*I);
874    }
875
876    // Now assign edges to the newly-created nodes.
877    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
878      const SDep &Pred = ChainPreds[i];
879      RemovePred(SU, Pred);
880      if (isNewLoad)
881        AddPred(LoadSU, Pred);
882    }
883    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
884      const SDep &Pred = LoadPreds[i];
885      RemovePred(SU, Pred);
886      if (isNewLoad)
887        AddPred(LoadSU, Pred);
888    }
889    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
890      const SDep &Pred = NodePreds[i];
891      RemovePred(SU, Pred);
892      AddPred(NewSU, Pred);
893    }
894    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
895      SDep D = NodeSuccs[i];
896      SUnit *SuccDep = D.getSUnit();
897      D.setSUnit(SU);
898      RemovePred(SuccDep, D);
899      D.setSUnit(NewSU);
900      AddPred(SuccDep, D);
901      // Balance register pressure.
902      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
903          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
904        --NewSU->NumRegDefsLeft;
905    }
906    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
907      SDep D = ChainSuccs[i];
908      SUnit *SuccDep = D.getSUnit();
909      D.setSUnit(SU);
910      RemovePred(SuccDep, D);
911      if (isNewLoad) {
912        D.setSUnit(LoadSU);
913        AddPred(SuccDep, D);
914      }
915    }
916
917    // Add a data dependency to reflect that NewSU reads the value defined
918    // by LoadSU.
919    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
920
921    if (isNewLoad)
922      AvailableQueue->addNode(LoadSU);
923    AvailableQueue->addNode(NewSU);
924
925    ++NumUnfolds;
926
927    if (NewSU->NumSuccsLeft == 0) {
928      NewSU->isAvailable = true;
929      return NewSU;
930    }
931    SU = NewSU;
932  }
933
934  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
935  NewSU = CreateClone(SU);
936
937  // New SUnit has the exact same predecessors.
938  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
939       I != E; ++I)
940    if (!I->isArtificial())
941      AddPred(NewSU, *I);
942
943  // Only copy scheduled successors. Cut them from old node's successor
944  // list and move them over.
945  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
946  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
947       I != E; ++I) {
948    if (I->isArtificial())
949      continue;
950    SUnit *SuccSU = I->getSUnit();
951    if (SuccSU->isScheduled) {
952      SDep D = *I;
953      D.setSUnit(NewSU);
954      AddPred(SuccSU, D);
955      D.setSUnit(SU);
956      DelDeps.push_back(std::make_pair(SuccSU, D));
957    }
958  }
959  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
960    RemovePred(DelDeps[i].first, DelDeps[i].second);
961
962  AvailableQueue->updateNode(SU);
963  AvailableQueue->addNode(NewSU);
964
965  ++NumDups;
966  return NewSU;
967}
968
969/// InsertCopiesAndMoveSuccs - Insert register copies and move all
970/// scheduled successors of the given SUnit to the last copy.
971void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
972                                               const TargetRegisterClass *DestRC,
973                                               const TargetRegisterClass *SrcRC,
974                                               SmallVector<SUnit*, 2> &Copies) {
975  SUnit *CopyFromSU = CreateNewSUnit(NULL);
976  CopyFromSU->CopySrcRC = SrcRC;
977  CopyFromSU->CopyDstRC = DestRC;
978
979  SUnit *CopyToSU = CreateNewSUnit(NULL);
980  CopyToSU->CopySrcRC = DestRC;
981  CopyToSU->CopyDstRC = SrcRC;
982
983  // Only copy scheduled successors. Cut them from old node's successor
984  // list and move them over.
985  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
986  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
987       I != E; ++I) {
988    if (I->isArtificial())
989      continue;
990    SUnit *SuccSU = I->getSUnit();
991    if (SuccSU->isScheduled) {
992      SDep D = *I;
993      D.setSUnit(CopyToSU);
994      AddPred(SuccSU, D);
995      DelDeps.push_back(std::make_pair(SuccSU, *I));
996    }
997    else {
998      // Avoid scheduling the def-side copy before other successors. Otherwise
999      // we could introduce another physreg interference on the copy and
1000      // continue inserting copies indefinitely.
1001      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
1002             /*Reg=*/0, /*isNormalMemory=*/false,
1003             /*isMustAlias=*/false, /*isArtificial=*/true);
1004      AddPred(SuccSU, D);
1005    }
1006  }
1007  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1008    RemovePred(DelDeps[i].first, DelDeps[i].second);
1009
1010  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
1011  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
1012
1013  AvailableQueue->updateNode(SU);
1014  AvailableQueue->addNode(CopyFromSU);
1015  AvailableQueue->addNode(CopyToSU);
1016  Copies.push_back(CopyFromSU);
1017  Copies.push_back(CopyToSU);
1018
1019  ++NumPRCopies;
1020}
1021
1022/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1023/// definition of the specified node.
1024/// FIXME: Move to SelectionDAG?
1025static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1026                                 const TargetInstrInfo *TII) {
1027  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
1028  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1029  unsigned NumRes = TID.getNumDefs();
1030  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1031    if (Reg == *ImpDef)
1032      break;
1033    ++NumRes;
1034  }
1035  return N->getValueType(NumRes);
1036}
1037
1038/// CheckForLiveRegDef - Return true and update live register vector if the
1039/// specified register def of the specified SUnit clobbers any "live" registers.
1040static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1041                               std::vector<SUnit*> &LiveRegDefs,
1042                               SmallSet<unsigned, 4> &RegAdded,
1043                               SmallVector<unsigned, 4> &LRegs,
1044                               const TargetRegisterInfo *TRI) {
1045  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1046
1047    // Check if Ref is live.
1048    if (!LiveRegDefs[*AliasI]) continue;
1049
1050    // Allow multiple uses of the same def.
1051    if (LiveRegDefs[*AliasI] == SU) continue;
1052
1053    // Add Reg to the set of interfering live regs.
1054    if (RegAdded.insert(*AliasI)) {
1055      LRegs.push_back(*AliasI);
1056    }
1057  }
1058}
1059
1060/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1061/// scheduling of the given node to satisfy live physical register dependencies.
1062/// If the specific node is the last one that's available to schedule, do
1063/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1064bool ScheduleDAGRRList::
1065DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1066  if (NumLiveRegs == 0)
1067    return false;
1068
1069  SmallSet<unsigned, 4> RegAdded;
1070  // If this node would clobber any "live" register, then it's not ready.
1071  //
1072  // If SU is the currently live definition of the same register that it uses,
1073  // then we are free to schedule it.
1074  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1075       I != E; ++I) {
1076    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1077      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1078                         RegAdded, LRegs, TRI);
1079  }
1080
1081  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1082    if (Node->getOpcode() == ISD::INLINEASM) {
1083      // Inline asm can clobber physical defs.
1084      unsigned NumOps = Node->getNumOperands();
1085      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1086        --NumOps;  // Ignore the glue operand.
1087
1088      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1089        unsigned Flags =
1090          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1091        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1092
1093        ++i; // Skip the ID value.
1094        if (InlineAsm::isRegDefKind(Flags) ||
1095            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1096            InlineAsm::isClobberKind(Flags)) {
1097          // Check for def of register or earlyclobber register.
1098          for (; NumVals; --NumVals, ++i) {
1099            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1100            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1101              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1102          }
1103        } else
1104          i += NumVals;
1105      }
1106      continue;
1107    }
1108
1109    if (!Node->isMachineOpcode())
1110      continue;
1111    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
1112    if (!TID.ImplicitDefs)
1113      continue;
1114    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
1115      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1116  }
1117
1118  return !LRegs.empty();
1119}
1120
1121/// Return a node that can be scheduled in this cycle. Requirements:
1122/// (1) Ready: latency has been satisfied
1123/// (2) No Hazards: resources are available
1124/// (3) No Interferences: may unschedule to break register interferences.
1125SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1126  SmallVector<SUnit*, 4> Interferences;
1127  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1128
1129  SUnit *CurSU = AvailableQueue->pop();
1130  while (CurSU) {
1131    SmallVector<unsigned, 4> LRegs;
1132    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1133      break;
1134    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1135
1136    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1137    Interferences.push_back(CurSU);
1138    CurSU = AvailableQueue->pop();
1139  }
1140  if (CurSU) {
1141    // Add the nodes that aren't ready back onto the available list.
1142    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1143      Interferences[i]->isPending = false;
1144      assert(Interferences[i]->isAvailable && "must still be available");
1145      AvailableQueue->push(Interferences[i]);
1146    }
1147    return CurSU;
1148  }
1149
1150  // All candidates are delayed due to live physical reg dependencies.
1151  // Try backtracking, code duplication, or inserting cross class copies
1152  // to resolve it.
1153  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1154    SUnit *TrySU = Interferences[i];
1155    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1156
1157    // Try unscheduling up to the point where it's safe to schedule
1158    // this node.
1159    SUnit *BtSU = NULL;
1160    unsigned LiveCycle = UINT_MAX;
1161    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1162      unsigned Reg = LRegs[j];
1163      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1164        BtSU = LiveRegGens[Reg];
1165        LiveCycle = BtSU->getHeight();
1166      }
1167    }
1168    if (!WillCreateCycle(TrySU, BtSU))  {
1169      BacktrackBottomUp(TrySU, BtSU);
1170
1171      // Force the current node to be scheduled before the node that
1172      // requires the physical reg dep.
1173      if (BtSU->isAvailable) {
1174        BtSU->isAvailable = false;
1175        if (!BtSU->isPending)
1176          AvailableQueue->remove(BtSU);
1177      }
1178      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1179                          /*Reg=*/0, /*isNormalMemory=*/false,
1180                          /*isMustAlias=*/false, /*isArtificial=*/true));
1181
1182      // If one or more successors has been unscheduled, then the current
1183      // node is no longer avaialable. Schedule a successor that's now
1184      // available instead.
1185      if (!TrySU->isAvailable) {
1186        CurSU = AvailableQueue->pop();
1187      }
1188      else {
1189        CurSU = TrySU;
1190        TrySU->isPending = false;
1191        Interferences.erase(Interferences.begin()+i);
1192      }
1193      break;
1194    }
1195  }
1196
1197  if (!CurSU) {
1198    // Can't backtrack. If it's too expensive to copy the value, then try
1199    // duplicate the nodes that produces these "too expensive to copy"
1200    // values to break the dependency. In case even that doesn't work,
1201    // insert cross class copies.
1202    // If it's not too expensive, i.e. cost != -1, issue copies.
1203    SUnit *TrySU = Interferences[0];
1204    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1205    assert(LRegs.size() == 1 && "Can't handle this yet!");
1206    unsigned Reg = LRegs[0];
1207    SUnit *LRDef = LiveRegDefs[Reg];
1208    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1209    const TargetRegisterClass *RC =
1210      TRI->getMinimalPhysRegClass(Reg, VT);
1211    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1212
1213    // If cross copy register class is the same as RC, then it must be possible
1214    // copy the value directly. Do not try duplicate the def.
1215    // If cross copy register class is not the same as RC, then it's possible to
1216    // copy the value but it require cross register class copies and it is
1217    // expensive.
1218    // If cross copy register class is null, then it's not possible to copy
1219    // the value at all.
1220    SUnit *NewDef = 0;
1221    if (DestRC != RC) {
1222      NewDef = CopyAndMoveSuccessors(LRDef);
1223      if (!DestRC && !NewDef)
1224        report_fatal_error("Can't handle live physical register dependency!");
1225    }
1226    if (!NewDef) {
1227      // Issue copies, these can be expensive cross register class copies.
1228      SmallVector<SUnit*, 2> Copies;
1229      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1230      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1231            << " to SU #" << Copies.front()->NodeNum << "\n");
1232      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1233                          /*Reg=*/0, /*isNormalMemory=*/false,
1234                          /*isMustAlias=*/false,
1235                          /*isArtificial=*/true));
1236      NewDef = Copies.back();
1237    }
1238
1239    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1240          << " to SU #" << TrySU->NodeNum << "\n");
1241    LiveRegDefs[Reg] = NewDef;
1242    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1243                         /*Reg=*/0, /*isNormalMemory=*/false,
1244                         /*isMustAlias=*/false,
1245                         /*isArtificial=*/true));
1246    TrySU->isAvailable = false;
1247    CurSU = NewDef;
1248  }
1249
1250  assert(CurSU && "Unable to resolve live physical register dependencies!");
1251
1252  // Add the nodes that aren't ready back onto the available list.
1253  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1254    Interferences[i]->isPending = false;
1255    // May no longer be available due to backtracking.
1256    if (Interferences[i]->isAvailable) {
1257      AvailableQueue->push(Interferences[i]);
1258    }
1259  }
1260  return CurSU;
1261}
1262
1263/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1264/// schedulers.
1265void ScheduleDAGRRList::ListScheduleBottomUp() {
1266  // Release any predecessors of the special Exit node.
1267  ReleasePredecessors(&ExitSU);
1268
1269  // Add root to Available queue.
1270  if (!SUnits.empty()) {
1271    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1272    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1273    RootSU->isAvailable = true;
1274    AvailableQueue->push(RootSU);
1275  }
1276
1277  // While Available queue is not empty, grab the node with the highest
1278  // priority. If it is not ready put it back.  Schedule the node.
1279  Sequence.reserve(SUnits.size());
1280  while (!AvailableQueue->empty()) {
1281    DEBUG(dbgs() << "\nExamining Available:\n";
1282          AvailableQueue->dump(this));
1283
1284    // Pick the best node to schedule taking all constraints into
1285    // consideration.
1286    SUnit *SU = PickNodeToScheduleBottomUp();
1287
1288    AdvancePastStalls(SU);
1289
1290    ScheduleNodeBottomUp(SU);
1291
1292    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1293      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1294      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1295      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1296    }
1297  }
1298
1299  // Reverse the order if it is bottom up.
1300  std::reverse(Sequence.begin(), Sequence.end());
1301
1302#ifndef NDEBUG
1303  VerifySchedule(isBottomUp);
1304#endif
1305}
1306
1307//===----------------------------------------------------------------------===//
1308//  Top-Down Scheduling
1309//===----------------------------------------------------------------------===//
1310
1311/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1312/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1313void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1314  SUnit *SuccSU = SuccEdge->getSUnit();
1315
1316#ifndef NDEBUG
1317  if (SuccSU->NumPredsLeft == 0) {
1318    dbgs() << "*** Scheduling failed! ***\n";
1319    SuccSU->dump(this);
1320    dbgs() << " has been released too many times!\n";
1321    llvm_unreachable(0);
1322  }
1323#endif
1324  --SuccSU->NumPredsLeft;
1325
1326  // If all the node's predecessors are scheduled, this node is ready
1327  // to be scheduled. Ignore the special ExitSU node.
1328  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1329    SuccSU->isAvailable = true;
1330    AvailableQueue->push(SuccSU);
1331  }
1332}
1333
1334void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1335  // Top down: release successors
1336  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1337       I != E; ++I) {
1338    assert(!I->isAssignedRegDep() &&
1339           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1340
1341    ReleaseSucc(SU, &*I);
1342  }
1343}
1344
1345/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1346/// count of its successors. If a successor pending count is zero, add it to
1347/// the Available queue.
1348void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1349  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1350  DEBUG(SU->dump(this));
1351
1352  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1353  SU->setDepthToAtLeast(CurCycle);
1354  Sequence.push_back(SU);
1355
1356  ReleaseSuccessors(SU);
1357  SU->isScheduled = true;
1358  AvailableQueue->ScheduledNode(SU);
1359}
1360
1361/// ListScheduleTopDown - The main loop of list scheduling for top-down
1362/// schedulers.
1363void ScheduleDAGRRList::ListScheduleTopDown() {
1364  AvailableQueue->setCurCycle(CurCycle);
1365
1366  // Release any successors of the special Entry node.
1367  ReleaseSuccessors(&EntrySU);
1368
1369  // All leaves to Available queue.
1370  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1371    // It is available if it has no predecessors.
1372    if (SUnits[i].Preds.empty()) {
1373      AvailableQueue->push(&SUnits[i]);
1374      SUnits[i].isAvailable = true;
1375    }
1376  }
1377
1378  // While Available queue is not empty, grab the node with the highest
1379  // priority. If it is not ready put it back.  Schedule the node.
1380  Sequence.reserve(SUnits.size());
1381  while (!AvailableQueue->empty()) {
1382    SUnit *CurSU = AvailableQueue->pop();
1383
1384    if (CurSU)
1385      ScheduleNodeTopDown(CurSU);
1386    ++CurCycle;
1387    AvailableQueue->setCurCycle(CurCycle);
1388  }
1389
1390#ifndef NDEBUG
1391  VerifySchedule(isBottomUp);
1392#endif
1393}
1394
1395
1396//===----------------------------------------------------------------------===//
1397//                RegReductionPriorityQueue Definition
1398//===----------------------------------------------------------------------===//
1399//
1400// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1401// to reduce register pressure.
1402//
1403namespace {
1404class RegReductionPQBase;
1405
1406struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1407  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1408};
1409
1410#ifndef NDEBUG
1411template<class SF>
1412struct reverse_sort : public queue_sort {
1413  SF &SortFunc;
1414  reverse_sort(SF &sf) : SortFunc(sf) {}
1415  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1416
1417  bool operator()(SUnit* left, SUnit* right) const {
1418    // reverse left/right rather than simply !SortFunc(left, right)
1419    // to expose different paths in the comparison logic.
1420    return SortFunc(right, left);
1421  }
1422};
1423#endif // NDEBUG
1424
1425/// bu_ls_rr_sort - Priority function for bottom up register pressure
1426// reduction scheduler.
1427struct bu_ls_rr_sort : public queue_sort {
1428  enum {
1429    IsBottomUp = true,
1430    HasReadyFilter = false
1431  };
1432
1433  RegReductionPQBase *SPQ;
1434  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1435  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1436
1437  bool operator()(SUnit* left, SUnit* right) const;
1438};
1439
1440// td_ls_rr_sort - Priority function for top down register pressure reduction
1441// scheduler.
1442struct td_ls_rr_sort : public queue_sort {
1443  enum {
1444    IsBottomUp = false,
1445    HasReadyFilter = false
1446  };
1447
1448  RegReductionPQBase *SPQ;
1449  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1450  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1451
1452  bool operator()(const SUnit* left, const SUnit* right) const;
1453};
1454
1455// src_ls_rr_sort - Priority function for source order scheduler.
1456struct src_ls_rr_sort : public queue_sort {
1457  enum {
1458    IsBottomUp = true,
1459    HasReadyFilter = false
1460  };
1461
1462  RegReductionPQBase *SPQ;
1463  src_ls_rr_sort(RegReductionPQBase *spq)
1464    : SPQ(spq) {}
1465  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1466    : SPQ(RHS.SPQ) {}
1467
1468  bool operator()(SUnit* left, SUnit* right) const;
1469};
1470
1471// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1472struct hybrid_ls_rr_sort : public queue_sort {
1473  enum {
1474    IsBottomUp = true,
1475    HasReadyFilter = false
1476  };
1477
1478  RegReductionPQBase *SPQ;
1479  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1480    : SPQ(spq) {}
1481  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1482    : SPQ(RHS.SPQ) {}
1483
1484  bool isReady(SUnit *SU, unsigned CurCycle) const;
1485
1486  bool operator()(SUnit* left, SUnit* right) const;
1487};
1488
1489// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1490// scheduler.
1491struct ilp_ls_rr_sort : public queue_sort {
1492  enum {
1493    IsBottomUp = true,
1494    HasReadyFilter = false
1495  };
1496
1497  RegReductionPQBase *SPQ;
1498  ilp_ls_rr_sort(RegReductionPQBase *spq)
1499    : SPQ(spq) {}
1500  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1501    : SPQ(RHS.SPQ) {}
1502
1503  bool isReady(SUnit *SU, unsigned CurCycle) const;
1504
1505  bool operator()(SUnit* left, SUnit* right) const;
1506};
1507
1508class RegReductionPQBase : public SchedulingPriorityQueue {
1509protected:
1510  std::vector<SUnit*> Queue;
1511  unsigned CurQueueId;
1512  bool TracksRegPressure;
1513
1514  // SUnits - The SUnits for the current graph.
1515  std::vector<SUnit> *SUnits;
1516
1517  MachineFunction &MF;
1518  const TargetInstrInfo *TII;
1519  const TargetRegisterInfo *TRI;
1520  const TargetLowering *TLI;
1521  ScheduleDAGRRList *scheduleDAG;
1522
1523  // SethiUllmanNumbers - The SethiUllman number for each node.
1524  std::vector<unsigned> SethiUllmanNumbers;
1525
1526  /// RegPressure - Tracking current reg pressure per register class.
1527  ///
1528  std::vector<unsigned> RegPressure;
1529
1530  /// RegLimit - Tracking the number of allocatable registers per register
1531  /// class.
1532  std::vector<unsigned> RegLimit;
1533
1534public:
1535  RegReductionPQBase(MachineFunction &mf,
1536                     bool hasReadyFilter,
1537                     bool tracksrp,
1538                     const TargetInstrInfo *tii,
1539                     const TargetRegisterInfo *tri,
1540                     const TargetLowering *tli)
1541    : SchedulingPriorityQueue(hasReadyFilter),
1542      CurQueueId(0), TracksRegPressure(tracksrp),
1543      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1544    if (TracksRegPressure) {
1545      unsigned NumRC = TRI->getNumRegClasses();
1546      RegLimit.resize(NumRC);
1547      RegPressure.resize(NumRC);
1548      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1549      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1550      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1551             E = TRI->regclass_end(); I != E; ++I)
1552        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1553    }
1554  }
1555
1556  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1557    scheduleDAG = scheduleDag;
1558  }
1559
1560  ScheduleHazardRecognizer* getHazardRec() {
1561    return scheduleDAG->getHazardRec();
1562  }
1563
1564  void initNodes(std::vector<SUnit> &sunits);
1565
1566  void addNode(const SUnit *SU);
1567
1568  void updateNode(const SUnit *SU);
1569
1570  void releaseState() {
1571    SUnits = 0;
1572    SethiUllmanNumbers.clear();
1573    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1574  }
1575
1576  unsigned getNodePriority(const SUnit *SU) const;
1577
1578  unsigned getNodeOrdering(const SUnit *SU) const {
1579    if (!SU->getNode()) return 0;
1580
1581    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1582  }
1583
1584  bool empty() const { return Queue.empty(); }
1585
1586  void push(SUnit *U) {
1587    assert(!U->NodeQueueId && "Node in the queue already");
1588    U->NodeQueueId = ++CurQueueId;
1589    Queue.push_back(U);
1590  }
1591
1592  void remove(SUnit *SU) {
1593    assert(!Queue.empty() && "Queue is empty!");
1594    assert(SU->NodeQueueId != 0 && "Not in queue!");
1595    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1596                                                 SU);
1597    if (I != prior(Queue.end()))
1598      std::swap(*I, Queue.back());
1599    Queue.pop_back();
1600    SU->NodeQueueId = 0;
1601  }
1602
1603  bool tracksRegPressure() const { return TracksRegPressure; }
1604
1605  void dumpRegPressure() const;
1606
1607  bool HighRegPressure(const SUnit *SU) const;
1608
1609  bool MayReduceRegPressure(SUnit *SU) const;
1610
1611  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1612
1613  void ScheduledNode(SUnit *SU);
1614
1615  void UnscheduledNode(SUnit *SU);
1616
1617protected:
1618  bool canClobber(const SUnit *SU, const SUnit *Op);
1619  void AddPseudoTwoAddrDeps();
1620  void PrescheduleNodesWithMultipleUses();
1621  void CalculateSethiUllmanNumbers();
1622};
1623
1624template<class SF>
1625static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1626  std::vector<SUnit *>::iterator Best = Q.begin();
1627  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1628         E = Q.end(); I != E; ++I)
1629    if (Picker(*Best, *I))
1630      Best = I;
1631  SUnit *V = *Best;
1632  if (Best != prior(Q.end()))
1633    std::swap(*Best, Q.back());
1634  Q.pop_back();
1635  return V;
1636}
1637
1638template<class SF>
1639SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1640#ifndef NDEBUG
1641  if (DAG->StressSched) {
1642    reverse_sort<SF> RPicker(Picker);
1643    return popFromQueueImpl(Q, RPicker);
1644  }
1645#endif
1646  (void)DAG;
1647  return popFromQueueImpl(Q, Picker);
1648}
1649
1650template<class SF>
1651class RegReductionPriorityQueue : public RegReductionPQBase {
1652  SF Picker;
1653
1654public:
1655  RegReductionPriorityQueue(MachineFunction &mf,
1656                            bool tracksrp,
1657                            const TargetInstrInfo *tii,
1658                            const TargetRegisterInfo *tri,
1659                            const TargetLowering *tli)
1660    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1661      Picker(this) {}
1662
1663  bool isBottomUp() const { return SF::IsBottomUp; }
1664
1665  bool isReady(SUnit *U) const {
1666    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1667  }
1668
1669  SUnit *pop() {
1670    if (Queue.empty()) return NULL;
1671
1672    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1673    V->NodeQueueId = 0;
1674    return V;
1675  }
1676
1677  void dump(ScheduleDAG *DAG) const {
1678    // Emulate pop() without clobbering NodeQueueIds.
1679    std::vector<SUnit*> DumpQueue = Queue;
1680    SF DumpPicker = Picker;
1681    while (!DumpQueue.empty()) {
1682      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1683      if (isBottomUp())
1684        dbgs() << "Height " << SU->getHeight() << ": ";
1685      else
1686        dbgs() << "Depth " << SU->getDepth() << ": ";
1687      SU->dump(DAG);
1688    }
1689  }
1690};
1691
1692typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1693BURegReductionPriorityQueue;
1694
1695typedef RegReductionPriorityQueue<td_ls_rr_sort>
1696TDRegReductionPriorityQueue;
1697
1698typedef RegReductionPriorityQueue<src_ls_rr_sort>
1699SrcRegReductionPriorityQueue;
1700
1701typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1702HybridBURRPriorityQueue;
1703
1704typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1705ILPBURRPriorityQueue;
1706} // end anonymous namespace
1707
1708//===----------------------------------------------------------------------===//
1709//           Static Node Priority for Register Pressure Reduction
1710//===----------------------------------------------------------------------===//
1711
1712// Check for special nodes that bypass scheduling heuristics.
1713// Currently this pushes TokenFactor nodes down, but may be used for other
1714// pseudo-ops as well.
1715//
1716// Return -1 to schedule right above left, 1 for left above right.
1717// Return 0 if no bias exists.
1718static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1719  bool LSchedLow = left->isScheduleLow;
1720  bool RSchedLow = right->isScheduleLow;
1721  if (LSchedLow != RSchedLow)
1722    return LSchedLow < RSchedLow ? 1 : -1;
1723  return 0;
1724}
1725
1726/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1727/// Smaller number is the higher priority.
1728static unsigned
1729CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1730  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1731  if (SethiUllmanNumber != 0)
1732    return SethiUllmanNumber;
1733
1734  unsigned Extra = 0;
1735  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1736       I != E; ++I) {
1737    if (I->isCtrl()) continue;  // ignore chain preds
1738    SUnit *PredSU = I->getSUnit();
1739    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1740    if (PredSethiUllman > SethiUllmanNumber) {
1741      SethiUllmanNumber = PredSethiUllman;
1742      Extra = 0;
1743    } else if (PredSethiUllman == SethiUllmanNumber)
1744      ++Extra;
1745  }
1746
1747  SethiUllmanNumber += Extra;
1748
1749  if (SethiUllmanNumber == 0)
1750    SethiUllmanNumber = 1;
1751
1752  return SethiUllmanNumber;
1753}
1754
1755/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1756/// scheduling units.
1757void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1758  SethiUllmanNumbers.assign(SUnits->size(), 0);
1759
1760  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1761    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1762}
1763
1764void RegReductionPQBase::addNode(const SUnit *SU) {
1765  unsigned SUSize = SethiUllmanNumbers.size();
1766  if (SUnits->size() > SUSize)
1767    SethiUllmanNumbers.resize(SUSize*2, 0);
1768  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1769}
1770
1771void RegReductionPQBase::updateNode(const SUnit *SU) {
1772  SethiUllmanNumbers[SU->NodeNum] = 0;
1773  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1774}
1775
1776// Lower priority means schedule further down. For bottom-up scheduling, lower
1777// priority SUs are scheduled before higher priority SUs.
1778unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1779  assert(SU->NodeNum < SethiUllmanNumbers.size());
1780  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1781  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1782    // CopyToReg should be close to its uses to facilitate coalescing and
1783    // avoid spilling.
1784    return 0;
1785  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1786      Opc == TargetOpcode::SUBREG_TO_REG ||
1787      Opc == TargetOpcode::INSERT_SUBREG)
1788    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1789    // close to their uses to facilitate coalescing.
1790    return 0;
1791  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1792    // If SU does not have a register use, i.e. it doesn't produce a value
1793    // that would be consumed (e.g. store), then it terminates a chain of
1794    // computation.  Give it a large SethiUllman number so it will be
1795    // scheduled right before its predecessors that it doesn't lengthen
1796    // their live ranges.
1797    return 0xffff;
1798  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1799    // If SU does not have a register def, schedule it close to its uses
1800    // because it does not lengthen any live ranges.
1801    return 0;
1802#if 1
1803  return SethiUllmanNumbers[SU->NodeNum];
1804#else
1805  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1806  if (SU->isCallOp) {
1807    // FIXME: This assumes all of the defs are used as call operands.
1808    int NP = (int)Priority - SU->getNode()->getNumValues();
1809    return (NP > 0) ? NP : 0;
1810  }
1811  return Priority;
1812#endif
1813}
1814
1815//===----------------------------------------------------------------------===//
1816//                     Register Pressure Tracking
1817//===----------------------------------------------------------------------===//
1818
1819void RegReductionPQBase::dumpRegPressure() const {
1820  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1821         E = TRI->regclass_end(); I != E; ++I) {
1822    const TargetRegisterClass *RC = *I;
1823    unsigned Id = RC->getID();
1824    unsigned RP = RegPressure[Id];
1825    if (!RP) continue;
1826    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1827          << '\n');
1828  }
1829}
1830
1831bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1832  if (!TLI)
1833    return false;
1834
1835  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1836       I != E; ++I) {
1837    if (I->isCtrl())
1838      continue;
1839    SUnit *PredSU = I->getSUnit();
1840    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1841    // to cover the number of registers defined (they are all live).
1842    if (PredSU->NumRegDefsLeft == 0) {
1843      continue;
1844    }
1845    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1846         RegDefPos.IsValid(); RegDefPos.Advance()) {
1847      unsigned RCId, Cost;
1848      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1849
1850      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1851        return true;
1852    }
1853  }
1854  return false;
1855}
1856
1857bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1858  const SDNode *N = SU->getNode();
1859
1860  if (!N->isMachineOpcode() || !SU->NumSuccs)
1861    return false;
1862
1863  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1864  for (unsigned i = 0; i != NumDefs; ++i) {
1865    EVT VT = N->getValueType(i);
1866    if (!N->hasAnyUseOfValue(i))
1867      continue;
1868    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1869    if (RegPressure[RCId] >= RegLimit[RCId])
1870      return true;
1871  }
1872  return false;
1873}
1874
1875// Compute the register pressure contribution by this instruction by count up
1876// for uses that are not live and down for defs. Only count register classes
1877// that are already under high pressure. As a side effect, compute the number of
1878// uses of registers that are already live.
1879//
1880// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1881// so could probably be factored.
1882int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1883  LiveUses = 0;
1884  int PDiff = 0;
1885  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1886       I != E; ++I) {
1887    if (I->isCtrl())
1888      continue;
1889    SUnit *PredSU = I->getSUnit();
1890    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1891    // to cover the number of registers defined (they are all live).
1892    if (PredSU->NumRegDefsLeft == 0) {
1893      if (PredSU->getNode()->isMachineOpcode())
1894        ++LiveUses;
1895      continue;
1896    }
1897    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1898         RegDefPos.IsValid(); RegDefPos.Advance()) {
1899      EVT VT = RegDefPos.GetValue();
1900      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1901      if (RegPressure[RCId] >= RegLimit[RCId])
1902        ++PDiff;
1903    }
1904  }
1905  const SDNode *N = SU->getNode();
1906
1907  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1908    return PDiff;
1909
1910  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1911  for (unsigned i = 0; i != NumDefs; ++i) {
1912    EVT VT = N->getValueType(i);
1913    if (!N->hasAnyUseOfValue(i))
1914      continue;
1915    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1916    if (RegPressure[RCId] >= RegLimit[RCId])
1917      --PDiff;
1918  }
1919  return PDiff;
1920}
1921
1922void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1923  if (!TracksRegPressure)
1924    return;
1925
1926  if (!SU->getNode())
1927    return;
1928
1929  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1930       I != E; ++I) {
1931    if (I->isCtrl())
1932      continue;
1933    SUnit *PredSU = I->getSUnit();
1934    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1935    // to cover the number of registers defined (they are all live).
1936    if (PredSU->NumRegDefsLeft == 0) {
1937      continue;
1938    }
1939    // FIXME: The ScheduleDAG currently loses information about which of a
1940    // node's values is consumed by each dependence. Consequently, if the node
1941    // defines multiple register classes, we don't know which to pressurize
1942    // here. Instead the following loop consumes the register defs in an
1943    // arbitrary order. At least it handles the common case of clustered loads
1944    // to the same class. For precise liveness, each SDep needs to indicate the
1945    // result number. But that tightly couples the ScheduleDAG with the
1946    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1947    // value type or register class to SDep.
1948    //
1949    // The most important aspect of register tracking is balancing the increase
1950    // here with the reduction further below. Note that this SU may use multiple
1951    // defs in PredSU. The can't be determined here, but we've already
1952    // compensated by reducing NumRegDefsLeft in PredSU during
1953    // ScheduleDAGSDNodes::AddSchedEdges.
1954    --PredSU->NumRegDefsLeft;
1955    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1956    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1957         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1958      if (SkipRegDefs)
1959        continue;
1960
1961      unsigned RCId, Cost;
1962      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1963      RegPressure[RCId] += Cost;
1964      break;
1965    }
1966  }
1967
1968  // We should have this assert, but there may be dead SDNodes that never
1969  // materialize as SUnits, so they don't appear to generate liveness.
1970  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1971  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1972  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1973       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1974    if (SkipRegDefs > 0)
1975      continue;
1976    unsigned RCId, Cost;
1977    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1978    if (RegPressure[RCId] < Cost) {
1979      // Register pressure tracking is imprecise. This can happen. But we try
1980      // hard not to let it happen because it likely results in poor scheduling.
1981      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1982      RegPressure[RCId] = 0;
1983    }
1984    else {
1985      RegPressure[RCId] -= Cost;
1986    }
1987  }
1988  dumpRegPressure();
1989}
1990
1991void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1992  if (!TracksRegPressure)
1993    return;
1994
1995  const SDNode *N = SU->getNode();
1996  if (!N) return;
1997
1998  if (!N->isMachineOpcode()) {
1999    if (N->getOpcode() != ISD::CopyToReg)
2000      return;
2001  } else {
2002    unsigned Opc = N->getMachineOpcode();
2003    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2004        Opc == TargetOpcode::INSERT_SUBREG ||
2005        Opc == TargetOpcode::SUBREG_TO_REG ||
2006        Opc == TargetOpcode::REG_SEQUENCE ||
2007        Opc == TargetOpcode::IMPLICIT_DEF)
2008      return;
2009  }
2010
2011  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2012       I != E; ++I) {
2013    if (I->isCtrl())
2014      continue;
2015    SUnit *PredSU = I->getSUnit();
2016    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2017    // counts data deps.
2018    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2019      continue;
2020    const SDNode *PN = PredSU->getNode();
2021    if (!PN->isMachineOpcode()) {
2022      if (PN->getOpcode() == ISD::CopyFromReg) {
2023        EVT VT = PN->getValueType(0);
2024        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2025        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2026      }
2027      continue;
2028    }
2029    unsigned POpc = PN->getMachineOpcode();
2030    if (POpc == TargetOpcode::IMPLICIT_DEF)
2031      continue;
2032    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2033        POpc == TargetOpcode::INSERT_SUBREG ||
2034        POpc == TargetOpcode::SUBREG_TO_REG) {
2035      EVT VT = PN->getValueType(0);
2036      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2037      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2038      continue;
2039    }
2040    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2041    for (unsigned i = 0; i != NumDefs; ++i) {
2042      EVT VT = PN->getValueType(i);
2043      if (!PN->hasAnyUseOfValue(i))
2044        continue;
2045      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2046      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2047        // Register pressure tracking is imprecise. This can happen.
2048        RegPressure[RCId] = 0;
2049      else
2050        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2051    }
2052  }
2053
2054  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2055  // may transfer data dependencies to CopyToReg.
2056  if (SU->NumSuccs && N->isMachineOpcode()) {
2057    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2058    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2059      EVT VT = N->getValueType(i);
2060      if (VT == MVT::Glue || VT == MVT::Other)
2061        continue;
2062      if (!N->hasAnyUseOfValue(i))
2063        continue;
2064      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2065      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2066    }
2067  }
2068
2069  dumpRegPressure();
2070}
2071
2072//===----------------------------------------------------------------------===//
2073//           Dynamic Node Priority for Register Pressure Reduction
2074//===----------------------------------------------------------------------===//
2075
2076/// closestSucc - Returns the scheduled cycle of the successor which is
2077/// closest to the current cycle.
2078static unsigned closestSucc(const SUnit *SU) {
2079  unsigned MaxHeight = 0;
2080  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2081       I != E; ++I) {
2082    if (I->isCtrl()) continue;  // ignore chain succs
2083    unsigned Height = I->getSUnit()->getHeight();
2084    // If there are bunch of CopyToRegs stacked up, they should be considered
2085    // to be at the same position.
2086    if (I->getSUnit()->getNode() &&
2087        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2088      Height = closestSucc(I->getSUnit())+1;
2089    if (Height > MaxHeight)
2090      MaxHeight = Height;
2091  }
2092  return MaxHeight;
2093}
2094
2095/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2096/// for scratch registers, i.e. number of data dependencies.
2097static unsigned calcMaxScratches(const SUnit *SU) {
2098  unsigned Scratches = 0;
2099  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2100       I != E; ++I) {
2101    if (I->isCtrl()) continue;  // ignore chain preds
2102    Scratches++;
2103  }
2104  return Scratches;
2105}
2106
2107/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2108/// CopyFromReg from a virtual register.
2109static bool hasOnlyLiveInOpers(const SUnit *SU) {
2110  bool RetVal = false;
2111  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2112       I != E; ++I) {
2113    if (I->isCtrl()) continue;
2114    const SUnit *PredSU = I->getSUnit();
2115    if (PredSU->getNode() &&
2116        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2117      unsigned Reg =
2118        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2119      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2120        RetVal = true;
2121        continue;
2122      }
2123    }
2124    return false;
2125  }
2126  return RetVal;
2127}
2128
2129/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2130/// CopyToReg to a virtual register. This SU def is probably a liveout and
2131/// it has no other use. It should be scheduled closer to the terminator.
2132static bool hasOnlyLiveOutUses(const SUnit *SU) {
2133  bool RetVal = false;
2134  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2135       I != E; ++I) {
2136    if (I->isCtrl()) continue;
2137    const SUnit *SuccSU = I->getSUnit();
2138    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2139      unsigned Reg =
2140        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2141      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2142        RetVal = true;
2143        continue;
2144      }
2145    }
2146    return false;
2147  }
2148  return RetVal;
2149}
2150
2151// Set isVRegCycle for a node with only live in opers and live out uses. Also
2152// set isVRegCycle for its CopyFromReg operands.
2153//
2154// This is only relevant for single-block loops, in which case the VRegCycle
2155// node is likely an induction variable in which the operand and target virtual
2156// registers should be coalesced (e.g. pre/post increment values). Setting the
2157// isVRegCycle flag helps the scheduler prioritize other uses of the same
2158// CopyFromReg so that this node becomes the virtual register "kill". This
2159// avoids interference between the values live in and out of the block and
2160// eliminates a copy inside the loop.
2161static void initVRegCycle(SUnit *SU) {
2162  if (DisableSchedVRegCycle)
2163    return;
2164
2165  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2166    return;
2167
2168  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2169
2170  SU->isVRegCycle = true;
2171
2172  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2173       I != E; ++I) {
2174    if (I->isCtrl()) continue;
2175    I->getSUnit()->isVRegCycle = true;
2176  }
2177}
2178
2179// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2180// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2181static void resetVRegCycle(SUnit *SU) {
2182  if (!SU->isVRegCycle)
2183    return;
2184
2185  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2186       I != E; ++I) {
2187    if (I->isCtrl()) continue;  // ignore chain preds
2188    SUnit *PredSU = I->getSUnit();
2189    if (PredSU->isVRegCycle) {
2190      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2191             "VRegCycle def must be CopyFromReg");
2192      I->getSUnit()->isVRegCycle = 0;
2193    }
2194  }
2195}
2196
2197// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2198// means a node that defines the VRegCycle has not been scheduled yet.
2199static bool hasVRegCycleUse(const SUnit *SU) {
2200  // If this SU also defines the VReg, don't hoist it as a "use".
2201  if (SU->isVRegCycle)
2202    return false;
2203
2204  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2205       I != E; ++I) {
2206    if (I->isCtrl()) continue;  // ignore chain preds
2207    if (I->getSUnit()->isVRegCycle &&
2208        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2209      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2210      return true;
2211    }
2212  }
2213  return false;
2214}
2215
2216// Check for either a dependence (latency) or resource (hazard) stall.
2217//
2218// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2219static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2220  if ((int)SPQ->getCurCycle() < Height) return true;
2221  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2222      != ScheduleHazardRecognizer::NoHazard)
2223    return true;
2224  return false;
2225}
2226
2227// Return -1 if left has higher priority, 1 if right has higher priority.
2228// Return 0 if latency-based priority is equivalent.
2229static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2230                            RegReductionPQBase *SPQ) {
2231  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2232  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2233  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2234  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2235  int LHeight = (int)left->getHeight() + LPenalty;
2236  int RHeight = (int)right->getHeight() + RPenalty;
2237
2238  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
2239    BUHasStall(left, LHeight, SPQ);
2240  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
2241    BUHasStall(right, RHeight, SPQ);
2242
2243  // If scheduling one of the node will cause a pipeline stall, delay it.
2244  // If scheduling either one of the node will cause a pipeline stall, sort
2245  // them according to their height.
2246  if (LStall) {
2247    if (!RStall) {
2248      DEBUG(++FactorCount[FactStall]);
2249      return 1;
2250    }
2251    if (LHeight != RHeight) {
2252      DEBUG(++FactorCount[FactStall]);
2253      return LHeight > RHeight ? 1 : -1;
2254    }
2255  } else if (RStall) {
2256    DEBUG(++FactorCount[FactStall]);
2257    return -1;
2258  }
2259
2260  // If either node is scheduling for latency, sort them by height/depth
2261  // and latency.
2262  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2263                     right->SchedulingPref == Sched::Latency)) {
2264    if (DisableSchedCycles) {
2265      if (LHeight != RHeight) {
2266        DEBUG(++FactorCount[FactHeight]);
2267        return LHeight > RHeight ? 1 : -1;
2268      }
2269    }
2270    else {
2271      // If neither instruction stalls (!LStall && !RStall) then
2272      // its height is already covered so only its depth matters. We also reach
2273      // this if both stall but have the same height.
2274      int LDepth = left->getDepth() - LPenalty;
2275      int RDepth = right->getDepth() - RPenalty;
2276      if (LDepth != RDepth) {
2277        DEBUG(++FactorCount[FactDepth]);
2278        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2279              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2280              << ") depth " << RDepth << "\n");
2281        return LDepth < RDepth ? 1 : -1;
2282      }
2283    }
2284    if (left->Latency != right->Latency) {
2285      DEBUG(++FactorCount[FactOther]);
2286      return left->Latency > right->Latency ? 1 : -1;
2287    }
2288  }
2289  return 0;
2290}
2291
2292static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2293  // Schedule physical register definitions close to their use. This is
2294  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2295  // long as shortening physreg live ranges is generally good, we can defer
2296  // creating a subtarget hook.
2297  if (!DisableSchedPhysRegJoin) {
2298    bool LHasPhysReg = left->hasPhysRegDefs;
2299    bool RHasPhysReg = right->hasPhysRegDefs;
2300    if (LHasPhysReg != RHasPhysReg) {
2301      DEBUG(++FactorCount[FactRegUses]);
2302      #ifndef NDEBUG
2303      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2304      #endif
2305      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2306            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2307            << PhysRegMsg[RHasPhysReg] << "\n");
2308      return LHasPhysReg < RHasPhysReg;
2309    }
2310  }
2311
2312  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2313  unsigned LPriority = SPQ->getNodePriority(left);
2314  unsigned RPriority = SPQ->getNodePriority(right);
2315
2316  // Be really careful about hoisting call operands above previous calls.
2317  // Only allows it if it would reduce register pressure.
2318  if (left->isCall && right->isCallOp) {
2319    unsigned RNumVals = right->getNode()->getNumValues();
2320    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2321  }
2322  if (right->isCall && left->isCallOp) {
2323    unsigned LNumVals = left->getNode()->getNumValues();
2324    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2325  }
2326
2327  if (LPriority != RPriority) {
2328    DEBUG(++FactorCount[FactStatic]);
2329    return LPriority > RPriority;
2330  }
2331
2332  // One or both of the nodes are calls and their sethi-ullman numbers are the
2333  // same, then keep source order.
2334  if (left->isCall || right->isCall) {
2335    unsigned LOrder = SPQ->getNodeOrdering(left);
2336    unsigned ROrder = SPQ->getNodeOrdering(right);
2337
2338    // Prefer an ordering where the lower the non-zero order number, the higher
2339    // the preference.
2340    if ((LOrder || ROrder) && LOrder != ROrder)
2341      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2342  }
2343
2344  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2345  // e.g.
2346  // t1 = op t2, c1
2347  // t3 = op t4, c2
2348  //
2349  // and the following instructions are both ready.
2350  // t2 = op c3
2351  // t4 = op c4
2352  //
2353  // Then schedule t2 = op first.
2354  // i.e.
2355  // t4 = op c4
2356  // t2 = op c3
2357  // t1 = op t2, c1
2358  // t3 = op t4, c2
2359  //
2360  // This creates more short live intervals.
2361  unsigned LDist = closestSucc(left);
2362  unsigned RDist = closestSucc(right);
2363  if (LDist != RDist) {
2364    DEBUG(++FactorCount[FactOther]);
2365    return LDist < RDist;
2366  }
2367
2368  // How many registers becomes live when the node is scheduled.
2369  unsigned LScratch = calcMaxScratches(left);
2370  unsigned RScratch = calcMaxScratches(right);
2371  if (LScratch != RScratch) {
2372    DEBUG(++FactorCount[FactOther]);
2373    return LScratch > RScratch;
2374  }
2375
2376  // Comparing latency against a call makes little sense unless the node
2377  // is register pressure-neutral.
2378  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2379    return (left->NodeQueueId > right->NodeQueueId);
2380
2381  // Do not compare latencies when one or both of the nodes are calls.
2382  if (!DisableSchedCycles &&
2383      !(left->isCall || right->isCall)) {
2384    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2385    if (result != 0)
2386      return result > 0;
2387  }
2388  else {
2389    if (left->getHeight() != right->getHeight()) {
2390      DEBUG(++FactorCount[FactHeight]);
2391      return left->getHeight() > right->getHeight();
2392    }
2393
2394    if (left->getDepth() != right->getDepth()) {
2395      DEBUG(++FactorCount[FactDepth]);
2396      return left->getDepth() < right->getDepth();
2397    }
2398  }
2399
2400  assert(left->NodeQueueId && right->NodeQueueId &&
2401         "NodeQueueId cannot be zero");
2402  DEBUG(++FactorCount[FactOther]);
2403  return (left->NodeQueueId > right->NodeQueueId);
2404}
2405
2406// Bottom up
2407bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2408  if (int res = checkSpecialNodes(left, right))
2409    return res > 0;
2410
2411  return BURRSort(left, right, SPQ);
2412}
2413
2414// Source order, otherwise bottom up.
2415bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2416  if (int res = checkSpecialNodes(left, right))
2417    return res > 0;
2418
2419  unsigned LOrder = SPQ->getNodeOrdering(left);
2420  unsigned ROrder = SPQ->getNodeOrdering(right);
2421
2422  // Prefer an ordering where the lower the non-zero order number, the higher
2423  // the preference.
2424  if ((LOrder || ROrder) && LOrder != ROrder)
2425    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2426
2427  return BURRSort(left, right, SPQ);
2428}
2429
2430// If the time between now and when the instruction will be ready can cover
2431// the spill code, then avoid adding it to the ready queue. This gives long
2432// stalls highest priority and allows hoisting across calls. It should also
2433// speed up processing the available queue.
2434bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2435  static const unsigned ReadyDelay = 3;
2436
2437  if (SPQ->MayReduceRegPressure(SU)) return true;
2438
2439  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2440
2441  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2442      != ScheduleHazardRecognizer::NoHazard)
2443    return false;
2444
2445  return true;
2446}
2447
2448// Return true if right should be scheduled with higher priority than left.
2449bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2450  if (int res = checkSpecialNodes(left, right))
2451    return res > 0;
2452
2453  if (left->isCall || right->isCall)
2454    // No way to compute latency of calls.
2455    return BURRSort(left, right, SPQ);
2456
2457  bool LHigh = SPQ->HighRegPressure(left);
2458  bool RHigh = SPQ->HighRegPressure(right);
2459  // Avoid causing spills. If register pressure is high, schedule for
2460  // register pressure reduction.
2461  if (LHigh && !RHigh) {
2462    DEBUG(++FactorCount[FactPressureDiff]);
2463    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2464          << right->NodeNum << ")\n");
2465    return true;
2466  }
2467  else if (!LHigh && RHigh) {
2468    DEBUG(++FactorCount[FactPressureDiff]);
2469    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2470          << left->NodeNum << ")\n");
2471    return false;
2472  }
2473  if (!LHigh && !RHigh) {
2474    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2475    if (result != 0)
2476      return result > 0;
2477  }
2478  return BURRSort(left, right, SPQ);
2479}
2480
2481// Schedule as many instructions in each cycle as possible. So don't make an
2482// instruction available unless it is ready in the current cycle.
2483bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2484  if (SU->getHeight() > CurCycle) return false;
2485
2486  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2487      != ScheduleHazardRecognizer::NoHazard)
2488    return false;
2489
2490  return true;
2491}
2492
2493static bool canEnableCoalescing(SUnit *SU) {
2494  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2495  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2496    // CopyToReg should be close to its uses to facilitate coalescing and
2497    // avoid spilling.
2498    return true;
2499
2500  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2501      Opc == TargetOpcode::SUBREG_TO_REG ||
2502      Opc == TargetOpcode::INSERT_SUBREG)
2503    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2504    // close to their uses to facilitate coalescing.
2505    return true;
2506
2507  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2508    // If SU does not have a register def, schedule it close to its uses
2509    // because it does not lengthen any live ranges.
2510    return true;
2511
2512  return false;
2513}
2514
2515// list-ilp is currently an experimental scheduler that allows various
2516// heuristics to be enabled prior to the normal register reduction logic.
2517bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2518  if (int res = checkSpecialNodes(left, right))
2519    return res > 0;
2520
2521  if (left->isCall || right->isCall)
2522    // No way to compute latency of calls.
2523    return BURRSort(left, right, SPQ);
2524
2525  unsigned LLiveUses = 0, RLiveUses = 0;
2526  int LPDiff = 0, RPDiff = 0;
2527  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2528    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2529    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2530  }
2531  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2532    DEBUG(++FactorCount[FactPressureDiff]);
2533    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2534          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2535    return LPDiff > RPDiff;
2536  }
2537
2538  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2539    bool LReduce = canEnableCoalescing(left);
2540    bool RReduce = canEnableCoalescing(right);
2541    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2542    if (LReduce && !RReduce) return false;
2543    if (RReduce && !LReduce) return true;
2544  }
2545
2546  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2547    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2548          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2549    DEBUG(++FactorCount[FactRegUses]);
2550    return LLiveUses < RLiveUses;
2551  }
2552
2553  if (!DisableSchedStalls) {
2554    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2555    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2556    if (LStall != RStall) {
2557      DEBUG(++FactorCount[FactHeight]);
2558      return left->getHeight() > right->getHeight();
2559    }
2560  }
2561
2562  if (!DisableSchedCriticalPath) {
2563    int spread = (int)left->getDepth() - (int)right->getDepth();
2564    if (std::abs(spread) > MaxReorderWindow) {
2565      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2566            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2567            << right->getDepth() << "\n");
2568      DEBUG(++FactorCount[FactDepth]);
2569      return left->getDepth() < right->getDepth();
2570    }
2571  }
2572
2573  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2574    int spread = (int)left->getHeight() - (int)right->getHeight();
2575    if (std::abs(spread) > MaxReorderWindow) {
2576      DEBUG(++FactorCount[FactHeight]);
2577      return left->getHeight() > right->getHeight();
2578    }
2579  }
2580
2581  return BURRSort(left, right, SPQ);
2582}
2583
2584void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2585  SUnits = &sunits;
2586  // Add pseudo dependency edges for two-address nodes.
2587  AddPseudoTwoAddrDeps();
2588  // Reroute edges to nodes with multiple uses.
2589  if (!TracksRegPressure)
2590    PrescheduleNodesWithMultipleUses();
2591  // Calculate node priorities.
2592  CalculateSethiUllmanNumbers();
2593
2594  // For single block loops, mark nodes that look like canonical IV increments.
2595  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2596    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2597      initVRegCycle(&sunits[i]);
2598    }
2599  }
2600}
2601
2602//===----------------------------------------------------------------------===//
2603//                    Preschedule for Register Pressure
2604//===----------------------------------------------------------------------===//
2605
2606bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2607  if (SU->isTwoAddress) {
2608    unsigned Opc = SU->getNode()->getMachineOpcode();
2609    const TargetInstrDesc &TID = TII->get(Opc);
2610    unsigned NumRes = TID.getNumDefs();
2611    unsigned NumOps = TID.getNumOperands() - NumRes;
2612    for (unsigned i = 0; i != NumOps; ++i) {
2613      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2614        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2615        if (DU->getNodeId() != -1 &&
2616            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2617          return true;
2618      }
2619    }
2620  }
2621  return false;
2622}
2623
2624/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2625/// physical register defs.
2626static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2627                                  const TargetInstrInfo *TII,
2628                                  const TargetRegisterInfo *TRI) {
2629  SDNode *N = SuccSU->getNode();
2630  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2631  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2632  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2633  for (const SDNode *SUNode = SU->getNode(); SUNode;
2634       SUNode = SUNode->getGluedNode()) {
2635    if (!SUNode->isMachineOpcode())
2636      continue;
2637    const unsigned *SUImpDefs =
2638      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2639    if (!SUImpDefs)
2640      return false;
2641    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2642      EVT VT = N->getValueType(i);
2643      if (VT == MVT::Glue || VT == MVT::Other)
2644        continue;
2645      if (!N->hasAnyUseOfValue(i))
2646        continue;
2647      unsigned Reg = ImpDefs[i - NumDefs];
2648      for (;*SUImpDefs; ++SUImpDefs) {
2649        unsigned SUReg = *SUImpDefs;
2650        if (TRI->regsOverlap(Reg, SUReg))
2651          return true;
2652      }
2653    }
2654  }
2655  return false;
2656}
2657
2658/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2659/// are not handled well by the general register pressure reduction
2660/// heuristics. When presented with code like this:
2661///
2662///      N
2663///    / |
2664///   /  |
2665///  U  store
2666///  |
2667/// ...
2668///
2669/// the heuristics tend to push the store up, but since the
2670/// operand of the store has another use (U), this would increase
2671/// the length of that other use (the U->N edge).
2672///
2673/// This function transforms code like the above to route U's
2674/// dependence through the store when possible, like this:
2675///
2676///      N
2677///      ||
2678///      ||
2679///     store
2680///       |
2681///       U
2682///       |
2683///      ...
2684///
2685/// This results in the store being scheduled immediately
2686/// after N, which shortens the U->N live range, reducing
2687/// register pressure.
2688///
2689void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2690  // Visit all the nodes in topological order, working top-down.
2691  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2692    SUnit *SU = &(*SUnits)[i];
2693    // For now, only look at nodes with no data successors, such as stores.
2694    // These are especially important, due to the heuristics in
2695    // getNodePriority for nodes with no data successors.
2696    if (SU->NumSuccs != 0)
2697      continue;
2698    // For now, only look at nodes with exactly one data predecessor.
2699    if (SU->NumPreds != 1)
2700      continue;
2701    // Avoid prescheduling copies to virtual registers, which don't behave
2702    // like other nodes from the perspective of scheduling heuristics.
2703    if (SDNode *N = SU->getNode())
2704      if (N->getOpcode() == ISD::CopyToReg &&
2705          TargetRegisterInfo::isVirtualRegister
2706            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2707        continue;
2708
2709    // Locate the single data predecessor.
2710    SUnit *PredSU = 0;
2711    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2712         EE = SU->Preds.end(); II != EE; ++II)
2713      if (!II->isCtrl()) {
2714        PredSU = II->getSUnit();
2715        break;
2716      }
2717    assert(PredSU);
2718
2719    // Don't rewrite edges that carry physregs, because that requires additional
2720    // support infrastructure.
2721    if (PredSU->hasPhysRegDefs)
2722      continue;
2723    // Short-circuit the case where SU is PredSU's only data successor.
2724    if (PredSU->NumSuccs == 1)
2725      continue;
2726    // Avoid prescheduling to copies from virtual registers, which don't behave
2727    // like other nodes from the perspective of scheduling heuristics.
2728    if (SDNode *N = SU->getNode())
2729      if (N->getOpcode() == ISD::CopyFromReg &&
2730          TargetRegisterInfo::isVirtualRegister
2731            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2732        continue;
2733
2734    // Perform checks on the successors of PredSU.
2735    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2736         EE = PredSU->Succs.end(); II != EE; ++II) {
2737      SUnit *PredSuccSU = II->getSUnit();
2738      if (PredSuccSU == SU) continue;
2739      // If PredSU has another successor with no data successors, for
2740      // now don't attempt to choose either over the other.
2741      if (PredSuccSU->NumSuccs == 0)
2742        goto outer_loop_continue;
2743      // Don't break physical register dependencies.
2744      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2745        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2746          goto outer_loop_continue;
2747      // Don't introduce graph cycles.
2748      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2749        goto outer_loop_continue;
2750    }
2751
2752    // Ok, the transformation is safe and the heuristics suggest it is
2753    // profitable. Update the graph.
2754    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2755                 << " next to PredSU #" << PredSU->NodeNum
2756                 << " to guide scheduling in the presence of multiple uses\n");
2757    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2758      SDep Edge = PredSU->Succs[i];
2759      assert(!Edge.isAssignedRegDep());
2760      SUnit *SuccSU = Edge.getSUnit();
2761      if (SuccSU != SU) {
2762        Edge.setSUnit(PredSU);
2763        scheduleDAG->RemovePred(SuccSU, Edge);
2764        scheduleDAG->AddPred(SU, Edge);
2765        Edge.setSUnit(SU);
2766        scheduleDAG->AddPred(SuccSU, Edge);
2767        --i;
2768      }
2769    }
2770  outer_loop_continue:;
2771  }
2772}
2773
2774/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2775/// it as a def&use operand. Add a pseudo control edge from it to the other
2776/// node (if it won't create a cycle) so the two-address one will be scheduled
2777/// first (lower in the schedule). If both nodes are two-address, favor the
2778/// one that has a CopyToReg use (more likely to be a loop induction update).
2779/// If both are two-address, but one is commutable while the other is not
2780/// commutable, favor the one that's not commutable.
2781void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2782  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2783    SUnit *SU = &(*SUnits)[i];
2784    if (!SU->isTwoAddress)
2785      continue;
2786
2787    SDNode *Node = SU->getNode();
2788    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2789      continue;
2790
2791    bool isLiveOut = hasOnlyLiveOutUses(SU);
2792    unsigned Opc = Node->getMachineOpcode();
2793    const TargetInstrDesc &TID = TII->get(Opc);
2794    unsigned NumRes = TID.getNumDefs();
2795    unsigned NumOps = TID.getNumOperands() - NumRes;
2796    for (unsigned j = 0; j != NumOps; ++j) {
2797      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2798        continue;
2799      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2800      if (DU->getNodeId() == -1)
2801        continue;
2802      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2803      if (!DUSU) continue;
2804      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2805           E = DUSU->Succs.end(); I != E; ++I) {
2806        if (I->isCtrl()) continue;
2807        SUnit *SuccSU = I->getSUnit();
2808        if (SuccSU == SU)
2809          continue;
2810        // Be conservative. Ignore if nodes aren't at roughly the same
2811        // depth and height.
2812        if (SuccSU->getHeight() < SU->getHeight() &&
2813            (SU->getHeight() - SuccSU->getHeight()) > 1)
2814          continue;
2815        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2816        // constrains whatever is using the copy, instead of the copy
2817        // itself. In the case that the copy is coalesced, this
2818        // preserves the intent of the pseudo two-address heurietics.
2819        while (SuccSU->Succs.size() == 1 &&
2820               SuccSU->getNode()->isMachineOpcode() &&
2821               SuccSU->getNode()->getMachineOpcode() ==
2822                 TargetOpcode::COPY_TO_REGCLASS)
2823          SuccSU = SuccSU->Succs.front().getSUnit();
2824        // Don't constrain non-instruction nodes.
2825        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2826          continue;
2827        // Don't constrain nodes with physical register defs if the
2828        // predecessor can clobber them.
2829        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2830          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2831            continue;
2832        }
2833        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2834        // these may be coalesced away. We want them close to their uses.
2835        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2836        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2837            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2838            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2839          continue;
2840        if ((!canClobber(SuccSU, DUSU) ||
2841             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2842             (!SU->isCommutable && SuccSU->isCommutable)) &&
2843            !scheduleDAG->IsReachable(SuccSU, SU)) {
2844          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2845                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2846          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2847                                        /*Reg=*/0, /*isNormalMemory=*/false,
2848                                        /*isMustAlias=*/false,
2849                                        /*isArtificial=*/true));
2850        }
2851      }
2852    }
2853  }
2854}
2855
2856/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2857/// predecessors of the successors of the SUnit SU. Stop when the provided
2858/// limit is exceeded.
2859static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2860                                                    unsigned Limit) {
2861  unsigned Sum = 0;
2862  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2863       I != E; ++I) {
2864    const SUnit *SuccSU = I->getSUnit();
2865    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2866         EE = SuccSU->Preds.end(); II != EE; ++II) {
2867      SUnit *PredSU = II->getSUnit();
2868      if (!PredSU->isScheduled)
2869        if (++Sum > Limit)
2870          return Sum;
2871    }
2872  }
2873  return Sum;
2874}
2875
2876
2877// Top down
2878bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2879  if (int res = checkSpecialNodes(left, right))
2880    return res < 0;
2881
2882  unsigned LPriority = SPQ->getNodePriority(left);
2883  unsigned RPriority = SPQ->getNodePriority(right);
2884  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2885  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2886  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2887  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2888  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2889  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2890
2891  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2892    return false;
2893  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2894    return true;
2895
2896  if (LIsFloater)
2897    LBonus -= 2;
2898  if (RIsFloater)
2899    RBonus -= 2;
2900  if (left->NumSuccs == 1)
2901    LBonus += 2;
2902  if (right->NumSuccs == 1)
2903    RBonus += 2;
2904
2905  if (LPriority+LBonus != RPriority+RBonus)
2906    return LPriority+LBonus < RPriority+RBonus;
2907
2908  if (left->getDepth() != right->getDepth())
2909    return left->getDepth() < right->getDepth();
2910
2911  if (left->NumSuccsLeft != right->NumSuccsLeft)
2912    return left->NumSuccsLeft > right->NumSuccsLeft;
2913
2914  assert(left->NodeQueueId && right->NodeQueueId &&
2915         "NodeQueueId cannot be zero");
2916  return (left->NodeQueueId > right->NodeQueueId);
2917}
2918
2919//===----------------------------------------------------------------------===//
2920//                         Public Constructor Functions
2921//===----------------------------------------------------------------------===//
2922
2923llvm::ScheduleDAGSDNodes *
2924llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2925                                 CodeGenOpt::Level OptLevel) {
2926  const TargetMachine &TM = IS->TM;
2927  const TargetInstrInfo *TII = TM.getInstrInfo();
2928  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2929
2930  BURegReductionPriorityQueue *PQ =
2931    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2932  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2933  PQ->setScheduleDAG(SD);
2934  return SD;
2935}
2936
2937llvm::ScheduleDAGSDNodes *
2938llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2939                                 CodeGenOpt::Level OptLevel) {
2940  const TargetMachine &TM = IS->TM;
2941  const TargetInstrInfo *TII = TM.getInstrInfo();
2942  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2943
2944  TDRegReductionPriorityQueue *PQ =
2945    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2946  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2947  PQ->setScheduleDAG(SD);
2948  return SD;
2949}
2950
2951llvm::ScheduleDAGSDNodes *
2952llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2953                                   CodeGenOpt::Level OptLevel) {
2954  const TargetMachine &TM = IS->TM;
2955  const TargetInstrInfo *TII = TM.getInstrInfo();
2956  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2957
2958  SrcRegReductionPriorityQueue *PQ =
2959    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2960  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2961  PQ->setScheduleDAG(SD);
2962  return SD;
2963}
2964
2965llvm::ScheduleDAGSDNodes *
2966llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2967                                   CodeGenOpt::Level OptLevel) {
2968  const TargetMachine &TM = IS->TM;
2969  const TargetInstrInfo *TII = TM.getInstrInfo();
2970  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2971  const TargetLowering *TLI = &IS->getTargetLowering();
2972
2973  HybridBURRPriorityQueue *PQ =
2974    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2975
2976  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2977  PQ->setScheduleDAG(SD);
2978  return SD;
2979}
2980
2981llvm::ScheduleDAGSDNodes *
2982llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2983                                CodeGenOpt::Level OptLevel) {
2984  const TargetMachine &TM = IS->TM;
2985  const TargetInstrInfo *TII = TM.getInstrInfo();
2986  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2987  const TargetLowering *TLI = &IS->getTargetLowering();
2988
2989  ILPBURRPriorityQueue *PQ =
2990    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2991  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2992  PQ->setScheduleDAG(SD);
2993  return SD;
2994}
2995