ScheduleDAGRRList.cpp revision 623a7e146bd86747dc46a6f8bb9993fc217d6b78
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  sourceListDAGScheduler("source",
49                         "Similar to list-burr but schedules in source "
50                         "order when possible",
51                         createSourceListDAGScheduler);
52
53static RegisterScheduler
54  hybridListDAGScheduler("list-hybrid",
55                         "Bottom-up register pressure aware list scheduling "
56                         "which tries to balance latency and register pressure",
57                         createHybridListDAGScheduler);
58
59static RegisterScheduler
60  ILPListDAGScheduler("list-ilp",
61                      "Bottom-up register pressure aware list scheduling "
62                      "which tries to balance ILP and register pressure",
63                      createILPListDAGScheduler);
64
65static cl::opt<bool> DisableSchedCycles(
66  "disable-sched-cycles", cl::Hidden, cl::init(false),
67  cl::desc("Disable cycle-level precision during preRA scheduling"));
68
69// Temporary sched=list-ilp flags until the heuristics are robust.
70// Some options are also available under sched=list-hybrid.
71static cl::opt<bool> DisableSchedRegPressure(
72  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
73  cl::desc("Disable regpressure priority in sched=list-ilp"));
74static cl::opt<bool> DisableSchedLiveUses(
75  "disable-sched-live-uses", cl::Hidden, cl::init(true),
76  cl::desc("Disable live use priority in sched=list-ilp"));
77static cl::opt<bool> DisableSchedVRegCycle(
78  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
79  cl::desc("Disable virtual register cycle interference checks"));
80static cl::opt<bool> DisableSchedPhysRegJoin(
81  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
82  cl::desc("Disable physreg def-use affinity"));
83static cl::opt<bool> DisableSchedStalls(
84  "disable-sched-stalls", cl::Hidden, cl::init(true),
85  cl::desc("Disable no-stall priority in sched=list-ilp"));
86static cl::opt<bool> DisableSchedCriticalPath(
87  "disable-sched-critical-path", cl::Hidden, cl::init(false),
88  cl::desc("Disable critical path priority in sched=list-ilp"));
89static cl::opt<bool> DisableSchedHeight(
90  "disable-sched-height", cl::Hidden, cl::init(false),
91  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
92static cl::opt<bool> Disable2AddrHack(
93  "disable-2addr-hack", cl::Hidden, cl::init(true),
94  cl::desc("Disable scheduler's two-address hack"));
95
96static cl::opt<int> MaxReorderWindow(
97  "max-sched-reorder", cl::Hidden, cl::init(6),
98  cl::desc("Number of instructions to allow ahead of the critical path "
99           "in sched=list-ilp"));
100
101static cl::opt<unsigned> AvgIPC(
102  "sched-avg-ipc", cl::Hidden, cl::init(1),
103  cl::desc("Average inst/cycle whan no target itinerary exists."));
104
105#ifndef NDEBUG
106namespace {
107  // For sched=list-ilp, Count the number of times each factor comes into play.
108  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
109         FactStatic, FactOther, NumFactors };
110}
111static const char *FactorName[NumFactors] =
112{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
113static int FactorCount[NumFactors];
114#endif //!NDEBUG
115
116namespace {
117//===----------------------------------------------------------------------===//
118/// ScheduleDAGRRList - The actual register reduction list scheduler
119/// implementation.  This supports both top-down and bottom-up scheduling.
120///
121class ScheduleDAGRRList : public ScheduleDAGSDNodes {
122private:
123  /// NeedLatency - True if the scheduler will make use of latency information.
124  ///
125  bool NeedLatency;
126
127  /// AvailableQueue - The priority queue to use for the available SUnits.
128  SchedulingPriorityQueue *AvailableQueue;
129
130  /// PendingQueue - This contains all of the instructions whose operands have
131  /// been issued, but their results are not ready yet (due to the latency of
132  /// the operation).  Once the operands becomes available, the instruction is
133  /// added to the AvailableQueue.
134  std::vector<SUnit*> PendingQueue;
135
136  /// HazardRec - The hazard recognizer to use.
137  ScheduleHazardRecognizer *HazardRec;
138
139  /// CurCycle - The current scheduler state corresponds to this cycle.
140  unsigned CurCycle;
141
142  /// MinAvailableCycle - Cycle of the soonest available instruction.
143  unsigned MinAvailableCycle;
144
145  /// IssueCount - Count instructions issued in this cycle
146  /// Currently valid only for bottom-up scheduling.
147  unsigned IssueCount;
148
149  /// LiveRegDefs - A set of physical registers and their definition
150  /// that are "live". These nodes must be scheduled before any other nodes that
151  /// modifies the registers can be scheduled.
152  unsigned NumLiveRegs;
153  std::vector<SUnit*> LiveRegDefs;
154  std::vector<SUnit*> LiveRegGens;
155
156  /// Topo - A topological ordering for SUnits which permits fast IsReachable
157  /// and similar queries.
158  ScheduleDAGTopologicalSort Topo;
159
160public:
161  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
162                    SchedulingPriorityQueue *availqueue,
163                    CodeGenOpt::Level OptLevel)
164    : ScheduleDAGSDNodes(mf),
165      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
166      Topo(SUnits) {
167
168    const TargetMachine &tm = mf.getTarget();
169    if (DisableSchedCycles || !NeedLatency)
170      HazardRec = new ScheduleHazardRecognizer();
171    else
172      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
173  }
174
175  ~ScheduleDAGRRList() {
176    delete HazardRec;
177    delete AvailableQueue;
178  }
179
180  void Schedule();
181
182  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
183
184  /// IsReachable - Checks if SU is reachable from TargetSU.
185  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
186    return Topo.IsReachable(SU, TargetSU);
187  }
188
189  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
190  /// create a cycle.
191  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
192    return Topo.WillCreateCycle(SU, TargetSU);
193  }
194
195  /// AddPred - adds a predecessor edge to SUnit SU.
196  /// This returns true if this is a new predecessor.
197  /// Updates the topological ordering if required.
198  void AddPred(SUnit *SU, const SDep &D) {
199    Topo.AddPred(SU, D.getSUnit());
200    SU->addPred(D);
201  }
202
203  /// RemovePred - removes a predecessor edge from SUnit SU.
204  /// This returns true if an edge was removed.
205  /// Updates the topological ordering if required.
206  void RemovePred(SUnit *SU, const SDep &D) {
207    Topo.RemovePred(SU, D.getSUnit());
208    SU->removePred(D);
209  }
210
211private:
212  bool isReady(SUnit *SU) {
213    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
214      AvailableQueue->isReady(SU);
215  }
216
217  void ReleasePred(SUnit *SU, const SDep *PredEdge);
218  void ReleasePredecessors(SUnit *SU);
219  void ReleasePending();
220  void AdvanceToCycle(unsigned NextCycle);
221  void AdvancePastStalls(SUnit *SU);
222  void EmitNode(SUnit *SU);
223  void ScheduleNodeBottomUp(SUnit*);
224  void CapturePred(SDep *PredEdge);
225  void UnscheduleNodeBottomUp(SUnit*);
226  void RestoreHazardCheckerBottomUp();
227  void BacktrackBottomUp(SUnit*, SUnit*);
228  SUnit *CopyAndMoveSuccessors(SUnit*);
229  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
230                                const TargetRegisterClass*,
231                                const TargetRegisterClass*,
232                                SmallVector<SUnit*, 2>&);
233  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
234
235  SUnit *PickNodeToScheduleBottomUp();
236  void ListScheduleBottomUp();
237
238  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
239  /// Updates the topological ordering if required.
240  SUnit *CreateNewSUnit(SDNode *N) {
241    unsigned NumSUnits = SUnits.size();
242    SUnit *NewNode = NewSUnit(N);
243    // Update the topological ordering.
244    if (NewNode->NodeNum >= NumSUnits)
245      Topo.InitDAGTopologicalSorting();
246    return NewNode;
247  }
248
249  /// CreateClone - Creates a new SUnit from an existing one.
250  /// Updates the topological ordering if required.
251  SUnit *CreateClone(SUnit *N) {
252    unsigned NumSUnits = SUnits.size();
253    SUnit *NewNode = Clone(N);
254    // Update the topological ordering.
255    if (NewNode->NodeNum >= NumSUnits)
256      Topo.InitDAGTopologicalSorting();
257    return NewNode;
258  }
259
260  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
261  /// need actual latency information but the hybrid scheduler does.
262  bool ForceUnitLatencies() const {
263    return !NeedLatency;
264  }
265};
266}  // end anonymous namespace
267
268/// GetCostForDef - Looks up the register class and cost for a given definition.
269/// Typically this just means looking up the representative register class,
270/// but for untyped values (MVT::untyped) it means inspecting the node's
271/// opcode to determine what register class is being generated.
272static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
273                          const TargetLowering *TLI,
274                          const TargetInstrInfo *TII,
275                          const TargetRegisterInfo *TRI,
276                          unsigned &RegClass, unsigned &Cost) {
277  EVT VT = RegDefPos.GetValue();
278
279  // Special handling for untyped values.  These values can only come from
280  // the expansion of custom DAG-to-DAG patterns.
281  if (VT == MVT::untyped) {
282    const SDNode *Node = RegDefPos.GetNode();
283    unsigned Opcode = Node->getMachineOpcode();
284
285    if (Opcode == TargetOpcode::REG_SEQUENCE) {
286      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
287      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
288      RegClass = RC->getID();
289      Cost = 1;
290      return;
291    }
292
293    unsigned Idx = RegDefPos.GetIdx();
294    const MCInstrDesc Desc = TII->get(Opcode);
295    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
296    RegClass = RC->getID();
297    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
298    // better way to determine it.
299    Cost = 1;
300  } else {
301    RegClass = TLI->getRepRegClassFor(VT)->getID();
302    Cost = TLI->getRepRegClassCostFor(VT);
303  }
304}
305
306/// Schedule - Schedule the DAG using list scheduling.
307void ScheduleDAGRRList::Schedule() {
308  DEBUG(dbgs()
309        << "********** List Scheduling BB#" << BB->getNumber()
310        << " '" << BB->getName() << "' **********\n");
311#ifndef NDEBUG
312  for (int i = 0; i < NumFactors; ++i) {
313    FactorCount[i] = 0;
314  }
315#endif //!NDEBUG
316
317  CurCycle = 0;
318  IssueCount = 0;
319  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
320  NumLiveRegs = 0;
321  // Allocate slots for each physical register, plus one for a special register
322  // to track the virtual resource of a calling sequence.
323  LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
324  LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
325
326  // Build the scheduling graph.
327  BuildSchedGraph(NULL);
328
329  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
330          SUnits[su].dumpAll(this));
331  Topo.InitDAGTopologicalSorting();
332
333  AvailableQueue->initNodes(SUnits);
334
335  HazardRec->Reset();
336
337  // Execute the actual scheduling loop.
338  ListScheduleBottomUp();
339
340#ifndef NDEBUG
341  for (int i = 0; i < NumFactors; ++i) {
342    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
343  }
344#endif // !NDEBUG
345  AvailableQueue->releaseState();
346}
347
348//===----------------------------------------------------------------------===//
349//  Bottom-Up Scheduling
350//===----------------------------------------------------------------------===//
351
352/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
353/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
354void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
355  SUnit *PredSU = PredEdge->getSUnit();
356
357#ifndef NDEBUG
358  if (PredSU->NumSuccsLeft == 0) {
359    dbgs() << "*** Scheduling failed! ***\n";
360    PredSU->dump(this);
361    dbgs() << " has been released too many times!\n";
362    llvm_unreachable(0);
363  }
364#endif
365  --PredSU->NumSuccsLeft;
366
367  if (!ForceUnitLatencies()) {
368    // Updating predecessor's height. This is now the cycle when the
369    // predecessor can be scheduled without causing a pipeline stall.
370    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
371  }
372
373  // If all the node's successors are scheduled, this node is ready
374  // to be scheduled. Ignore the special EntrySU node.
375  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
376    PredSU->isAvailable = true;
377
378    unsigned Height = PredSU->getHeight();
379    if (Height < MinAvailableCycle)
380      MinAvailableCycle = Height;
381
382    if (isReady(PredSU)) {
383      AvailableQueue->push(PredSU);
384    }
385    // CapturePred and others may have left the node in the pending queue, avoid
386    // adding it twice.
387    else if (!PredSU->isPending) {
388      PredSU->isPending = true;
389      PendingQueue.push_back(PredSU);
390    }
391  }
392}
393
394/// IsChainDependent - Test if Outer is reachable from Inner through
395/// chain dependencies.
396static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
397                             unsigned NestLevel,
398                             const TargetInstrInfo *TII) {
399  SDNode *N = Outer;
400  for (;;) {
401    if (N == Inner)
402      return true;
403    // For a TokenFactor, examine each operand. There may be multiple ways
404    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
405    // most nesting in order to ensure that we find the corresponding match.
406    if (N->getOpcode() == ISD::TokenFactor) {
407      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
408        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
409          return true;
410      return false;
411    }
412    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
413    if (N->isMachineOpcode()) {
414      if (N->getMachineOpcode() ==
415          (unsigned)TII->getCallFrameDestroyOpcode()) {
416        ++NestLevel;
417      } else if (N->getMachineOpcode() ==
418                 (unsigned)TII->getCallFrameSetupOpcode()) {
419        if (NestLevel == 0)
420          return false;
421        --NestLevel;
422      }
423    }
424    // Otherwise, find the chain and continue climbing.
425    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
426      if (N->getOperand(i).getValueType() == MVT::Other) {
427        N = N->getOperand(i).getNode();
428        goto found_chain_operand;
429      }
430    return false;
431  found_chain_operand:;
432    if (N->getOpcode() == ISD::EntryToken)
433      return false;
434  }
435}
436
437/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
438/// the corresponding (lowered) CALLSEQ_BEGIN node.
439///
440/// NestLevel and MaxNested are used in recursion to indcate the current level
441/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
442/// level seen so far.
443///
444/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
445/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
446static SDNode *
447FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
448                 const TargetInstrInfo *TII) {
449  for (;;) {
450    // For a TokenFactor, examine each operand. There may be multiple ways
451    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
452    // most nesting in order to ensure that we find the corresponding match.
453    if (N->getOpcode() == ISD::TokenFactor) {
454      SDNode *Best = 0;
455      unsigned BestMaxNest = MaxNest;
456      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
457        unsigned MyNestLevel = NestLevel;
458        unsigned MyMaxNest = MaxNest;
459        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
460                                           MyNestLevel, MyMaxNest, TII))
461          if (!Best || (MyMaxNest > BestMaxNest)) {
462            Best = New;
463            BestMaxNest = MyMaxNest;
464          }
465      }
466      assert(Best);
467      MaxNest = BestMaxNest;
468      return Best;
469    }
470    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
471    if (N->isMachineOpcode()) {
472      if (N->getMachineOpcode() ==
473          (unsigned)TII->getCallFrameDestroyOpcode()) {
474        ++NestLevel;
475        MaxNest = std::max(MaxNest, NestLevel);
476      } else if (N->getMachineOpcode() ==
477                 (unsigned)TII->getCallFrameSetupOpcode()) {
478        assert(NestLevel != 0);
479        --NestLevel;
480        if (NestLevel == 0)
481          return N;
482      }
483    }
484    // Otherwise, find the chain and continue climbing.
485    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
486      if (N->getOperand(i).getValueType() == MVT::Other) {
487        N = N->getOperand(i).getNode();
488        goto found_chain_operand;
489      }
490    return 0;
491  found_chain_operand:;
492    if (N->getOpcode() == ISD::EntryToken)
493      return 0;
494  }
495}
496
497/// Call ReleasePred for each predecessor, then update register live def/gen.
498/// Always update LiveRegDefs for a register dependence even if the current SU
499/// also defines the register. This effectively create one large live range
500/// across a sequence of two-address node. This is important because the
501/// entire chain must be scheduled together. Example:
502///
503/// flags = (3) add
504/// flags = (2) addc flags
505/// flags = (1) addc flags
506///
507/// results in
508///
509/// LiveRegDefs[flags] = 3
510/// LiveRegGens[flags] = 1
511///
512/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
513/// interference on flags.
514void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
515  // Bottom up: release predecessors
516  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
517       I != E; ++I) {
518    ReleasePred(SU, &*I);
519    if (I->isAssignedRegDep()) {
520      // This is a physical register dependency and it's impossible or
521      // expensive to copy the register. Make sure nothing that can
522      // clobber the register is scheduled between the predecessor and
523      // this node.
524      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
525      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
526             "interference on register dependence");
527      LiveRegDefs[I->getReg()] = I->getSUnit();
528      if (!LiveRegGens[I->getReg()]) {
529        ++NumLiveRegs;
530        LiveRegGens[I->getReg()] = SU;
531      }
532    }
533  }
534
535  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
536  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
537  // these nodes, to prevent other calls from being interscheduled with them.
538  unsigned CallResource = TRI->getNumRegs();
539  if (!LiveRegDefs[CallResource])
540    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
541      if (Node->isMachineOpcode() &&
542          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
543        unsigned NestLevel = 0;
544        unsigned MaxNest = 0;
545        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
546
547        SUnit *Def = &SUnits[N->getNodeId()];
548        ++NumLiveRegs;
549        LiveRegDefs[CallResource] = Def;
550        LiveRegGens[CallResource] = SU;
551        break;
552      }
553}
554
555/// Check to see if any of the pending instructions are ready to issue.  If
556/// so, add them to the available queue.
557void ScheduleDAGRRList::ReleasePending() {
558  if (DisableSchedCycles) {
559    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
560    return;
561  }
562
563  // If the available queue is empty, it is safe to reset MinAvailableCycle.
564  if (AvailableQueue->empty())
565    MinAvailableCycle = UINT_MAX;
566
567  // Check to see if any of the pending instructions are ready to issue.  If
568  // so, add them to the available queue.
569  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
570    unsigned ReadyCycle = PendingQueue[i]->getHeight();
571    if (ReadyCycle < MinAvailableCycle)
572      MinAvailableCycle = ReadyCycle;
573
574    if (PendingQueue[i]->isAvailable) {
575      if (!isReady(PendingQueue[i]))
576          continue;
577      AvailableQueue->push(PendingQueue[i]);
578    }
579    PendingQueue[i]->isPending = false;
580    PendingQueue[i] = PendingQueue.back();
581    PendingQueue.pop_back();
582    --i; --e;
583  }
584}
585
586/// Move the scheduler state forward by the specified number of Cycles.
587void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
588  if (NextCycle <= CurCycle)
589    return;
590
591  IssueCount = 0;
592  AvailableQueue->setCurCycle(NextCycle);
593  if (!HazardRec->isEnabled()) {
594    // Bypass lots of virtual calls in case of long latency.
595    CurCycle = NextCycle;
596  }
597  else {
598    for (; CurCycle != NextCycle; ++CurCycle) {
599      HazardRec->RecedeCycle();
600    }
601  }
602  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
603  // available Q to release pending nodes at least once before popping.
604  ReleasePending();
605}
606
607/// Move the scheduler state forward until the specified node's dependents are
608/// ready and can be scheduled with no resource conflicts.
609void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
610  if (DisableSchedCycles)
611    return;
612
613  // FIXME: Nodes such as CopyFromReg probably should not advance the current
614  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
615  // has predecessors the cycle will be advanced when they are scheduled.
616  // But given the crude nature of modeling latency though such nodes, we
617  // currently need to treat these nodes like real instructions.
618  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
619
620  unsigned ReadyCycle = SU->getHeight();
621
622  // Bump CurCycle to account for latency. We assume the latency of other
623  // available instructions may be hidden by the stall (not a full pipe stall).
624  // This updates the hazard recognizer's cycle before reserving resources for
625  // this instruction.
626  AdvanceToCycle(ReadyCycle);
627
628  // Calls are scheduled in their preceding cycle, so don't conflict with
629  // hazards from instructions after the call. EmitNode will reset the
630  // scoreboard state before emitting the call.
631  if (SU->isCall)
632    return;
633
634  // FIXME: For resource conflicts in very long non-pipelined stages, we
635  // should probably skip ahead here to avoid useless scoreboard checks.
636  int Stalls = 0;
637  while (true) {
638    ScheduleHazardRecognizer::HazardType HT =
639      HazardRec->getHazardType(SU, -Stalls);
640
641    if (HT == ScheduleHazardRecognizer::NoHazard)
642      break;
643
644    ++Stalls;
645  }
646  AdvanceToCycle(CurCycle + Stalls);
647}
648
649/// Record this SUnit in the HazardRecognizer.
650/// Does not update CurCycle.
651void ScheduleDAGRRList::EmitNode(SUnit *SU) {
652  if (!HazardRec->isEnabled())
653    return;
654
655  // Check for phys reg copy.
656  if (!SU->getNode())
657    return;
658
659  switch (SU->getNode()->getOpcode()) {
660  default:
661    assert(SU->getNode()->isMachineOpcode() &&
662           "This target-independent node should not be scheduled.");
663    break;
664  case ISD::MERGE_VALUES:
665  case ISD::TokenFactor:
666  case ISD::CopyToReg:
667  case ISD::CopyFromReg:
668  case ISD::EH_LABEL:
669    // Noops don't affect the scoreboard state. Copies are likely to be
670    // removed.
671    return;
672  case ISD::INLINEASM:
673    // For inline asm, clear the pipeline state.
674    HazardRec->Reset();
675    return;
676  }
677  if (SU->isCall) {
678    // Calls are scheduled with their preceding instructions. For bottom-up
679    // scheduling, clear the pipeline state before emitting.
680    HazardRec->Reset();
681  }
682
683  HazardRec->EmitInstruction(SU);
684}
685
686static void resetVRegCycle(SUnit *SU);
687
688/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
689/// count of its predecessors. If a predecessor pending count is zero, add it to
690/// the Available queue.
691void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
692  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
693  DEBUG(SU->dump(this));
694
695#ifndef NDEBUG
696  if (CurCycle < SU->getHeight())
697    DEBUG(dbgs() << "   Height [" << SU->getHeight()
698          << "] pipeline stall!\n");
699#endif
700
701  // FIXME: Do not modify node height. It may interfere with
702  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
703  // node its ready cycle can aid heuristics, and after scheduling it can
704  // indicate the scheduled cycle.
705  SU->setHeightToAtLeast(CurCycle);
706
707  // Reserve resources for the scheduled intruction.
708  EmitNode(SU);
709
710  Sequence.push_back(SU);
711
712  AvailableQueue->ScheduledNode(SU);
713
714  // If HazardRec is disabled, and each inst counts as one cycle, then
715  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
716  // PendingQueue for schedulers that implement HasReadyFilter.
717  if (!HazardRec->isEnabled() && AvgIPC < 2)
718    AdvanceToCycle(CurCycle + 1);
719
720  // Update liveness of predecessors before successors to avoid treating a
721  // two-address node as a live range def.
722  ReleasePredecessors(SU);
723
724  // Release all the implicit physical register defs that are live.
725  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
726       I != E; ++I) {
727    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
728    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
729      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
730      --NumLiveRegs;
731      LiveRegDefs[I->getReg()] = NULL;
732      LiveRegGens[I->getReg()] = NULL;
733    }
734  }
735  // Release the special call resource dependence, if this is the beginning
736  // of a call.
737  unsigned CallResource = TRI->getNumRegs();
738  if (LiveRegDefs[CallResource] == SU)
739    for (const SDNode *SUNode = SU->getNode(); SUNode;
740         SUNode = SUNode->getGluedNode()) {
741      if (SUNode->isMachineOpcode() &&
742          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
743        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
744        --NumLiveRegs;
745        LiveRegDefs[CallResource] = NULL;
746        LiveRegGens[CallResource] = NULL;
747      }
748    }
749
750  resetVRegCycle(SU);
751
752  SU->isScheduled = true;
753
754  // Conditions under which the scheduler should eagerly advance the cycle:
755  // (1) No available instructions
756  // (2) All pipelines full, so available instructions must have hazards.
757  //
758  // If HazardRec is disabled, the cycle was pre-advanced before calling
759  // ReleasePredecessors. In that case, IssueCount should remain 0.
760  //
761  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
762  if (HazardRec->isEnabled() || AvgIPC > 1) {
763    if (SU->getNode() && SU->getNode()->isMachineOpcode())
764      ++IssueCount;
765    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
766        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
767      AdvanceToCycle(CurCycle + 1);
768  }
769}
770
771/// CapturePred - This does the opposite of ReleasePred. Since SU is being
772/// unscheduled, incrcease the succ left count of its predecessors. Remove
773/// them from AvailableQueue if necessary.
774void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
775  SUnit *PredSU = PredEdge->getSUnit();
776  if (PredSU->isAvailable) {
777    PredSU->isAvailable = false;
778    if (!PredSU->isPending)
779      AvailableQueue->remove(PredSU);
780  }
781
782  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
783  ++PredSU->NumSuccsLeft;
784}
785
786/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
787/// its predecessor states to reflect the change.
788void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
789  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
790  DEBUG(SU->dump(this));
791
792  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
793       I != E; ++I) {
794    CapturePred(&*I);
795    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
796      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
797      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
798             "Physical register dependency violated?");
799      --NumLiveRegs;
800      LiveRegDefs[I->getReg()] = NULL;
801      LiveRegGens[I->getReg()] = NULL;
802    }
803  }
804
805  // Reclaim the special call resource dependence, if this is the beginning
806  // of a call.
807  unsigned CallResource = TRI->getNumRegs();
808  for (const SDNode *SUNode = SU->getNode(); SUNode;
809       SUNode = SUNode->getGluedNode()) {
810    if (SUNode->isMachineOpcode() &&
811        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
812      ++NumLiveRegs;
813      LiveRegDefs[CallResource] = SU;
814      LiveRegGens[CallResource] = NULL;
815    }
816  }
817
818  // Release the special call resource dependence, if this is the end
819  // of a call.
820  if (LiveRegGens[CallResource] == SU)
821    for (const SDNode *SUNode = SU->getNode(); SUNode;
822         SUNode = SUNode->getGluedNode()) {
823      if (SUNode->isMachineOpcode() &&
824          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
825        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
826        --NumLiveRegs;
827        LiveRegDefs[CallResource] = NULL;
828        LiveRegGens[CallResource] = NULL;
829      }
830    }
831
832  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
833       I != E; ++I) {
834    if (I->isAssignedRegDep()) {
835      // This becomes the nearest def. Note that an earlier def may still be
836      // pending if this is a two-address node.
837      LiveRegDefs[I->getReg()] = SU;
838      if (!LiveRegDefs[I->getReg()]) {
839        ++NumLiveRegs;
840      }
841      if (LiveRegGens[I->getReg()] == NULL ||
842          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
843        LiveRegGens[I->getReg()] = I->getSUnit();
844    }
845  }
846  if (SU->getHeight() < MinAvailableCycle)
847    MinAvailableCycle = SU->getHeight();
848
849  SU->setHeightDirty();
850  SU->isScheduled = false;
851  SU->isAvailable = true;
852  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
853    // Don't make available until backtracking is complete.
854    SU->isPending = true;
855    PendingQueue.push_back(SU);
856  }
857  else {
858    AvailableQueue->push(SU);
859  }
860  AvailableQueue->UnscheduledNode(SU);
861}
862
863/// After backtracking, the hazard checker needs to be restored to a state
864/// corresponding the the current cycle.
865void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
866  HazardRec->Reset();
867
868  unsigned LookAhead = std::min((unsigned)Sequence.size(),
869                                HazardRec->getMaxLookAhead());
870  if (LookAhead == 0)
871    return;
872
873  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
874  unsigned HazardCycle = (*I)->getHeight();
875  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
876    SUnit *SU = *I;
877    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
878      HazardRec->RecedeCycle();
879    }
880    EmitNode(SU);
881  }
882}
883
884/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
885/// BTCycle in order to schedule a specific node.
886void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
887  SUnit *OldSU = Sequence.back();
888  while (true) {
889    Sequence.pop_back();
890    if (SU->isSucc(OldSU))
891      // Don't try to remove SU from AvailableQueue.
892      SU->isAvailable = false;
893    // FIXME: use ready cycle instead of height
894    CurCycle = OldSU->getHeight();
895    UnscheduleNodeBottomUp(OldSU);
896    AvailableQueue->setCurCycle(CurCycle);
897    if (OldSU == BtSU)
898      break;
899    OldSU = Sequence.back();
900  }
901
902  assert(!SU->isSucc(OldSU) && "Something is wrong!");
903
904  RestoreHazardCheckerBottomUp();
905
906  ReleasePending();
907
908  ++NumBacktracks;
909}
910
911static bool isOperandOf(const SUnit *SU, SDNode *N) {
912  for (const SDNode *SUNode = SU->getNode(); SUNode;
913       SUNode = SUNode->getGluedNode()) {
914    if (SUNode->isOperandOf(N))
915      return true;
916  }
917  return false;
918}
919
920/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
921/// successors to the newly created node.
922SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
923  SDNode *N = SU->getNode();
924  if (!N)
925    return NULL;
926
927  if (SU->getNode()->getGluedNode())
928    return NULL;
929
930  SUnit *NewSU;
931  bool TryUnfold = false;
932  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
933    EVT VT = N->getValueType(i);
934    if (VT == MVT::Glue)
935      return NULL;
936    else if (VT == MVT::Other)
937      TryUnfold = true;
938  }
939  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
940    const SDValue &Op = N->getOperand(i);
941    EVT VT = Op.getNode()->getValueType(Op.getResNo());
942    if (VT == MVT::Glue)
943      return NULL;
944  }
945
946  if (TryUnfold) {
947    SmallVector<SDNode*, 2> NewNodes;
948    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
949      return NULL;
950
951    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
952    assert(NewNodes.size() == 2 && "Expected a load folding node!");
953
954    N = NewNodes[1];
955    SDNode *LoadNode = NewNodes[0];
956    unsigned NumVals = N->getNumValues();
957    unsigned OldNumVals = SU->getNode()->getNumValues();
958    for (unsigned i = 0; i != NumVals; ++i)
959      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
960    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
961                                   SDValue(LoadNode, 1));
962
963    // LoadNode may already exist. This can happen when there is another
964    // load from the same location and producing the same type of value
965    // but it has different alignment or volatileness.
966    bool isNewLoad = true;
967    SUnit *LoadSU;
968    if (LoadNode->getNodeId() != -1) {
969      LoadSU = &SUnits[LoadNode->getNodeId()];
970      isNewLoad = false;
971    } else {
972      LoadSU = CreateNewSUnit(LoadNode);
973      LoadNode->setNodeId(LoadSU->NodeNum);
974
975      InitNumRegDefsLeft(LoadSU);
976      ComputeLatency(LoadSU);
977    }
978
979    SUnit *NewSU = CreateNewSUnit(N);
980    assert(N->getNodeId() == -1 && "Node already inserted!");
981    N->setNodeId(NewSU->NodeNum);
982
983    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
984    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
985      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
986        NewSU->isTwoAddress = true;
987        break;
988      }
989    }
990    if (MCID.isCommutable())
991      NewSU->isCommutable = true;
992
993    InitNumRegDefsLeft(NewSU);
994    ComputeLatency(NewSU);
995
996    // Record all the edges to and from the old SU, by category.
997    SmallVector<SDep, 4> ChainPreds;
998    SmallVector<SDep, 4> ChainSuccs;
999    SmallVector<SDep, 4> LoadPreds;
1000    SmallVector<SDep, 4> NodePreds;
1001    SmallVector<SDep, 4> NodeSuccs;
1002    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1003         I != E; ++I) {
1004      if (I->isCtrl())
1005        ChainPreds.push_back(*I);
1006      else if (isOperandOf(I->getSUnit(), LoadNode))
1007        LoadPreds.push_back(*I);
1008      else
1009        NodePreds.push_back(*I);
1010    }
1011    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1012         I != E; ++I) {
1013      if (I->isCtrl())
1014        ChainSuccs.push_back(*I);
1015      else
1016        NodeSuccs.push_back(*I);
1017    }
1018
1019    // Now assign edges to the newly-created nodes.
1020    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
1021      const SDep &Pred = ChainPreds[i];
1022      RemovePred(SU, Pred);
1023      if (isNewLoad)
1024        AddPred(LoadSU, Pred);
1025    }
1026    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
1027      const SDep &Pred = LoadPreds[i];
1028      RemovePred(SU, Pred);
1029      if (isNewLoad)
1030        AddPred(LoadSU, Pred);
1031    }
1032    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
1033      const SDep &Pred = NodePreds[i];
1034      RemovePred(SU, Pred);
1035      AddPred(NewSU, Pred);
1036    }
1037    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
1038      SDep D = NodeSuccs[i];
1039      SUnit *SuccDep = D.getSUnit();
1040      D.setSUnit(SU);
1041      RemovePred(SuccDep, D);
1042      D.setSUnit(NewSU);
1043      AddPred(SuccDep, D);
1044      // Balance register pressure.
1045      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
1046          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1047        --NewSU->NumRegDefsLeft;
1048    }
1049    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
1050      SDep D = ChainSuccs[i];
1051      SUnit *SuccDep = D.getSUnit();
1052      D.setSUnit(SU);
1053      RemovePred(SuccDep, D);
1054      if (isNewLoad) {
1055        D.setSUnit(LoadSU);
1056        AddPred(SuccDep, D);
1057      }
1058    }
1059
1060    // Add a data dependency to reflect that NewSU reads the value defined
1061    // by LoadSU.
1062    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
1063
1064    if (isNewLoad)
1065      AvailableQueue->addNode(LoadSU);
1066    AvailableQueue->addNode(NewSU);
1067
1068    ++NumUnfolds;
1069
1070    if (NewSU->NumSuccsLeft == 0) {
1071      NewSU->isAvailable = true;
1072      return NewSU;
1073    }
1074    SU = NewSU;
1075  }
1076
1077  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
1078  NewSU = CreateClone(SU);
1079
1080  // New SUnit has the exact same predecessors.
1081  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1082       I != E; ++I)
1083    if (!I->isArtificial())
1084      AddPred(NewSU, *I);
1085
1086  // Only copy scheduled successors. Cut them from old node's successor
1087  // list and move them over.
1088  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1089  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1090       I != E; ++I) {
1091    if (I->isArtificial())
1092      continue;
1093    SUnit *SuccSU = I->getSUnit();
1094    if (SuccSU->isScheduled) {
1095      SDep D = *I;
1096      D.setSUnit(NewSU);
1097      AddPred(SuccSU, D);
1098      D.setSUnit(SU);
1099      DelDeps.push_back(std::make_pair(SuccSU, D));
1100    }
1101  }
1102  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1103    RemovePred(DelDeps[i].first, DelDeps[i].second);
1104
1105  AvailableQueue->updateNode(SU);
1106  AvailableQueue->addNode(NewSU);
1107
1108  ++NumDups;
1109  return NewSU;
1110}
1111
1112/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1113/// scheduled successors of the given SUnit to the last copy.
1114void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1115                                               const TargetRegisterClass *DestRC,
1116                                               const TargetRegisterClass *SrcRC,
1117                                               SmallVector<SUnit*, 2> &Copies) {
1118  SUnit *CopyFromSU = CreateNewSUnit(NULL);
1119  CopyFromSU->CopySrcRC = SrcRC;
1120  CopyFromSU->CopyDstRC = DestRC;
1121
1122  SUnit *CopyToSU = CreateNewSUnit(NULL);
1123  CopyToSU->CopySrcRC = DestRC;
1124  CopyToSU->CopyDstRC = SrcRC;
1125
1126  // Only copy scheduled successors. Cut them from old node's successor
1127  // list and move them over.
1128  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1129  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1130       I != E; ++I) {
1131    if (I->isArtificial())
1132      continue;
1133    SUnit *SuccSU = I->getSUnit();
1134    if (SuccSU->isScheduled) {
1135      SDep D = *I;
1136      D.setSUnit(CopyToSU);
1137      AddPred(SuccSU, D);
1138      DelDeps.push_back(std::make_pair(SuccSU, *I));
1139    }
1140    else {
1141      // Avoid scheduling the def-side copy before other successors. Otherwise
1142      // we could introduce another physreg interference on the copy and
1143      // continue inserting copies indefinitely.
1144      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
1145             /*Reg=*/0, /*isNormalMemory=*/false,
1146             /*isMustAlias=*/false, /*isArtificial=*/true);
1147      AddPred(SuccSU, D);
1148    }
1149  }
1150  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1151    RemovePred(DelDeps[i].first, DelDeps[i].second);
1152
1153  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
1154  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
1155
1156  AvailableQueue->updateNode(SU);
1157  AvailableQueue->addNode(CopyFromSU);
1158  AvailableQueue->addNode(CopyToSU);
1159  Copies.push_back(CopyFromSU);
1160  Copies.push_back(CopyToSU);
1161
1162  ++NumPRCopies;
1163}
1164
1165/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1166/// definition of the specified node.
1167/// FIXME: Move to SelectionDAG?
1168static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1169                                 const TargetInstrInfo *TII) {
1170  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1171  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1172  unsigned NumRes = MCID.getNumDefs();
1173  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1174    if (Reg == *ImpDef)
1175      break;
1176    ++NumRes;
1177  }
1178  return N->getValueType(NumRes);
1179}
1180
1181/// CheckForLiveRegDef - Return true and update live register vector if the
1182/// specified register def of the specified SUnit clobbers any "live" registers.
1183static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1184                               std::vector<SUnit*> &LiveRegDefs,
1185                               SmallSet<unsigned, 4> &RegAdded,
1186                               SmallVector<unsigned, 4> &LRegs,
1187                               const TargetRegisterInfo *TRI) {
1188  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1189
1190    // Check if Ref is live.
1191    if (!LiveRegDefs[*AliasI]) continue;
1192
1193    // Allow multiple uses of the same def.
1194    if (LiveRegDefs[*AliasI] == SU) continue;
1195
1196    // Add Reg to the set of interfering live regs.
1197    if (RegAdded.insert(*AliasI)) {
1198      LRegs.push_back(*AliasI);
1199    }
1200  }
1201}
1202
1203/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1204/// scheduling of the given node to satisfy live physical register dependencies.
1205/// If the specific node is the last one that's available to schedule, do
1206/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1207bool ScheduleDAGRRList::
1208DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1209  if (NumLiveRegs == 0)
1210    return false;
1211
1212  SmallSet<unsigned, 4> RegAdded;
1213  // If this node would clobber any "live" register, then it's not ready.
1214  //
1215  // If SU is the currently live definition of the same register that it uses,
1216  // then we are free to schedule it.
1217  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1218       I != E; ++I) {
1219    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1220      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1221                         RegAdded, LRegs, TRI);
1222  }
1223
1224  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1225    if (Node->getOpcode() == ISD::INLINEASM) {
1226      // Inline asm can clobber physical defs.
1227      unsigned NumOps = Node->getNumOperands();
1228      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1229        --NumOps;  // Ignore the glue operand.
1230
1231      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1232        unsigned Flags =
1233          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1234        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1235
1236        ++i; // Skip the ID value.
1237        if (InlineAsm::isRegDefKind(Flags) ||
1238            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1239            InlineAsm::isClobberKind(Flags)) {
1240          // Check for def of register or earlyclobber register.
1241          for (; NumVals; --NumVals, ++i) {
1242            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1243            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1244              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1245          }
1246        } else
1247          i += NumVals;
1248      }
1249      continue;
1250    }
1251
1252    if (!Node->isMachineOpcode())
1253      continue;
1254    // If we're in the middle of scheduling a call, don't begin scheduling
1255    // another call. Also, don't allow any physical registers to be live across
1256    // the call.
1257    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1258      // Check the special calling-sequence resource.
1259      unsigned CallResource = TRI->getNumRegs();
1260      if (LiveRegDefs[CallResource]) {
1261        SDNode *Gen = LiveRegGens[CallResource]->getNode();
1262        while (SDNode *Glued = Gen->getGluedNode())
1263          Gen = Glued;
1264        if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
1265          LRegs.push_back(CallResource);
1266      }
1267    }
1268    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1269    if (!MCID.ImplicitDefs)
1270      continue;
1271    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
1272      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1273  }
1274
1275  return !LRegs.empty();
1276}
1277
1278/// Return a node that can be scheduled in this cycle. Requirements:
1279/// (1) Ready: latency has been satisfied
1280/// (2) No Hazards: resources are available
1281/// (3) No Interferences: may unschedule to break register interferences.
1282SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1283  SmallVector<SUnit*, 4> Interferences;
1284  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1285
1286  SUnit *CurSU = AvailableQueue->pop();
1287  while (CurSU) {
1288    SmallVector<unsigned, 4> LRegs;
1289    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1290      break;
1291    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1292
1293    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1294    Interferences.push_back(CurSU);
1295    CurSU = AvailableQueue->pop();
1296  }
1297  if (CurSU) {
1298    // Add the nodes that aren't ready back onto the available list.
1299    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1300      Interferences[i]->isPending = false;
1301      assert(Interferences[i]->isAvailable && "must still be available");
1302      AvailableQueue->push(Interferences[i]);
1303    }
1304    return CurSU;
1305  }
1306
1307  // All candidates are delayed due to live physical reg dependencies.
1308  // Try backtracking, code duplication, or inserting cross class copies
1309  // to resolve it.
1310  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1311    SUnit *TrySU = Interferences[i];
1312    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1313
1314    // Try unscheduling up to the point where it's safe to schedule
1315    // this node.
1316    SUnit *BtSU = NULL;
1317    unsigned LiveCycle = UINT_MAX;
1318    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1319      unsigned Reg = LRegs[j];
1320      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1321        BtSU = LiveRegGens[Reg];
1322        LiveCycle = BtSU->getHeight();
1323      }
1324    }
1325    if (!WillCreateCycle(TrySU, BtSU))  {
1326      BacktrackBottomUp(TrySU, BtSU);
1327
1328      // Force the current node to be scheduled before the node that
1329      // requires the physical reg dep.
1330      if (BtSU->isAvailable) {
1331        BtSU->isAvailable = false;
1332        if (!BtSU->isPending)
1333          AvailableQueue->remove(BtSU);
1334      }
1335      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1336                          /*Reg=*/0, /*isNormalMemory=*/false,
1337                          /*isMustAlias=*/false, /*isArtificial=*/true));
1338
1339      // If one or more successors has been unscheduled, then the current
1340      // node is no longer avaialable. Schedule a successor that's now
1341      // available instead.
1342      if (!TrySU->isAvailable) {
1343        CurSU = AvailableQueue->pop();
1344      }
1345      else {
1346        CurSU = TrySU;
1347        TrySU->isPending = false;
1348        Interferences.erase(Interferences.begin()+i);
1349      }
1350      break;
1351    }
1352  }
1353
1354  if (!CurSU) {
1355    // Can't backtrack. If it's too expensive to copy the value, then try
1356    // duplicate the nodes that produces these "too expensive to copy"
1357    // values to break the dependency. In case even that doesn't work,
1358    // insert cross class copies.
1359    // If it's not too expensive, i.e. cost != -1, issue copies.
1360    SUnit *TrySU = Interferences[0];
1361    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1362    assert(LRegs.size() == 1 && "Can't handle this yet!");
1363    unsigned Reg = LRegs[0];
1364    SUnit *LRDef = LiveRegDefs[Reg];
1365    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1366    const TargetRegisterClass *RC =
1367      TRI->getMinimalPhysRegClass(Reg, VT);
1368    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1369
1370    // If cross copy register class is the same as RC, then it must be possible
1371    // copy the value directly. Do not try duplicate the def.
1372    // If cross copy register class is not the same as RC, then it's possible to
1373    // copy the value but it require cross register class copies and it is
1374    // expensive.
1375    // If cross copy register class is null, then it's not possible to copy
1376    // the value at all.
1377    SUnit *NewDef = 0;
1378    if (DestRC != RC) {
1379      NewDef = CopyAndMoveSuccessors(LRDef);
1380      if (!DestRC && !NewDef)
1381        report_fatal_error("Can't handle live physical register dependency!");
1382    }
1383    if (!NewDef) {
1384      // Issue copies, these can be expensive cross register class copies.
1385      SmallVector<SUnit*, 2> Copies;
1386      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1387      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1388            << " to SU #" << Copies.front()->NodeNum << "\n");
1389      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1390                          /*Reg=*/0, /*isNormalMemory=*/false,
1391                          /*isMustAlias=*/false,
1392                          /*isArtificial=*/true));
1393      NewDef = Copies.back();
1394    }
1395
1396    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1397          << " to SU #" << TrySU->NodeNum << "\n");
1398    LiveRegDefs[Reg] = NewDef;
1399    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1400                         /*Reg=*/0, /*isNormalMemory=*/false,
1401                         /*isMustAlias=*/false,
1402                         /*isArtificial=*/true));
1403    TrySU->isAvailable = false;
1404    CurSU = NewDef;
1405  }
1406
1407  assert(CurSU && "Unable to resolve live physical register dependencies!");
1408
1409  // Add the nodes that aren't ready back onto the available list.
1410  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1411    Interferences[i]->isPending = false;
1412    // May no longer be available due to backtracking.
1413    if (Interferences[i]->isAvailable) {
1414      AvailableQueue->push(Interferences[i]);
1415    }
1416  }
1417  return CurSU;
1418}
1419
1420/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1421/// schedulers.
1422void ScheduleDAGRRList::ListScheduleBottomUp() {
1423  // Release any predecessors of the special Exit node.
1424  ReleasePredecessors(&ExitSU);
1425
1426  // Add root to Available queue.
1427  if (!SUnits.empty()) {
1428    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1429    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1430    RootSU->isAvailable = true;
1431    AvailableQueue->push(RootSU);
1432  }
1433
1434  // While Available queue is not empty, grab the node with the highest
1435  // priority. If it is not ready put it back.  Schedule the node.
1436  Sequence.reserve(SUnits.size());
1437  while (!AvailableQueue->empty()) {
1438    DEBUG(dbgs() << "\nExamining Available:\n";
1439          AvailableQueue->dump(this));
1440
1441    // Pick the best node to schedule taking all constraints into
1442    // consideration.
1443    SUnit *SU = PickNodeToScheduleBottomUp();
1444
1445    AdvancePastStalls(SU);
1446
1447    ScheduleNodeBottomUp(SU);
1448
1449    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1450      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1451      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1452      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1453    }
1454  }
1455
1456  // Reverse the order if it is bottom up.
1457  std::reverse(Sequence.begin(), Sequence.end());
1458
1459#ifndef NDEBUG
1460  VerifySchedule(/*isBottomUp=*/true);
1461#endif
1462}
1463
1464//===----------------------------------------------------------------------===//
1465//                RegReductionPriorityQueue Definition
1466//===----------------------------------------------------------------------===//
1467//
1468// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1469// to reduce register pressure.
1470//
1471namespace {
1472class RegReductionPQBase;
1473
1474struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1475  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1476};
1477
1478#ifndef NDEBUG
1479template<class SF>
1480struct reverse_sort : public queue_sort {
1481  SF &SortFunc;
1482  reverse_sort(SF &sf) : SortFunc(sf) {}
1483  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1484
1485  bool operator()(SUnit* left, SUnit* right) const {
1486    // reverse left/right rather than simply !SortFunc(left, right)
1487    // to expose different paths in the comparison logic.
1488    return SortFunc(right, left);
1489  }
1490};
1491#endif // NDEBUG
1492
1493/// bu_ls_rr_sort - Priority function for bottom up register pressure
1494// reduction scheduler.
1495struct bu_ls_rr_sort : public queue_sort {
1496  enum {
1497    IsBottomUp = true,
1498    HasReadyFilter = false
1499  };
1500
1501  RegReductionPQBase *SPQ;
1502  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1503  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1504
1505  bool operator()(SUnit* left, SUnit* right) const;
1506};
1507
1508// src_ls_rr_sort - Priority function for source order scheduler.
1509struct src_ls_rr_sort : public queue_sort {
1510  enum {
1511    IsBottomUp = true,
1512    HasReadyFilter = false
1513  };
1514
1515  RegReductionPQBase *SPQ;
1516  src_ls_rr_sort(RegReductionPQBase *spq)
1517    : SPQ(spq) {}
1518  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1519    : SPQ(RHS.SPQ) {}
1520
1521  bool operator()(SUnit* left, SUnit* right) const;
1522};
1523
1524// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1525struct hybrid_ls_rr_sort : public queue_sort {
1526  enum {
1527    IsBottomUp = true,
1528    HasReadyFilter = false
1529  };
1530
1531  RegReductionPQBase *SPQ;
1532  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1533    : SPQ(spq) {}
1534  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1535    : SPQ(RHS.SPQ) {}
1536
1537  bool isReady(SUnit *SU, unsigned CurCycle) const;
1538
1539  bool operator()(SUnit* left, SUnit* right) const;
1540};
1541
1542// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1543// scheduler.
1544struct ilp_ls_rr_sort : public queue_sort {
1545  enum {
1546    IsBottomUp = true,
1547    HasReadyFilter = false
1548  };
1549
1550  RegReductionPQBase *SPQ;
1551  ilp_ls_rr_sort(RegReductionPQBase *spq)
1552    : SPQ(spq) {}
1553  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1554    : SPQ(RHS.SPQ) {}
1555
1556  bool isReady(SUnit *SU, unsigned CurCycle) const;
1557
1558  bool operator()(SUnit* left, SUnit* right) const;
1559};
1560
1561class RegReductionPQBase : public SchedulingPriorityQueue {
1562protected:
1563  std::vector<SUnit*> Queue;
1564  unsigned CurQueueId;
1565  bool TracksRegPressure;
1566
1567  // SUnits - The SUnits for the current graph.
1568  std::vector<SUnit> *SUnits;
1569
1570  MachineFunction &MF;
1571  const TargetInstrInfo *TII;
1572  const TargetRegisterInfo *TRI;
1573  const TargetLowering *TLI;
1574  ScheduleDAGRRList *scheduleDAG;
1575
1576  // SethiUllmanNumbers - The SethiUllman number for each node.
1577  std::vector<unsigned> SethiUllmanNumbers;
1578
1579  /// RegPressure - Tracking current reg pressure per register class.
1580  ///
1581  std::vector<unsigned> RegPressure;
1582
1583  /// RegLimit - Tracking the number of allocatable registers per register
1584  /// class.
1585  std::vector<unsigned> RegLimit;
1586
1587public:
1588  RegReductionPQBase(MachineFunction &mf,
1589                     bool hasReadyFilter,
1590                     bool tracksrp,
1591                     const TargetInstrInfo *tii,
1592                     const TargetRegisterInfo *tri,
1593                     const TargetLowering *tli)
1594    : SchedulingPriorityQueue(hasReadyFilter),
1595      CurQueueId(0), TracksRegPressure(tracksrp),
1596      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1597    if (TracksRegPressure) {
1598      unsigned NumRC = TRI->getNumRegClasses();
1599      RegLimit.resize(NumRC);
1600      RegPressure.resize(NumRC);
1601      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1602      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1603      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1604             E = TRI->regclass_end(); I != E; ++I)
1605        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1606    }
1607  }
1608
1609  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1610    scheduleDAG = scheduleDag;
1611  }
1612
1613  ScheduleHazardRecognizer* getHazardRec() {
1614    return scheduleDAG->getHazardRec();
1615  }
1616
1617  void initNodes(std::vector<SUnit> &sunits);
1618
1619  void addNode(const SUnit *SU);
1620
1621  void updateNode(const SUnit *SU);
1622
1623  void releaseState() {
1624    SUnits = 0;
1625    SethiUllmanNumbers.clear();
1626    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1627  }
1628
1629  unsigned getNodePriority(const SUnit *SU) const;
1630
1631  unsigned getNodeOrdering(const SUnit *SU) const {
1632    if (!SU->getNode()) return 0;
1633
1634    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1635  }
1636
1637  bool empty() const { return Queue.empty(); }
1638
1639  void push(SUnit *U) {
1640    assert(!U->NodeQueueId && "Node in the queue already");
1641    U->NodeQueueId = ++CurQueueId;
1642    Queue.push_back(U);
1643  }
1644
1645  void remove(SUnit *SU) {
1646    assert(!Queue.empty() && "Queue is empty!");
1647    assert(SU->NodeQueueId != 0 && "Not in queue!");
1648    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1649                                                 SU);
1650    if (I != prior(Queue.end()))
1651      std::swap(*I, Queue.back());
1652    Queue.pop_back();
1653    SU->NodeQueueId = 0;
1654  }
1655
1656  bool tracksRegPressure() const { return TracksRegPressure; }
1657
1658  void dumpRegPressure() const;
1659
1660  bool HighRegPressure(const SUnit *SU) const;
1661
1662  bool MayReduceRegPressure(SUnit *SU) const;
1663
1664  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1665
1666  void ScheduledNode(SUnit *SU);
1667
1668  void UnscheduledNode(SUnit *SU);
1669
1670protected:
1671  bool canClobber(const SUnit *SU, const SUnit *Op);
1672  void AddPseudoTwoAddrDeps();
1673  void PrescheduleNodesWithMultipleUses();
1674  void CalculateSethiUllmanNumbers();
1675};
1676
1677template<class SF>
1678static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1679  std::vector<SUnit *>::iterator Best = Q.begin();
1680  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1681         E = Q.end(); I != E; ++I)
1682    if (Picker(*Best, *I))
1683      Best = I;
1684  SUnit *V = *Best;
1685  if (Best != prior(Q.end()))
1686    std::swap(*Best, Q.back());
1687  Q.pop_back();
1688  return V;
1689}
1690
1691template<class SF>
1692SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1693#ifndef NDEBUG
1694  if (DAG->StressSched) {
1695    reverse_sort<SF> RPicker(Picker);
1696    return popFromQueueImpl(Q, RPicker);
1697  }
1698#endif
1699  (void)DAG;
1700  return popFromQueueImpl(Q, Picker);
1701}
1702
1703template<class SF>
1704class RegReductionPriorityQueue : public RegReductionPQBase {
1705  SF Picker;
1706
1707public:
1708  RegReductionPriorityQueue(MachineFunction &mf,
1709                            bool tracksrp,
1710                            const TargetInstrInfo *tii,
1711                            const TargetRegisterInfo *tri,
1712                            const TargetLowering *tli)
1713    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1714      Picker(this) {}
1715
1716  bool isBottomUp() const { return SF::IsBottomUp; }
1717
1718  bool isReady(SUnit *U) const {
1719    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1720  }
1721
1722  SUnit *pop() {
1723    if (Queue.empty()) return NULL;
1724
1725    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1726    V->NodeQueueId = 0;
1727    return V;
1728  }
1729
1730  void dump(ScheduleDAG *DAG) const {
1731    // Emulate pop() without clobbering NodeQueueIds.
1732    std::vector<SUnit*> DumpQueue = Queue;
1733    SF DumpPicker = Picker;
1734    while (!DumpQueue.empty()) {
1735      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1736      dbgs() << "Height " << SU->getHeight() << ": ";
1737      SU->dump(DAG);
1738    }
1739  }
1740};
1741
1742typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1743BURegReductionPriorityQueue;
1744
1745typedef RegReductionPriorityQueue<src_ls_rr_sort>
1746SrcRegReductionPriorityQueue;
1747
1748typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1749HybridBURRPriorityQueue;
1750
1751typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1752ILPBURRPriorityQueue;
1753} // end anonymous namespace
1754
1755//===----------------------------------------------------------------------===//
1756//           Static Node Priority for Register Pressure Reduction
1757//===----------------------------------------------------------------------===//
1758
1759// Check for special nodes that bypass scheduling heuristics.
1760// Currently this pushes TokenFactor nodes down, but may be used for other
1761// pseudo-ops as well.
1762//
1763// Return -1 to schedule right above left, 1 for left above right.
1764// Return 0 if no bias exists.
1765static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1766  bool LSchedLow = left->isScheduleLow;
1767  bool RSchedLow = right->isScheduleLow;
1768  if (LSchedLow != RSchedLow)
1769    return LSchedLow < RSchedLow ? 1 : -1;
1770  return 0;
1771}
1772
1773/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1774/// Smaller number is the higher priority.
1775static unsigned
1776CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1777  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1778  if (SethiUllmanNumber != 0)
1779    return SethiUllmanNumber;
1780
1781  unsigned Extra = 0;
1782  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1783       I != E; ++I) {
1784    if (I->isCtrl()) continue;  // ignore chain preds
1785    SUnit *PredSU = I->getSUnit();
1786    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1787    if (PredSethiUllman > SethiUllmanNumber) {
1788      SethiUllmanNumber = PredSethiUllman;
1789      Extra = 0;
1790    } else if (PredSethiUllman == SethiUllmanNumber)
1791      ++Extra;
1792  }
1793
1794  SethiUllmanNumber += Extra;
1795
1796  if (SethiUllmanNumber == 0)
1797    SethiUllmanNumber = 1;
1798
1799  return SethiUllmanNumber;
1800}
1801
1802/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1803/// scheduling units.
1804void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1805  SethiUllmanNumbers.assign(SUnits->size(), 0);
1806
1807  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1808    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1809}
1810
1811void RegReductionPQBase::addNode(const SUnit *SU) {
1812  unsigned SUSize = SethiUllmanNumbers.size();
1813  if (SUnits->size() > SUSize)
1814    SethiUllmanNumbers.resize(SUSize*2, 0);
1815  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1816}
1817
1818void RegReductionPQBase::updateNode(const SUnit *SU) {
1819  SethiUllmanNumbers[SU->NodeNum] = 0;
1820  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1821}
1822
1823// Lower priority means schedule further down. For bottom-up scheduling, lower
1824// priority SUs are scheduled before higher priority SUs.
1825unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1826  assert(SU->NodeNum < SethiUllmanNumbers.size());
1827  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1828  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1829    // CopyToReg should be close to its uses to facilitate coalescing and
1830    // avoid spilling.
1831    return 0;
1832  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1833      Opc == TargetOpcode::SUBREG_TO_REG ||
1834      Opc == TargetOpcode::INSERT_SUBREG)
1835    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1836    // close to their uses to facilitate coalescing.
1837    return 0;
1838  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1839    // If SU does not have a register use, i.e. it doesn't produce a value
1840    // that would be consumed (e.g. store), then it terminates a chain of
1841    // computation.  Give it a large SethiUllman number so it will be
1842    // scheduled right before its predecessors that it doesn't lengthen
1843    // their live ranges.
1844    return 0xffff;
1845  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1846    // If SU does not have a register def, schedule it close to its uses
1847    // because it does not lengthen any live ranges.
1848    return 0;
1849#if 1
1850  return SethiUllmanNumbers[SU->NodeNum];
1851#else
1852  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1853  if (SU->isCallOp) {
1854    // FIXME: This assumes all of the defs are used as call operands.
1855    int NP = (int)Priority - SU->getNode()->getNumValues();
1856    return (NP > 0) ? NP : 0;
1857  }
1858  return Priority;
1859#endif
1860}
1861
1862//===----------------------------------------------------------------------===//
1863//                     Register Pressure Tracking
1864//===----------------------------------------------------------------------===//
1865
1866void RegReductionPQBase::dumpRegPressure() const {
1867  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1868         E = TRI->regclass_end(); I != E; ++I) {
1869    const TargetRegisterClass *RC = *I;
1870    unsigned Id = RC->getID();
1871    unsigned RP = RegPressure[Id];
1872    if (!RP) continue;
1873    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1874          << '\n');
1875  }
1876}
1877
1878bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1879  if (!TLI)
1880    return false;
1881
1882  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1883       I != E; ++I) {
1884    if (I->isCtrl())
1885      continue;
1886    SUnit *PredSU = I->getSUnit();
1887    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1888    // to cover the number of registers defined (they are all live).
1889    if (PredSU->NumRegDefsLeft == 0) {
1890      continue;
1891    }
1892    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1893         RegDefPos.IsValid(); RegDefPos.Advance()) {
1894      unsigned RCId, Cost;
1895      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1896
1897      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1898        return true;
1899    }
1900  }
1901  return false;
1902}
1903
1904bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1905  const SDNode *N = SU->getNode();
1906
1907  if (!N->isMachineOpcode() || !SU->NumSuccs)
1908    return false;
1909
1910  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1911  for (unsigned i = 0; i != NumDefs; ++i) {
1912    EVT VT = N->getValueType(i);
1913    if (!N->hasAnyUseOfValue(i))
1914      continue;
1915    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1916    if (RegPressure[RCId] >= RegLimit[RCId])
1917      return true;
1918  }
1919  return false;
1920}
1921
1922// Compute the register pressure contribution by this instruction by count up
1923// for uses that are not live and down for defs. Only count register classes
1924// that are already under high pressure. As a side effect, compute the number of
1925// uses of registers that are already live.
1926//
1927// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1928// so could probably be factored.
1929int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1930  LiveUses = 0;
1931  int PDiff = 0;
1932  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1933       I != E; ++I) {
1934    if (I->isCtrl())
1935      continue;
1936    SUnit *PredSU = I->getSUnit();
1937    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1938    // to cover the number of registers defined (they are all live).
1939    if (PredSU->NumRegDefsLeft == 0) {
1940      if (PredSU->getNode()->isMachineOpcode())
1941        ++LiveUses;
1942      continue;
1943    }
1944    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1945         RegDefPos.IsValid(); RegDefPos.Advance()) {
1946      EVT VT = RegDefPos.GetValue();
1947      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1948      if (RegPressure[RCId] >= RegLimit[RCId])
1949        ++PDiff;
1950    }
1951  }
1952  const SDNode *N = SU->getNode();
1953
1954  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1955    return PDiff;
1956
1957  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1958  for (unsigned i = 0; i != NumDefs; ++i) {
1959    EVT VT = N->getValueType(i);
1960    if (!N->hasAnyUseOfValue(i))
1961      continue;
1962    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1963    if (RegPressure[RCId] >= RegLimit[RCId])
1964      --PDiff;
1965  }
1966  return PDiff;
1967}
1968
1969void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1970  if (!TracksRegPressure)
1971    return;
1972
1973  if (!SU->getNode())
1974    return;
1975
1976  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1977       I != E; ++I) {
1978    if (I->isCtrl())
1979      continue;
1980    SUnit *PredSU = I->getSUnit();
1981    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1982    // to cover the number of registers defined (they are all live).
1983    if (PredSU->NumRegDefsLeft == 0) {
1984      continue;
1985    }
1986    // FIXME: The ScheduleDAG currently loses information about which of a
1987    // node's values is consumed by each dependence. Consequently, if the node
1988    // defines multiple register classes, we don't know which to pressurize
1989    // here. Instead the following loop consumes the register defs in an
1990    // arbitrary order. At least it handles the common case of clustered loads
1991    // to the same class. For precise liveness, each SDep needs to indicate the
1992    // result number. But that tightly couples the ScheduleDAG with the
1993    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1994    // value type or register class to SDep.
1995    //
1996    // The most important aspect of register tracking is balancing the increase
1997    // here with the reduction further below. Note that this SU may use multiple
1998    // defs in PredSU. The can't be determined here, but we've already
1999    // compensated by reducing NumRegDefsLeft in PredSU during
2000    // ScheduleDAGSDNodes::AddSchedEdges.
2001    --PredSU->NumRegDefsLeft;
2002    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2003    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2004         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2005      if (SkipRegDefs)
2006        continue;
2007
2008      unsigned RCId, Cost;
2009      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
2010      RegPressure[RCId] += Cost;
2011      break;
2012    }
2013  }
2014
2015  // We should have this assert, but there may be dead SDNodes that never
2016  // materialize as SUnits, so they don't appear to generate liveness.
2017  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2018  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2019  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2020       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2021    if (SkipRegDefs > 0)
2022      continue;
2023    unsigned RCId, Cost;
2024    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
2025    if (RegPressure[RCId] < Cost) {
2026      // Register pressure tracking is imprecise. This can happen. But we try
2027      // hard not to let it happen because it likely results in poor scheduling.
2028      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
2029      RegPressure[RCId] = 0;
2030    }
2031    else {
2032      RegPressure[RCId] -= Cost;
2033    }
2034  }
2035  dumpRegPressure();
2036}
2037
2038void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
2039  if (!TracksRegPressure)
2040    return;
2041
2042  const SDNode *N = SU->getNode();
2043  if (!N) return;
2044
2045  if (!N->isMachineOpcode()) {
2046    if (N->getOpcode() != ISD::CopyToReg)
2047      return;
2048  } else {
2049    unsigned Opc = N->getMachineOpcode();
2050    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2051        Opc == TargetOpcode::INSERT_SUBREG ||
2052        Opc == TargetOpcode::SUBREG_TO_REG ||
2053        Opc == TargetOpcode::REG_SEQUENCE ||
2054        Opc == TargetOpcode::IMPLICIT_DEF)
2055      return;
2056  }
2057
2058  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2059       I != E; ++I) {
2060    if (I->isCtrl())
2061      continue;
2062    SUnit *PredSU = I->getSUnit();
2063    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2064    // counts data deps.
2065    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2066      continue;
2067    const SDNode *PN = PredSU->getNode();
2068    if (!PN->isMachineOpcode()) {
2069      if (PN->getOpcode() == ISD::CopyFromReg) {
2070        EVT VT = PN->getValueType(0);
2071        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2072        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2073      }
2074      continue;
2075    }
2076    unsigned POpc = PN->getMachineOpcode();
2077    if (POpc == TargetOpcode::IMPLICIT_DEF)
2078      continue;
2079    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2080        POpc == TargetOpcode::INSERT_SUBREG ||
2081        POpc == TargetOpcode::SUBREG_TO_REG) {
2082      EVT VT = PN->getValueType(0);
2083      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2084      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2085      continue;
2086    }
2087    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2088    for (unsigned i = 0; i != NumDefs; ++i) {
2089      EVT VT = PN->getValueType(i);
2090      if (!PN->hasAnyUseOfValue(i))
2091        continue;
2092      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2093      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2094        // Register pressure tracking is imprecise. This can happen.
2095        RegPressure[RCId] = 0;
2096      else
2097        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2098    }
2099  }
2100
2101  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2102  // may transfer data dependencies to CopyToReg.
2103  if (SU->NumSuccs && N->isMachineOpcode()) {
2104    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2105    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2106      EVT VT = N->getValueType(i);
2107      if (VT == MVT::Glue || VT == MVT::Other)
2108        continue;
2109      if (!N->hasAnyUseOfValue(i))
2110        continue;
2111      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2112      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2113    }
2114  }
2115
2116  dumpRegPressure();
2117}
2118
2119//===----------------------------------------------------------------------===//
2120//           Dynamic Node Priority for Register Pressure Reduction
2121//===----------------------------------------------------------------------===//
2122
2123/// closestSucc - Returns the scheduled cycle of the successor which is
2124/// closest to the current cycle.
2125static unsigned closestSucc(const SUnit *SU) {
2126  unsigned MaxHeight = 0;
2127  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2128       I != E; ++I) {
2129    if (I->isCtrl()) continue;  // ignore chain succs
2130    unsigned Height = I->getSUnit()->getHeight();
2131    // If there are bunch of CopyToRegs stacked up, they should be considered
2132    // to be at the same position.
2133    if (I->getSUnit()->getNode() &&
2134        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2135      Height = closestSucc(I->getSUnit())+1;
2136    if (Height > MaxHeight)
2137      MaxHeight = Height;
2138  }
2139  return MaxHeight;
2140}
2141
2142/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2143/// for scratch registers, i.e. number of data dependencies.
2144static unsigned calcMaxScratches(const SUnit *SU) {
2145  unsigned Scratches = 0;
2146  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2147       I != E; ++I) {
2148    if (I->isCtrl()) continue;  // ignore chain preds
2149    Scratches++;
2150  }
2151  return Scratches;
2152}
2153
2154/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2155/// CopyFromReg from a virtual register.
2156static bool hasOnlyLiveInOpers(const SUnit *SU) {
2157  bool RetVal = false;
2158  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2159       I != E; ++I) {
2160    if (I->isCtrl()) continue;
2161    const SUnit *PredSU = I->getSUnit();
2162    if (PredSU->getNode() &&
2163        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2164      unsigned Reg =
2165        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2166      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2167        RetVal = true;
2168        continue;
2169      }
2170    }
2171    return false;
2172  }
2173  return RetVal;
2174}
2175
2176/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2177/// CopyToReg to a virtual register. This SU def is probably a liveout and
2178/// it has no other use. It should be scheduled closer to the terminator.
2179static bool hasOnlyLiveOutUses(const SUnit *SU) {
2180  bool RetVal = false;
2181  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2182       I != E; ++I) {
2183    if (I->isCtrl()) continue;
2184    const SUnit *SuccSU = I->getSUnit();
2185    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2186      unsigned Reg =
2187        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2188      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2189        RetVal = true;
2190        continue;
2191      }
2192    }
2193    return false;
2194  }
2195  return RetVal;
2196}
2197
2198// Set isVRegCycle for a node with only live in opers and live out uses. Also
2199// set isVRegCycle for its CopyFromReg operands.
2200//
2201// This is only relevant for single-block loops, in which case the VRegCycle
2202// node is likely an induction variable in which the operand and target virtual
2203// registers should be coalesced (e.g. pre/post increment values). Setting the
2204// isVRegCycle flag helps the scheduler prioritize other uses of the same
2205// CopyFromReg so that this node becomes the virtual register "kill". This
2206// avoids interference between the values live in and out of the block and
2207// eliminates a copy inside the loop.
2208static void initVRegCycle(SUnit *SU) {
2209  if (DisableSchedVRegCycle)
2210    return;
2211
2212  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2213    return;
2214
2215  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2216
2217  SU->isVRegCycle = true;
2218
2219  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2220       I != E; ++I) {
2221    if (I->isCtrl()) continue;
2222    I->getSUnit()->isVRegCycle = true;
2223  }
2224}
2225
2226// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2227// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2228static void resetVRegCycle(SUnit *SU) {
2229  if (!SU->isVRegCycle)
2230    return;
2231
2232  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2233       I != E; ++I) {
2234    if (I->isCtrl()) continue;  // ignore chain preds
2235    SUnit *PredSU = I->getSUnit();
2236    if (PredSU->isVRegCycle) {
2237      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2238             "VRegCycle def must be CopyFromReg");
2239      I->getSUnit()->isVRegCycle = 0;
2240    }
2241  }
2242}
2243
2244// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2245// means a node that defines the VRegCycle has not been scheduled yet.
2246static bool hasVRegCycleUse(const SUnit *SU) {
2247  // If this SU also defines the VReg, don't hoist it as a "use".
2248  if (SU->isVRegCycle)
2249    return false;
2250
2251  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2252       I != E; ++I) {
2253    if (I->isCtrl()) continue;  // ignore chain preds
2254    if (I->getSUnit()->isVRegCycle &&
2255        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2256      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2257      return true;
2258    }
2259  }
2260  return false;
2261}
2262
2263// Check for either a dependence (latency) or resource (hazard) stall.
2264//
2265// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2266static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2267  if ((int)SPQ->getCurCycle() < Height) return true;
2268  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2269      != ScheduleHazardRecognizer::NoHazard)
2270    return true;
2271  return false;
2272}
2273
2274// Return -1 if left has higher priority, 1 if right has higher priority.
2275// Return 0 if latency-based priority is equivalent.
2276static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2277                            RegReductionPQBase *SPQ) {
2278  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2279  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2280  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2281  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2282  int LHeight = (int)left->getHeight() + LPenalty;
2283  int RHeight = (int)right->getHeight() + RPenalty;
2284
2285  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2286    BUHasStall(left, LHeight, SPQ);
2287  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2288    BUHasStall(right, RHeight, SPQ);
2289
2290  // If scheduling one of the node will cause a pipeline stall, delay it.
2291  // If scheduling either one of the node will cause a pipeline stall, sort
2292  // them according to their height.
2293  if (LStall) {
2294    if (!RStall) {
2295      DEBUG(++FactorCount[FactStall]);
2296      return 1;
2297    }
2298    if (LHeight != RHeight) {
2299      DEBUG(++FactorCount[FactStall]);
2300      return LHeight > RHeight ? 1 : -1;
2301    }
2302  } else if (RStall) {
2303    DEBUG(++FactorCount[FactStall]);
2304    return -1;
2305  }
2306
2307  // If either node is scheduling for latency, sort them by height/depth
2308  // and latency.
2309  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2310                     right->SchedulingPref == Sched::ILP)) {
2311    if (DisableSchedCycles) {
2312      if (LHeight != RHeight) {
2313        DEBUG(++FactorCount[FactHeight]);
2314        return LHeight > RHeight ? 1 : -1;
2315      }
2316    }
2317    else {
2318      // If neither instruction stalls (!LStall && !RStall) then
2319      // its height is already covered so only its depth matters. We also reach
2320      // this if both stall but have the same height.
2321      int LDepth = left->getDepth() - LPenalty;
2322      int RDepth = right->getDepth() - RPenalty;
2323      if (LDepth != RDepth) {
2324        DEBUG(++FactorCount[FactDepth]);
2325        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2326              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2327              << ") depth " << RDepth << "\n");
2328        return LDepth < RDepth ? 1 : -1;
2329      }
2330    }
2331    if (left->Latency != right->Latency) {
2332      DEBUG(++FactorCount[FactOther]);
2333      return left->Latency > right->Latency ? 1 : -1;
2334    }
2335  }
2336  return 0;
2337}
2338
2339static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2340  // Schedule physical register definitions close to their use. This is
2341  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2342  // long as shortening physreg live ranges is generally good, we can defer
2343  // creating a subtarget hook.
2344  if (!DisableSchedPhysRegJoin) {
2345    bool LHasPhysReg = left->hasPhysRegDefs;
2346    bool RHasPhysReg = right->hasPhysRegDefs;
2347    if (LHasPhysReg != RHasPhysReg) {
2348      DEBUG(++FactorCount[FactRegUses]);
2349      #ifndef NDEBUG
2350      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2351      #endif
2352      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2353            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2354            << PhysRegMsg[RHasPhysReg] << "\n");
2355      return LHasPhysReg < RHasPhysReg;
2356    }
2357  }
2358
2359  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2360  unsigned LPriority = SPQ->getNodePriority(left);
2361  unsigned RPriority = SPQ->getNodePriority(right);
2362
2363  // Be really careful about hoisting call operands above previous calls.
2364  // Only allows it if it would reduce register pressure.
2365  if (left->isCall && right->isCallOp) {
2366    unsigned RNumVals = right->getNode()->getNumValues();
2367    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2368  }
2369  if (right->isCall && left->isCallOp) {
2370    unsigned LNumVals = left->getNode()->getNumValues();
2371    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2372  }
2373
2374  if (LPriority != RPriority) {
2375    DEBUG(++FactorCount[FactStatic]);
2376    return LPriority > RPriority;
2377  }
2378
2379  // One or both of the nodes are calls and their sethi-ullman numbers are the
2380  // same, then keep source order.
2381  if (left->isCall || right->isCall) {
2382    unsigned LOrder = SPQ->getNodeOrdering(left);
2383    unsigned ROrder = SPQ->getNodeOrdering(right);
2384
2385    // Prefer an ordering where the lower the non-zero order number, the higher
2386    // the preference.
2387    if ((LOrder || ROrder) && LOrder != ROrder)
2388      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2389  }
2390
2391  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2392  // e.g.
2393  // t1 = op t2, c1
2394  // t3 = op t4, c2
2395  //
2396  // and the following instructions are both ready.
2397  // t2 = op c3
2398  // t4 = op c4
2399  //
2400  // Then schedule t2 = op first.
2401  // i.e.
2402  // t4 = op c4
2403  // t2 = op c3
2404  // t1 = op t2, c1
2405  // t3 = op t4, c2
2406  //
2407  // This creates more short live intervals.
2408  unsigned LDist = closestSucc(left);
2409  unsigned RDist = closestSucc(right);
2410  if (LDist != RDist) {
2411    DEBUG(++FactorCount[FactOther]);
2412    return LDist < RDist;
2413  }
2414
2415  // How many registers becomes live when the node is scheduled.
2416  unsigned LScratch = calcMaxScratches(left);
2417  unsigned RScratch = calcMaxScratches(right);
2418  if (LScratch != RScratch) {
2419    DEBUG(++FactorCount[FactOther]);
2420    return LScratch > RScratch;
2421  }
2422
2423  // Comparing latency against a call makes little sense unless the node
2424  // is register pressure-neutral.
2425  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2426    return (left->NodeQueueId > right->NodeQueueId);
2427
2428  // Do not compare latencies when one or both of the nodes are calls.
2429  if (!DisableSchedCycles &&
2430      !(left->isCall || right->isCall)) {
2431    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2432    if (result != 0)
2433      return result > 0;
2434  }
2435  else {
2436    if (left->getHeight() != right->getHeight()) {
2437      DEBUG(++FactorCount[FactHeight]);
2438      return left->getHeight() > right->getHeight();
2439    }
2440
2441    if (left->getDepth() != right->getDepth()) {
2442      DEBUG(++FactorCount[FactDepth]);
2443      return left->getDepth() < right->getDepth();
2444    }
2445  }
2446
2447  assert(left->NodeQueueId && right->NodeQueueId &&
2448         "NodeQueueId cannot be zero");
2449  DEBUG(++FactorCount[FactOther]);
2450  return (left->NodeQueueId > right->NodeQueueId);
2451}
2452
2453// Bottom up
2454bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2455  if (int res = checkSpecialNodes(left, right))
2456    return res > 0;
2457
2458  return BURRSort(left, right, SPQ);
2459}
2460
2461// Source order, otherwise bottom up.
2462bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2463  if (int res = checkSpecialNodes(left, right))
2464    return res > 0;
2465
2466  unsigned LOrder = SPQ->getNodeOrdering(left);
2467  unsigned ROrder = SPQ->getNodeOrdering(right);
2468
2469  // Prefer an ordering where the lower the non-zero order number, the higher
2470  // the preference.
2471  if ((LOrder || ROrder) && LOrder != ROrder)
2472    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2473
2474  return BURRSort(left, right, SPQ);
2475}
2476
2477// If the time between now and when the instruction will be ready can cover
2478// the spill code, then avoid adding it to the ready queue. This gives long
2479// stalls highest priority and allows hoisting across calls. It should also
2480// speed up processing the available queue.
2481bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2482  static const unsigned ReadyDelay = 3;
2483
2484  if (SPQ->MayReduceRegPressure(SU)) return true;
2485
2486  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2487
2488  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2489      != ScheduleHazardRecognizer::NoHazard)
2490    return false;
2491
2492  return true;
2493}
2494
2495// Return true if right should be scheduled with higher priority than left.
2496bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2497  if (int res = checkSpecialNodes(left, right))
2498    return res > 0;
2499
2500  if (left->isCall || right->isCall)
2501    // No way to compute latency of calls.
2502    return BURRSort(left, right, SPQ);
2503
2504  bool LHigh = SPQ->HighRegPressure(left);
2505  bool RHigh = SPQ->HighRegPressure(right);
2506  // Avoid causing spills. If register pressure is high, schedule for
2507  // register pressure reduction.
2508  if (LHigh && !RHigh) {
2509    DEBUG(++FactorCount[FactPressureDiff]);
2510    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2511          << right->NodeNum << ")\n");
2512    return true;
2513  }
2514  else if (!LHigh && RHigh) {
2515    DEBUG(++FactorCount[FactPressureDiff]);
2516    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2517          << left->NodeNum << ")\n");
2518    return false;
2519  }
2520  if (!LHigh && !RHigh) {
2521    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2522    if (result != 0)
2523      return result > 0;
2524  }
2525  return BURRSort(left, right, SPQ);
2526}
2527
2528// Schedule as many instructions in each cycle as possible. So don't make an
2529// instruction available unless it is ready in the current cycle.
2530bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2531  if (SU->getHeight() > CurCycle) return false;
2532
2533  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2534      != ScheduleHazardRecognizer::NoHazard)
2535    return false;
2536
2537  return true;
2538}
2539
2540static bool canEnableCoalescing(SUnit *SU) {
2541  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2542  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2543    // CopyToReg should be close to its uses to facilitate coalescing and
2544    // avoid spilling.
2545    return true;
2546
2547  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2548      Opc == TargetOpcode::SUBREG_TO_REG ||
2549      Opc == TargetOpcode::INSERT_SUBREG)
2550    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2551    // close to their uses to facilitate coalescing.
2552    return true;
2553
2554  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2555    // If SU does not have a register def, schedule it close to its uses
2556    // because it does not lengthen any live ranges.
2557    return true;
2558
2559  return false;
2560}
2561
2562// list-ilp is currently an experimental scheduler that allows various
2563// heuristics to be enabled prior to the normal register reduction logic.
2564bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2565  if (int res = checkSpecialNodes(left, right))
2566    return res > 0;
2567
2568  if (left->isCall || right->isCall)
2569    // No way to compute latency of calls.
2570    return BURRSort(left, right, SPQ);
2571
2572  unsigned LLiveUses = 0, RLiveUses = 0;
2573  int LPDiff = 0, RPDiff = 0;
2574  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2575    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2576    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2577  }
2578  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2579    DEBUG(++FactorCount[FactPressureDiff]);
2580    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2581          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2582    return LPDiff > RPDiff;
2583  }
2584
2585  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2586    bool LReduce = canEnableCoalescing(left);
2587    bool RReduce = canEnableCoalescing(right);
2588    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2589    if (LReduce && !RReduce) return false;
2590    if (RReduce && !LReduce) return true;
2591  }
2592
2593  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2594    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2595          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2596    DEBUG(++FactorCount[FactRegUses]);
2597    return LLiveUses < RLiveUses;
2598  }
2599
2600  if (!DisableSchedStalls) {
2601    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2602    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2603    if (LStall != RStall) {
2604      DEBUG(++FactorCount[FactHeight]);
2605      return left->getHeight() > right->getHeight();
2606    }
2607  }
2608
2609  if (!DisableSchedCriticalPath) {
2610    int spread = (int)left->getDepth() - (int)right->getDepth();
2611    if (std::abs(spread) > MaxReorderWindow) {
2612      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2613            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2614            << right->getDepth() << "\n");
2615      DEBUG(++FactorCount[FactDepth]);
2616      return left->getDepth() < right->getDepth();
2617    }
2618  }
2619
2620  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2621    int spread = (int)left->getHeight() - (int)right->getHeight();
2622    if (std::abs(spread) > MaxReorderWindow) {
2623      DEBUG(++FactorCount[FactHeight]);
2624      return left->getHeight() > right->getHeight();
2625    }
2626  }
2627
2628  return BURRSort(left, right, SPQ);
2629}
2630
2631void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2632  SUnits = &sunits;
2633  // Add pseudo dependency edges for two-address nodes.
2634  if (!Disable2AddrHack)
2635    AddPseudoTwoAddrDeps();
2636  // Reroute edges to nodes with multiple uses.
2637  if (!TracksRegPressure)
2638    PrescheduleNodesWithMultipleUses();
2639  // Calculate node priorities.
2640  CalculateSethiUllmanNumbers();
2641
2642  // For single block loops, mark nodes that look like canonical IV increments.
2643  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2644    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2645      initVRegCycle(&sunits[i]);
2646    }
2647  }
2648}
2649
2650//===----------------------------------------------------------------------===//
2651//                    Preschedule for Register Pressure
2652//===----------------------------------------------------------------------===//
2653
2654bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2655  if (SU->isTwoAddress) {
2656    unsigned Opc = SU->getNode()->getMachineOpcode();
2657    const MCInstrDesc &MCID = TII->get(Opc);
2658    unsigned NumRes = MCID.getNumDefs();
2659    unsigned NumOps = MCID.getNumOperands() - NumRes;
2660    for (unsigned i = 0; i != NumOps; ++i) {
2661      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2662        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2663        if (DU->getNodeId() != -1 &&
2664            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2665          return true;
2666      }
2667    }
2668  }
2669  return false;
2670}
2671
2672/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2673/// successor's explicit physregs whose definition can reach DepSU.
2674/// i.e. DepSU should not be scheduled above SU.
2675static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2676                                         ScheduleDAGRRList *scheduleDAG,
2677                                         const TargetInstrInfo *TII,
2678                                         const TargetRegisterInfo *TRI) {
2679  const unsigned *ImpDefs
2680    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2681  if(!ImpDefs)
2682    return false;
2683
2684  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2685       SI != SE; ++SI) {
2686    SUnit *SuccSU = SI->getSUnit();
2687    for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2688           PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2689      if (!PI->isAssignedRegDep())
2690        continue;
2691
2692      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
2693        // Return true if SU clobbers this physical register use and the
2694        // definition of the register reaches from DepSU. IsReachable queries a
2695        // topological forward sort of the DAG (following the successors).
2696        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2697            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2698          return true;
2699      }
2700    }
2701  }
2702  return false;
2703}
2704
2705/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2706/// physical register defs.
2707static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2708                                  const TargetInstrInfo *TII,
2709                                  const TargetRegisterInfo *TRI) {
2710  SDNode *N = SuccSU->getNode();
2711  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2712  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2713  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2714  for (const SDNode *SUNode = SU->getNode(); SUNode;
2715       SUNode = SUNode->getGluedNode()) {
2716    if (!SUNode->isMachineOpcode())
2717      continue;
2718    const unsigned *SUImpDefs =
2719      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2720    if (!SUImpDefs)
2721      return false;
2722    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2723      EVT VT = N->getValueType(i);
2724      if (VT == MVT::Glue || VT == MVT::Other)
2725        continue;
2726      if (!N->hasAnyUseOfValue(i))
2727        continue;
2728      unsigned Reg = ImpDefs[i - NumDefs];
2729      for (;*SUImpDefs; ++SUImpDefs) {
2730        unsigned SUReg = *SUImpDefs;
2731        if (TRI->regsOverlap(Reg, SUReg))
2732          return true;
2733      }
2734    }
2735  }
2736  return false;
2737}
2738
2739/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2740/// are not handled well by the general register pressure reduction
2741/// heuristics. When presented with code like this:
2742///
2743///      N
2744///    / |
2745///   /  |
2746///  U  store
2747///  |
2748/// ...
2749///
2750/// the heuristics tend to push the store up, but since the
2751/// operand of the store has another use (U), this would increase
2752/// the length of that other use (the U->N edge).
2753///
2754/// This function transforms code like the above to route U's
2755/// dependence through the store when possible, like this:
2756///
2757///      N
2758///      ||
2759///      ||
2760///     store
2761///       |
2762///       U
2763///       |
2764///      ...
2765///
2766/// This results in the store being scheduled immediately
2767/// after N, which shortens the U->N live range, reducing
2768/// register pressure.
2769///
2770void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2771  // Visit all the nodes in topological order, working top-down.
2772  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2773    SUnit *SU = &(*SUnits)[i];
2774    // For now, only look at nodes with no data successors, such as stores.
2775    // These are especially important, due to the heuristics in
2776    // getNodePriority for nodes with no data successors.
2777    if (SU->NumSuccs != 0)
2778      continue;
2779    // For now, only look at nodes with exactly one data predecessor.
2780    if (SU->NumPreds != 1)
2781      continue;
2782    // Avoid prescheduling copies to virtual registers, which don't behave
2783    // like other nodes from the perspective of scheduling heuristics.
2784    if (SDNode *N = SU->getNode())
2785      if (N->getOpcode() == ISD::CopyToReg &&
2786          TargetRegisterInfo::isVirtualRegister
2787            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2788        continue;
2789
2790    // Locate the single data predecessor.
2791    SUnit *PredSU = 0;
2792    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2793         EE = SU->Preds.end(); II != EE; ++II)
2794      if (!II->isCtrl()) {
2795        PredSU = II->getSUnit();
2796        break;
2797      }
2798    assert(PredSU);
2799
2800    // Don't rewrite edges that carry physregs, because that requires additional
2801    // support infrastructure.
2802    if (PredSU->hasPhysRegDefs)
2803      continue;
2804    // Short-circuit the case where SU is PredSU's only data successor.
2805    if (PredSU->NumSuccs == 1)
2806      continue;
2807    // Avoid prescheduling to copies from virtual registers, which don't behave
2808    // like other nodes from the perspective of scheduling heuristics.
2809    if (SDNode *N = SU->getNode())
2810      if (N->getOpcode() == ISD::CopyFromReg &&
2811          TargetRegisterInfo::isVirtualRegister
2812            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2813        continue;
2814
2815    // Perform checks on the successors of PredSU.
2816    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2817         EE = PredSU->Succs.end(); II != EE; ++II) {
2818      SUnit *PredSuccSU = II->getSUnit();
2819      if (PredSuccSU == SU) continue;
2820      // If PredSU has another successor with no data successors, for
2821      // now don't attempt to choose either over the other.
2822      if (PredSuccSU->NumSuccs == 0)
2823        goto outer_loop_continue;
2824      // Don't break physical register dependencies.
2825      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2826        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2827          goto outer_loop_continue;
2828      // Don't introduce graph cycles.
2829      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2830        goto outer_loop_continue;
2831    }
2832
2833    // Ok, the transformation is safe and the heuristics suggest it is
2834    // profitable. Update the graph.
2835    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2836                 << " next to PredSU #" << PredSU->NodeNum
2837                 << " to guide scheduling in the presence of multiple uses\n");
2838    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2839      SDep Edge = PredSU->Succs[i];
2840      assert(!Edge.isAssignedRegDep());
2841      SUnit *SuccSU = Edge.getSUnit();
2842      if (SuccSU != SU) {
2843        Edge.setSUnit(PredSU);
2844        scheduleDAG->RemovePred(SuccSU, Edge);
2845        scheduleDAG->AddPred(SU, Edge);
2846        Edge.setSUnit(SU);
2847        scheduleDAG->AddPred(SuccSU, Edge);
2848        --i;
2849      }
2850    }
2851  outer_loop_continue:;
2852  }
2853}
2854
2855/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2856/// it as a def&use operand. Add a pseudo control edge from it to the other
2857/// node (if it won't create a cycle) so the two-address one will be scheduled
2858/// first (lower in the schedule). If both nodes are two-address, favor the
2859/// one that has a CopyToReg use (more likely to be a loop induction update).
2860/// If both are two-address, but one is commutable while the other is not
2861/// commutable, favor the one that's not commutable.
2862void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2863  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2864    SUnit *SU = &(*SUnits)[i];
2865    if (!SU->isTwoAddress)
2866      continue;
2867
2868    SDNode *Node = SU->getNode();
2869    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2870      continue;
2871
2872    bool isLiveOut = hasOnlyLiveOutUses(SU);
2873    unsigned Opc = Node->getMachineOpcode();
2874    const MCInstrDesc &MCID = TII->get(Opc);
2875    unsigned NumRes = MCID.getNumDefs();
2876    unsigned NumOps = MCID.getNumOperands() - NumRes;
2877    for (unsigned j = 0; j != NumOps; ++j) {
2878      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2879        continue;
2880      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2881      if (DU->getNodeId() == -1)
2882        continue;
2883      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2884      if (!DUSU) continue;
2885      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2886           E = DUSU->Succs.end(); I != E; ++I) {
2887        if (I->isCtrl()) continue;
2888        SUnit *SuccSU = I->getSUnit();
2889        if (SuccSU == SU)
2890          continue;
2891        // Be conservative. Ignore if nodes aren't at roughly the same
2892        // depth and height.
2893        if (SuccSU->getHeight() < SU->getHeight() &&
2894            (SU->getHeight() - SuccSU->getHeight()) > 1)
2895          continue;
2896        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2897        // constrains whatever is using the copy, instead of the copy
2898        // itself. In the case that the copy is coalesced, this
2899        // preserves the intent of the pseudo two-address heurietics.
2900        while (SuccSU->Succs.size() == 1 &&
2901               SuccSU->getNode()->isMachineOpcode() &&
2902               SuccSU->getNode()->getMachineOpcode() ==
2903                 TargetOpcode::COPY_TO_REGCLASS)
2904          SuccSU = SuccSU->Succs.front().getSUnit();
2905        // Don't constrain non-instruction nodes.
2906        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2907          continue;
2908        // Don't constrain nodes with physical register defs if the
2909        // predecessor can clobber them.
2910        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2911          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2912            continue;
2913        }
2914        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2915        // these may be coalesced away. We want them close to their uses.
2916        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2917        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2918            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2919            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2920          continue;
2921        if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2922            (!canClobber(SuccSU, DUSU) ||
2923             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2924             (!SU->isCommutable && SuccSU->isCommutable)) &&
2925            !scheduleDAG->IsReachable(SuccSU, SU)) {
2926          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2927                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2928          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2929                                        /*Reg=*/0, /*isNormalMemory=*/false,
2930                                        /*isMustAlias=*/false,
2931                                        /*isArtificial=*/true));
2932        }
2933      }
2934    }
2935  }
2936}
2937
2938//===----------------------------------------------------------------------===//
2939//                         Public Constructor Functions
2940//===----------------------------------------------------------------------===//
2941
2942llvm::ScheduleDAGSDNodes *
2943llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2944                                 CodeGenOpt::Level OptLevel) {
2945  const TargetMachine &TM = IS->TM;
2946  const TargetInstrInfo *TII = TM.getInstrInfo();
2947  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2948
2949  BURegReductionPriorityQueue *PQ =
2950    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2951  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2952  PQ->setScheduleDAG(SD);
2953  return SD;
2954}
2955
2956llvm::ScheduleDAGSDNodes *
2957llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2958                                   CodeGenOpt::Level OptLevel) {
2959  const TargetMachine &TM = IS->TM;
2960  const TargetInstrInfo *TII = TM.getInstrInfo();
2961  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2962
2963  SrcRegReductionPriorityQueue *PQ =
2964    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2965  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2966  PQ->setScheduleDAG(SD);
2967  return SD;
2968}
2969
2970llvm::ScheduleDAGSDNodes *
2971llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2972                                   CodeGenOpt::Level OptLevel) {
2973  const TargetMachine &TM = IS->TM;
2974  const TargetInstrInfo *TII = TM.getInstrInfo();
2975  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2976  const TargetLowering *TLI = &IS->getTargetLowering();
2977
2978  HybridBURRPriorityQueue *PQ =
2979    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2980
2981  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2982  PQ->setScheduleDAG(SD);
2983  return SD;
2984}
2985
2986llvm::ScheduleDAGSDNodes *
2987llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2988                                CodeGenOpt::Level OptLevel) {
2989  const TargetMachine &TM = IS->TM;
2990  const TargetInstrInfo *TII = TM.getInstrInfo();
2991  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2992  const TargetLowering *TLI = &IS->getTargetLowering();
2993
2994  ILPBURRPriorityQueue *PQ =
2995    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2996  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2997  PQ->setScheduleDAG(SD);
2998  return SD;
2999}
3000