ScheduleDAGRRList.cpp revision 2ad3a89681d15d30b76c36bbd6355ddd78be7e45
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  sourceListDAGScheduler("source",
49                         "Similar to list-burr but schedules in source "
50                         "order when possible",
51                         createSourceListDAGScheduler);
52
53static RegisterScheduler
54  hybridListDAGScheduler("list-hybrid",
55                         "Bottom-up register pressure aware list scheduling "
56                         "which tries to balance latency and register pressure",
57                         createHybridListDAGScheduler);
58
59static RegisterScheduler
60  ILPListDAGScheduler("list-ilp",
61                      "Bottom-up register pressure aware list scheduling "
62                      "which tries to balance ILP and register pressure",
63                      createILPListDAGScheduler);
64
65static cl::opt<bool> DisableSchedCycles(
66  "disable-sched-cycles", cl::Hidden, cl::init(false),
67  cl::desc("Disable cycle-level precision during preRA scheduling"));
68
69// Temporary sched=list-ilp flags until the heuristics are robust.
70// Some options are also available under sched=list-hybrid.
71static cl::opt<bool> DisableSchedRegPressure(
72  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
73  cl::desc("Disable regpressure priority in sched=list-ilp"));
74static cl::opt<bool> DisableSchedLiveUses(
75  "disable-sched-live-uses", cl::Hidden, cl::init(true),
76  cl::desc("Disable live use priority in sched=list-ilp"));
77static cl::opt<bool> DisableSchedVRegCycle(
78  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
79  cl::desc("Disable virtual register cycle interference checks"));
80static cl::opt<bool> DisableSchedPhysRegJoin(
81  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
82  cl::desc("Disable physreg def-use affinity"));
83static cl::opt<bool> DisableSchedStalls(
84  "disable-sched-stalls", cl::Hidden, cl::init(true),
85  cl::desc("Disable no-stall priority in sched=list-ilp"));
86static cl::opt<bool> DisableSchedCriticalPath(
87  "disable-sched-critical-path", cl::Hidden, cl::init(false),
88  cl::desc("Disable critical path priority in sched=list-ilp"));
89static cl::opt<bool> DisableSchedHeight(
90  "disable-sched-height", cl::Hidden, cl::init(false),
91  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
92
93static cl::opt<int> MaxReorderWindow(
94  "max-sched-reorder", cl::Hidden, cl::init(6),
95  cl::desc("Number of instructions to allow ahead of the critical path "
96           "in sched=list-ilp"));
97
98static cl::opt<unsigned> AvgIPC(
99  "sched-avg-ipc", cl::Hidden, cl::init(1),
100  cl::desc("Average inst/cycle whan no target itinerary exists."));
101
102#ifndef NDEBUG
103namespace {
104  // For sched=list-ilp, Count the number of times each factor comes into play.
105  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
106         FactStatic, FactOther, NumFactors };
107}
108static const char *FactorName[NumFactors] =
109{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
110static int FactorCount[NumFactors];
111#endif //!NDEBUG
112
113namespace {
114//===----------------------------------------------------------------------===//
115/// ScheduleDAGRRList - The actual register reduction list scheduler
116/// implementation.  This supports both top-down and bottom-up scheduling.
117///
118class ScheduleDAGRRList : public ScheduleDAGSDNodes {
119private:
120  /// NeedLatency - True if the scheduler will make use of latency information.
121  ///
122  bool NeedLatency;
123
124  /// AvailableQueue - The priority queue to use for the available SUnits.
125  SchedulingPriorityQueue *AvailableQueue;
126
127  /// PendingQueue - This contains all of the instructions whose operands have
128  /// been issued, but their results are not ready yet (due to the latency of
129  /// the operation).  Once the operands becomes available, the instruction is
130  /// added to the AvailableQueue.
131  std::vector<SUnit*> PendingQueue;
132
133  /// HazardRec - The hazard recognizer to use.
134  ScheduleHazardRecognizer *HazardRec;
135
136  /// CurCycle - The current scheduler state corresponds to this cycle.
137  unsigned CurCycle;
138
139  /// MinAvailableCycle - Cycle of the soonest available instruction.
140  unsigned MinAvailableCycle;
141
142  /// IssueCount - Count instructions issued in this cycle
143  /// Currently valid only for bottom-up scheduling.
144  unsigned IssueCount;
145
146  /// LiveRegDefs - A set of physical registers and their definition
147  /// that are "live". These nodes must be scheduled before any other nodes that
148  /// modifies the registers can be scheduled.
149  unsigned NumLiveRegs;
150  std::vector<SUnit*> LiveRegDefs;
151  std::vector<SUnit*> LiveRegGens;
152
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
155  ScheduleDAGTopologicalSort Topo;
156
157public:
158  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
159                    SchedulingPriorityQueue *availqueue,
160                    CodeGenOpt::Level OptLevel)
161    : ScheduleDAGSDNodes(mf),
162      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
163      Topo(SUnits) {
164
165    const TargetMachine &tm = mf.getTarget();
166    if (DisableSchedCycles || !NeedLatency)
167      HazardRec = new ScheduleHazardRecognizer();
168    else
169      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
170  }
171
172  ~ScheduleDAGRRList() {
173    delete HazardRec;
174    delete AvailableQueue;
175  }
176
177  void Schedule();
178
179  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
180
181  /// IsReachable - Checks if SU is reachable from TargetSU.
182  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
183    return Topo.IsReachable(SU, TargetSU);
184  }
185
186  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
187  /// create a cycle.
188  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
189    return Topo.WillCreateCycle(SU, TargetSU);
190  }
191
192  /// AddPred - adds a predecessor edge to SUnit SU.
193  /// This returns true if this is a new predecessor.
194  /// Updates the topological ordering if required.
195  void AddPred(SUnit *SU, const SDep &D) {
196    Topo.AddPred(SU, D.getSUnit());
197    SU->addPred(D);
198  }
199
200  /// RemovePred - removes a predecessor edge from SUnit SU.
201  /// This returns true if an edge was removed.
202  /// Updates the topological ordering if required.
203  void RemovePred(SUnit *SU, const SDep &D) {
204    Topo.RemovePred(SU, D.getSUnit());
205    SU->removePred(D);
206  }
207
208private:
209  bool isReady(SUnit *SU) {
210    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
211      AvailableQueue->isReady(SU);
212  }
213
214  void ReleasePred(SUnit *SU, const SDep *PredEdge);
215  void ReleasePredecessors(SUnit *SU);
216  void ReleasePending();
217  void AdvanceToCycle(unsigned NextCycle);
218  void AdvancePastStalls(SUnit *SU);
219  void EmitNode(SUnit *SU);
220  void ScheduleNodeBottomUp(SUnit*);
221  void CapturePred(SDep *PredEdge);
222  void UnscheduleNodeBottomUp(SUnit*);
223  void RestoreHazardCheckerBottomUp();
224  void BacktrackBottomUp(SUnit*, SUnit*);
225  SUnit *CopyAndMoveSuccessors(SUnit*);
226  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
227                                const TargetRegisterClass*,
228                                const TargetRegisterClass*,
229                                SmallVector<SUnit*, 2>&);
230  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
231
232  SUnit *PickNodeToScheduleBottomUp();
233  void ListScheduleBottomUp();
234
235  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
236  /// Updates the topological ordering if required.
237  SUnit *CreateNewSUnit(SDNode *N) {
238    unsigned NumSUnits = SUnits.size();
239    SUnit *NewNode = NewSUnit(N);
240    // Update the topological ordering.
241    if (NewNode->NodeNum >= NumSUnits)
242      Topo.InitDAGTopologicalSorting();
243    return NewNode;
244  }
245
246  /// CreateClone - Creates a new SUnit from an existing one.
247  /// Updates the topological ordering if required.
248  SUnit *CreateClone(SUnit *N) {
249    unsigned NumSUnits = SUnits.size();
250    SUnit *NewNode = Clone(N);
251    // Update the topological ordering.
252    if (NewNode->NodeNum >= NumSUnits)
253      Topo.InitDAGTopologicalSorting();
254    return NewNode;
255  }
256
257  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
258  /// need actual latency information but the hybrid scheduler does.
259  bool ForceUnitLatencies() const {
260    return !NeedLatency;
261  }
262};
263}  // end anonymous namespace
264
265/// GetCostForDef - Looks up the register class and cost for a given definition.
266/// Typically this just means looking up the representative register class,
267/// but for untyped values (MVT::untyped) it means inspecting the node's
268/// opcode to determine what register class is being generated.
269static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
270                          const TargetLowering *TLI,
271                          const TargetInstrInfo *TII,
272                          const TargetRegisterInfo *TRI,
273                          unsigned &RegClass, unsigned &Cost) {
274  EVT VT = RegDefPos.GetValue();
275
276  // Special handling for untyped values.  These values can only come from
277  // the expansion of custom DAG-to-DAG patterns.
278  if (VT == MVT::untyped) {
279    const SDNode *Node = RegDefPos.GetNode();
280    unsigned Opcode = Node->getMachineOpcode();
281
282    if (Opcode == TargetOpcode::REG_SEQUENCE) {
283      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
284      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
285      RegClass = RC->getID();
286      Cost = 1;
287      return;
288    }
289
290    unsigned Idx = RegDefPos.GetIdx();
291    const MCInstrDesc Desc = TII->get(Opcode);
292    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
293    RegClass = RC->getID();
294    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
295    // better way to determine it.
296    Cost = 1;
297  } else {
298    RegClass = TLI->getRepRegClassFor(VT)->getID();
299    Cost = TLI->getRepRegClassCostFor(VT);
300  }
301}
302
303/// Schedule - Schedule the DAG using list scheduling.
304void ScheduleDAGRRList::Schedule() {
305  DEBUG(dbgs()
306        << "********** List Scheduling BB#" << BB->getNumber()
307        << " '" << BB->getName() << "' **********\n");
308#ifndef NDEBUG
309  for (int i = 0; i < NumFactors; ++i) {
310    FactorCount[i] = 0;
311  }
312#endif //!NDEBUG
313
314  CurCycle = 0;
315  IssueCount = 0;
316  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
317  NumLiveRegs = 0;
318  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
319  LiveRegGens.resize(TRI->getNumRegs(), NULL);
320
321  // Build the scheduling graph.
322  BuildSchedGraph(NULL);
323
324  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
325          SUnits[su].dumpAll(this));
326  Topo.InitDAGTopologicalSorting();
327
328  AvailableQueue->initNodes(SUnits);
329
330  HazardRec->Reset();
331
332  // Execute the actual scheduling loop.
333  ListScheduleBottomUp();
334
335#ifndef NDEBUG
336  for (int i = 0; i < NumFactors; ++i) {
337    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
338  }
339#endif // !NDEBUG
340  AvailableQueue->releaseState();
341}
342
343//===----------------------------------------------------------------------===//
344//  Bottom-Up Scheduling
345//===----------------------------------------------------------------------===//
346
347/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
348/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
349void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
350  SUnit *PredSU = PredEdge->getSUnit();
351
352#ifndef NDEBUG
353  if (PredSU->NumSuccsLeft == 0) {
354    dbgs() << "*** Scheduling failed! ***\n";
355    PredSU->dump(this);
356    dbgs() << " has been released too many times!\n";
357    llvm_unreachable(0);
358  }
359#endif
360  --PredSU->NumSuccsLeft;
361
362  if (!ForceUnitLatencies()) {
363    // Updating predecessor's height. This is now the cycle when the
364    // predecessor can be scheduled without causing a pipeline stall.
365    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
366  }
367
368  // If all the node's successors are scheduled, this node is ready
369  // to be scheduled. Ignore the special EntrySU node.
370  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
371    PredSU->isAvailable = true;
372
373    unsigned Height = PredSU->getHeight();
374    if (Height < MinAvailableCycle)
375      MinAvailableCycle = Height;
376
377    if (isReady(PredSU)) {
378      AvailableQueue->push(PredSU);
379    }
380    // CapturePred and others may have left the node in the pending queue, avoid
381    // adding it twice.
382    else if (!PredSU->isPending) {
383      PredSU->isPending = true;
384      PendingQueue.push_back(PredSU);
385    }
386  }
387}
388
389/// Call ReleasePred for each predecessor, then update register live def/gen.
390/// Always update LiveRegDefs for a register dependence even if the current SU
391/// also defines the register. This effectively create one large live range
392/// across a sequence of two-address node. This is important because the
393/// entire chain must be scheduled together. Example:
394///
395/// flags = (3) add
396/// flags = (2) addc flags
397/// flags = (1) addc flags
398///
399/// results in
400///
401/// LiveRegDefs[flags] = 3
402/// LiveRegGens[flags] = 1
403///
404/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
405/// interference on flags.
406void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
407  // Bottom up: release predecessors
408  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
409       I != E; ++I) {
410    ReleasePred(SU, &*I);
411    if (I->isAssignedRegDep()) {
412      // This is a physical register dependency and it's impossible or
413      // expensive to copy the register. Make sure nothing that can
414      // clobber the register is scheduled between the predecessor and
415      // this node.
416      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
417      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
418             "interference on register dependence");
419      LiveRegDefs[I->getReg()] = I->getSUnit();
420      if (!LiveRegGens[I->getReg()]) {
421        ++NumLiveRegs;
422        LiveRegGens[I->getReg()] = SU;
423      }
424    }
425  }
426}
427
428/// Check to see if any of the pending instructions are ready to issue.  If
429/// so, add them to the available queue.
430void ScheduleDAGRRList::ReleasePending() {
431  if (DisableSchedCycles) {
432    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
433    return;
434  }
435
436  // If the available queue is empty, it is safe to reset MinAvailableCycle.
437  if (AvailableQueue->empty())
438    MinAvailableCycle = UINT_MAX;
439
440  // Check to see if any of the pending instructions are ready to issue.  If
441  // so, add them to the available queue.
442  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
443    unsigned ReadyCycle = PendingQueue[i]->getHeight();
444    if (ReadyCycle < MinAvailableCycle)
445      MinAvailableCycle = ReadyCycle;
446
447    if (PendingQueue[i]->isAvailable) {
448      if (!isReady(PendingQueue[i]))
449          continue;
450      AvailableQueue->push(PendingQueue[i]);
451    }
452    PendingQueue[i]->isPending = false;
453    PendingQueue[i] = PendingQueue.back();
454    PendingQueue.pop_back();
455    --i; --e;
456  }
457}
458
459/// Move the scheduler state forward by the specified number of Cycles.
460void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
461  if (NextCycle <= CurCycle)
462    return;
463
464  IssueCount = 0;
465  AvailableQueue->setCurCycle(NextCycle);
466  if (!HazardRec->isEnabled()) {
467    // Bypass lots of virtual calls in case of long latency.
468    CurCycle = NextCycle;
469  }
470  else {
471    for (; CurCycle != NextCycle; ++CurCycle) {
472      HazardRec->RecedeCycle();
473    }
474  }
475  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
476  // available Q to release pending nodes at least once before popping.
477  ReleasePending();
478}
479
480/// Move the scheduler state forward until the specified node's dependents are
481/// ready and can be scheduled with no resource conflicts.
482void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
483  if (DisableSchedCycles)
484    return;
485
486  // FIXME: Nodes such as CopyFromReg probably should not advance the current
487  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
488  // has predecessors the cycle will be advanced when they are scheduled.
489  // But given the crude nature of modeling latency though such nodes, we
490  // currently need to treat these nodes like real instructions.
491  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
492
493  unsigned ReadyCycle = SU->getHeight();
494
495  // Bump CurCycle to account for latency. We assume the latency of other
496  // available instructions may be hidden by the stall (not a full pipe stall).
497  // This updates the hazard recognizer's cycle before reserving resources for
498  // this instruction.
499  AdvanceToCycle(ReadyCycle);
500
501  // Calls are scheduled in their preceding cycle, so don't conflict with
502  // hazards from instructions after the call. EmitNode will reset the
503  // scoreboard state before emitting the call.
504  if (SU->isCall)
505    return;
506
507  // FIXME: For resource conflicts in very long non-pipelined stages, we
508  // should probably skip ahead here to avoid useless scoreboard checks.
509  int Stalls = 0;
510  while (true) {
511    ScheduleHazardRecognizer::HazardType HT =
512      HazardRec->getHazardType(SU, -Stalls);
513
514    if (HT == ScheduleHazardRecognizer::NoHazard)
515      break;
516
517    ++Stalls;
518  }
519  AdvanceToCycle(CurCycle + Stalls);
520}
521
522/// Record this SUnit in the HazardRecognizer.
523/// Does not update CurCycle.
524void ScheduleDAGRRList::EmitNode(SUnit *SU) {
525  if (!HazardRec->isEnabled())
526    return;
527
528  // Check for phys reg copy.
529  if (!SU->getNode())
530    return;
531
532  switch (SU->getNode()->getOpcode()) {
533  default:
534    assert(SU->getNode()->isMachineOpcode() &&
535           "This target-independent node should not be scheduled.");
536    break;
537  case ISD::MERGE_VALUES:
538  case ISD::TokenFactor:
539  case ISD::CopyToReg:
540  case ISD::CopyFromReg:
541  case ISD::EH_LABEL:
542    // Noops don't affect the scoreboard state. Copies are likely to be
543    // removed.
544    return;
545  case ISD::INLINEASM:
546    // For inline asm, clear the pipeline state.
547    HazardRec->Reset();
548    return;
549  }
550  if (SU->isCall) {
551    // Calls are scheduled with their preceding instructions. For bottom-up
552    // scheduling, clear the pipeline state before emitting.
553    HazardRec->Reset();
554  }
555
556  HazardRec->EmitInstruction(SU);
557}
558
559static void resetVRegCycle(SUnit *SU);
560
561/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
562/// count of its predecessors. If a predecessor pending count is zero, add it to
563/// the Available queue.
564void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
565  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
566  DEBUG(SU->dump(this));
567
568#ifndef NDEBUG
569  if (CurCycle < SU->getHeight())
570    DEBUG(dbgs() << "   Height [" << SU->getHeight()
571          << "] pipeline stall!\n");
572#endif
573
574  // FIXME: Do not modify node height. It may interfere with
575  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
576  // node its ready cycle can aid heuristics, and after scheduling it can
577  // indicate the scheduled cycle.
578  SU->setHeightToAtLeast(CurCycle);
579
580  // Reserve resources for the scheduled intruction.
581  EmitNode(SU);
582
583  Sequence.push_back(SU);
584
585  AvailableQueue->ScheduledNode(SU);
586
587  // If HazardRec is disabled, and each inst counts as one cycle, then
588  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
589  // PendingQueue for schedulers that implement HasReadyFilter.
590  if (!HazardRec->isEnabled() && AvgIPC < 2)
591    AdvanceToCycle(CurCycle + 1);
592
593  // Update liveness of predecessors before successors to avoid treating a
594  // two-address node as a live range def.
595  ReleasePredecessors(SU);
596
597  // Release all the implicit physical register defs that are live.
598  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
599       I != E; ++I) {
600    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
601    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
602      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
603      --NumLiveRegs;
604      LiveRegDefs[I->getReg()] = NULL;
605      LiveRegGens[I->getReg()] = NULL;
606    }
607  }
608
609  resetVRegCycle(SU);
610
611  SU->isScheduled = true;
612
613  // Conditions under which the scheduler should eagerly advance the cycle:
614  // (1) No available instructions
615  // (2) All pipelines full, so available instructions must have hazards.
616  //
617  // If HazardRec is disabled, the cycle was pre-advanced before calling
618  // ReleasePredecessors. In that case, IssueCount should remain 0.
619  //
620  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
621  if (HazardRec->isEnabled() || AvgIPC > 1) {
622    if (SU->getNode() && SU->getNode()->isMachineOpcode())
623      ++IssueCount;
624    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
625        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
626      AdvanceToCycle(CurCycle + 1);
627  }
628}
629
630/// CapturePred - This does the opposite of ReleasePred. Since SU is being
631/// unscheduled, incrcease the succ left count of its predecessors. Remove
632/// them from AvailableQueue if necessary.
633void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
634  SUnit *PredSU = PredEdge->getSUnit();
635  if (PredSU->isAvailable) {
636    PredSU->isAvailable = false;
637    if (!PredSU->isPending)
638      AvailableQueue->remove(PredSU);
639  }
640
641  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
642  ++PredSU->NumSuccsLeft;
643}
644
645/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
646/// its predecessor states to reflect the change.
647void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
648  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
649  DEBUG(SU->dump(this));
650
651  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
652       I != E; ++I) {
653    CapturePred(&*I);
654    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
655      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
656      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
657             "Physical register dependency violated?");
658      --NumLiveRegs;
659      LiveRegDefs[I->getReg()] = NULL;
660      LiveRegGens[I->getReg()] = NULL;
661    }
662  }
663
664  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
665       I != E; ++I) {
666    if (I->isAssignedRegDep()) {
667      // This becomes the nearest def. Note that an earlier def may still be
668      // pending if this is a two-address node.
669      LiveRegDefs[I->getReg()] = SU;
670      if (!LiveRegDefs[I->getReg()]) {
671        ++NumLiveRegs;
672      }
673      if (LiveRegGens[I->getReg()] == NULL ||
674          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
675        LiveRegGens[I->getReg()] = I->getSUnit();
676    }
677  }
678  if (SU->getHeight() < MinAvailableCycle)
679    MinAvailableCycle = SU->getHeight();
680
681  SU->setHeightDirty();
682  SU->isScheduled = false;
683  SU->isAvailable = true;
684  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
685    // Don't make available until backtracking is complete.
686    SU->isPending = true;
687    PendingQueue.push_back(SU);
688  }
689  else {
690    AvailableQueue->push(SU);
691  }
692  AvailableQueue->UnscheduledNode(SU);
693}
694
695/// After backtracking, the hazard checker needs to be restored to a state
696/// corresponding the the current cycle.
697void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
698  HazardRec->Reset();
699
700  unsigned LookAhead = std::min((unsigned)Sequence.size(),
701                                HazardRec->getMaxLookAhead());
702  if (LookAhead == 0)
703    return;
704
705  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
706  unsigned HazardCycle = (*I)->getHeight();
707  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
708    SUnit *SU = *I;
709    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
710      HazardRec->RecedeCycle();
711    }
712    EmitNode(SU);
713  }
714}
715
716/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
717/// BTCycle in order to schedule a specific node.
718void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
719  SUnit *OldSU = Sequence.back();
720  while (true) {
721    Sequence.pop_back();
722    if (SU->isSucc(OldSU))
723      // Don't try to remove SU from AvailableQueue.
724      SU->isAvailable = false;
725    // FIXME: use ready cycle instead of height
726    CurCycle = OldSU->getHeight();
727    UnscheduleNodeBottomUp(OldSU);
728    AvailableQueue->setCurCycle(CurCycle);
729    if (OldSU == BtSU)
730      break;
731    OldSU = Sequence.back();
732  }
733
734  assert(!SU->isSucc(OldSU) && "Something is wrong!");
735
736  RestoreHazardCheckerBottomUp();
737
738  ReleasePending();
739
740  ++NumBacktracks;
741}
742
743static bool isOperandOf(const SUnit *SU, SDNode *N) {
744  for (const SDNode *SUNode = SU->getNode(); SUNode;
745       SUNode = SUNode->getGluedNode()) {
746    if (SUNode->isOperandOf(N))
747      return true;
748  }
749  return false;
750}
751
752/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
753/// successors to the newly created node.
754SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
755  SDNode *N = SU->getNode();
756  if (!N)
757    return NULL;
758
759  if (SU->getNode()->getGluedNode())
760    return NULL;
761
762  SUnit *NewSU;
763  bool TryUnfold = false;
764  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
765    EVT VT = N->getValueType(i);
766    if (VT == MVT::Glue)
767      return NULL;
768    else if (VT == MVT::Other)
769      TryUnfold = true;
770  }
771  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
772    const SDValue &Op = N->getOperand(i);
773    EVT VT = Op.getNode()->getValueType(Op.getResNo());
774    if (VT == MVT::Glue)
775      return NULL;
776  }
777
778  if (TryUnfold) {
779    SmallVector<SDNode*, 2> NewNodes;
780    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
781      return NULL;
782
783    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
784    assert(NewNodes.size() == 2 && "Expected a load folding node!");
785
786    N = NewNodes[1];
787    SDNode *LoadNode = NewNodes[0];
788    unsigned NumVals = N->getNumValues();
789    unsigned OldNumVals = SU->getNode()->getNumValues();
790    for (unsigned i = 0; i != NumVals; ++i)
791      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
792    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
793                                   SDValue(LoadNode, 1));
794
795    // LoadNode may already exist. This can happen when there is another
796    // load from the same location and producing the same type of value
797    // but it has different alignment or volatileness.
798    bool isNewLoad = true;
799    SUnit *LoadSU;
800    if (LoadNode->getNodeId() != -1) {
801      LoadSU = &SUnits[LoadNode->getNodeId()];
802      isNewLoad = false;
803    } else {
804      LoadSU = CreateNewSUnit(LoadNode);
805      LoadNode->setNodeId(LoadSU->NodeNum);
806
807      InitNumRegDefsLeft(LoadSU);
808      ComputeLatency(LoadSU);
809    }
810
811    SUnit *NewSU = CreateNewSUnit(N);
812    assert(N->getNodeId() == -1 && "Node already inserted!");
813    N->setNodeId(NewSU->NodeNum);
814
815    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
816    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
817      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
818        NewSU->isTwoAddress = true;
819        break;
820      }
821    }
822    if (MCID.isCommutable())
823      NewSU->isCommutable = true;
824
825    InitNumRegDefsLeft(NewSU);
826    ComputeLatency(NewSU);
827
828    // Record all the edges to and from the old SU, by category.
829    SmallVector<SDep, 4> ChainPreds;
830    SmallVector<SDep, 4> ChainSuccs;
831    SmallVector<SDep, 4> LoadPreds;
832    SmallVector<SDep, 4> NodePreds;
833    SmallVector<SDep, 4> NodeSuccs;
834    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
835         I != E; ++I) {
836      if (I->isCtrl())
837        ChainPreds.push_back(*I);
838      else if (isOperandOf(I->getSUnit(), LoadNode))
839        LoadPreds.push_back(*I);
840      else
841        NodePreds.push_back(*I);
842    }
843    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
844         I != E; ++I) {
845      if (I->isCtrl())
846        ChainSuccs.push_back(*I);
847      else
848        NodeSuccs.push_back(*I);
849    }
850
851    // Now assign edges to the newly-created nodes.
852    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
853      const SDep &Pred = ChainPreds[i];
854      RemovePred(SU, Pred);
855      if (isNewLoad)
856        AddPred(LoadSU, Pred);
857    }
858    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
859      const SDep &Pred = LoadPreds[i];
860      RemovePred(SU, Pred);
861      if (isNewLoad)
862        AddPred(LoadSU, Pred);
863    }
864    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
865      const SDep &Pred = NodePreds[i];
866      RemovePred(SU, Pred);
867      AddPred(NewSU, Pred);
868    }
869    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
870      SDep D = NodeSuccs[i];
871      SUnit *SuccDep = D.getSUnit();
872      D.setSUnit(SU);
873      RemovePred(SuccDep, D);
874      D.setSUnit(NewSU);
875      AddPred(SuccDep, D);
876      // Balance register pressure.
877      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
878          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
879        --NewSU->NumRegDefsLeft;
880    }
881    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
882      SDep D = ChainSuccs[i];
883      SUnit *SuccDep = D.getSUnit();
884      D.setSUnit(SU);
885      RemovePred(SuccDep, D);
886      if (isNewLoad) {
887        D.setSUnit(LoadSU);
888        AddPred(SuccDep, D);
889      }
890    }
891
892    // Add a data dependency to reflect that NewSU reads the value defined
893    // by LoadSU.
894    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
895
896    if (isNewLoad)
897      AvailableQueue->addNode(LoadSU);
898    AvailableQueue->addNode(NewSU);
899
900    ++NumUnfolds;
901
902    if (NewSU->NumSuccsLeft == 0) {
903      NewSU->isAvailable = true;
904      return NewSU;
905    }
906    SU = NewSU;
907  }
908
909  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
910  NewSU = CreateClone(SU);
911
912  // New SUnit has the exact same predecessors.
913  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
914       I != E; ++I)
915    if (!I->isArtificial())
916      AddPred(NewSU, *I);
917
918  // Only copy scheduled successors. Cut them from old node's successor
919  // list and move them over.
920  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
921  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
922       I != E; ++I) {
923    if (I->isArtificial())
924      continue;
925    SUnit *SuccSU = I->getSUnit();
926    if (SuccSU->isScheduled) {
927      SDep D = *I;
928      D.setSUnit(NewSU);
929      AddPred(SuccSU, D);
930      D.setSUnit(SU);
931      DelDeps.push_back(std::make_pair(SuccSU, D));
932    }
933  }
934  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
935    RemovePred(DelDeps[i].first, DelDeps[i].second);
936
937  AvailableQueue->updateNode(SU);
938  AvailableQueue->addNode(NewSU);
939
940  ++NumDups;
941  return NewSU;
942}
943
944/// InsertCopiesAndMoveSuccs - Insert register copies and move all
945/// scheduled successors of the given SUnit to the last copy.
946void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
947                                               const TargetRegisterClass *DestRC,
948                                               const TargetRegisterClass *SrcRC,
949                                               SmallVector<SUnit*, 2> &Copies) {
950  SUnit *CopyFromSU = CreateNewSUnit(NULL);
951  CopyFromSU->CopySrcRC = SrcRC;
952  CopyFromSU->CopyDstRC = DestRC;
953
954  SUnit *CopyToSU = CreateNewSUnit(NULL);
955  CopyToSU->CopySrcRC = DestRC;
956  CopyToSU->CopyDstRC = SrcRC;
957
958  // Only copy scheduled successors. Cut them from old node's successor
959  // list and move them over.
960  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
961  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
962       I != E; ++I) {
963    if (I->isArtificial())
964      continue;
965    SUnit *SuccSU = I->getSUnit();
966    if (SuccSU->isScheduled) {
967      SDep D = *I;
968      D.setSUnit(CopyToSU);
969      AddPred(SuccSU, D);
970      DelDeps.push_back(std::make_pair(SuccSU, *I));
971    }
972    else {
973      // Avoid scheduling the def-side copy before other successors. Otherwise
974      // we could introduce another physreg interference on the copy and
975      // continue inserting copies indefinitely.
976      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
977             /*Reg=*/0, /*isNormalMemory=*/false,
978             /*isMustAlias=*/false, /*isArtificial=*/true);
979      AddPred(SuccSU, D);
980    }
981  }
982  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
983    RemovePred(DelDeps[i].first, DelDeps[i].second);
984
985  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
986  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
987
988  AvailableQueue->updateNode(SU);
989  AvailableQueue->addNode(CopyFromSU);
990  AvailableQueue->addNode(CopyToSU);
991  Copies.push_back(CopyFromSU);
992  Copies.push_back(CopyToSU);
993
994  ++NumPRCopies;
995}
996
997/// getPhysicalRegisterVT - Returns the ValueType of the physical register
998/// definition of the specified node.
999/// FIXME: Move to SelectionDAG?
1000static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1001                                 const TargetInstrInfo *TII) {
1002  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1003  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1004  unsigned NumRes = MCID.getNumDefs();
1005  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1006    if (Reg == *ImpDef)
1007      break;
1008    ++NumRes;
1009  }
1010  return N->getValueType(NumRes);
1011}
1012
1013/// CheckForLiveRegDef - Return true and update live register vector if the
1014/// specified register def of the specified SUnit clobbers any "live" registers.
1015static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1016                               std::vector<SUnit*> &LiveRegDefs,
1017                               SmallSet<unsigned, 4> &RegAdded,
1018                               SmallVector<unsigned, 4> &LRegs,
1019                               const TargetRegisterInfo *TRI) {
1020  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
1021
1022    // Check if Ref is live.
1023    if (!LiveRegDefs[*AliasI]) continue;
1024
1025    // Allow multiple uses of the same def.
1026    if (LiveRegDefs[*AliasI] == SU) continue;
1027
1028    // Add Reg to the set of interfering live regs.
1029    if (RegAdded.insert(*AliasI)) {
1030      LRegs.push_back(*AliasI);
1031    }
1032  }
1033}
1034
1035/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1036/// scheduling of the given node to satisfy live physical register dependencies.
1037/// If the specific node is the last one that's available to schedule, do
1038/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1039bool ScheduleDAGRRList::
1040DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1041  if (NumLiveRegs == 0)
1042    return false;
1043
1044  SmallSet<unsigned, 4> RegAdded;
1045  // If this node would clobber any "live" register, then it's not ready.
1046  //
1047  // If SU is the currently live definition of the same register that it uses,
1048  // then we are free to schedule it.
1049  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1050       I != E; ++I) {
1051    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1052      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1053                         RegAdded, LRegs, TRI);
1054  }
1055
1056  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1057    if (Node->getOpcode() == ISD::INLINEASM) {
1058      // Inline asm can clobber physical defs.
1059      unsigned NumOps = Node->getNumOperands();
1060      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1061        --NumOps;  // Ignore the glue operand.
1062
1063      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1064        unsigned Flags =
1065          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1066        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1067
1068        ++i; // Skip the ID value.
1069        if (InlineAsm::isRegDefKind(Flags) ||
1070            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1071            InlineAsm::isClobberKind(Flags)) {
1072          // Check for def of register or earlyclobber register.
1073          for (; NumVals; --NumVals, ++i) {
1074            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1075            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1076              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1077          }
1078        } else
1079          i += NumVals;
1080      }
1081      continue;
1082    }
1083
1084    if (!Node->isMachineOpcode())
1085      continue;
1086    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1087    if (!MCID.ImplicitDefs)
1088      continue;
1089    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
1090      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1091  }
1092
1093  return !LRegs.empty();
1094}
1095
1096/// Return a node that can be scheduled in this cycle. Requirements:
1097/// (1) Ready: latency has been satisfied
1098/// (2) No Hazards: resources are available
1099/// (3) No Interferences: may unschedule to break register interferences.
1100SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1101  SmallVector<SUnit*, 4> Interferences;
1102  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1103
1104  SUnit *CurSU = AvailableQueue->pop();
1105  while (CurSU) {
1106    SmallVector<unsigned, 4> LRegs;
1107    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1108      break;
1109    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1110
1111    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1112    Interferences.push_back(CurSU);
1113    CurSU = AvailableQueue->pop();
1114  }
1115  if (CurSU) {
1116    // Add the nodes that aren't ready back onto the available list.
1117    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1118      Interferences[i]->isPending = false;
1119      assert(Interferences[i]->isAvailable && "must still be available");
1120      AvailableQueue->push(Interferences[i]);
1121    }
1122    return CurSU;
1123  }
1124
1125  // All candidates are delayed due to live physical reg dependencies.
1126  // Try backtracking, code duplication, or inserting cross class copies
1127  // to resolve it.
1128  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1129    SUnit *TrySU = Interferences[i];
1130    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1131
1132    // Try unscheduling up to the point where it's safe to schedule
1133    // this node.
1134    SUnit *BtSU = NULL;
1135    unsigned LiveCycle = UINT_MAX;
1136    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1137      unsigned Reg = LRegs[j];
1138      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1139        BtSU = LiveRegGens[Reg];
1140        LiveCycle = BtSU->getHeight();
1141      }
1142    }
1143    if (!WillCreateCycle(TrySU, BtSU))  {
1144      BacktrackBottomUp(TrySU, BtSU);
1145
1146      // Force the current node to be scheduled before the node that
1147      // requires the physical reg dep.
1148      if (BtSU->isAvailable) {
1149        BtSU->isAvailable = false;
1150        if (!BtSU->isPending)
1151          AvailableQueue->remove(BtSU);
1152      }
1153      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1154                          /*Reg=*/0, /*isNormalMemory=*/false,
1155                          /*isMustAlias=*/false, /*isArtificial=*/true));
1156
1157      // If one or more successors has been unscheduled, then the current
1158      // node is no longer avaialable. Schedule a successor that's now
1159      // available instead.
1160      if (!TrySU->isAvailable) {
1161        CurSU = AvailableQueue->pop();
1162      }
1163      else {
1164        CurSU = TrySU;
1165        TrySU->isPending = false;
1166        Interferences.erase(Interferences.begin()+i);
1167      }
1168      break;
1169    }
1170  }
1171
1172  if (!CurSU) {
1173    // Can't backtrack. If it's too expensive to copy the value, then try
1174    // duplicate the nodes that produces these "too expensive to copy"
1175    // values to break the dependency. In case even that doesn't work,
1176    // insert cross class copies.
1177    // If it's not too expensive, i.e. cost != -1, issue copies.
1178    SUnit *TrySU = Interferences[0];
1179    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1180    assert(LRegs.size() == 1 && "Can't handle this yet!");
1181    unsigned Reg = LRegs[0];
1182    SUnit *LRDef = LiveRegDefs[Reg];
1183    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1184    const TargetRegisterClass *RC =
1185      TRI->getMinimalPhysRegClass(Reg, VT);
1186    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1187
1188    // If cross copy register class is the same as RC, then it must be possible
1189    // copy the value directly. Do not try duplicate the def.
1190    // If cross copy register class is not the same as RC, then it's possible to
1191    // copy the value but it require cross register class copies and it is
1192    // expensive.
1193    // If cross copy register class is null, then it's not possible to copy
1194    // the value at all.
1195    SUnit *NewDef = 0;
1196    if (DestRC != RC) {
1197      NewDef = CopyAndMoveSuccessors(LRDef);
1198      if (!DestRC && !NewDef)
1199        report_fatal_error("Can't handle live physical register dependency!");
1200    }
1201    if (!NewDef) {
1202      // Issue copies, these can be expensive cross register class copies.
1203      SmallVector<SUnit*, 2> Copies;
1204      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1205      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1206            << " to SU #" << Copies.front()->NodeNum << "\n");
1207      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1208                          /*Reg=*/0, /*isNormalMemory=*/false,
1209                          /*isMustAlias=*/false,
1210                          /*isArtificial=*/true));
1211      NewDef = Copies.back();
1212    }
1213
1214    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1215          << " to SU #" << TrySU->NodeNum << "\n");
1216    LiveRegDefs[Reg] = NewDef;
1217    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1218                         /*Reg=*/0, /*isNormalMemory=*/false,
1219                         /*isMustAlias=*/false,
1220                         /*isArtificial=*/true));
1221    TrySU->isAvailable = false;
1222    CurSU = NewDef;
1223  }
1224
1225  assert(CurSU && "Unable to resolve live physical register dependencies!");
1226
1227  // Add the nodes that aren't ready back onto the available list.
1228  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1229    Interferences[i]->isPending = false;
1230    // May no longer be available due to backtracking.
1231    if (Interferences[i]->isAvailable) {
1232      AvailableQueue->push(Interferences[i]);
1233    }
1234  }
1235  return CurSU;
1236}
1237
1238/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1239/// schedulers.
1240void ScheduleDAGRRList::ListScheduleBottomUp() {
1241  // Release any predecessors of the special Exit node.
1242  ReleasePredecessors(&ExitSU);
1243
1244  // Add root to Available queue.
1245  if (!SUnits.empty()) {
1246    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1247    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1248    RootSU->isAvailable = true;
1249    AvailableQueue->push(RootSU);
1250  }
1251
1252  // While Available queue is not empty, grab the node with the highest
1253  // priority. If it is not ready put it back.  Schedule the node.
1254  Sequence.reserve(SUnits.size());
1255  while (!AvailableQueue->empty()) {
1256    DEBUG(dbgs() << "\nExamining Available:\n";
1257          AvailableQueue->dump(this));
1258
1259    // Pick the best node to schedule taking all constraints into
1260    // consideration.
1261    SUnit *SU = PickNodeToScheduleBottomUp();
1262
1263    AdvancePastStalls(SU);
1264
1265    ScheduleNodeBottomUp(SU);
1266
1267    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1268      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1269      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1270      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1271    }
1272  }
1273
1274  // Reverse the order if it is bottom up.
1275  std::reverse(Sequence.begin(), Sequence.end());
1276
1277#ifndef NDEBUG
1278  VerifySchedule(/*isBottomUp=*/true);
1279#endif
1280}
1281
1282//===----------------------------------------------------------------------===//
1283//                RegReductionPriorityQueue Definition
1284//===----------------------------------------------------------------------===//
1285//
1286// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1287// to reduce register pressure.
1288//
1289namespace {
1290class RegReductionPQBase;
1291
1292struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1293  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1294};
1295
1296#ifndef NDEBUG
1297template<class SF>
1298struct reverse_sort : public queue_sort {
1299  SF &SortFunc;
1300  reverse_sort(SF &sf) : SortFunc(sf) {}
1301  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
1302
1303  bool operator()(SUnit* left, SUnit* right) const {
1304    // reverse left/right rather than simply !SortFunc(left, right)
1305    // to expose different paths in the comparison logic.
1306    return SortFunc(right, left);
1307  }
1308};
1309#endif // NDEBUG
1310
1311/// bu_ls_rr_sort - Priority function for bottom up register pressure
1312// reduction scheduler.
1313struct bu_ls_rr_sort : public queue_sort {
1314  enum {
1315    IsBottomUp = true,
1316    HasReadyFilter = false
1317  };
1318
1319  RegReductionPQBase *SPQ;
1320  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1321  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1322
1323  bool operator()(SUnit* left, SUnit* right) const;
1324};
1325
1326// src_ls_rr_sort - Priority function for source order scheduler.
1327struct src_ls_rr_sort : public queue_sort {
1328  enum {
1329    IsBottomUp = true,
1330    HasReadyFilter = false
1331  };
1332
1333  RegReductionPQBase *SPQ;
1334  src_ls_rr_sort(RegReductionPQBase *spq)
1335    : SPQ(spq) {}
1336  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1337    : SPQ(RHS.SPQ) {}
1338
1339  bool operator()(SUnit* left, SUnit* right) const;
1340};
1341
1342// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1343struct hybrid_ls_rr_sort : public queue_sort {
1344  enum {
1345    IsBottomUp = true,
1346    HasReadyFilter = false
1347  };
1348
1349  RegReductionPQBase *SPQ;
1350  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1351    : SPQ(spq) {}
1352  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1353    : SPQ(RHS.SPQ) {}
1354
1355  bool isReady(SUnit *SU, unsigned CurCycle) const;
1356
1357  bool operator()(SUnit* left, SUnit* right) const;
1358};
1359
1360// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1361// scheduler.
1362struct ilp_ls_rr_sort : public queue_sort {
1363  enum {
1364    IsBottomUp = true,
1365    HasReadyFilter = false
1366  };
1367
1368  RegReductionPQBase *SPQ;
1369  ilp_ls_rr_sort(RegReductionPQBase *spq)
1370    : SPQ(spq) {}
1371  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1372    : SPQ(RHS.SPQ) {}
1373
1374  bool isReady(SUnit *SU, unsigned CurCycle) const;
1375
1376  bool operator()(SUnit* left, SUnit* right) const;
1377};
1378
1379class RegReductionPQBase : public SchedulingPriorityQueue {
1380protected:
1381  std::vector<SUnit*> Queue;
1382  unsigned CurQueueId;
1383  bool TracksRegPressure;
1384
1385  // SUnits - The SUnits for the current graph.
1386  std::vector<SUnit> *SUnits;
1387
1388  MachineFunction &MF;
1389  const TargetInstrInfo *TII;
1390  const TargetRegisterInfo *TRI;
1391  const TargetLowering *TLI;
1392  ScheduleDAGRRList *scheduleDAG;
1393
1394  // SethiUllmanNumbers - The SethiUllman number for each node.
1395  std::vector<unsigned> SethiUllmanNumbers;
1396
1397  /// RegPressure - Tracking current reg pressure per register class.
1398  ///
1399  std::vector<unsigned> RegPressure;
1400
1401  /// RegLimit - Tracking the number of allocatable registers per register
1402  /// class.
1403  std::vector<unsigned> RegLimit;
1404
1405public:
1406  RegReductionPQBase(MachineFunction &mf,
1407                     bool hasReadyFilter,
1408                     bool tracksrp,
1409                     const TargetInstrInfo *tii,
1410                     const TargetRegisterInfo *tri,
1411                     const TargetLowering *tli)
1412    : SchedulingPriorityQueue(hasReadyFilter),
1413      CurQueueId(0), TracksRegPressure(tracksrp),
1414      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1415    if (TracksRegPressure) {
1416      unsigned NumRC = TRI->getNumRegClasses();
1417      RegLimit.resize(NumRC);
1418      RegPressure.resize(NumRC);
1419      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1420      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1421      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1422             E = TRI->regclass_end(); I != E; ++I)
1423        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1424    }
1425  }
1426
1427  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1428    scheduleDAG = scheduleDag;
1429  }
1430
1431  ScheduleHazardRecognizer* getHazardRec() {
1432    return scheduleDAG->getHazardRec();
1433  }
1434
1435  void initNodes(std::vector<SUnit> &sunits);
1436
1437  void addNode(const SUnit *SU);
1438
1439  void updateNode(const SUnit *SU);
1440
1441  void releaseState() {
1442    SUnits = 0;
1443    SethiUllmanNumbers.clear();
1444    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1445  }
1446
1447  unsigned getNodePriority(const SUnit *SU) const;
1448
1449  unsigned getNodeOrdering(const SUnit *SU) const {
1450    if (!SU->getNode()) return 0;
1451
1452    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1453  }
1454
1455  bool empty() const { return Queue.empty(); }
1456
1457  void push(SUnit *U) {
1458    assert(!U->NodeQueueId && "Node in the queue already");
1459    U->NodeQueueId = ++CurQueueId;
1460    Queue.push_back(U);
1461  }
1462
1463  void remove(SUnit *SU) {
1464    assert(!Queue.empty() && "Queue is empty!");
1465    assert(SU->NodeQueueId != 0 && "Not in queue!");
1466    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1467                                                 SU);
1468    if (I != prior(Queue.end()))
1469      std::swap(*I, Queue.back());
1470    Queue.pop_back();
1471    SU->NodeQueueId = 0;
1472  }
1473
1474  bool tracksRegPressure() const { return TracksRegPressure; }
1475
1476  void dumpRegPressure() const;
1477
1478  bool HighRegPressure(const SUnit *SU) const;
1479
1480  bool MayReduceRegPressure(SUnit *SU) const;
1481
1482  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1483
1484  void ScheduledNode(SUnit *SU);
1485
1486  void UnscheduledNode(SUnit *SU);
1487
1488protected:
1489  bool canClobber(const SUnit *SU, const SUnit *Op);
1490  void AddPseudoTwoAddrDeps();
1491  void PrescheduleNodesWithMultipleUses();
1492  void CalculateSethiUllmanNumbers();
1493};
1494
1495template<class SF>
1496static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1497  std::vector<SUnit *>::iterator Best = Q.begin();
1498  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1499         E = Q.end(); I != E; ++I)
1500    if (Picker(*Best, *I))
1501      Best = I;
1502  SUnit *V = *Best;
1503  if (Best != prior(Q.end()))
1504    std::swap(*Best, Q.back());
1505  Q.pop_back();
1506  return V;
1507}
1508
1509template<class SF>
1510SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1511#ifndef NDEBUG
1512  if (DAG->StressSched) {
1513    reverse_sort<SF> RPicker(Picker);
1514    return popFromQueueImpl(Q, RPicker);
1515  }
1516#endif
1517  (void)DAG;
1518  return popFromQueueImpl(Q, Picker);
1519}
1520
1521template<class SF>
1522class RegReductionPriorityQueue : public RegReductionPQBase {
1523  SF Picker;
1524
1525public:
1526  RegReductionPriorityQueue(MachineFunction &mf,
1527                            bool tracksrp,
1528                            const TargetInstrInfo *tii,
1529                            const TargetRegisterInfo *tri,
1530                            const TargetLowering *tli)
1531    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1532      Picker(this) {}
1533
1534  bool isBottomUp() const { return SF::IsBottomUp; }
1535
1536  bool isReady(SUnit *U) const {
1537    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1538  }
1539
1540  SUnit *pop() {
1541    if (Queue.empty()) return NULL;
1542
1543    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1544    V->NodeQueueId = 0;
1545    return V;
1546  }
1547
1548  void dump(ScheduleDAG *DAG) const {
1549    // Emulate pop() without clobbering NodeQueueIds.
1550    std::vector<SUnit*> DumpQueue = Queue;
1551    SF DumpPicker = Picker;
1552    while (!DumpQueue.empty()) {
1553      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1554      dbgs() << "Height " << SU->getHeight() << ": ";
1555      SU->dump(DAG);
1556    }
1557  }
1558};
1559
1560typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1561BURegReductionPriorityQueue;
1562
1563typedef RegReductionPriorityQueue<src_ls_rr_sort>
1564SrcRegReductionPriorityQueue;
1565
1566typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1567HybridBURRPriorityQueue;
1568
1569typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1570ILPBURRPriorityQueue;
1571} // end anonymous namespace
1572
1573//===----------------------------------------------------------------------===//
1574//           Static Node Priority for Register Pressure Reduction
1575//===----------------------------------------------------------------------===//
1576
1577// Check for special nodes that bypass scheduling heuristics.
1578// Currently this pushes TokenFactor nodes down, but may be used for other
1579// pseudo-ops as well.
1580//
1581// Return -1 to schedule right above left, 1 for left above right.
1582// Return 0 if no bias exists.
1583static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1584  bool LSchedLow = left->isScheduleLow;
1585  bool RSchedLow = right->isScheduleLow;
1586  if (LSchedLow != RSchedLow)
1587    return LSchedLow < RSchedLow ? 1 : -1;
1588  return 0;
1589}
1590
1591/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1592/// Smaller number is the higher priority.
1593static unsigned
1594CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1595  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1596  if (SethiUllmanNumber != 0)
1597    return SethiUllmanNumber;
1598
1599  unsigned Extra = 0;
1600  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1601       I != E; ++I) {
1602    if (I->isCtrl()) continue;  // ignore chain preds
1603    SUnit *PredSU = I->getSUnit();
1604    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1605    if (PredSethiUllman > SethiUllmanNumber) {
1606      SethiUllmanNumber = PredSethiUllman;
1607      Extra = 0;
1608    } else if (PredSethiUllman == SethiUllmanNumber)
1609      ++Extra;
1610  }
1611
1612  SethiUllmanNumber += Extra;
1613
1614  if (SethiUllmanNumber == 0)
1615    SethiUllmanNumber = 1;
1616
1617  return SethiUllmanNumber;
1618}
1619
1620/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1621/// scheduling units.
1622void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1623  SethiUllmanNumbers.assign(SUnits->size(), 0);
1624
1625  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1626    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1627}
1628
1629void RegReductionPQBase::addNode(const SUnit *SU) {
1630  unsigned SUSize = SethiUllmanNumbers.size();
1631  if (SUnits->size() > SUSize)
1632    SethiUllmanNumbers.resize(SUSize*2, 0);
1633  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1634}
1635
1636void RegReductionPQBase::updateNode(const SUnit *SU) {
1637  SethiUllmanNumbers[SU->NodeNum] = 0;
1638  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1639}
1640
1641// Lower priority means schedule further down. For bottom-up scheduling, lower
1642// priority SUs are scheduled before higher priority SUs.
1643unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1644  assert(SU->NodeNum < SethiUllmanNumbers.size());
1645  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1646  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1647    // CopyToReg should be close to its uses to facilitate coalescing and
1648    // avoid spilling.
1649    return 0;
1650  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1651      Opc == TargetOpcode::SUBREG_TO_REG ||
1652      Opc == TargetOpcode::INSERT_SUBREG)
1653    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1654    // close to their uses to facilitate coalescing.
1655    return 0;
1656  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1657    // If SU does not have a register use, i.e. it doesn't produce a value
1658    // that would be consumed (e.g. store), then it terminates a chain of
1659    // computation.  Give it a large SethiUllman number so it will be
1660    // scheduled right before its predecessors that it doesn't lengthen
1661    // their live ranges.
1662    return 0xffff;
1663  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1664    // If SU does not have a register def, schedule it close to its uses
1665    // because it does not lengthen any live ranges.
1666    return 0;
1667#if 1
1668  return SethiUllmanNumbers[SU->NodeNum];
1669#else
1670  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1671  if (SU->isCallOp) {
1672    // FIXME: This assumes all of the defs are used as call operands.
1673    int NP = (int)Priority - SU->getNode()->getNumValues();
1674    return (NP > 0) ? NP : 0;
1675  }
1676  return Priority;
1677#endif
1678}
1679
1680//===----------------------------------------------------------------------===//
1681//                     Register Pressure Tracking
1682//===----------------------------------------------------------------------===//
1683
1684void RegReductionPQBase::dumpRegPressure() const {
1685  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1686         E = TRI->regclass_end(); I != E; ++I) {
1687    const TargetRegisterClass *RC = *I;
1688    unsigned Id = RC->getID();
1689    unsigned RP = RegPressure[Id];
1690    if (!RP) continue;
1691    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1692          << '\n');
1693  }
1694}
1695
1696bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1697  if (!TLI)
1698    return false;
1699
1700  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1701       I != E; ++I) {
1702    if (I->isCtrl())
1703      continue;
1704    SUnit *PredSU = I->getSUnit();
1705    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1706    // to cover the number of registers defined (they are all live).
1707    if (PredSU->NumRegDefsLeft == 0) {
1708      continue;
1709    }
1710    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1711         RegDefPos.IsValid(); RegDefPos.Advance()) {
1712      unsigned RCId, Cost;
1713      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1714
1715      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1716        return true;
1717    }
1718  }
1719  return false;
1720}
1721
1722bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1723  const SDNode *N = SU->getNode();
1724
1725  if (!N->isMachineOpcode() || !SU->NumSuccs)
1726    return false;
1727
1728  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1729  for (unsigned i = 0; i != NumDefs; ++i) {
1730    EVT VT = N->getValueType(i);
1731    if (!N->hasAnyUseOfValue(i))
1732      continue;
1733    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1734    if (RegPressure[RCId] >= RegLimit[RCId])
1735      return true;
1736  }
1737  return false;
1738}
1739
1740// Compute the register pressure contribution by this instruction by count up
1741// for uses that are not live and down for defs. Only count register classes
1742// that are already under high pressure. As a side effect, compute the number of
1743// uses of registers that are already live.
1744//
1745// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1746// so could probably be factored.
1747int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1748  LiveUses = 0;
1749  int PDiff = 0;
1750  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1751       I != E; ++I) {
1752    if (I->isCtrl())
1753      continue;
1754    SUnit *PredSU = I->getSUnit();
1755    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1756    // to cover the number of registers defined (they are all live).
1757    if (PredSU->NumRegDefsLeft == 0) {
1758      if (PredSU->getNode()->isMachineOpcode())
1759        ++LiveUses;
1760      continue;
1761    }
1762    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1763         RegDefPos.IsValid(); RegDefPos.Advance()) {
1764      EVT VT = RegDefPos.GetValue();
1765      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1766      if (RegPressure[RCId] >= RegLimit[RCId])
1767        ++PDiff;
1768    }
1769  }
1770  const SDNode *N = SU->getNode();
1771
1772  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1773    return PDiff;
1774
1775  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1776  for (unsigned i = 0; i != NumDefs; ++i) {
1777    EVT VT = N->getValueType(i);
1778    if (!N->hasAnyUseOfValue(i))
1779      continue;
1780    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1781    if (RegPressure[RCId] >= RegLimit[RCId])
1782      --PDiff;
1783  }
1784  return PDiff;
1785}
1786
1787void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1788  if (!TracksRegPressure)
1789    return;
1790
1791  if (!SU->getNode())
1792    return;
1793
1794  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1795       I != E; ++I) {
1796    if (I->isCtrl())
1797      continue;
1798    SUnit *PredSU = I->getSUnit();
1799    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1800    // to cover the number of registers defined (they are all live).
1801    if (PredSU->NumRegDefsLeft == 0) {
1802      continue;
1803    }
1804    // FIXME: The ScheduleDAG currently loses information about which of a
1805    // node's values is consumed by each dependence. Consequently, if the node
1806    // defines multiple register classes, we don't know which to pressurize
1807    // here. Instead the following loop consumes the register defs in an
1808    // arbitrary order. At least it handles the common case of clustered loads
1809    // to the same class. For precise liveness, each SDep needs to indicate the
1810    // result number. But that tightly couples the ScheduleDAG with the
1811    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1812    // value type or register class to SDep.
1813    //
1814    // The most important aspect of register tracking is balancing the increase
1815    // here with the reduction further below. Note that this SU may use multiple
1816    // defs in PredSU. The can't be determined here, but we've already
1817    // compensated by reducing NumRegDefsLeft in PredSU during
1818    // ScheduleDAGSDNodes::AddSchedEdges.
1819    --PredSU->NumRegDefsLeft;
1820    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1821    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1822         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1823      if (SkipRegDefs)
1824        continue;
1825
1826      unsigned RCId, Cost;
1827      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1828      RegPressure[RCId] += Cost;
1829      break;
1830    }
1831  }
1832
1833  // We should have this assert, but there may be dead SDNodes that never
1834  // materialize as SUnits, so they don't appear to generate liveness.
1835  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1836  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1837  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1838       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1839    if (SkipRegDefs > 0)
1840      continue;
1841    unsigned RCId, Cost;
1842    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
1843    if (RegPressure[RCId] < Cost) {
1844      // Register pressure tracking is imprecise. This can happen. But we try
1845      // hard not to let it happen because it likely results in poor scheduling.
1846      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1847      RegPressure[RCId] = 0;
1848    }
1849    else {
1850      RegPressure[RCId] -= Cost;
1851    }
1852  }
1853  dumpRegPressure();
1854}
1855
1856void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1857  if (!TracksRegPressure)
1858    return;
1859
1860  const SDNode *N = SU->getNode();
1861  if (!N) return;
1862
1863  if (!N->isMachineOpcode()) {
1864    if (N->getOpcode() != ISD::CopyToReg)
1865      return;
1866  } else {
1867    unsigned Opc = N->getMachineOpcode();
1868    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1869        Opc == TargetOpcode::INSERT_SUBREG ||
1870        Opc == TargetOpcode::SUBREG_TO_REG ||
1871        Opc == TargetOpcode::REG_SEQUENCE ||
1872        Opc == TargetOpcode::IMPLICIT_DEF)
1873      return;
1874  }
1875
1876  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1877       I != E; ++I) {
1878    if (I->isCtrl())
1879      continue;
1880    SUnit *PredSU = I->getSUnit();
1881    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1882    // counts data deps.
1883    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1884      continue;
1885    const SDNode *PN = PredSU->getNode();
1886    if (!PN->isMachineOpcode()) {
1887      if (PN->getOpcode() == ISD::CopyFromReg) {
1888        EVT VT = PN->getValueType(0);
1889        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1890        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1891      }
1892      continue;
1893    }
1894    unsigned POpc = PN->getMachineOpcode();
1895    if (POpc == TargetOpcode::IMPLICIT_DEF)
1896      continue;
1897    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
1898        POpc == TargetOpcode::INSERT_SUBREG ||
1899        POpc == TargetOpcode::SUBREG_TO_REG) {
1900      EVT VT = PN->getValueType(0);
1901      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1902      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1903      continue;
1904    }
1905    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1906    for (unsigned i = 0; i != NumDefs; ++i) {
1907      EVT VT = PN->getValueType(i);
1908      if (!PN->hasAnyUseOfValue(i))
1909        continue;
1910      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1911      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1912        // Register pressure tracking is imprecise. This can happen.
1913        RegPressure[RCId] = 0;
1914      else
1915        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1916    }
1917  }
1918
1919  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1920  // may transfer data dependencies to CopyToReg.
1921  if (SU->NumSuccs && N->isMachineOpcode()) {
1922    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1923    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1924      EVT VT = N->getValueType(i);
1925      if (VT == MVT::Glue || VT == MVT::Other)
1926        continue;
1927      if (!N->hasAnyUseOfValue(i))
1928        continue;
1929      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1930      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1931    }
1932  }
1933
1934  dumpRegPressure();
1935}
1936
1937//===----------------------------------------------------------------------===//
1938//           Dynamic Node Priority for Register Pressure Reduction
1939//===----------------------------------------------------------------------===//
1940
1941/// closestSucc - Returns the scheduled cycle of the successor which is
1942/// closest to the current cycle.
1943static unsigned closestSucc(const SUnit *SU) {
1944  unsigned MaxHeight = 0;
1945  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1946       I != E; ++I) {
1947    if (I->isCtrl()) continue;  // ignore chain succs
1948    unsigned Height = I->getSUnit()->getHeight();
1949    // If there are bunch of CopyToRegs stacked up, they should be considered
1950    // to be at the same position.
1951    if (I->getSUnit()->getNode() &&
1952        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1953      Height = closestSucc(I->getSUnit())+1;
1954    if (Height > MaxHeight)
1955      MaxHeight = Height;
1956  }
1957  return MaxHeight;
1958}
1959
1960/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1961/// for scratch registers, i.e. number of data dependencies.
1962static unsigned calcMaxScratches(const SUnit *SU) {
1963  unsigned Scratches = 0;
1964  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1965       I != E; ++I) {
1966    if (I->isCtrl()) continue;  // ignore chain preds
1967    Scratches++;
1968  }
1969  return Scratches;
1970}
1971
1972/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
1973/// CopyFromReg from a virtual register.
1974static bool hasOnlyLiveInOpers(const SUnit *SU) {
1975  bool RetVal = false;
1976  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1977       I != E; ++I) {
1978    if (I->isCtrl()) continue;
1979    const SUnit *PredSU = I->getSUnit();
1980    if (PredSU->getNode() &&
1981        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
1982      unsigned Reg =
1983        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
1984      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1985        RetVal = true;
1986        continue;
1987      }
1988    }
1989    return false;
1990  }
1991  return RetVal;
1992}
1993
1994/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
1995/// CopyToReg to a virtual register. This SU def is probably a liveout and
1996/// it has no other use. It should be scheduled closer to the terminator.
1997static bool hasOnlyLiveOutUses(const SUnit *SU) {
1998  bool RetVal = false;
1999  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2000       I != E; ++I) {
2001    if (I->isCtrl()) continue;
2002    const SUnit *SuccSU = I->getSUnit();
2003    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2004      unsigned Reg =
2005        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2006      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2007        RetVal = true;
2008        continue;
2009      }
2010    }
2011    return false;
2012  }
2013  return RetVal;
2014}
2015
2016// Set isVRegCycle for a node with only live in opers and live out uses. Also
2017// set isVRegCycle for its CopyFromReg operands.
2018//
2019// This is only relevant for single-block loops, in which case the VRegCycle
2020// node is likely an induction variable in which the operand and target virtual
2021// registers should be coalesced (e.g. pre/post increment values). Setting the
2022// isVRegCycle flag helps the scheduler prioritize other uses of the same
2023// CopyFromReg so that this node becomes the virtual register "kill". This
2024// avoids interference between the values live in and out of the block and
2025// eliminates a copy inside the loop.
2026static void initVRegCycle(SUnit *SU) {
2027  if (DisableSchedVRegCycle)
2028    return;
2029
2030  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2031    return;
2032
2033  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2034
2035  SU->isVRegCycle = true;
2036
2037  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2038       I != E; ++I) {
2039    if (I->isCtrl()) continue;
2040    I->getSUnit()->isVRegCycle = true;
2041  }
2042}
2043
2044// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2045// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2046static void resetVRegCycle(SUnit *SU) {
2047  if (!SU->isVRegCycle)
2048    return;
2049
2050  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2051       I != E; ++I) {
2052    if (I->isCtrl()) continue;  // ignore chain preds
2053    SUnit *PredSU = I->getSUnit();
2054    if (PredSU->isVRegCycle) {
2055      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2056             "VRegCycle def must be CopyFromReg");
2057      I->getSUnit()->isVRegCycle = 0;
2058    }
2059  }
2060}
2061
2062// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2063// means a node that defines the VRegCycle has not been scheduled yet.
2064static bool hasVRegCycleUse(const SUnit *SU) {
2065  // If this SU also defines the VReg, don't hoist it as a "use".
2066  if (SU->isVRegCycle)
2067    return false;
2068
2069  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2070       I != E; ++I) {
2071    if (I->isCtrl()) continue;  // ignore chain preds
2072    if (I->getSUnit()->isVRegCycle &&
2073        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2074      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2075      return true;
2076    }
2077  }
2078  return false;
2079}
2080
2081// Check for either a dependence (latency) or resource (hazard) stall.
2082//
2083// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2084static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2085  if ((int)SPQ->getCurCycle() < Height) return true;
2086  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2087      != ScheduleHazardRecognizer::NoHazard)
2088    return true;
2089  return false;
2090}
2091
2092// Return -1 if left has higher priority, 1 if right has higher priority.
2093// Return 0 if latency-based priority is equivalent.
2094static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2095                            RegReductionPQBase *SPQ) {
2096  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2097  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2098  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2099  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2100  int LHeight = (int)left->getHeight() + LPenalty;
2101  int RHeight = (int)right->getHeight() + RPenalty;
2102
2103  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2104    BUHasStall(left, LHeight, SPQ);
2105  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2106    BUHasStall(right, RHeight, SPQ);
2107
2108  // If scheduling one of the node will cause a pipeline stall, delay it.
2109  // If scheduling either one of the node will cause a pipeline stall, sort
2110  // them according to their height.
2111  if (LStall) {
2112    if (!RStall) {
2113      DEBUG(++FactorCount[FactStall]);
2114      return 1;
2115    }
2116    if (LHeight != RHeight) {
2117      DEBUG(++FactorCount[FactStall]);
2118      return LHeight > RHeight ? 1 : -1;
2119    }
2120  } else if (RStall) {
2121    DEBUG(++FactorCount[FactStall]);
2122    return -1;
2123  }
2124
2125  // If either node is scheduling for latency, sort them by height/depth
2126  // and latency.
2127  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2128                     right->SchedulingPref == Sched::ILP)) {
2129    if (DisableSchedCycles) {
2130      if (LHeight != RHeight) {
2131        DEBUG(++FactorCount[FactHeight]);
2132        return LHeight > RHeight ? 1 : -1;
2133      }
2134    }
2135    else {
2136      // If neither instruction stalls (!LStall && !RStall) then
2137      // its height is already covered so only its depth matters. We also reach
2138      // this if both stall but have the same height.
2139      int LDepth = left->getDepth() - LPenalty;
2140      int RDepth = right->getDepth() - RPenalty;
2141      if (LDepth != RDepth) {
2142        DEBUG(++FactorCount[FactDepth]);
2143        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2144              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2145              << ") depth " << RDepth << "\n");
2146        return LDepth < RDepth ? 1 : -1;
2147      }
2148    }
2149    if (left->Latency != right->Latency) {
2150      DEBUG(++FactorCount[FactOther]);
2151      return left->Latency > right->Latency ? 1 : -1;
2152    }
2153  }
2154  return 0;
2155}
2156
2157static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2158  // Schedule physical register definitions close to their use. This is
2159  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2160  // long as shortening physreg live ranges is generally good, we can defer
2161  // creating a subtarget hook.
2162  if (!DisableSchedPhysRegJoin) {
2163    bool LHasPhysReg = left->hasPhysRegDefs;
2164    bool RHasPhysReg = right->hasPhysRegDefs;
2165    if (LHasPhysReg != RHasPhysReg) {
2166      DEBUG(++FactorCount[FactRegUses]);
2167      #ifndef NDEBUG
2168      const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
2169      #endif
2170      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2171            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2172            << PhysRegMsg[RHasPhysReg] << "\n");
2173      return LHasPhysReg < RHasPhysReg;
2174    }
2175  }
2176
2177  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2178  unsigned LPriority = SPQ->getNodePriority(left);
2179  unsigned RPriority = SPQ->getNodePriority(right);
2180
2181  // Be really careful about hoisting call operands above previous calls.
2182  // Only allows it if it would reduce register pressure.
2183  if (left->isCall && right->isCallOp) {
2184    unsigned RNumVals = right->getNode()->getNumValues();
2185    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2186  }
2187  if (right->isCall && left->isCallOp) {
2188    unsigned LNumVals = left->getNode()->getNumValues();
2189    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2190  }
2191
2192  if (LPriority != RPriority) {
2193    DEBUG(++FactorCount[FactStatic]);
2194    return LPriority > RPriority;
2195  }
2196
2197  // One or both of the nodes are calls and their sethi-ullman numbers are the
2198  // same, then keep source order.
2199  if (left->isCall || right->isCall) {
2200    unsigned LOrder = SPQ->getNodeOrdering(left);
2201    unsigned ROrder = SPQ->getNodeOrdering(right);
2202
2203    // Prefer an ordering where the lower the non-zero order number, the higher
2204    // the preference.
2205    if ((LOrder || ROrder) && LOrder != ROrder)
2206      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2207  }
2208
2209  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2210  // e.g.
2211  // t1 = op t2, c1
2212  // t3 = op t4, c2
2213  //
2214  // and the following instructions are both ready.
2215  // t2 = op c3
2216  // t4 = op c4
2217  //
2218  // Then schedule t2 = op first.
2219  // i.e.
2220  // t4 = op c4
2221  // t2 = op c3
2222  // t1 = op t2, c1
2223  // t3 = op t4, c2
2224  //
2225  // This creates more short live intervals.
2226  unsigned LDist = closestSucc(left);
2227  unsigned RDist = closestSucc(right);
2228  if (LDist != RDist) {
2229    DEBUG(++FactorCount[FactOther]);
2230    return LDist < RDist;
2231  }
2232
2233  // How many registers becomes live when the node is scheduled.
2234  unsigned LScratch = calcMaxScratches(left);
2235  unsigned RScratch = calcMaxScratches(right);
2236  if (LScratch != RScratch) {
2237    DEBUG(++FactorCount[FactOther]);
2238    return LScratch > RScratch;
2239  }
2240
2241  // Comparing latency against a call makes little sense unless the node
2242  // is register pressure-neutral.
2243  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2244    return (left->NodeQueueId > right->NodeQueueId);
2245
2246  // Do not compare latencies when one or both of the nodes are calls.
2247  if (!DisableSchedCycles &&
2248      !(left->isCall || right->isCall)) {
2249    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2250    if (result != 0)
2251      return result > 0;
2252  }
2253  else {
2254    if (left->getHeight() != right->getHeight()) {
2255      DEBUG(++FactorCount[FactHeight]);
2256      return left->getHeight() > right->getHeight();
2257    }
2258
2259    if (left->getDepth() != right->getDepth()) {
2260      DEBUG(++FactorCount[FactDepth]);
2261      return left->getDepth() < right->getDepth();
2262    }
2263  }
2264
2265  assert(left->NodeQueueId && right->NodeQueueId &&
2266         "NodeQueueId cannot be zero");
2267  DEBUG(++FactorCount[FactOther]);
2268  return (left->NodeQueueId > right->NodeQueueId);
2269}
2270
2271// Bottom up
2272bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2273  if (int res = checkSpecialNodes(left, right))
2274    return res > 0;
2275
2276  return BURRSort(left, right, SPQ);
2277}
2278
2279// Source order, otherwise bottom up.
2280bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2281  if (int res = checkSpecialNodes(left, right))
2282    return res > 0;
2283
2284  unsigned LOrder = SPQ->getNodeOrdering(left);
2285  unsigned ROrder = SPQ->getNodeOrdering(right);
2286
2287  // Prefer an ordering where the lower the non-zero order number, the higher
2288  // the preference.
2289  if ((LOrder || ROrder) && LOrder != ROrder)
2290    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2291
2292  return BURRSort(left, right, SPQ);
2293}
2294
2295// If the time between now and when the instruction will be ready can cover
2296// the spill code, then avoid adding it to the ready queue. This gives long
2297// stalls highest priority and allows hoisting across calls. It should also
2298// speed up processing the available queue.
2299bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2300  static const unsigned ReadyDelay = 3;
2301
2302  if (SPQ->MayReduceRegPressure(SU)) return true;
2303
2304  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2305
2306  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2307      != ScheduleHazardRecognizer::NoHazard)
2308    return false;
2309
2310  return true;
2311}
2312
2313// Return true if right should be scheduled with higher priority than left.
2314bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2315  if (int res = checkSpecialNodes(left, right))
2316    return res > 0;
2317
2318  if (left->isCall || right->isCall)
2319    // No way to compute latency of calls.
2320    return BURRSort(left, right, SPQ);
2321
2322  bool LHigh = SPQ->HighRegPressure(left);
2323  bool RHigh = SPQ->HighRegPressure(right);
2324  // Avoid causing spills. If register pressure is high, schedule for
2325  // register pressure reduction.
2326  if (LHigh && !RHigh) {
2327    DEBUG(++FactorCount[FactPressureDiff]);
2328    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2329          << right->NodeNum << ")\n");
2330    return true;
2331  }
2332  else if (!LHigh && RHigh) {
2333    DEBUG(++FactorCount[FactPressureDiff]);
2334    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2335          << left->NodeNum << ")\n");
2336    return false;
2337  }
2338  if (!LHigh && !RHigh) {
2339    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2340    if (result != 0)
2341      return result > 0;
2342  }
2343  return BURRSort(left, right, SPQ);
2344}
2345
2346// Schedule as many instructions in each cycle as possible. So don't make an
2347// instruction available unless it is ready in the current cycle.
2348bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2349  if (SU->getHeight() > CurCycle) return false;
2350
2351  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2352      != ScheduleHazardRecognizer::NoHazard)
2353    return false;
2354
2355  return true;
2356}
2357
2358static bool canEnableCoalescing(SUnit *SU) {
2359  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2360  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2361    // CopyToReg should be close to its uses to facilitate coalescing and
2362    // avoid spilling.
2363    return true;
2364
2365  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2366      Opc == TargetOpcode::SUBREG_TO_REG ||
2367      Opc == TargetOpcode::INSERT_SUBREG)
2368    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2369    // close to their uses to facilitate coalescing.
2370    return true;
2371
2372  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2373    // If SU does not have a register def, schedule it close to its uses
2374    // because it does not lengthen any live ranges.
2375    return true;
2376
2377  return false;
2378}
2379
2380// list-ilp is currently an experimental scheduler that allows various
2381// heuristics to be enabled prior to the normal register reduction logic.
2382bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2383  if (int res = checkSpecialNodes(left, right))
2384    return res > 0;
2385
2386  if (left->isCall || right->isCall)
2387    // No way to compute latency of calls.
2388    return BURRSort(left, right, SPQ);
2389
2390  unsigned LLiveUses = 0, RLiveUses = 0;
2391  int LPDiff = 0, RPDiff = 0;
2392  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2393    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2394    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2395  }
2396  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2397    DEBUG(++FactorCount[FactPressureDiff]);
2398    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2399          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2400    return LPDiff > RPDiff;
2401  }
2402
2403  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2404    bool LReduce = canEnableCoalescing(left);
2405    bool RReduce = canEnableCoalescing(right);
2406    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2407    if (LReduce && !RReduce) return false;
2408    if (RReduce && !LReduce) return true;
2409  }
2410
2411  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2412    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2413          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2414    DEBUG(++FactorCount[FactRegUses]);
2415    return LLiveUses < RLiveUses;
2416  }
2417
2418  if (!DisableSchedStalls) {
2419    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2420    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2421    if (LStall != RStall) {
2422      DEBUG(++FactorCount[FactHeight]);
2423      return left->getHeight() > right->getHeight();
2424    }
2425  }
2426
2427  if (!DisableSchedCriticalPath) {
2428    int spread = (int)left->getDepth() - (int)right->getDepth();
2429    if (std::abs(spread) > MaxReorderWindow) {
2430      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2431            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2432            << right->getDepth() << "\n");
2433      DEBUG(++FactorCount[FactDepth]);
2434      return left->getDepth() < right->getDepth();
2435    }
2436  }
2437
2438  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2439    int spread = (int)left->getHeight() - (int)right->getHeight();
2440    if (std::abs(spread) > MaxReorderWindow) {
2441      DEBUG(++FactorCount[FactHeight]);
2442      return left->getHeight() > right->getHeight();
2443    }
2444  }
2445
2446  return BURRSort(left, right, SPQ);
2447}
2448
2449void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2450  SUnits = &sunits;
2451  // Add pseudo dependency edges for two-address nodes.
2452  AddPseudoTwoAddrDeps();
2453  // Reroute edges to nodes with multiple uses.
2454  if (!TracksRegPressure)
2455    PrescheduleNodesWithMultipleUses();
2456  // Calculate node priorities.
2457  CalculateSethiUllmanNumbers();
2458
2459  // For single block loops, mark nodes that look like canonical IV increments.
2460  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2461    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2462      initVRegCycle(&sunits[i]);
2463    }
2464  }
2465}
2466
2467//===----------------------------------------------------------------------===//
2468//                    Preschedule for Register Pressure
2469//===----------------------------------------------------------------------===//
2470
2471bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2472  if (SU->isTwoAddress) {
2473    unsigned Opc = SU->getNode()->getMachineOpcode();
2474    const MCInstrDesc &MCID = TII->get(Opc);
2475    unsigned NumRes = MCID.getNumDefs();
2476    unsigned NumOps = MCID.getNumOperands() - NumRes;
2477    for (unsigned i = 0; i != NumOps; ++i) {
2478      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2479        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2480        if (DU->getNodeId() != -1 &&
2481            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2482          return true;
2483      }
2484    }
2485  }
2486  return false;
2487}
2488
2489/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2490/// successor's explicit physregs whose definition can reach DepSU.
2491/// i.e. DepSU should not be scheduled above SU.
2492static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2493                                         ScheduleDAGRRList *scheduleDAG,
2494                                         const TargetInstrInfo *TII,
2495                                         const TargetRegisterInfo *TRI) {
2496  const unsigned *ImpDefs
2497    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2498  if(!ImpDefs)
2499    return false;
2500
2501  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2502       SI != SE; ++SI) {
2503    SUnit *SuccSU = SI->getSUnit();
2504    for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2505           PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2506      if (!PI->isAssignedRegDep())
2507        continue;
2508
2509      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
2510        // Return true if SU clobbers this physical register use and the
2511        // definition of the register reaches from DepSU. IsReachable queries a
2512        // topological forward sort of the DAG (following the successors).
2513        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2514            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2515          return true;
2516      }
2517    }
2518  }
2519  return false;
2520}
2521
2522/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2523/// physical register defs.
2524static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2525                                  const TargetInstrInfo *TII,
2526                                  const TargetRegisterInfo *TRI) {
2527  SDNode *N = SuccSU->getNode();
2528  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2529  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2530  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2531  for (const SDNode *SUNode = SU->getNode(); SUNode;
2532       SUNode = SUNode->getGluedNode()) {
2533    if (!SUNode->isMachineOpcode())
2534      continue;
2535    const unsigned *SUImpDefs =
2536      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2537    if (!SUImpDefs)
2538      return false;
2539    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2540      EVT VT = N->getValueType(i);
2541      if (VT == MVT::Glue || VT == MVT::Other)
2542        continue;
2543      if (!N->hasAnyUseOfValue(i))
2544        continue;
2545      unsigned Reg = ImpDefs[i - NumDefs];
2546      for (;*SUImpDefs; ++SUImpDefs) {
2547        unsigned SUReg = *SUImpDefs;
2548        if (TRI->regsOverlap(Reg, SUReg))
2549          return true;
2550      }
2551    }
2552  }
2553  return false;
2554}
2555
2556/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2557/// are not handled well by the general register pressure reduction
2558/// heuristics. When presented with code like this:
2559///
2560///      N
2561///    / |
2562///   /  |
2563///  U  store
2564///  |
2565/// ...
2566///
2567/// the heuristics tend to push the store up, but since the
2568/// operand of the store has another use (U), this would increase
2569/// the length of that other use (the U->N edge).
2570///
2571/// This function transforms code like the above to route U's
2572/// dependence through the store when possible, like this:
2573///
2574///      N
2575///      ||
2576///      ||
2577///     store
2578///       |
2579///       U
2580///       |
2581///      ...
2582///
2583/// This results in the store being scheduled immediately
2584/// after N, which shortens the U->N live range, reducing
2585/// register pressure.
2586///
2587void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2588  // Visit all the nodes in topological order, working top-down.
2589  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2590    SUnit *SU = &(*SUnits)[i];
2591    // For now, only look at nodes with no data successors, such as stores.
2592    // These are especially important, due to the heuristics in
2593    // getNodePriority for nodes with no data successors.
2594    if (SU->NumSuccs != 0)
2595      continue;
2596    // For now, only look at nodes with exactly one data predecessor.
2597    if (SU->NumPreds != 1)
2598      continue;
2599    // Avoid prescheduling copies to virtual registers, which don't behave
2600    // like other nodes from the perspective of scheduling heuristics.
2601    if (SDNode *N = SU->getNode())
2602      if (N->getOpcode() == ISD::CopyToReg &&
2603          TargetRegisterInfo::isVirtualRegister
2604            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2605        continue;
2606
2607    // Locate the single data predecessor.
2608    SUnit *PredSU = 0;
2609    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2610         EE = SU->Preds.end(); II != EE; ++II)
2611      if (!II->isCtrl()) {
2612        PredSU = II->getSUnit();
2613        break;
2614      }
2615    assert(PredSU);
2616
2617    // Don't rewrite edges that carry physregs, because that requires additional
2618    // support infrastructure.
2619    if (PredSU->hasPhysRegDefs)
2620      continue;
2621    // Short-circuit the case where SU is PredSU's only data successor.
2622    if (PredSU->NumSuccs == 1)
2623      continue;
2624    // Avoid prescheduling to copies from virtual registers, which don't behave
2625    // like other nodes from the perspective of scheduling heuristics.
2626    if (SDNode *N = SU->getNode())
2627      if (N->getOpcode() == ISD::CopyFromReg &&
2628          TargetRegisterInfo::isVirtualRegister
2629            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2630        continue;
2631
2632    // Perform checks on the successors of PredSU.
2633    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2634         EE = PredSU->Succs.end(); II != EE; ++II) {
2635      SUnit *PredSuccSU = II->getSUnit();
2636      if (PredSuccSU == SU) continue;
2637      // If PredSU has another successor with no data successors, for
2638      // now don't attempt to choose either over the other.
2639      if (PredSuccSU->NumSuccs == 0)
2640        goto outer_loop_continue;
2641      // Don't break physical register dependencies.
2642      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2643        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2644          goto outer_loop_continue;
2645      // Don't introduce graph cycles.
2646      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2647        goto outer_loop_continue;
2648    }
2649
2650    // Ok, the transformation is safe and the heuristics suggest it is
2651    // profitable. Update the graph.
2652    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2653                 << " next to PredSU #" << PredSU->NodeNum
2654                 << " to guide scheduling in the presence of multiple uses\n");
2655    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2656      SDep Edge = PredSU->Succs[i];
2657      assert(!Edge.isAssignedRegDep());
2658      SUnit *SuccSU = Edge.getSUnit();
2659      if (SuccSU != SU) {
2660        Edge.setSUnit(PredSU);
2661        scheduleDAG->RemovePred(SuccSU, Edge);
2662        scheduleDAG->AddPred(SU, Edge);
2663        Edge.setSUnit(SU);
2664        scheduleDAG->AddPred(SuccSU, Edge);
2665        --i;
2666      }
2667    }
2668  outer_loop_continue:;
2669  }
2670}
2671
2672/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2673/// it as a def&use operand. Add a pseudo control edge from it to the other
2674/// node (if it won't create a cycle) so the two-address one will be scheduled
2675/// first (lower in the schedule). If both nodes are two-address, favor the
2676/// one that has a CopyToReg use (more likely to be a loop induction update).
2677/// If both are two-address, but one is commutable while the other is not
2678/// commutable, favor the one that's not commutable.
2679void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2680  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2681    SUnit *SU = &(*SUnits)[i];
2682    if (!SU->isTwoAddress)
2683      continue;
2684
2685    SDNode *Node = SU->getNode();
2686    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2687      continue;
2688
2689    bool isLiveOut = hasOnlyLiveOutUses(SU);
2690    unsigned Opc = Node->getMachineOpcode();
2691    const MCInstrDesc &MCID = TII->get(Opc);
2692    unsigned NumRes = MCID.getNumDefs();
2693    unsigned NumOps = MCID.getNumOperands() - NumRes;
2694    for (unsigned j = 0; j != NumOps; ++j) {
2695      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2696        continue;
2697      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2698      if (DU->getNodeId() == -1)
2699        continue;
2700      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2701      if (!DUSU) continue;
2702      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2703           E = DUSU->Succs.end(); I != E; ++I) {
2704        if (I->isCtrl()) continue;
2705        SUnit *SuccSU = I->getSUnit();
2706        if (SuccSU == SU)
2707          continue;
2708        // Be conservative. Ignore if nodes aren't at roughly the same
2709        // depth and height.
2710        if (SuccSU->getHeight() < SU->getHeight() &&
2711            (SU->getHeight() - SuccSU->getHeight()) > 1)
2712          continue;
2713        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2714        // constrains whatever is using the copy, instead of the copy
2715        // itself. In the case that the copy is coalesced, this
2716        // preserves the intent of the pseudo two-address heurietics.
2717        while (SuccSU->Succs.size() == 1 &&
2718               SuccSU->getNode()->isMachineOpcode() &&
2719               SuccSU->getNode()->getMachineOpcode() ==
2720                 TargetOpcode::COPY_TO_REGCLASS)
2721          SuccSU = SuccSU->Succs.front().getSUnit();
2722        // Don't constrain non-instruction nodes.
2723        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2724          continue;
2725        // Don't constrain nodes with physical register defs if the
2726        // predecessor can clobber them.
2727        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2728          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2729            continue;
2730        }
2731        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2732        // these may be coalesced away. We want them close to their uses.
2733        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2734        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2735            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2736            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2737          continue;
2738        if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2739            (!canClobber(SuccSU, DUSU) ||
2740             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2741             (!SU->isCommutable && SuccSU->isCommutable)) &&
2742            !scheduleDAG->IsReachable(SuccSU, SU)) {
2743          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2744                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2745          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2746                                        /*Reg=*/0, /*isNormalMemory=*/false,
2747                                        /*isMustAlias=*/false,
2748                                        /*isArtificial=*/true));
2749        }
2750      }
2751    }
2752  }
2753}
2754
2755//===----------------------------------------------------------------------===//
2756//                         Public Constructor Functions
2757//===----------------------------------------------------------------------===//
2758
2759llvm::ScheduleDAGSDNodes *
2760llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2761                                 CodeGenOpt::Level OptLevel) {
2762  const TargetMachine &TM = IS->TM;
2763  const TargetInstrInfo *TII = TM.getInstrInfo();
2764  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2765
2766  BURegReductionPriorityQueue *PQ =
2767    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2768  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2769  PQ->setScheduleDAG(SD);
2770  return SD;
2771}
2772
2773llvm::ScheduleDAGSDNodes *
2774llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2775                                   CodeGenOpt::Level OptLevel) {
2776  const TargetMachine &TM = IS->TM;
2777  const TargetInstrInfo *TII = TM.getInstrInfo();
2778  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2779
2780  SrcRegReductionPriorityQueue *PQ =
2781    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2782  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2783  PQ->setScheduleDAG(SD);
2784  return SD;
2785}
2786
2787llvm::ScheduleDAGSDNodes *
2788llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2789                                   CodeGenOpt::Level OptLevel) {
2790  const TargetMachine &TM = IS->TM;
2791  const TargetInstrInfo *TII = TM.getInstrInfo();
2792  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2793  const TargetLowering *TLI = &IS->getTargetLowering();
2794
2795  HybridBURRPriorityQueue *PQ =
2796    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2797
2798  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2799  PQ->setScheduleDAG(SD);
2800  return SD;
2801}
2802
2803llvm::ScheduleDAGSDNodes *
2804llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2805                                CodeGenOpt::Level OptLevel) {
2806  const TargetMachine &TM = IS->TM;
2807  const TargetInstrInfo *TII = TM.getInstrInfo();
2808  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2809  const TargetLowering *TLI = &IS->getTargetLowering();
2810
2811  ILPBURRPriorityQueue *PQ =
2812    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2813  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2814  PQ->setScheduleDAG(SD);
2815  return SD;
2816}
2817