ScheduleDAGRRList.cpp revision 336298cf2cc68d9af163992b9f9cafddd4bb3c8a
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73// Temporary sched=list-ilp flags until the heuristics are robust.
74static cl::opt<bool> DisableSchedRegPressure(
75  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
76  cl::desc("Disable regpressure priority in sched=list-ilp"));
77static cl::opt<bool> DisableSchedLiveUses(
78  "disable-sched-live-uses", cl::Hidden, cl::init(true),
79  cl::desc("Disable live use priority in sched=list-ilp"));
80static cl::opt<bool> DisableSchedStalls(
81  "disable-sched-stalls", cl::Hidden, cl::init(true),
82  cl::desc("Disable no-stall priority in sched=list-ilp"));
83static cl::opt<bool> DisableSchedCriticalPath(
84  "disable-sched-critical-path", cl::Hidden, cl::init(false),
85  cl::desc("Disable critical path priority in sched=list-ilp"));
86static cl::opt<bool> DisableSchedHeight(
87  "disable-sched-height", cl::Hidden, cl::init(false),
88  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
89
90static cl::opt<int> MaxReorderWindow(
91  "max-sched-reorder", cl::Hidden, cl::init(6),
92  cl::desc("Number of instructions to allow ahead of the critical path "
93           "in sched=list-ilp"));
94
95static cl::opt<unsigned> AvgIPC(
96  "sched-avg-ipc", cl::Hidden, cl::init(1),
97  cl::desc("Average inst/cycle whan no target itinerary exists."));
98
99#ifndef NDEBUG
100namespace {
101  // For sched=list-ilp, Count the number of times each factor comes into play.
102  enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
103         FactOther, NumFactors };
104}
105static const char *FactorName[NumFactors] =
106{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
107static int FactorCount[NumFactors];
108#endif //!NDEBUG
109
110namespace {
111//===----------------------------------------------------------------------===//
112/// ScheduleDAGRRList - The actual register reduction list scheduler
113/// implementation.  This supports both top-down and bottom-up scheduling.
114///
115class ScheduleDAGRRList : public ScheduleDAGSDNodes {
116private:
117  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
118  /// it is top-down.
119  bool isBottomUp;
120
121  /// NeedLatency - True if the scheduler will make use of latency information.
122  ///
123  bool NeedLatency;
124
125  /// AvailableQueue - The priority queue to use for the available SUnits.
126  SchedulingPriorityQueue *AvailableQueue;
127
128  /// PendingQueue - This contains all of the instructions whose operands have
129  /// been issued, but their results are not ready yet (due to the latency of
130  /// the operation).  Once the operands becomes available, the instruction is
131  /// added to the AvailableQueue.
132  std::vector<SUnit*> PendingQueue;
133
134  /// HazardRec - The hazard recognizer to use.
135  ScheduleHazardRecognizer *HazardRec;
136
137  /// CurCycle - The current scheduler state corresponds to this cycle.
138  unsigned CurCycle;
139
140  /// MinAvailableCycle - Cycle of the soonest available instruction.
141  unsigned MinAvailableCycle;
142
143  /// IssueCount - Count instructions issued in this cycle
144  /// Currently valid only for bottom-up scheduling.
145  unsigned IssueCount;
146
147  /// LiveRegDefs - A set of physical registers and their definition
148  /// that are "live". These nodes must be scheduled before any other nodes that
149  /// modifies the registers can be scheduled.
150  unsigned NumLiveRegs;
151  std::vector<SUnit*> LiveRegDefs;
152  std::vector<SUnit*> LiveRegGens;
153
154  /// Topo - A topological ordering for SUnits which permits fast IsReachable
155  /// and similar queries.
156  ScheduleDAGTopologicalSort Topo;
157
158public:
159  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
160                    SchedulingPriorityQueue *availqueue,
161                    CodeGenOpt::Level OptLevel)
162    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
163      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
164      Topo(SUnits) {
165
166    const TargetMachine &tm = mf.getTarget();
167    if (DisableSchedCycles || !NeedLatency)
168      HazardRec = new ScheduleHazardRecognizer();
169    else
170      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
171  }
172
173  ~ScheduleDAGRRList() {
174    delete HazardRec;
175    delete AvailableQueue;
176  }
177
178  void Schedule();
179
180  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
181
182  /// IsReachable - Checks if SU is reachable from TargetSU.
183  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
184    return Topo.IsReachable(SU, TargetSU);
185  }
186
187  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
188  /// create a cycle.
189  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
190    return Topo.WillCreateCycle(SU, TargetSU);
191  }
192
193  /// AddPred - adds a predecessor edge to SUnit SU.
194  /// This returns true if this is a new predecessor.
195  /// Updates the topological ordering if required.
196  void AddPred(SUnit *SU, const SDep &D) {
197    Topo.AddPred(SU, D.getSUnit());
198    SU->addPred(D);
199  }
200
201  /// RemovePred - removes a predecessor edge from SUnit SU.
202  /// This returns true if an edge was removed.
203  /// Updates the topological ordering if required.
204  void RemovePred(SUnit *SU, const SDep &D) {
205    Topo.RemovePred(SU, D.getSUnit());
206    SU->removePred(D);
207  }
208
209private:
210  bool isReady(SUnit *SU) {
211    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
212      AvailableQueue->isReady(SU);
213  }
214
215  void ReleasePred(SUnit *SU, const SDep *PredEdge);
216  void ReleasePredecessors(SUnit *SU);
217  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
218  void ReleaseSuccessors(SUnit *SU);
219  void ReleasePending();
220  void AdvanceToCycle(unsigned NextCycle);
221  void AdvancePastStalls(SUnit *SU);
222  void EmitNode(SUnit *SU);
223  void ScheduleNodeBottomUp(SUnit*);
224  void CapturePred(SDep *PredEdge);
225  void UnscheduleNodeBottomUp(SUnit*);
226  void RestoreHazardCheckerBottomUp();
227  void BacktrackBottomUp(SUnit*, SUnit*);
228  SUnit *CopyAndMoveSuccessors(SUnit*);
229  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
230                                const TargetRegisterClass*,
231                                const TargetRegisterClass*,
232                                SmallVector<SUnit*, 2>&);
233  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
234
235  SUnit *PickNodeToScheduleBottomUp();
236  void ListScheduleBottomUp();
237
238  void ScheduleNodeTopDown(SUnit*);
239  void ListScheduleTopDown();
240
241
242  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
243  /// Updates the topological ordering if required.
244  SUnit *CreateNewSUnit(SDNode *N) {
245    unsigned NumSUnits = SUnits.size();
246    SUnit *NewNode = NewSUnit(N);
247    // Update the topological ordering.
248    if (NewNode->NodeNum >= NumSUnits)
249      Topo.InitDAGTopologicalSorting();
250    return NewNode;
251  }
252
253  /// CreateClone - Creates a new SUnit from an existing one.
254  /// Updates the topological ordering if required.
255  SUnit *CreateClone(SUnit *N) {
256    unsigned NumSUnits = SUnits.size();
257    SUnit *NewNode = Clone(N);
258    // Update the topological ordering.
259    if (NewNode->NodeNum >= NumSUnits)
260      Topo.InitDAGTopologicalSorting();
261    return NewNode;
262  }
263
264  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
265  /// need actual latency information but the hybrid scheduler does.
266  bool ForceUnitLatencies() const {
267    return !NeedLatency;
268  }
269};
270}  // end anonymous namespace
271
272
273/// Schedule - Schedule the DAG using list scheduling.
274void ScheduleDAGRRList::Schedule() {
275  DEBUG(dbgs()
276        << "********** List Scheduling BB#" << BB->getNumber()
277        << " '" << BB->getName() << "' **********\n");
278#ifndef NDEBUG
279  for (int i = 0; i < NumFactors; ++i) {
280    FactorCount[i] = 0;
281  }
282#endif //!NDEBUG
283
284  CurCycle = 0;
285  IssueCount = 0;
286  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
287  NumLiveRegs = 0;
288  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
289  LiveRegGens.resize(TRI->getNumRegs(), NULL);
290
291  // Build the scheduling graph.
292  BuildSchedGraph(NULL);
293
294  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
295          SUnits[su].dumpAll(this));
296  Topo.InitDAGTopologicalSorting();
297
298  AvailableQueue->initNodes(SUnits);
299
300  HazardRec->Reset();
301
302  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
303  if (isBottomUp)
304    ListScheduleBottomUp();
305  else
306    ListScheduleTopDown();
307
308#ifndef NDEBUG
309  for (int i = 0; i < NumFactors; ++i) {
310    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
311  }
312#endif // !NDEBUG
313  AvailableQueue->releaseState();
314}
315
316//===----------------------------------------------------------------------===//
317//  Bottom-Up Scheduling
318//===----------------------------------------------------------------------===//
319
320/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
321/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
322void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
323  SUnit *PredSU = PredEdge->getSUnit();
324
325#ifndef NDEBUG
326  if (PredSU->NumSuccsLeft == 0) {
327    dbgs() << "*** Scheduling failed! ***\n";
328    PredSU->dump(this);
329    dbgs() << " has been released too many times!\n";
330    llvm_unreachable(0);
331  }
332#endif
333  --PredSU->NumSuccsLeft;
334
335  if (!ForceUnitLatencies()) {
336    // Updating predecessor's height. This is now the cycle when the
337    // predecessor can be scheduled without causing a pipeline stall.
338    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
339  }
340
341  // If all the node's successors are scheduled, this node is ready
342  // to be scheduled. Ignore the special EntrySU node.
343  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
344    PredSU->isAvailable = true;
345
346    unsigned Height = PredSU->getHeight();
347    if (Height < MinAvailableCycle)
348      MinAvailableCycle = Height;
349
350    if (isReady(PredSU)) {
351      AvailableQueue->push(PredSU);
352    }
353    // CapturePred and others may have left the node in the pending queue, avoid
354    // adding it twice.
355    else if (!PredSU->isPending) {
356      PredSU->isPending = true;
357      PendingQueue.push_back(PredSU);
358    }
359  }
360}
361
362/// Call ReleasePred for each predecessor, then update register live def/gen.
363/// Always update LiveRegDefs for a register dependence even if the current SU
364/// also defines the register. This effectively create one large live range
365/// across a sequence of two-address node. This is important because the
366/// entire chain must be scheduled together. Example:
367///
368/// flags = (3) add
369/// flags = (2) addc flags
370/// flags = (1) addc flags
371///
372/// results in
373///
374/// LiveRegDefs[flags] = 3
375/// LiveRegGens[flags] = 1
376///
377/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
378/// interference on flags.
379void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
380  // Bottom up: release predecessors
381  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
382       I != E; ++I) {
383    ReleasePred(SU, &*I);
384    if (I->isAssignedRegDep()) {
385      // This is a physical register dependency and it's impossible or
386      // expensive to copy the register. Make sure nothing that can
387      // clobber the register is scheduled between the predecessor and
388      // this node.
389      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
390      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
391             "interference on register dependence");
392      LiveRegDefs[I->getReg()] = I->getSUnit();
393      if (!LiveRegGens[I->getReg()]) {
394        ++NumLiveRegs;
395        LiveRegGens[I->getReg()] = SU;
396      }
397    }
398  }
399}
400
401/// Check to see if any of the pending instructions are ready to issue.  If
402/// so, add them to the available queue.
403void ScheduleDAGRRList::ReleasePending() {
404  if (DisableSchedCycles) {
405    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
406    return;
407  }
408
409  // If the available queue is empty, it is safe to reset MinAvailableCycle.
410  if (AvailableQueue->empty())
411    MinAvailableCycle = UINT_MAX;
412
413  // Check to see if any of the pending instructions are ready to issue.  If
414  // so, add them to the available queue.
415  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
416    unsigned ReadyCycle =
417      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
418    if (ReadyCycle < MinAvailableCycle)
419      MinAvailableCycle = ReadyCycle;
420
421    if (PendingQueue[i]->isAvailable) {
422      if (!isReady(PendingQueue[i]))
423          continue;
424      AvailableQueue->push(PendingQueue[i]);
425    }
426    PendingQueue[i]->isPending = false;
427    PendingQueue[i] = PendingQueue.back();
428    PendingQueue.pop_back();
429    --i; --e;
430  }
431}
432
433/// Move the scheduler state forward by the specified number of Cycles.
434void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
435  if (NextCycle <= CurCycle)
436    return;
437
438  IssueCount = 0;
439  AvailableQueue->setCurCycle(NextCycle);
440  if (!HazardRec->isEnabled()) {
441    // Bypass lots of virtual calls in case of long latency.
442    CurCycle = NextCycle;
443  }
444  else {
445    for (; CurCycle != NextCycle; ++CurCycle) {
446      if (isBottomUp)
447        HazardRec->RecedeCycle();
448      else
449        HazardRec->AdvanceCycle();
450    }
451  }
452  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
453  // available Q to release pending nodes at least once before popping.
454  ReleasePending();
455}
456
457/// Move the scheduler state forward until the specified node's dependents are
458/// ready and can be scheduled with no resource conflicts.
459void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
460  if (DisableSchedCycles)
461    return;
462
463  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
464
465  // Bump CurCycle to account for latency. We assume the latency of other
466  // available instructions may be hidden by the stall (not a full pipe stall).
467  // This updates the hazard recognizer's cycle before reserving resources for
468  // this instruction.
469  AdvanceToCycle(ReadyCycle);
470
471  // Calls are scheduled in their preceding cycle, so don't conflict with
472  // hazards from instructions after the call. EmitNode will reset the
473  // scoreboard state before emitting the call.
474  if (isBottomUp && SU->isCall)
475    return;
476
477  // FIXME: For resource conflicts in very long non-pipelined stages, we
478  // should probably skip ahead here to avoid useless scoreboard checks.
479  int Stalls = 0;
480  while (true) {
481    ScheduleHazardRecognizer::HazardType HT =
482      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
483
484    if (HT == ScheduleHazardRecognizer::NoHazard)
485      break;
486
487    ++Stalls;
488  }
489  AdvanceToCycle(CurCycle + Stalls);
490}
491
492/// Record this SUnit in the HazardRecognizer.
493/// Does not update CurCycle.
494void ScheduleDAGRRList::EmitNode(SUnit *SU) {
495  if (!HazardRec->isEnabled())
496    return;
497
498  // Check for phys reg copy.
499  if (!SU->getNode())
500    return;
501
502  switch (SU->getNode()->getOpcode()) {
503  default:
504    assert(SU->getNode()->isMachineOpcode() &&
505           "This target-independent node should not be scheduled.");
506    break;
507  case ISD::MERGE_VALUES:
508  case ISD::TokenFactor:
509  case ISD::CopyToReg:
510  case ISD::CopyFromReg:
511  case ISD::EH_LABEL:
512    // Noops don't affect the scoreboard state. Copies are likely to be
513    // removed.
514    return;
515  case ISD::INLINEASM:
516    // For inline asm, clear the pipeline state.
517    HazardRec->Reset();
518    return;
519  }
520  if (isBottomUp && SU->isCall) {
521    // Calls are scheduled with their preceding instructions. For bottom-up
522    // scheduling, clear the pipeline state before emitting.
523    HazardRec->Reset();
524  }
525
526  HazardRec->EmitInstruction(SU);
527
528  if (!isBottomUp && SU->isCall) {
529    HazardRec->Reset();
530  }
531}
532
533/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
534/// count of its predecessors. If a predecessor pending count is zero, add it to
535/// the Available queue.
536void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
537  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
538  DEBUG(SU->dump(this));
539
540#ifndef NDEBUG
541  if (CurCycle < SU->getHeight())
542    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
543#endif
544
545  // FIXME: Do not modify node height. It may interfere with
546  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
547  // node its ready cycle can aid heuristics, and after scheduling it can
548  // indicate the scheduled cycle.
549  SU->setHeightToAtLeast(CurCycle);
550
551  // Reserve resources for the scheduled intruction.
552  EmitNode(SU);
553
554  Sequence.push_back(SU);
555
556  AvailableQueue->ScheduledNode(SU);
557
558  // If HazardRec is disabled, and each inst counts as one cycle, then
559  // advance CurCycle before ReleasePredecessors to avoid useles pushed to
560  // PendingQueue for schedulers that implement HasReadyFilter.
561  if (!HazardRec->isEnabled() && AvgIPC < 2)
562    AdvanceToCycle(CurCycle + 1);
563
564  // Update liveness of predecessors before successors to avoid treating a
565  // two-address node as a live range def.
566  ReleasePredecessors(SU);
567
568  // Release all the implicit physical register defs that are live.
569  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
570       I != E; ++I) {
571    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
572    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
573      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
574      --NumLiveRegs;
575      LiveRegDefs[I->getReg()] = NULL;
576      LiveRegGens[I->getReg()] = NULL;
577    }
578  }
579
580  SU->isScheduled = true;
581
582  // Conditions under which the scheduler should eagerly advance the cycle:
583  // (1) No available instructions
584  // (2) All pipelines full, so available instructions must have hazards.
585  //
586  // If HazardRec is disabled, the cycle was advanced earlier.
587  //
588  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
589  ++IssueCount;
590  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
591      || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
592      || AvailableQueue->empty())
593    AdvanceToCycle(CurCycle + 1);
594}
595
596/// CapturePred - This does the opposite of ReleasePred. Since SU is being
597/// unscheduled, incrcease the succ left count of its predecessors. Remove
598/// them from AvailableQueue if necessary.
599void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
600  SUnit *PredSU = PredEdge->getSUnit();
601  if (PredSU->isAvailable) {
602    PredSU->isAvailable = false;
603    if (!PredSU->isPending)
604      AvailableQueue->remove(PredSU);
605  }
606
607  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
608  ++PredSU->NumSuccsLeft;
609}
610
611/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
612/// its predecessor states to reflect the change.
613void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
614  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
615  DEBUG(SU->dump(this));
616
617  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
618       I != E; ++I) {
619    CapturePred(&*I);
620    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
621      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
622      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
623             "Physical register dependency violated?");
624      --NumLiveRegs;
625      LiveRegDefs[I->getReg()] = NULL;
626      LiveRegGens[I->getReg()] = NULL;
627    }
628  }
629
630  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
631       I != E; ++I) {
632    if (I->isAssignedRegDep()) {
633      // This becomes the nearest def. Note that an earlier def may still be
634      // pending if this is a two-address node.
635      LiveRegDefs[I->getReg()] = SU;
636      if (!LiveRegDefs[I->getReg()]) {
637        ++NumLiveRegs;
638      }
639      if (LiveRegGens[I->getReg()] == NULL ||
640          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
641        LiveRegGens[I->getReg()] = I->getSUnit();
642    }
643  }
644  if (SU->getHeight() < MinAvailableCycle)
645    MinAvailableCycle = SU->getHeight();
646
647  SU->setHeightDirty();
648  SU->isScheduled = false;
649  SU->isAvailable = true;
650  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
651    // Don't make available until backtracking is complete.
652    SU->isPending = true;
653    PendingQueue.push_back(SU);
654  }
655  else {
656    AvailableQueue->push(SU);
657  }
658  AvailableQueue->UnscheduledNode(SU);
659}
660
661/// After backtracking, the hazard checker needs to be restored to a state
662/// corresponding the the current cycle.
663void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
664  HazardRec->Reset();
665
666  unsigned LookAhead = std::min((unsigned)Sequence.size(),
667                                HazardRec->getMaxLookAhead());
668  if (LookAhead == 0)
669    return;
670
671  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
672  unsigned HazardCycle = (*I)->getHeight();
673  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
674    SUnit *SU = *I;
675    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
676      HazardRec->RecedeCycle();
677    }
678    EmitNode(SU);
679  }
680}
681
682/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
683/// BTCycle in order to schedule a specific node.
684void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
685  SUnit *OldSU = Sequence.back();
686  while (true) {
687    Sequence.pop_back();
688    if (SU->isSucc(OldSU))
689      // Don't try to remove SU from AvailableQueue.
690      SU->isAvailable = false;
691    // FIXME: use ready cycle instead of height
692    CurCycle = OldSU->getHeight();
693    UnscheduleNodeBottomUp(OldSU);
694    AvailableQueue->setCurCycle(CurCycle);
695    if (OldSU == BtSU)
696      break;
697    OldSU = Sequence.back();
698  }
699
700  assert(!SU->isSucc(OldSU) && "Something is wrong!");
701
702  RestoreHazardCheckerBottomUp();
703
704  ReleasePending();
705
706  ++NumBacktracks;
707}
708
709static bool isOperandOf(const SUnit *SU, SDNode *N) {
710  for (const SDNode *SUNode = SU->getNode(); SUNode;
711       SUNode = SUNode->getGluedNode()) {
712    if (SUNode->isOperandOf(N))
713      return true;
714  }
715  return false;
716}
717
718/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
719/// successors to the newly created node.
720SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
721  SDNode *N = SU->getNode();
722  if (!N)
723    return NULL;
724
725  if (SU->getNode()->getGluedNode())
726    return NULL;
727
728  SUnit *NewSU;
729  bool TryUnfold = false;
730  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
731    EVT VT = N->getValueType(i);
732    if (VT == MVT::Glue)
733      return NULL;
734    else if (VT == MVT::Other)
735      TryUnfold = true;
736  }
737  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
738    const SDValue &Op = N->getOperand(i);
739    EVT VT = Op.getNode()->getValueType(Op.getResNo());
740    if (VT == MVT::Glue)
741      return NULL;
742  }
743
744  if (TryUnfold) {
745    SmallVector<SDNode*, 2> NewNodes;
746    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
747      return NULL;
748
749    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
750    assert(NewNodes.size() == 2 && "Expected a load folding node!");
751
752    N = NewNodes[1];
753    SDNode *LoadNode = NewNodes[0];
754    unsigned NumVals = N->getNumValues();
755    unsigned OldNumVals = SU->getNode()->getNumValues();
756    for (unsigned i = 0; i != NumVals; ++i)
757      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
758    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
759                                   SDValue(LoadNode, 1));
760
761    // LoadNode may already exist. This can happen when there is another
762    // load from the same location and producing the same type of value
763    // but it has different alignment or volatileness.
764    bool isNewLoad = true;
765    SUnit *LoadSU;
766    if (LoadNode->getNodeId() != -1) {
767      LoadSU = &SUnits[LoadNode->getNodeId()];
768      isNewLoad = false;
769    } else {
770      LoadSU = CreateNewSUnit(LoadNode);
771      LoadNode->setNodeId(LoadSU->NodeNum);
772
773      InitNumRegDefsLeft(LoadSU);
774      ComputeLatency(LoadSU);
775    }
776
777    SUnit *NewSU = CreateNewSUnit(N);
778    assert(N->getNodeId() == -1 && "Node already inserted!");
779    N->setNodeId(NewSU->NodeNum);
780
781    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
782    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
783      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
784        NewSU->isTwoAddress = true;
785        break;
786      }
787    }
788    if (TID.isCommutable())
789      NewSU->isCommutable = true;
790
791    InitNumRegDefsLeft(NewSU);
792    ComputeLatency(NewSU);
793
794    // Record all the edges to and from the old SU, by category.
795    SmallVector<SDep, 4> ChainPreds;
796    SmallVector<SDep, 4> ChainSuccs;
797    SmallVector<SDep, 4> LoadPreds;
798    SmallVector<SDep, 4> NodePreds;
799    SmallVector<SDep, 4> NodeSuccs;
800    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
801         I != E; ++I) {
802      if (I->isCtrl())
803        ChainPreds.push_back(*I);
804      else if (isOperandOf(I->getSUnit(), LoadNode))
805        LoadPreds.push_back(*I);
806      else
807        NodePreds.push_back(*I);
808    }
809    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
810         I != E; ++I) {
811      if (I->isCtrl())
812        ChainSuccs.push_back(*I);
813      else
814        NodeSuccs.push_back(*I);
815    }
816
817    // Now assign edges to the newly-created nodes.
818    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
819      const SDep &Pred = ChainPreds[i];
820      RemovePred(SU, Pred);
821      if (isNewLoad)
822        AddPred(LoadSU, Pred);
823    }
824    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
825      const SDep &Pred = LoadPreds[i];
826      RemovePred(SU, Pred);
827      if (isNewLoad)
828        AddPred(LoadSU, Pred);
829    }
830    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
831      const SDep &Pred = NodePreds[i];
832      RemovePred(SU, Pred);
833      AddPred(NewSU, Pred);
834    }
835    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
836      SDep D = NodeSuccs[i];
837      SUnit *SuccDep = D.getSUnit();
838      D.setSUnit(SU);
839      RemovePred(SuccDep, D);
840      D.setSUnit(NewSU);
841      AddPred(SuccDep, D);
842      // Balance register pressure.
843      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
844          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
845        --NewSU->NumRegDefsLeft;
846    }
847    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
848      SDep D = ChainSuccs[i];
849      SUnit *SuccDep = D.getSUnit();
850      D.setSUnit(SU);
851      RemovePred(SuccDep, D);
852      if (isNewLoad) {
853        D.setSUnit(LoadSU);
854        AddPred(SuccDep, D);
855      }
856    }
857
858    // Add a data dependency to reflect that NewSU reads the value defined
859    // by LoadSU.
860    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
861
862    if (isNewLoad)
863      AvailableQueue->addNode(LoadSU);
864    AvailableQueue->addNode(NewSU);
865
866    ++NumUnfolds;
867
868    if (NewSU->NumSuccsLeft == 0) {
869      NewSU->isAvailable = true;
870      return NewSU;
871    }
872    SU = NewSU;
873  }
874
875  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
876  NewSU = CreateClone(SU);
877
878  // New SUnit has the exact same predecessors.
879  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
880       I != E; ++I)
881    if (!I->isArtificial())
882      AddPred(NewSU, *I);
883
884  // Only copy scheduled successors. Cut them from old node's successor
885  // list and move them over.
886  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
887  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
888       I != E; ++I) {
889    if (I->isArtificial())
890      continue;
891    SUnit *SuccSU = I->getSUnit();
892    if (SuccSU->isScheduled) {
893      SDep D = *I;
894      D.setSUnit(NewSU);
895      AddPred(SuccSU, D);
896      D.setSUnit(SU);
897      DelDeps.push_back(std::make_pair(SuccSU, D));
898    }
899  }
900  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
901    RemovePred(DelDeps[i].first, DelDeps[i].second);
902
903  AvailableQueue->updateNode(SU);
904  AvailableQueue->addNode(NewSU);
905
906  ++NumDups;
907  return NewSU;
908}
909
910/// InsertCopiesAndMoveSuccs - Insert register copies and move all
911/// scheduled successors of the given SUnit to the last copy.
912void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
913                                               const TargetRegisterClass *DestRC,
914                                               const TargetRegisterClass *SrcRC,
915                                               SmallVector<SUnit*, 2> &Copies) {
916  SUnit *CopyFromSU = CreateNewSUnit(NULL);
917  CopyFromSU->CopySrcRC = SrcRC;
918  CopyFromSU->CopyDstRC = DestRC;
919
920  SUnit *CopyToSU = CreateNewSUnit(NULL);
921  CopyToSU->CopySrcRC = DestRC;
922  CopyToSU->CopyDstRC = SrcRC;
923
924  // Only copy scheduled successors. Cut them from old node's successor
925  // list and move them over.
926  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
927  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
928       I != E; ++I) {
929    if (I->isArtificial())
930      continue;
931    SUnit *SuccSU = I->getSUnit();
932    if (SuccSU->isScheduled) {
933      SDep D = *I;
934      D.setSUnit(CopyToSU);
935      AddPred(SuccSU, D);
936      DelDeps.push_back(std::make_pair(SuccSU, *I));
937    }
938    else {
939      // Avoid scheduling the def-side copy before other successors. Otherwise
940      // we could introduce another physreg interference on the copy and
941      // continue inserting copies indefinitely.
942      SDep D(CopyFromSU, SDep::Order, /*Latency=*/0,
943             /*Reg=*/0, /*isNormalMemory=*/false,
944             /*isMustAlias=*/false, /*isArtificial=*/true);
945      AddPred(SuccSU, D);
946    }
947  }
948  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
949    RemovePred(DelDeps[i].first, DelDeps[i].second);
950
951  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
952  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
953
954  AvailableQueue->updateNode(SU);
955  AvailableQueue->addNode(CopyFromSU);
956  AvailableQueue->addNode(CopyToSU);
957  Copies.push_back(CopyFromSU);
958  Copies.push_back(CopyToSU);
959
960  ++NumPRCopies;
961}
962
963/// getPhysicalRegisterVT - Returns the ValueType of the physical register
964/// definition of the specified node.
965/// FIXME: Move to SelectionDAG?
966static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
967                                 const TargetInstrInfo *TII) {
968  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
969  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
970  unsigned NumRes = TID.getNumDefs();
971  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
972    if (Reg == *ImpDef)
973      break;
974    ++NumRes;
975  }
976  return N->getValueType(NumRes);
977}
978
979/// CheckForLiveRegDef - Return true and update live register vector if the
980/// specified register def of the specified SUnit clobbers any "live" registers.
981static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
982                               std::vector<SUnit*> &LiveRegDefs,
983                               SmallSet<unsigned, 4> &RegAdded,
984                               SmallVector<unsigned, 4> &LRegs,
985                               const TargetRegisterInfo *TRI) {
986  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
987
988    // Check if Ref is live.
989    if (!LiveRegDefs[Reg]) continue;
990
991    // Allow multiple uses of the same def.
992    if (LiveRegDefs[Reg] == SU) continue;
993
994    // Add Reg to the set of interfering live regs.
995    if (RegAdded.insert(Reg))
996      LRegs.push_back(Reg);
997  }
998}
999
1000/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1001/// scheduling of the given node to satisfy live physical register dependencies.
1002/// If the specific node is the last one that's available to schedule, do
1003/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1004bool ScheduleDAGRRList::
1005DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
1006  if (NumLiveRegs == 0)
1007    return false;
1008
1009  SmallSet<unsigned, 4> RegAdded;
1010  // If this node would clobber any "live" register, then it's not ready.
1011  //
1012  // If SU is the currently live definition of the same register that it uses,
1013  // then we are free to schedule it.
1014  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1015       I != E; ++I) {
1016    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1017      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1018                         RegAdded, LRegs, TRI);
1019  }
1020
1021  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1022    if (Node->getOpcode() == ISD::INLINEASM) {
1023      // Inline asm can clobber physical defs.
1024      unsigned NumOps = Node->getNumOperands();
1025      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1026        --NumOps;  // Ignore the glue operand.
1027
1028      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1029        unsigned Flags =
1030          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1031        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1032
1033        ++i; // Skip the ID value.
1034        if (InlineAsm::isRegDefKind(Flags) ||
1035            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
1036          // Check for def of register or earlyclobber register.
1037          for (; NumVals; --NumVals, ++i) {
1038            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1039            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1040              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1041          }
1042        } else
1043          i += NumVals;
1044      }
1045      continue;
1046    }
1047
1048    if (!Node->isMachineOpcode())
1049      continue;
1050    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
1051    if (!TID.ImplicitDefs)
1052      continue;
1053    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
1054      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1055  }
1056
1057  return !LRegs.empty();
1058}
1059
1060/// Return a node that can be scheduled in this cycle. Requirements:
1061/// (1) Ready: latency has been satisfied
1062/// (2) No Hazards: resources are available
1063/// (3) No Interferences: may unschedule to break register interferences.
1064SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1065  SmallVector<SUnit*, 4> Interferences;
1066  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
1067
1068  SUnit *CurSU = AvailableQueue->pop();
1069  while (CurSU) {
1070    SmallVector<unsigned, 4> LRegs;
1071    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1072      break;
1073    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1074
1075    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1076    Interferences.push_back(CurSU);
1077    CurSU = AvailableQueue->pop();
1078  }
1079  if (CurSU) {
1080    // Add the nodes that aren't ready back onto the available list.
1081    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1082      Interferences[i]->isPending = false;
1083      assert(Interferences[i]->isAvailable && "must still be available");
1084      AvailableQueue->push(Interferences[i]);
1085    }
1086    return CurSU;
1087  }
1088
1089  // All candidates are delayed due to live physical reg dependencies.
1090  // Try backtracking, code duplication, or inserting cross class copies
1091  // to resolve it.
1092  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1093    SUnit *TrySU = Interferences[i];
1094    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1095
1096    // Try unscheduling up to the point where it's safe to schedule
1097    // this node.
1098    SUnit *BtSU = NULL;
1099    unsigned LiveCycle = UINT_MAX;
1100    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1101      unsigned Reg = LRegs[j];
1102      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1103        BtSU = LiveRegGens[Reg];
1104        LiveCycle = BtSU->getHeight();
1105      }
1106    }
1107    if (!WillCreateCycle(TrySU, BtSU))  {
1108      BacktrackBottomUp(TrySU, BtSU);
1109
1110      // Force the current node to be scheduled before the node that
1111      // requires the physical reg dep.
1112      if (BtSU->isAvailable) {
1113        BtSU->isAvailable = false;
1114        if (!BtSU->isPending)
1115          AvailableQueue->remove(BtSU);
1116      }
1117      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1118                          /*Reg=*/0, /*isNormalMemory=*/false,
1119                          /*isMustAlias=*/false, /*isArtificial=*/true));
1120
1121      // If one or more successors has been unscheduled, then the current
1122      // node is no longer avaialable. Schedule a successor that's now
1123      // available instead.
1124      if (!TrySU->isAvailable) {
1125        CurSU = AvailableQueue->pop();
1126      }
1127      else {
1128        CurSU = TrySU;
1129        TrySU->isPending = false;
1130        Interferences.erase(Interferences.begin()+i);
1131      }
1132      break;
1133    }
1134  }
1135
1136  if (!CurSU) {
1137    // Can't backtrack. If it's too expensive to copy the value, then try
1138    // duplicate the nodes that produces these "too expensive to copy"
1139    // values to break the dependency. In case even that doesn't work,
1140    // insert cross class copies.
1141    // If it's not too expensive, i.e. cost != -1, issue copies.
1142    SUnit *TrySU = Interferences[0];
1143    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1144    assert(LRegs.size() == 1 && "Can't handle this yet!");
1145    unsigned Reg = LRegs[0];
1146    SUnit *LRDef = LiveRegDefs[Reg];
1147    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1148    const TargetRegisterClass *RC =
1149      TRI->getMinimalPhysRegClass(Reg, VT);
1150    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1151
1152    // If cross copy register class is the same as RC, then it must be possible
1153    // copy the value directly. Do not try duplicate the def.
1154    // If cross copy register class is not the same as RC, then it's possible to
1155    // copy the value but it require cross register class copies and it is
1156    // expensive.
1157    // If cross copy register class is null, then it's not possible to copy
1158    // the value at all.
1159    SUnit *NewDef = 0;
1160    if (DestRC != RC) {
1161      NewDef = CopyAndMoveSuccessors(LRDef);
1162      if (!DestRC && !NewDef)
1163        report_fatal_error("Can't handle live physical register dependency!");
1164    }
1165    if (!NewDef) {
1166      // Issue copies, these can be expensive cross register class copies.
1167      SmallVector<SUnit*, 2> Copies;
1168      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1169      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1170            << " to SU #" << Copies.front()->NodeNum << "\n");
1171      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1172                          /*Reg=*/0, /*isNormalMemory=*/false,
1173                          /*isMustAlias=*/false,
1174                          /*isArtificial=*/true));
1175      NewDef = Copies.back();
1176    }
1177
1178    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1179          << " to SU #" << TrySU->NodeNum << "\n");
1180    LiveRegDefs[Reg] = NewDef;
1181    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1182                         /*Reg=*/0, /*isNormalMemory=*/false,
1183                         /*isMustAlias=*/false,
1184                         /*isArtificial=*/true));
1185    TrySU->isAvailable = false;
1186    CurSU = NewDef;
1187  }
1188
1189  assert(CurSU && "Unable to resolve live physical register dependencies!");
1190
1191  // Add the nodes that aren't ready back onto the available list.
1192  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1193    Interferences[i]->isPending = false;
1194    // May no longer be available due to backtracking.
1195    if (Interferences[i]->isAvailable) {
1196      AvailableQueue->push(Interferences[i]);
1197    }
1198  }
1199  return CurSU;
1200}
1201
1202/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1203/// schedulers.
1204void ScheduleDAGRRList::ListScheduleBottomUp() {
1205  // Release any predecessors of the special Exit node.
1206  ReleasePredecessors(&ExitSU);
1207
1208  // Add root to Available queue.
1209  if (!SUnits.empty()) {
1210    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1211    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1212    RootSU->isAvailable = true;
1213    AvailableQueue->push(RootSU);
1214  }
1215
1216  // While Available queue is not empty, grab the node with the highest
1217  // priority. If it is not ready put it back.  Schedule the node.
1218  Sequence.reserve(SUnits.size());
1219  while (!AvailableQueue->empty()) {
1220    DEBUG(dbgs() << "\n*** Examining Available\n";
1221          AvailableQueue->dump(this));
1222
1223    // Pick the best node to schedule taking all constraints into
1224    // consideration.
1225    SUnit *SU = PickNodeToScheduleBottomUp();
1226
1227    AdvancePastStalls(SU);
1228
1229    ScheduleNodeBottomUp(SU);
1230
1231    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1232      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1233      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1234      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1235    }
1236  }
1237
1238  // Reverse the order if it is bottom up.
1239  std::reverse(Sequence.begin(), Sequence.end());
1240
1241#ifndef NDEBUG
1242  VerifySchedule(isBottomUp);
1243#endif
1244}
1245
1246//===----------------------------------------------------------------------===//
1247//  Top-Down Scheduling
1248//===----------------------------------------------------------------------===//
1249
1250/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1251/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1252void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1253  SUnit *SuccSU = SuccEdge->getSUnit();
1254
1255#ifndef NDEBUG
1256  if (SuccSU->NumPredsLeft == 0) {
1257    dbgs() << "*** Scheduling failed! ***\n";
1258    SuccSU->dump(this);
1259    dbgs() << " has been released too many times!\n";
1260    llvm_unreachable(0);
1261  }
1262#endif
1263  --SuccSU->NumPredsLeft;
1264
1265  // If all the node's predecessors are scheduled, this node is ready
1266  // to be scheduled. Ignore the special ExitSU node.
1267  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1268    SuccSU->isAvailable = true;
1269    AvailableQueue->push(SuccSU);
1270  }
1271}
1272
1273void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1274  // Top down: release successors
1275  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1276       I != E; ++I) {
1277    assert(!I->isAssignedRegDep() &&
1278           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1279
1280    ReleaseSucc(SU, &*I);
1281  }
1282}
1283
1284/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1285/// count of its successors. If a successor pending count is zero, add it to
1286/// the Available queue.
1287void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1288  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1289  DEBUG(SU->dump(this));
1290
1291  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1292  SU->setDepthToAtLeast(CurCycle);
1293  Sequence.push_back(SU);
1294
1295  ReleaseSuccessors(SU);
1296  SU->isScheduled = true;
1297  AvailableQueue->ScheduledNode(SU);
1298}
1299
1300/// ListScheduleTopDown - The main loop of list scheduling for top-down
1301/// schedulers.
1302void ScheduleDAGRRList::ListScheduleTopDown() {
1303  AvailableQueue->setCurCycle(CurCycle);
1304
1305  // Release any successors of the special Entry node.
1306  ReleaseSuccessors(&EntrySU);
1307
1308  // All leaves to Available queue.
1309  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1310    // It is available if it has no predecessors.
1311    if (SUnits[i].Preds.empty()) {
1312      AvailableQueue->push(&SUnits[i]);
1313      SUnits[i].isAvailable = true;
1314    }
1315  }
1316
1317  // While Available queue is not empty, grab the node with the highest
1318  // priority. If it is not ready put it back.  Schedule the node.
1319  Sequence.reserve(SUnits.size());
1320  while (!AvailableQueue->empty()) {
1321    SUnit *CurSU = AvailableQueue->pop();
1322
1323    if (CurSU)
1324      ScheduleNodeTopDown(CurSU);
1325    ++CurCycle;
1326    AvailableQueue->setCurCycle(CurCycle);
1327  }
1328
1329#ifndef NDEBUG
1330  VerifySchedule(isBottomUp);
1331#endif
1332}
1333
1334
1335//===----------------------------------------------------------------------===//
1336//                RegReductionPriorityQueue Definition
1337//===----------------------------------------------------------------------===//
1338//
1339// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1340// to reduce register pressure.
1341//
1342namespace {
1343class RegReductionPQBase;
1344
1345struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1346  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1347};
1348
1349/// bu_ls_rr_sort - Priority function for bottom up register pressure
1350// reduction scheduler.
1351struct bu_ls_rr_sort : public queue_sort {
1352  enum {
1353    IsBottomUp = true,
1354    HasReadyFilter = false
1355  };
1356
1357  RegReductionPQBase *SPQ;
1358  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1359  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1360
1361  bool operator()(SUnit* left, SUnit* right) const;
1362};
1363
1364// td_ls_rr_sort - Priority function for top down register pressure reduction
1365// scheduler.
1366struct td_ls_rr_sort : public queue_sort {
1367  enum {
1368    IsBottomUp = false,
1369    HasReadyFilter = false
1370  };
1371
1372  RegReductionPQBase *SPQ;
1373  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1374  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1375
1376  bool operator()(const SUnit* left, const SUnit* right) const;
1377};
1378
1379// src_ls_rr_sort - Priority function for source order scheduler.
1380struct src_ls_rr_sort : public queue_sort {
1381  enum {
1382    IsBottomUp = true,
1383    HasReadyFilter = false
1384  };
1385
1386  RegReductionPQBase *SPQ;
1387  src_ls_rr_sort(RegReductionPQBase *spq)
1388    : SPQ(spq) {}
1389  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1390    : SPQ(RHS.SPQ) {}
1391
1392  bool operator()(SUnit* left, SUnit* right) const;
1393};
1394
1395// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1396struct hybrid_ls_rr_sort : public queue_sort {
1397  enum {
1398    IsBottomUp = true,
1399    HasReadyFilter = false
1400  };
1401
1402  RegReductionPQBase *SPQ;
1403  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1404    : SPQ(spq) {}
1405  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1406    : SPQ(RHS.SPQ) {}
1407
1408  bool isReady(SUnit *SU, unsigned CurCycle) const;
1409
1410  bool operator()(SUnit* left, SUnit* right) const;
1411};
1412
1413// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1414// scheduler.
1415struct ilp_ls_rr_sort : public queue_sort {
1416  enum {
1417    IsBottomUp = true,
1418    HasReadyFilter = false
1419  };
1420
1421  RegReductionPQBase *SPQ;
1422  ilp_ls_rr_sort(RegReductionPQBase *spq)
1423    : SPQ(spq) {}
1424  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1425    : SPQ(RHS.SPQ) {}
1426
1427  bool isReady(SUnit *SU, unsigned CurCycle) const;
1428
1429  bool operator()(SUnit* left, SUnit* right) const;
1430};
1431
1432class RegReductionPQBase : public SchedulingPriorityQueue {
1433protected:
1434  std::vector<SUnit*> Queue;
1435  unsigned CurQueueId;
1436  bool TracksRegPressure;
1437
1438  // SUnits - The SUnits for the current graph.
1439  std::vector<SUnit> *SUnits;
1440
1441  MachineFunction &MF;
1442  const TargetInstrInfo *TII;
1443  const TargetRegisterInfo *TRI;
1444  const TargetLowering *TLI;
1445  ScheduleDAGRRList *scheduleDAG;
1446
1447  // SethiUllmanNumbers - The SethiUllman number for each node.
1448  std::vector<unsigned> SethiUllmanNumbers;
1449
1450  /// RegPressure - Tracking current reg pressure per register class.
1451  ///
1452  std::vector<unsigned> RegPressure;
1453
1454  /// RegLimit - Tracking the number of allocatable registers per register
1455  /// class.
1456  std::vector<unsigned> RegLimit;
1457
1458public:
1459  RegReductionPQBase(MachineFunction &mf,
1460                     bool hasReadyFilter,
1461                     bool tracksrp,
1462                     const TargetInstrInfo *tii,
1463                     const TargetRegisterInfo *tri,
1464                     const TargetLowering *tli)
1465    : SchedulingPriorityQueue(hasReadyFilter),
1466      CurQueueId(0), TracksRegPressure(tracksrp),
1467      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1468    if (TracksRegPressure) {
1469      unsigned NumRC = TRI->getNumRegClasses();
1470      RegLimit.resize(NumRC);
1471      RegPressure.resize(NumRC);
1472      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1473      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1474      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1475             E = TRI->regclass_end(); I != E; ++I)
1476        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1477    }
1478  }
1479
1480  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1481    scheduleDAG = scheduleDag;
1482  }
1483
1484  ScheduleHazardRecognizer* getHazardRec() {
1485    return scheduleDAG->getHazardRec();
1486  }
1487
1488  void initNodes(std::vector<SUnit> &sunits);
1489
1490  void addNode(const SUnit *SU);
1491
1492  void updateNode(const SUnit *SU);
1493
1494  void releaseState() {
1495    SUnits = 0;
1496    SethiUllmanNumbers.clear();
1497    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1498  }
1499
1500  unsigned getNodePriority(const SUnit *SU) const;
1501
1502  unsigned getNodeOrdering(const SUnit *SU) const {
1503    if (!SU->getNode()) return 0;
1504
1505    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1506  }
1507
1508  bool empty() const { return Queue.empty(); }
1509
1510  void push(SUnit *U) {
1511    assert(!U->NodeQueueId && "Node in the queue already");
1512    U->NodeQueueId = ++CurQueueId;
1513    Queue.push_back(U);
1514  }
1515
1516  void remove(SUnit *SU) {
1517    assert(!Queue.empty() && "Queue is empty!");
1518    assert(SU->NodeQueueId != 0 && "Not in queue!");
1519    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1520                                                 SU);
1521    if (I != prior(Queue.end()))
1522      std::swap(*I, Queue.back());
1523    Queue.pop_back();
1524    SU->NodeQueueId = 0;
1525  }
1526
1527  bool tracksRegPressure() const { return TracksRegPressure; }
1528
1529  void dumpRegPressure() const;
1530
1531  bool HighRegPressure(const SUnit *SU) const;
1532
1533  bool MayReduceRegPressure(SUnit *SU) const;
1534
1535  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1536
1537  void ScheduledNode(SUnit *SU);
1538
1539  void UnscheduledNode(SUnit *SU);
1540
1541protected:
1542  bool canClobber(const SUnit *SU, const SUnit *Op);
1543  void AddPseudoTwoAddrDeps();
1544  void PrescheduleNodesWithMultipleUses();
1545  void CalculateSethiUllmanNumbers();
1546};
1547
1548template<class SF>
1549class RegReductionPriorityQueue : public RegReductionPQBase {
1550  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1551    std::vector<SUnit *>::iterator Best = Q.begin();
1552    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1553           E = Q.end(); I != E; ++I)
1554      if (Picker(*Best, *I))
1555        Best = I;
1556    SUnit *V = *Best;
1557    if (Best != prior(Q.end()))
1558      std::swap(*Best, Q.back());
1559    Q.pop_back();
1560    return V;
1561  }
1562
1563  SF Picker;
1564
1565public:
1566  RegReductionPriorityQueue(MachineFunction &mf,
1567                            bool tracksrp,
1568                            const TargetInstrInfo *tii,
1569                            const TargetRegisterInfo *tri,
1570                            const TargetLowering *tli)
1571    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1572      Picker(this) {}
1573
1574  bool isBottomUp() const { return SF::IsBottomUp; }
1575
1576  bool isReady(SUnit *U) const {
1577    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1578  }
1579
1580  SUnit *pop() {
1581    if (Queue.empty()) return NULL;
1582
1583    SUnit *V = popFromQueue(Queue, Picker);
1584    V->NodeQueueId = 0;
1585    return V;
1586  }
1587
1588  void dump(ScheduleDAG *DAG) const {
1589    // Emulate pop() without clobbering NodeQueueIds.
1590    std::vector<SUnit*> DumpQueue = Queue;
1591    SF DumpPicker = Picker;
1592    while (!DumpQueue.empty()) {
1593      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1594      if (isBottomUp())
1595        dbgs() << "Height " << SU->getHeight() << ": ";
1596      else
1597        dbgs() << "Depth " << SU->getDepth() << ": ";
1598      SU->dump(DAG);
1599    }
1600  }
1601};
1602
1603typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1604BURegReductionPriorityQueue;
1605
1606typedef RegReductionPriorityQueue<td_ls_rr_sort>
1607TDRegReductionPriorityQueue;
1608
1609typedef RegReductionPriorityQueue<src_ls_rr_sort>
1610SrcRegReductionPriorityQueue;
1611
1612typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1613HybridBURRPriorityQueue;
1614
1615typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1616ILPBURRPriorityQueue;
1617} // end anonymous namespace
1618
1619//===----------------------------------------------------------------------===//
1620//           Static Node Priority for Register Pressure Reduction
1621//===----------------------------------------------------------------------===//
1622
1623/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1624/// Smaller number is the higher priority.
1625static unsigned
1626CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1627  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1628  if (SethiUllmanNumber != 0)
1629    return SethiUllmanNumber;
1630
1631  unsigned Extra = 0;
1632  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1633       I != E; ++I) {
1634    if (I->isCtrl()) continue;  // ignore chain preds
1635    SUnit *PredSU = I->getSUnit();
1636    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1637    if (PredSethiUllman > SethiUllmanNumber) {
1638      SethiUllmanNumber = PredSethiUllman;
1639      Extra = 0;
1640    } else if (PredSethiUllman == SethiUllmanNumber)
1641      ++Extra;
1642  }
1643
1644  SethiUllmanNumber += Extra;
1645
1646  if (SethiUllmanNumber == 0)
1647    SethiUllmanNumber = 1;
1648
1649  return SethiUllmanNumber;
1650}
1651
1652/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1653/// scheduling units.
1654void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1655  SethiUllmanNumbers.assign(SUnits->size(), 0);
1656
1657  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1658    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1659}
1660
1661void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
1662  SUnits = &sunits;
1663  // Add pseudo dependency edges for two-address nodes.
1664  AddPseudoTwoAddrDeps();
1665  // Reroute edges to nodes with multiple uses.
1666  if (!TracksRegPressure)
1667    PrescheduleNodesWithMultipleUses();
1668  // Calculate node priorities.
1669  CalculateSethiUllmanNumbers();
1670}
1671
1672void RegReductionPQBase::addNode(const SUnit *SU) {
1673  unsigned SUSize = SethiUllmanNumbers.size();
1674  if (SUnits->size() > SUSize)
1675    SethiUllmanNumbers.resize(SUSize*2, 0);
1676  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1677}
1678
1679void RegReductionPQBase::updateNode(const SUnit *SU) {
1680  SethiUllmanNumbers[SU->NodeNum] = 0;
1681  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1682}
1683
1684// Lower priority means schedule further down. For bottom-up scheduling, lower
1685// priority SUs are scheduled before higher priority SUs.
1686unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1687  assert(SU->NodeNum < SethiUllmanNumbers.size());
1688  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1689  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1690    // CopyToReg should be close to its uses to facilitate coalescing and
1691    // avoid spilling.
1692    return 0;
1693  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1694      Opc == TargetOpcode::SUBREG_TO_REG ||
1695      Opc == TargetOpcode::INSERT_SUBREG)
1696    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1697    // close to their uses to facilitate coalescing.
1698    return 0;
1699  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1700    // If SU does not have a register use, i.e. it doesn't produce a value
1701    // that would be consumed (e.g. store), then it terminates a chain of
1702    // computation.  Give it a large SethiUllman number so it will be
1703    // scheduled right before its predecessors that it doesn't lengthen
1704    // their live ranges.
1705    return 0xffff;
1706  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1707    // If SU does not have a register def, schedule it close to its uses
1708    // because it does not lengthen any live ranges.
1709    return 0;
1710  return SethiUllmanNumbers[SU->NodeNum];
1711}
1712
1713//===----------------------------------------------------------------------===//
1714//                     Register Pressure Tracking
1715//===----------------------------------------------------------------------===//
1716
1717void RegReductionPQBase::dumpRegPressure() const {
1718  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1719         E = TRI->regclass_end(); I != E; ++I) {
1720    const TargetRegisterClass *RC = *I;
1721    unsigned Id = RC->getID();
1722    unsigned RP = RegPressure[Id];
1723    if (!RP) continue;
1724    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1725          << '\n');
1726  }
1727}
1728
1729bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1730  if (!TLI)
1731    return false;
1732
1733  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1734       I != E; ++I) {
1735    if (I->isCtrl())
1736      continue;
1737    SUnit *PredSU = I->getSUnit();
1738    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1739    // to cover the number of registers defined (they are all live).
1740    if (PredSU->NumRegDefsLeft == 0) {
1741      continue;
1742    }
1743    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1744         RegDefPos.IsValid(); RegDefPos.Advance()) {
1745      EVT VT = RegDefPos.GetValue();
1746      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1747      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1748      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1749        return true;
1750    }
1751  }
1752  return false;
1753}
1754
1755bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1756  const SDNode *N = SU->getNode();
1757
1758  if (!N->isMachineOpcode() || !SU->NumSuccs)
1759    return false;
1760
1761  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1762  for (unsigned i = 0; i != NumDefs; ++i) {
1763    EVT VT = N->getValueType(i);
1764    if (!N->hasAnyUseOfValue(i))
1765      continue;
1766    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1767    if (RegPressure[RCId] >= RegLimit[RCId])
1768      return true;
1769  }
1770  return false;
1771}
1772
1773// Compute the register pressure contribution by this instruction by count up
1774// for uses that are not live and down for defs. Only count register classes
1775// that are already under high pressure. As a side effect, compute the number of
1776// uses of registers that are already live.
1777//
1778// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1779// so could probably be factored.
1780int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1781  LiveUses = 0;
1782  int PDiff = 0;
1783  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1784       I != E; ++I) {
1785    if (I->isCtrl())
1786      continue;
1787    SUnit *PredSU = I->getSUnit();
1788    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1789    // to cover the number of registers defined (they are all live).
1790    if (PredSU->NumRegDefsLeft == 0) {
1791      if (PredSU->getNode()->isMachineOpcode())
1792        ++LiveUses;
1793      continue;
1794    }
1795    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1796         RegDefPos.IsValid(); RegDefPos.Advance()) {
1797      EVT VT = RegDefPos.GetValue();
1798      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1799      if (RegPressure[RCId] >= RegLimit[RCId])
1800        ++PDiff;
1801    }
1802  }
1803  const SDNode *N = SU->getNode();
1804
1805  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
1806    return PDiff;
1807
1808  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1809  for (unsigned i = 0; i != NumDefs; ++i) {
1810    EVT VT = N->getValueType(i);
1811    if (!N->hasAnyUseOfValue(i))
1812      continue;
1813    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1814    if (RegPressure[RCId] >= RegLimit[RCId])
1815      --PDiff;
1816  }
1817  return PDiff;
1818}
1819
1820void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1821  if (!TracksRegPressure)
1822    return;
1823
1824  if (!SU->getNode())
1825    return;
1826
1827  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1828       I != E; ++I) {
1829    if (I->isCtrl())
1830      continue;
1831    SUnit *PredSU = I->getSUnit();
1832    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1833    // to cover the number of registers defined (they are all live).
1834    if (PredSU->NumRegDefsLeft == 0) {
1835      continue;
1836    }
1837    // FIXME: The ScheduleDAG currently loses information about which of a
1838    // node's values is consumed by each dependence. Consequently, if the node
1839    // defines multiple register classes, we don't know which to pressurize
1840    // here. Instead the following loop consumes the register defs in an
1841    // arbitrary order. At least it handles the common case of clustered loads
1842    // to the same class. For precise liveness, each SDep needs to indicate the
1843    // result number. But that tightly couples the ScheduleDAG with the
1844    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1845    // value type or register class to SDep.
1846    //
1847    // The most important aspect of register tracking is balancing the increase
1848    // here with the reduction further below. Note that this SU may use multiple
1849    // defs in PredSU. The can't be determined here, but we've already
1850    // compensated by reducing NumRegDefsLeft in PredSU during
1851    // ScheduleDAGSDNodes::AddSchedEdges.
1852    --PredSU->NumRegDefsLeft;
1853    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1854    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1855         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1856      if (SkipRegDefs)
1857        continue;
1858      EVT VT = RegDefPos.GetValue();
1859      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1860      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1861      break;
1862    }
1863  }
1864
1865  // We should have this assert, but there may be dead SDNodes that never
1866  // materialize as SUnits, so they don't appear to generate liveness.
1867  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1868  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1869  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1870       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1871    if (SkipRegDefs > 0)
1872      continue;
1873    EVT VT = RegDefPos.GetValue();
1874    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1875    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
1876      // Register pressure tracking is imprecise. This can happen. But we try
1877      // hard not to let it happen because it likely results in poor scheduling.
1878      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1879      RegPressure[RCId] = 0;
1880    }
1881    else {
1882      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1883    }
1884  }
1885  dumpRegPressure();
1886}
1887
1888void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1889  if (!TracksRegPressure)
1890    return;
1891
1892  const SDNode *N = SU->getNode();
1893  if (!N) return;
1894
1895  if (!N->isMachineOpcode()) {
1896    if (N->getOpcode() != ISD::CopyToReg)
1897      return;
1898  } else {
1899    unsigned Opc = N->getMachineOpcode();
1900    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1901        Opc == TargetOpcode::INSERT_SUBREG ||
1902        Opc == TargetOpcode::SUBREG_TO_REG ||
1903        Opc == TargetOpcode::REG_SEQUENCE ||
1904        Opc == TargetOpcode::IMPLICIT_DEF)
1905      return;
1906  }
1907
1908  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1909       I != E; ++I) {
1910    if (I->isCtrl())
1911      continue;
1912    SUnit *PredSU = I->getSUnit();
1913    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1914    // counts data deps.
1915    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1916      continue;
1917    const SDNode *PN = PredSU->getNode();
1918    if (!PN->isMachineOpcode()) {
1919      if (PN->getOpcode() == ISD::CopyFromReg) {
1920        EVT VT = PN->getValueType(0);
1921        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1922        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1923      }
1924      continue;
1925    }
1926    unsigned POpc = PN->getMachineOpcode();
1927    if (POpc == TargetOpcode::IMPLICIT_DEF)
1928      continue;
1929    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1930      EVT VT = PN->getOperand(0).getValueType();
1931      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1932      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1933      continue;
1934    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1935               POpc == TargetOpcode::SUBREG_TO_REG) {
1936      EVT VT = PN->getValueType(0);
1937      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1938      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1939      continue;
1940    }
1941    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1942    for (unsigned i = 0; i != NumDefs; ++i) {
1943      EVT VT = PN->getValueType(i);
1944      if (!PN->hasAnyUseOfValue(i))
1945        continue;
1946      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1947      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1948        // Register pressure tracking is imprecise. This can happen.
1949        RegPressure[RCId] = 0;
1950      else
1951        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1952    }
1953  }
1954
1955  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1956  // may transfer data dependencies to CopyToReg.
1957  if (SU->NumSuccs && N->isMachineOpcode()) {
1958    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1959    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1960      EVT VT = N->getValueType(i);
1961      if (VT == MVT::Glue || VT == MVT::Other)
1962        continue;
1963      if (!N->hasAnyUseOfValue(i))
1964        continue;
1965      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1966      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1967    }
1968  }
1969
1970  dumpRegPressure();
1971}
1972
1973//===----------------------------------------------------------------------===//
1974//           Dynamic Node Priority for Register Pressure Reduction
1975//===----------------------------------------------------------------------===//
1976
1977/// closestSucc - Returns the scheduled cycle of the successor which is
1978/// closest to the current cycle.
1979static unsigned closestSucc(const SUnit *SU) {
1980  unsigned MaxHeight = 0;
1981  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1982       I != E; ++I) {
1983    if (I->isCtrl()) continue;  // ignore chain succs
1984    unsigned Height = I->getSUnit()->getHeight();
1985    // If there are bunch of CopyToRegs stacked up, they should be considered
1986    // to be at the same position.
1987    if (I->getSUnit()->getNode() &&
1988        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1989      Height = closestSucc(I->getSUnit())+1;
1990    if (Height > MaxHeight)
1991      MaxHeight = Height;
1992  }
1993  return MaxHeight;
1994}
1995
1996/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1997/// for scratch registers, i.e. number of data dependencies.
1998static unsigned calcMaxScratches(const SUnit *SU) {
1999  unsigned Scratches = 0;
2000  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2001       I != E; ++I) {
2002    if (I->isCtrl()) continue;  // ignore chain preds
2003    Scratches++;
2004  }
2005  return Scratches;
2006}
2007
2008/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
2009/// CopyToReg to a virtual register. This SU def is probably a liveout and
2010/// it has no other use. It should be scheduled closer to the terminator.
2011static bool hasOnlyLiveOutUses(const SUnit *SU) {
2012  bool RetVal = false;
2013  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2014       I != E; ++I) {
2015    if (I->isCtrl()) continue;
2016    const SUnit *SuccSU = I->getSUnit();
2017    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2018      unsigned Reg =
2019        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2020      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2021        RetVal = true;
2022        continue;
2023      }
2024    }
2025    return false;
2026  }
2027  return RetVal;
2028}
2029
2030/// UnitsSharePred - Return true if the two scheduling units share a common
2031/// data predecessor.
2032static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
2033  SmallSet<const SUnit*, 4> Preds;
2034  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
2035       I != E; ++I) {
2036    if (I->isCtrl()) continue;  // ignore chain preds
2037    Preds.insert(I->getSUnit());
2038  }
2039  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
2040       I != E; ++I) {
2041    if (I->isCtrl()) continue;  // ignore chain preds
2042    if (Preds.count(I->getSUnit()))
2043      return true;
2044  }
2045  return false;
2046}
2047
2048// Check for either a dependence (latency) or resource (hazard) stall.
2049//
2050// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2051static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2052  if ((int)SPQ->getCurCycle() < Height) return true;
2053  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2054      != ScheduleHazardRecognizer::NoHazard)
2055    return true;
2056  return false;
2057}
2058
2059// Return -1 if left has higher priority, 1 if right has higher priority.
2060// Return 0 if latency-based priority is equivalent.
2061static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2062                            RegReductionPQBase *SPQ) {
2063  // If the two nodes share an operand and one of them has a single
2064  // use that is a live out copy, favor the one that is live out. Otherwise
2065  // it will be difficult to eliminate the copy if the instruction is a
2066  // loop induction variable update. e.g.
2067  // BB:
2068  // sub r1, r3, #1
2069  // str r0, [r2, r3]
2070  // mov r3, r1
2071  // cmp
2072  // bne BB
2073  bool SharePred = UnitsSharePred(left, right);
2074  // FIXME: Only adjust if BB is a loop back edge.
2075  // FIXME: What's the cost of a copy?
2076  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
2077  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
2078  int LHeight = (int)left->getHeight() - LBonus;
2079  int RHeight = (int)right->getHeight() - RBonus;
2080
2081  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
2082    BUHasStall(left, LHeight, SPQ);
2083  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
2084    BUHasStall(right, RHeight, SPQ);
2085
2086  // If scheduling one of the node will cause a pipeline stall, delay it.
2087  // If scheduling either one of the node will cause a pipeline stall, sort
2088  // them according to their height.
2089  if (LStall) {
2090    if (!RStall)
2091      return 1;
2092    if (LHeight != RHeight)
2093      return LHeight > RHeight ? 1 : -1;
2094  } else if (RStall)
2095    return -1;
2096
2097  // If either node is scheduling for latency, sort them by height/depth
2098  // and latency.
2099  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2100                     right->SchedulingPref == Sched::Latency)) {
2101    if (DisableSchedCycles) {
2102      if (LHeight != RHeight)
2103        return LHeight > RHeight ? 1 : -1;
2104    }
2105    else {
2106      // If neither instruction stalls (!LStall && !RStall) then
2107      // its height is already covered so only its depth matters. We also reach
2108      // this if both stall but have the same height.
2109      unsigned LDepth = left->getDepth();
2110      unsigned RDepth = right->getDepth();
2111      if (LDepth != RDepth) {
2112        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2113              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2114              << ") depth " << RDepth << "\n");
2115        return LDepth < RDepth ? 1 : -1;
2116      }
2117    }
2118    if (left->Latency != right->Latency)
2119      return left->Latency > right->Latency ? 1 : -1;
2120  }
2121  return 0;
2122}
2123
2124static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2125  unsigned LPriority = SPQ->getNodePriority(left);
2126  unsigned RPriority = SPQ->getNodePriority(right);
2127  if (LPriority != RPriority) {
2128    DEBUG(++FactorCount[FactStatic]);
2129    return LPriority > RPriority;
2130  }
2131  DEBUG(++FactorCount[FactOther]);
2132
2133  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2134  // e.g.
2135  // t1 = op t2, c1
2136  // t3 = op t4, c2
2137  //
2138  // and the following instructions are both ready.
2139  // t2 = op c3
2140  // t4 = op c4
2141  //
2142  // Then schedule t2 = op first.
2143  // i.e.
2144  // t4 = op c4
2145  // t2 = op c3
2146  // t1 = op t2, c1
2147  // t3 = op t4, c2
2148  //
2149  // This creates more short live intervals.
2150  unsigned LDist = closestSucc(left);
2151  unsigned RDist = closestSucc(right);
2152  if (LDist != RDist)
2153    return LDist < RDist;
2154
2155  // How many registers becomes live when the node is scheduled.
2156  unsigned LScratch = calcMaxScratches(left);
2157  unsigned RScratch = calcMaxScratches(right);
2158  if (LScratch != RScratch)
2159    return LScratch > RScratch;
2160
2161  if (!DisableSchedCycles) {
2162    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2163    if (result != 0)
2164      return result > 0;
2165  }
2166  else {
2167    if (left->getHeight() != right->getHeight())
2168      return left->getHeight() > right->getHeight();
2169
2170    if (left->getDepth() != right->getDepth())
2171      return left->getDepth() < right->getDepth();
2172  }
2173
2174  assert(left->NodeQueueId && right->NodeQueueId &&
2175         "NodeQueueId cannot be zero");
2176  return (left->NodeQueueId > right->NodeQueueId);
2177}
2178
2179// Bottom up
2180bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2181  return BURRSort(left, right, SPQ);
2182}
2183
2184// Source order, otherwise bottom up.
2185bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2186  unsigned LOrder = SPQ->getNodeOrdering(left);
2187  unsigned ROrder = SPQ->getNodeOrdering(right);
2188
2189  // Prefer an ordering where the lower the non-zero order number, the higher
2190  // the preference.
2191  if ((LOrder || ROrder) && LOrder != ROrder)
2192    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2193
2194  return BURRSort(left, right, SPQ);
2195}
2196
2197// If the time between now and when the instruction will be ready can cover
2198// the spill code, then avoid adding it to the ready queue. This gives long
2199// stalls highest priority and allows hoisting across calls. It should also
2200// speed up processing the available queue.
2201bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2202  static const unsigned ReadyDelay = 3;
2203
2204  if (SPQ->MayReduceRegPressure(SU)) return true;
2205
2206  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2207
2208  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2209      != ScheduleHazardRecognizer::NoHazard)
2210    return false;
2211
2212  return true;
2213}
2214
2215// Return true if right should be scheduled with higher priority than left.
2216bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2217  if (left->isCall || right->isCall)
2218    // No way to compute latency of calls.
2219    return BURRSort(left, right, SPQ);
2220
2221  bool LHigh = SPQ->HighRegPressure(left);
2222  bool RHigh = SPQ->HighRegPressure(right);
2223  // Avoid causing spills. If register pressure is high, schedule for
2224  // register pressure reduction.
2225  if (LHigh && !RHigh) {
2226    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2227          << right->NodeNum << ")\n");
2228    return true;
2229  }
2230  else if (!LHigh && RHigh) {
2231    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2232          << left->NodeNum << ")\n");
2233    return false;
2234  }
2235  else if (!LHigh && !RHigh) {
2236    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2237    if (result != 0)
2238      return result > 0;
2239  }
2240  return BURRSort(left, right, SPQ);
2241}
2242
2243// Schedule as many instructions in each cycle as possible. So don't make an
2244// instruction available unless it is ready in the current cycle.
2245bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2246  if (SU->getHeight() > CurCycle) return false;
2247
2248  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2249      != ScheduleHazardRecognizer::NoHazard)
2250    return false;
2251
2252  return true;
2253}
2254
2255static bool canEnableCoalescing(SUnit *SU) {
2256  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2257  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2258    // CopyToReg should be close to its uses to facilitate coalescing and
2259    // avoid spilling.
2260    return true;
2261
2262  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2263      Opc == TargetOpcode::SUBREG_TO_REG ||
2264      Opc == TargetOpcode::INSERT_SUBREG)
2265    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2266    // close to their uses to facilitate coalescing.
2267    return true;
2268
2269  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2270    // If SU does not have a register def, schedule it close to its uses
2271    // because it does not lengthen any live ranges.
2272    return true;
2273
2274  return false;
2275}
2276
2277// list-ilp is currently an experimental scheduler that allows various
2278// heuristics to be enabled prior to the normal register reduction logic.
2279bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2280  if (left->isCall || right->isCall)
2281    // No way to compute latency of calls.
2282    return BURRSort(left, right, SPQ);
2283
2284  unsigned LLiveUses = 0, RLiveUses = 0;
2285  int LPDiff = 0, RPDiff = 0;
2286  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2287    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2288    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2289  }
2290  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2291    DEBUG(++FactorCount[FactPressureDiff]);
2292    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2293          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2294    return LPDiff > RPDiff;
2295  }
2296
2297  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2298    bool LReduce = canEnableCoalescing(left);
2299    bool RReduce = canEnableCoalescing(right);
2300    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
2301    if (LReduce && !RReduce) return false;
2302    if (RReduce && !LReduce) return true;
2303  }
2304
2305  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2306    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2307          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2308    DEBUG(++FactorCount[FactRegUses]);
2309    return LLiveUses < RLiveUses;
2310  }
2311
2312  if (!DisableSchedStalls) {
2313    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2314    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2315    if (LStall != RStall) {
2316      DEBUG(++FactorCount[FactHeight]);
2317      return left->getHeight() > right->getHeight();
2318    }
2319  }
2320
2321  if (!DisableSchedCriticalPath) {
2322    int spread = (int)left->getDepth() - (int)right->getDepth();
2323    if (std::abs(spread) > MaxReorderWindow) {
2324      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2325            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2326            << right->getDepth() << "\n");
2327      DEBUG(++FactorCount[FactDepth]);
2328      return left->getDepth() < right->getDepth();
2329    }
2330  }
2331
2332  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2333    int spread = (int)left->getHeight() - (int)right->getHeight();
2334    if (std::abs(spread) > MaxReorderWindow) {
2335      DEBUG(++FactorCount[FactHeight]);
2336      return left->getHeight() > right->getHeight();
2337    }
2338  }
2339
2340  return BURRSort(left, right, SPQ);
2341}
2342
2343//===----------------------------------------------------------------------===//
2344//                    Preschedule for Register Pressure
2345//===----------------------------------------------------------------------===//
2346
2347bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2348  if (SU->isTwoAddress) {
2349    unsigned Opc = SU->getNode()->getMachineOpcode();
2350    const TargetInstrDesc &TID = TII->get(Opc);
2351    unsigned NumRes = TID.getNumDefs();
2352    unsigned NumOps = TID.getNumOperands() - NumRes;
2353    for (unsigned i = 0; i != NumOps; ++i) {
2354      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2355        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2356        if (DU->getNodeId() != -1 &&
2357            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2358          return true;
2359      }
2360    }
2361  }
2362  return false;
2363}
2364
2365/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2366/// physical register defs.
2367static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2368                                  const TargetInstrInfo *TII,
2369                                  const TargetRegisterInfo *TRI) {
2370  SDNode *N = SuccSU->getNode();
2371  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2372  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2373  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2374  for (const SDNode *SUNode = SU->getNode(); SUNode;
2375       SUNode = SUNode->getGluedNode()) {
2376    if (!SUNode->isMachineOpcode())
2377      continue;
2378    const unsigned *SUImpDefs =
2379      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2380    if (!SUImpDefs)
2381      return false;
2382    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2383      EVT VT = N->getValueType(i);
2384      if (VT == MVT::Glue || VT == MVT::Other)
2385        continue;
2386      if (!N->hasAnyUseOfValue(i))
2387        continue;
2388      unsigned Reg = ImpDefs[i - NumDefs];
2389      for (;*SUImpDefs; ++SUImpDefs) {
2390        unsigned SUReg = *SUImpDefs;
2391        if (TRI->regsOverlap(Reg, SUReg))
2392          return true;
2393      }
2394    }
2395  }
2396  return false;
2397}
2398
2399/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2400/// are not handled well by the general register pressure reduction
2401/// heuristics. When presented with code like this:
2402///
2403///      N
2404///    / |
2405///   /  |
2406///  U  store
2407///  |
2408/// ...
2409///
2410/// the heuristics tend to push the store up, but since the
2411/// operand of the store has another use (U), this would increase
2412/// the length of that other use (the U->N edge).
2413///
2414/// This function transforms code like the above to route U's
2415/// dependence through the store when possible, like this:
2416///
2417///      N
2418///      ||
2419///      ||
2420///     store
2421///       |
2422///       U
2423///       |
2424///      ...
2425///
2426/// This results in the store being scheduled immediately
2427/// after N, which shortens the U->N live range, reducing
2428/// register pressure.
2429///
2430void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2431  // Visit all the nodes in topological order, working top-down.
2432  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2433    SUnit *SU = &(*SUnits)[i];
2434    // For now, only look at nodes with no data successors, such as stores.
2435    // These are especially important, due to the heuristics in
2436    // getNodePriority for nodes with no data successors.
2437    if (SU->NumSuccs != 0)
2438      continue;
2439    // For now, only look at nodes with exactly one data predecessor.
2440    if (SU->NumPreds != 1)
2441      continue;
2442    // Avoid prescheduling copies to virtual registers, which don't behave
2443    // like other nodes from the perspective of scheduling heuristics.
2444    if (SDNode *N = SU->getNode())
2445      if (N->getOpcode() == ISD::CopyToReg &&
2446          TargetRegisterInfo::isVirtualRegister
2447            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2448        continue;
2449
2450    // Locate the single data predecessor.
2451    SUnit *PredSU = 0;
2452    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2453         EE = SU->Preds.end(); II != EE; ++II)
2454      if (!II->isCtrl()) {
2455        PredSU = II->getSUnit();
2456        break;
2457      }
2458    assert(PredSU);
2459
2460    // Don't rewrite edges that carry physregs, because that requires additional
2461    // support infrastructure.
2462    if (PredSU->hasPhysRegDefs)
2463      continue;
2464    // Short-circuit the case where SU is PredSU's only data successor.
2465    if (PredSU->NumSuccs == 1)
2466      continue;
2467    // Avoid prescheduling to copies from virtual registers, which don't behave
2468    // like other nodes from the perspective of scheduling heuristics.
2469    if (SDNode *N = SU->getNode())
2470      if (N->getOpcode() == ISD::CopyFromReg &&
2471          TargetRegisterInfo::isVirtualRegister
2472            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2473        continue;
2474
2475    // Perform checks on the successors of PredSU.
2476    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2477         EE = PredSU->Succs.end(); II != EE; ++II) {
2478      SUnit *PredSuccSU = II->getSUnit();
2479      if (PredSuccSU == SU) continue;
2480      // If PredSU has another successor with no data successors, for
2481      // now don't attempt to choose either over the other.
2482      if (PredSuccSU->NumSuccs == 0)
2483        goto outer_loop_continue;
2484      // Don't break physical register dependencies.
2485      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2486        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2487          goto outer_loop_continue;
2488      // Don't introduce graph cycles.
2489      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2490        goto outer_loop_continue;
2491    }
2492
2493    // Ok, the transformation is safe and the heuristics suggest it is
2494    // profitable. Update the graph.
2495    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2496                 << " next to PredSU #" << PredSU->NodeNum
2497                 << " to guide scheduling in the presence of multiple uses\n");
2498    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2499      SDep Edge = PredSU->Succs[i];
2500      assert(!Edge.isAssignedRegDep());
2501      SUnit *SuccSU = Edge.getSUnit();
2502      if (SuccSU != SU) {
2503        Edge.setSUnit(PredSU);
2504        scheduleDAG->RemovePred(SuccSU, Edge);
2505        scheduleDAG->AddPred(SU, Edge);
2506        Edge.setSUnit(SU);
2507        scheduleDAG->AddPred(SuccSU, Edge);
2508        --i;
2509      }
2510    }
2511  outer_loop_continue:;
2512  }
2513}
2514
2515/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2516/// it as a def&use operand. Add a pseudo control edge from it to the other
2517/// node (if it won't create a cycle) so the two-address one will be scheduled
2518/// first (lower in the schedule). If both nodes are two-address, favor the
2519/// one that has a CopyToReg use (more likely to be a loop induction update).
2520/// If both are two-address, but one is commutable while the other is not
2521/// commutable, favor the one that's not commutable.
2522void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2523  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2524    SUnit *SU = &(*SUnits)[i];
2525    if (!SU->isTwoAddress)
2526      continue;
2527
2528    SDNode *Node = SU->getNode();
2529    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2530      continue;
2531
2532    bool isLiveOut = hasOnlyLiveOutUses(SU);
2533    unsigned Opc = Node->getMachineOpcode();
2534    const TargetInstrDesc &TID = TII->get(Opc);
2535    unsigned NumRes = TID.getNumDefs();
2536    unsigned NumOps = TID.getNumOperands() - NumRes;
2537    for (unsigned j = 0; j != NumOps; ++j) {
2538      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2539        continue;
2540      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2541      if (DU->getNodeId() == -1)
2542        continue;
2543      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2544      if (!DUSU) continue;
2545      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2546           E = DUSU->Succs.end(); I != E; ++I) {
2547        if (I->isCtrl()) continue;
2548        SUnit *SuccSU = I->getSUnit();
2549        if (SuccSU == SU)
2550          continue;
2551        // Be conservative. Ignore if nodes aren't at roughly the same
2552        // depth and height.
2553        if (SuccSU->getHeight() < SU->getHeight() &&
2554            (SU->getHeight() - SuccSU->getHeight()) > 1)
2555          continue;
2556        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2557        // constrains whatever is using the copy, instead of the copy
2558        // itself. In the case that the copy is coalesced, this
2559        // preserves the intent of the pseudo two-address heurietics.
2560        while (SuccSU->Succs.size() == 1 &&
2561               SuccSU->getNode()->isMachineOpcode() &&
2562               SuccSU->getNode()->getMachineOpcode() ==
2563                 TargetOpcode::COPY_TO_REGCLASS)
2564          SuccSU = SuccSU->Succs.front().getSUnit();
2565        // Don't constrain non-instruction nodes.
2566        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2567          continue;
2568        // Don't constrain nodes with physical register defs if the
2569        // predecessor can clobber them.
2570        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2571          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2572            continue;
2573        }
2574        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2575        // these may be coalesced away. We want them close to their uses.
2576        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2577        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2578            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2579            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2580          continue;
2581        if ((!canClobber(SuccSU, DUSU) ||
2582             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2583             (!SU->isCommutable && SuccSU->isCommutable)) &&
2584            !scheduleDAG->IsReachable(SuccSU, SU)) {
2585          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2586                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2587          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2588                                        /*Reg=*/0, /*isNormalMemory=*/false,
2589                                        /*isMustAlias=*/false,
2590                                        /*isArtificial=*/true));
2591        }
2592      }
2593    }
2594  }
2595}
2596
2597/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2598/// predecessors of the successors of the SUnit SU. Stop when the provided
2599/// limit is exceeded.
2600static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2601                                                    unsigned Limit) {
2602  unsigned Sum = 0;
2603  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2604       I != E; ++I) {
2605    const SUnit *SuccSU = I->getSUnit();
2606    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2607         EE = SuccSU->Preds.end(); II != EE; ++II) {
2608      SUnit *PredSU = II->getSUnit();
2609      if (!PredSU->isScheduled)
2610        if (++Sum > Limit)
2611          return Sum;
2612    }
2613  }
2614  return Sum;
2615}
2616
2617
2618// Top down
2619bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2620  unsigned LPriority = SPQ->getNodePriority(left);
2621  unsigned RPriority = SPQ->getNodePriority(right);
2622  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2623  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2624  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2625  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2626  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2627  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2628
2629  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2630    return false;
2631  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2632    return true;
2633
2634  if (LIsFloater)
2635    LBonus -= 2;
2636  if (RIsFloater)
2637    RBonus -= 2;
2638  if (left->NumSuccs == 1)
2639    LBonus += 2;
2640  if (right->NumSuccs == 1)
2641    RBonus += 2;
2642
2643  if (LPriority+LBonus != RPriority+RBonus)
2644    return LPriority+LBonus < RPriority+RBonus;
2645
2646  if (left->getDepth() != right->getDepth())
2647    return left->getDepth() < right->getDepth();
2648
2649  if (left->NumSuccsLeft != right->NumSuccsLeft)
2650    return left->NumSuccsLeft > right->NumSuccsLeft;
2651
2652  assert(left->NodeQueueId && right->NodeQueueId &&
2653         "NodeQueueId cannot be zero");
2654  return (left->NodeQueueId > right->NodeQueueId);
2655}
2656
2657//===----------------------------------------------------------------------===//
2658//                         Public Constructor Functions
2659//===----------------------------------------------------------------------===//
2660
2661llvm::ScheduleDAGSDNodes *
2662llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2663                                 CodeGenOpt::Level OptLevel) {
2664  const TargetMachine &TM = IS->TM;
2665  const TargetInstrInfo *TII = TM.getInstrInfo();
2666  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2667
2668  BURegReductionPriorityQueue *PQ =
2669    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2670  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2671  PQ->setScheduleDAG(SD);
2672  return SD;
2673}
2674
2675llvm::ScheduleDAGSDNodes *
2676llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2677                                 CodeGenOpt::Level OptLevel) {
2678  const TargetMachine &TM = IS->TM;
2679  const TargetInstrInfo *TII = TM.getInstrInfo();
2680  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2681
2682  TDRegReductionPriorityQueue *PQ =
2683    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2684  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2685  PQ->setScheduleDAG(SD);
2686  return SD;
2687}
2688
2689llvm::ScheduleDAGSDNodes *
2690llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2691                                   CodeGenOpt::Level OptLevel) {
2692  const TargetMachine &TM = IS->TM;
2693  const TargetInstrInfo *TII = TM.getInstrInfo();
2694  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2695
2696  SrcRegReductionPriorityQueue *PQ =
2697    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2698  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2699  PQ->setScheduleDAG(SD);
2700  return SD;
2701}
2702
2703llvm::ScheduleDAGSDNodes *
2704llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2705                                   CodeGenOpt::Level OptLevel) {
2706  const TargetMachine &TM = IS->TM;
2707  const TargetInstrInfo *TII = TM.getInstrInfo();
2708  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2709  const TargetLowering *TLI = &IS->getTargetLowering();
2710
2711  HybridBURRPriorityQueue *PQ =
2712    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2713
2714  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2715  PQ->setScheduleDAG(SD);
2716  return SD;
2717}
2718
2719llvm::ScheduleDAGSDNodes *
2720llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2721                                CodeGenOpt::Level OptLevel) {
2722  const TargetMachine &TM = IS->TM;
2723  const TargetInstrInfo *TII = TM.getInstrInfo();
2724  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2725  const TargetLowering *TLI = &IS->getTargetLowering();
2726
2727  ILPBURRPriorityQueue *PQ =
2728    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2729  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2730  PQ->setScheduleDAG(SD);
2731  return SD;
2732}
2733