ScheduleDAGRRList.cpp revision 92e946630d5f9bb092853b93501387dd216899b9
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73namespace {
74//===----------------------------------------------------------------------===//
75/// ScheduleDAGRRList - The actual register reduction list scheduler
76/// implementation.  This supports both top-down and bottom-up scheduling.
77///
78class ScheduleDAGRRList : public ScheduleDAGSDNodes {
79private:
80  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
81  /// it is top-down.
82  bool isBottomUp;
83
84  /// NeedLatency - True if the scheduler will make use of latency information.
85  ///
86  bool NeedLatency;
87
88  /// AvailableQueue - The priority queue to use for the available SUnits.
89  SchedulingPriorityQueue *AvailableQueue;
90
91  /// PendingQueue - This contains all of the instructions whose operands have
92  /// been issued, but their results are not ready yet (due to the latency of
93  /// the operation).  Once the operands becomes available, the instruction is
94  /// added to the AvailableQueue.
95  std::vector<SUnit*> PendingQueue;
96
97  /// HazardRec - The hazard recognizer to use.
98  ScheduleHazardRecognizer *HazardRec;
99
100  /// CurCycle - The current scheduler state corresponds to this cycle.
101  unsigned CurCycle;
102
103  /// MinAvailableCycle - Cycle of the soonest available instruction.
104  unsigned MinAvailableCycle;
105
106  /// LiveRegDefs - A set of physical registers and their definition
107  /// that are "live". These nodes must be scheduled before any other nodes that
108  /// modifies the registers can be scheduled.
109  unsigned NumLiveRegs;
110  std::vector<SUnit*> LiveRegDefs;
111  std::vector<SUnit*> LiveRegGens;
112
113  /// Topo - A topological ordering for SUnits which permits fast IsReachable
114  /// and similar queries.
115  ScheduleDAGTopologicalSort Topo;
116
117public:
118  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
119                    SchedulingPriorityQueue *availqueue,
120                    CodeGenOpt::Level OptLevel)
121    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
122      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
123      Topo(SUnits) {
124
125    const TargetMachine &tm = mf.getTarget();
126    if (DisableSchedCycles || !NeedLatency)
127      HazardRec = new ScheduleHazardRecognizer();
128    else
129      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
130  }
131
132  ~ScheduleDAGRRList() {
133    delete HazardRec;
134    delete AvailableQueue;
135  }
136
137  void Schedule();
138
139  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
140
141  /// IsReachable - Checks if SU is reachable from TargetSU.
142  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
143    return Topo.IsReachable(SU, TargetSU);
144  }
145
146  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
147  /// create a cycle.
148  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
149    return Topo.WillCreateCycle(SU, TargetSU);
150  }
151
152  /// AddPred - adds a predecessor edge to SUnit SU.
153  /// This returns true if this is a new predecessor.
154  /// Updates the topological ordering if required.
155  void AddPred(SUnit *SU, const SDep &D) {
156    Topo.AddPred(SU, D.getSUnit());
157    SU->addPred(D);
158  }
159
160  /// RemovePred - removes a predecessor edge from SUnit SU.
161  /// This returns true if an edge was removed.
162  /// Updates the topological ordering if required.
163  void RemovePred(SUnit *SU, const SDep &D) {
164    Topo.RemovePred(SU, D.getSUnit());
165    SU->removePred(D);
166  }
167
168private:
169  bool isReady(SUnit *SU) {
170    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
171      AvailableQueue->isReady(SU);
172  }
173
174  void ReleasePred(SUnit *SU, const SDep *PredEdge);
175  void ReleasePredecessors(SUnit *SU);
176  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
177  void ReleaseSuccessors(SUnit *SU);
178  void ReleasePending();
179  void AdvanceToCycle(unsigned NextCycle);
180  void AdvancePastStalls(SUnit *SU);
181  void EmitNode(SUnit *SU);
182  void ScheduleNodeBottomUp(SUnit*);
183  void CapturePred(SDep *PredEdge);
184  void UnscheduleNodeBottomUp(SUnit*);
185  void RestoreHazardCheckerBottomUp();
186  void BacktrackBottomUp(SUnit*, SUnit*);
187  SUnit *CopyAndMoveSuccessors(SUnit*);
188  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
189                                const TargetRegisterClass*,
190                                const TargetRegisterClass*,
191                                SmallVector<SUnit*, 2>&);
192  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
193
194  SUnit *PickNodeToScheduleBottomUp();
195  void ListScheduleBottomUp();
196
197  void ScheduleNodeTopDown(SUnit*);
198  void ListScheduleTopDown();
199
200
201  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
202  /// Updates the topological ordering if required.
203  SUnit *CreateNewSUnit(SDNode *N) {
204    unsigned NumSUnits = SUnits.size();
205    SUnit *NewNode = NewSUnit(N);
206    // Update the topological ordering.
207    if (NewNode->NodeNum >= NumSUnits)
208      Topo.InitDAGTopologicalSorting();
209    return NewNode;
210  }
211
212  /// CreateClone - Creates a new SUnit from an existing one.
213  /// Updates the topological ordering if required.
214  SUnit *CreateClone(SUnit *N) {
215    unsigned NumSUnits = SUnits.size();
216    SUnit *NewNode = Clone(N);
217    // Update the topological ordering.
218    if (NewNode->NodeNum >= NumSUnits)
219      Topo.InitDAGTopologicalSorting();
220    return NewNode;
221  }
222
223  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
224  /// need actual latency information but the hybrid scheduler does.
225  bool ForceUnitLatencies() const {
226    return !NeedLatency;
227  }
228};
229}  // end anonymous namespace
230
231
232/// Schedule - Schedule the DAG using list scheduling.
233void ScheduleDAGRRList::Schedule() {
234  DEBUG(dbgs()
235        << "********** List Scheduling BB#" << BB->getNumber()
236        << " '" << BB->getName() << "' **********\n");
237
238  CurCycle = 0;
239  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
240  NumLiveRegs = 0;
241  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
242  LiveRegGens.resize(TRI->getNumRegs(), NULL);
243
244  // Build the scheduling graph.
245  BuildSchedGraph(NULL);
246
247  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
248          SUnits[su].dumpAll(this));
249  Topo.InitDAGTopologicalSorting();
250
251  AvailableQueue->initNodes(SUnits);
252
253  HazardRec->Reset();
254
255  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
256  if (isBottomUp)
257    ListScheduleBottomUp();
258  else
259    ListScheduleTopDown();
260
261  AvailableQueue->releaseState();
262}
263
264//===----------------------------------------------------------------------===//
265//  Bottom-Up Scheduling
266//===----------------------------------------------------------------------===//
267
268/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
269/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
270void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
271  SUnit *PredSU = PredEdge->getSUnit();
272
273#ifndef NDEBUG
274  if (PredSU->NumSuccsLeft == 0) {
275    dbgs() << "*** Scheduling failed! ***\n";
276    PredSU->dump(this);
277    dbgs() << " has been released too many times!\n";
278    llvm_unreachable(0);
279  }
280#endif
281  --PredSU->NumSuccsLeft;
282
283  if (!ForceUnitLatencies()) {
284    // Updating predecessor's height. This is now the cycle when the
285    // predecessor can be scheduled without causing a pipeline stall.
286    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
287  }
288
289  // If all the node's successors are scheduled, this node is ready
290  // to be scheduled. Ignore the special EntrySU node.
291  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
292    PredSU->isAvailable = true;
293
294    unsigned Height = PredSU->getHeight();
295    if (Height < MinAvailableCycle)
296      MinAvailableCycle = Height;
297
298    if (isReady(SU)) {
299      AvailableQueue->push(PredSU);
300    }
301    // CapturePred and others may have left the node in the pending queue, avoid
302    // adding it twice.
303    else if (!PredSU->isPending) {
304      PredSU->isPending = true;
305      PendingQueue.push_back(PredSU);
306    }
307  }
308}
309
310/// Call ReleasePred for each predecessor, then update register live def/gen.
311/// Always update LiveRegDefs for a register dependence even if the current SU
312/// also defines the register. This effectively create one large live range
313/// across a sequence of two-address node. This is important because the
314/// entire chain must be scheduled together. Example:
315///
316/// flags = (3) add
317/// flags = (2) addc flags
318/// flags = (1) addc flags
319///
320/// results in
321///
322/// LiveRegDefs[flags] = 3
323/// LiveRegGens[flags] = 1
324///
325/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
326/// interference on flags.
327void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
328  // Bottom up: release predecessors
329  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
330       I != E; ++I) {
331    ReleasePred(SU, &*I);
332    if (I->isAssignedRegDep()) {
333      // This is a physical register dependency and it's impossible or
334      // expensive to copy the register. Make sure nothing that can
335      // clobber the register is scheduled between the predecessor and
336      // this node.
337      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
338      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
339             "interference on register dependence");
340      LiveRegDefs[I->getReg()] = I->getSUnit();
341      if (!LiveRegGens[I->getReg()]) {
342        ++NumLiveRegs;
343        LiveRegGens[I->getReg()] = SU;
344      }
345    }
346  }
347}
348
349/// Check to see if any of the pending instructions are ready to issue.  If
350/// so, add them to the available queue.
351void ScheduleDAGRRList::ReleasePending() {
352  if (DisableSchedCycles) {
353    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
354    return;
355  }
356
357  // If the available queue is empty, it is safe to reset MinAvailableCycle.
358  if (AvailableQueue->empty())
359    MinAvailableCycle = UINT_MAX;
360
361  // Check to see if any of the pending instructions are ready to issue.  If
362  // so, add them to the available queue.
363  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
364    unsigned ReadyCycle =
365      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
366    if (ReadyCycle < MinAvailableCycle)
367      MinAvailableCycle = ReadyCycle;
368
369    if (PendingQueue[i]->isAvailable) {
370      if (!isReady(PendingQueue[i]))
371          continue;
372      AvailableQueue->push(PendingQueue[i]);
373    }
374    PendingQueue[i]->isPending = false;
375    PendingQueue[i] = PendingQueue.back();
376    PendingQueue.pop_back();
377    --i; --e;
378  }
379}
380
381/// Move the scheduler state forward by the specified number of Cycles.
382void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
383  if (NextCycle <= CurCycle)
384    return;
385
386  AvailableQueue->setCurCycle(NextCycle);
387  if (!HazardRec->isEnabled()) {
388    // Bypass lots of virtual calls in case of long latency.
389    CurCycle = NextCycle;
390  }
391  else {
392    for (; CurCycle != NextCycle; ++CurCycle) {
393      if (isBottomUp)
394        HazardRec->RecedeCycle();
395      else
396        HazardRec->AdvanceCycle();
397    }
398  }
399  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
400  // available Q to release pending nodes at least once before popping.
401  ReleasePending();
402}
403
404/// Move the scheduler state forward until the specified node's dependents are
405/// ready and can be scheduled with no resource conflicts.
406void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
407  if (DisableSchedCycles)
408    return;
409
410  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
411
412  // Bump CurCycle to account for latency. We assume the latency of other
413  // available instructions may be hidden by the stall (not a full pipe stall).
414  // This updates the hazard recognizer's cycle before reserving resources for
415  // this instruction.
416  AdvanceToCycle(ReadyCycle);
417
418  // Calls are scheduled in their preceding cycle, so don't conflict with
419  // hazards from instructions after the call. EmitNode will reset the
420  // scoreboard state before emitting the call.
421  if (isBottomUp && SU->isCall)
422    return;
423
424  // FIXME: For resource conflicts in very long non-pipelined stages, we
425  // should probably skip ahead here to avoid useless scoreboard checks.
426  int Stalls = 0;
427  while (true) {
428    ScheduleHazardRecognizer::HazardType HT =
429      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
430
431    if (HT == ScheduleHazardRecognizer::NoHazard)
432      break;
433
434    ++Stalls;
435  }
436  AdvanceToCycle(CurCycle + Stalls);
437}
438
439/// Record this SUnit in the HazardRecognizer.
440/// Does not update CurCycle.
441void ScheduleDAGRRList::EmitNode(SUnit *SU) {
442  if (!HazardRec->isEnabled())
443    return;
444
445  // Check for phys reg copy.
446  if (!SU->getNode())
447    return;
448
449  switch (SU->getNode()->getOpcode()) {
450  default:
451    assert(SU->getNode()->isMachineOpcode() &&
452           "This target-independent node should not be scheduled.");
453    break;
454  case ISD::MERGE_VALUES:
455  case ISD::TokenFactor:
456  case ISD::CopyToReg:
457  case ISD::CopyFromReg:
458  case ISD::EH_LABEL:
459    // Noops don't affect the scoreboard state. Copies are likely to be
460    // removed.
461    return;
462  case ISD::INLINEASM:
463    // For inline asm, clear the pipeline state.
464    HazardRec->Reset();
465    return;
466  }
467  if (isBottomUp && SU->isCall) {
468    // Calls are scheduled with their preceding instructions. For bottom-up
469    // scheduling, clear the pipeline state before emitting.
470    HazardRec->Reset();
471  }
472
473  HazardRec->EmitInstruction(SU);
474
475  if (!isBottomUp && SU->isCall) {
476    HazardRec->Reset();
477  }
478}
479
480/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
481/// count of its predecessors. If a predecessor pending count is zero, add it to
482/// the Available queue.
483void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
484  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
485  DEBUG(SU->dump(this));
486
487#ifndef NDEBUG
488  if (CurCycle < SU->getHeight())
489    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
490#endif
491
492  // FIXME: Do not modify node height. It may interfere with
493  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
494  // node it's ready cycle can aid heuristics, and after scheduling it can
495  // indicate the scheduled cycle.
496  SU->setHeightToAtLeast(CurCycle);
497
498  // Reserve resources for the scheduled intruction.
499  EmitNode(SU);
500
501  Sequence.push_back(SU);
502
503  AvailableQueue->ScheduledNode(SU);
504
505  // Update liveness of predecessors before successors to avoid treating a
506  // two-address node as a live range def.
507  ReleasePredecessors(SU);
508
509  // Release all the implicit physical register defs that are live.
510  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
511       I != E; ++I) {
512    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
513    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
514      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
515      --NumLiveRegs;
516      LiveRegDefs[I->getReg()] = NULL;
517      LiveRegGens[I->getReg()] = NULL;
518    }
519  }
520
521  SU->isScheduled = true;
522
523  // Conditions under which the scheduler should eagerly advance the cycle:
524  // (1) No available instructions
525  // (2) All pipelines full, so available instructions must have hazards.
526  //
527  // If HazardRec is disabled, count each inst as one cycle.
528  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
529      || AvailableQueue->empty())
530    AdvanceToCycle(CurCycle + 1);
531}
532
533/// CapturePred - This does the opposite of ReleasePred. Since SU is being
534/// unscheduled, incrcease the succ left count of its predecessors. Remove
535/// them from AvailableQueue if necessary.
536void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
537  SUnit *PredSU = PredEdge->getSUnit();
538  if (PredSU->isAvailable) {
539    PredSU->isAvailable = false;
540    if (!PredSU->isPending)
541      AvailableQueue->remove(PredSU);
542  }
543
544  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
545  ++PredSU->NumSuccsLeft;
546}
547
548/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
549/// its predecessor states to reflect the change.
550void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
551  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
552  DEBUG(SU->dump(this));
553
554  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
555       I != E; ++I) {
556    CapturePred(&*I);
557    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
558      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
559      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
560             "Physical register dependency violated?");
561      --NumLiveRegs;
562      LiveRegDefs[I->getReg()] = NULL;
563      LiveRegGens[I->getReg()] = NULL;
564    }
565  }
566
567  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
568       I != E; ++I) {
569    if (I->isAssignedRegDep()) {
570      // This becomes the nearest def. Note that an earlier def may still be
571      // pending if this is a two-address node.
572      LiveRegDefs[I->getReg()] = SU;
573      if (!LiveRegDefs[I->getReg()]) {
574        ++NumLiveRegs;
575      }
576      if (LiveRegGens[I->getReg()] == NULL ||
577          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
578        LiveRegGens[I->getReg()] = I->getSUnit();
579    }
580  }
581  if (SU->getHeight() < MinAvailableCycle)
582    MinAvailableCycle = SU->getHeight();
583
584  SU->setHeightDirty();
585  SU->isScheduled = false;
586  SU->isAvailable = true;
587  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
588    // Don't make available until backtracking is complete.
589    SU->isPending = true;
590    PendingQueue.push_back(SU);
591  }
592  else {
593    AvailableQueue->push(SU);
594  }
595  AvailableQueue->UnscheduledNode(SU);
596}
597
598/// After backtracking, the hazard checker needs to be restored to a state
599/// corresponding the the current cycle.
600void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
601  HazardRec->Reset();
602
603  unsigned LookAhead = std::min((unsigned)Sequence.size(),
604                                HazardRec->getMaxLookAhead());
605  if (LookAhead == 0)
606    return;
607
608  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
609  unsigned HazardCycle = (*I)->getHeight();
610  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
611    SUnit *SU = *I;
612    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
613      HazardRec->RecedeCycle();
614    }
615    EmitNode(SU);
616  }
617}
618
619/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
620/// BTCycle in order to schedule a specific node.
621void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
622  SUnit *OldSU = Sequence.back();
623  while (true) {
624    Sequence.pop_back();
625    if (SU->isSucc(OldSU))
626      // Don't try to remove SU from AvailableQueue.
627      SU->isAvailable = false;
628    // FIXME: use ready cycle instead of height
629    CurCycle = OldSU->getHeight();
630    UnscheduleNodeBottomUp(OldSU);
631    AvailableQueue->setCurCycle(CurCycle);
632    if (OldSU == BtSU)
633      break;
634    OldSU = Sequence.back();
635  }
636
637  assert(!SU->isSucc(OldSU) && "Something is wrong!");
638
639  RestoreHazardCheckerBottomUp();
640
641  ReleasePending();
642
643  ++NumBacktracks;
644}
645
646static bool isOperandOf(const SUnit *SU, SDNode *N) {
647  for (const SDNode *SUNode = SU->getNode(); SUNode;
648       SUNode = SUNode->getGluedNode()) {
649    if (SUNode->isOperandOf(N))
650      return true;
651  }
652  return false;
653}
654
655/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
656/// successors to the newly created node.
657SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
658  SDNode *N = SU->getNode();
659  if (!N)
660    return NULL;
661
662  if (SU->getNode()->getGluedNode())
663    return NULL;
664
665  SUnit *NewSU;
666  bool TryUnfold = false;
667  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
668    EVT VT = N->getValueType(i);
669    if (VT == MVT::Glue)
670      return NULL;
671    else if (VT == MVT::Other)
672      TryUnfold = true;
673  }
674  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
675    const SDValue &Op = N->getOperand(i);
676    EVT VT = Op.getNode()->getValueType(Op.getResNo());
677    if (VT == MVT::Glue)
678      return NULL;
679  }
680
681  if (TryUnfold) {
682    SmallVector<SDNode*, 2> NewNodes;
683    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
684      return NULL;
685
686    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
687    assert(NewNodes.size() == 2 && "Expected a load folding node!");
688
689    N = NewNodes[1];
690    SDNode *LoadNode = NewNodes[0];
691    unsigned NumVals = N->getNumValues();
692    unsigned OldNumVals = SU->getNode()->getNumValues();
693    for (unsigned i = 0; i != NumVals; ++i)
694      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
695    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
696                                   SDValue(LoadNode, 1));
697
698    // LoadNode may already exist. This can happen when there is another
699    // load from the same location and producing the same type of value
700    // but it has different alignment or volatileness.
701    bool isNewLoad = true;
702    SUnit *LoadSU;
703    if (LoadNode->getNodeId() != -1) {
704      LoadSU = &SUnits[LoadNode->getNodeId()];
705      isNewLoad = false;
706    } else {
707      LoadSU = CreateNewSUnit(LoadNode);
708      LoadNode->setNodeId(LoadSU->NodeNum);
709
710      InitNumRegDefsLeft(LoadSU);
711      ComputeLatency(LoadSU);
712    }
713
714    SUnit *NewSU = CreateNewSUnit(N);
715    assert(N->getNodeId() == -1 && "Node already inserted!");
716    N->setNodeId(NewSU->NodeNum);
717
718    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
719    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
720      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
721        NewSU->isTwoAddress = true;
722        break;
723      }
724    }
725    if (TID.isCommutable())
726      NewSU->isCommutable = true;
727
728    InitNumRegDefsLeft(NewSU);
729    ComputeLatency(NewSU);
730
731    // Record all the edges to and from the old SU, by category.
732    SmallVector<SDep, 4> ChainPreds;
733    SmallVector<SDep, 4> ChainSuccs;
734    SmallVector<SDep, 4> LoadPreds;
735    SmallVector<SDep, 4> NodePreds;
736    SmallVector<SDep, 4> NodeSuccs;
737    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
738         I != E; ++I) {
739      if (I->isCtrl())
740        ChainPreds.push_back(*I);
741      else if (isOperandOf(I->getSUnit(), LoadNode))
742        LoadPreds.push_back(*I);
743      else
744        NodePreds.push_back(*I);
745    }
746    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
747         I != E; ++I) {
748      if (I->isCtrl())
749        ChainSuccs.push_back(*I);
750      else
751        NodeSuccs.push_back(*I);
752    }
753
754    // Now assign edges to the newly-created nodes.
755    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
756      const SDep &Pred = ChainPreds[i];
757      RemovePred(SU, Pred);
758      if (isNewLoad)
759        AddPred(LoadSU, Pred);
760    }
761    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
762      const SDep &Pred = LoadPreds[i];
763      RemovePred(SU, Pred);
764      if (isNewLoad)
765        AddPred(LoadSU, Pred);
766    }
767    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
768      const SDep &Pred = NodePreds[i];
769      RemovePred(SU, Pred);
770      AddPred(NewSU, Pred);
771    }
772    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
773      SDep D = NodeSuccs[i];
774      SUnit *SuccDep = D.getSUnit();
775      D.setSUnit(SU);
776      RemovePred(SuccDep, D);
777      D.setSUnit(NewSU);
778      AddPred(SuccDep, D);
779      // Balance register pressure.
780      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
781          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
782        --NewSU->NumRegDefsLeft;
783    }
784    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
785      SDep D = ChainSuccs[i];
786      SUnit *SuccDep = D.getSUnit();
787      D.setSUnit(SU);
788      RemovePred(SuccDep, D);
789      if (isNewLoad) {
790        D.setSUnit(LoadSU);
791        AddPred(SuccDep, D);
792      }
793    }
794
795    // Add a data dependency to reflect that NewSU reads the value defined
796    // by LoadSU.
797    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
798
799    if (isNewLoad)
800      AvailableQueue->addNode(LoadSU);
801    AvailableQueue->addNode(NewSU);
802
803    ++NumUnfolds;
804
805    if (NewSU->NumSuccsLeft == 0) {
806      NewSU->isAvailable = true;
807      return NewSU;
808    }
809    SU = NewSU;
810  }
811
812  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
813  NewSU = CreateClone(SU);
814
815  // New SUnit has the exact same predecessors.
816  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
817       I != E; ++I)
818    if (!I->isArtificial())
819      AddPred(NewSU, *I);
820
821  // Only copy scheduled successors. Cut them from old node's successor
822  // list and move them over.
823  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
824  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
825       I != E; ++I) {
826    if (I->isArtificial())
827      continue;
828    SUnit *SuccSU = I->getSUnit();
829    if (SuccSU->isScheduled) {
830      SDep D = *I;
831      D.setSUnit(NewSU);
832      AddPred(SuccSU, D);
833      D.setSUnit(SU);
834      DelDeps.push_back(std::make_pair(SuccSU, D));
835    }
836  }
837  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
838    RemovePred(DelDeps[i].first, DelDeps[i].second);
839
840  AvailableQueue->updateNode(SU);
841  AvailableQueue->addNode(NewSU);
842
843  ++NumDups;
844  return NewSU;
845}
846
847/// InsertCopiesAndMoveSuccs - Insert register copies and move all
848/// scheduled successors of the given SUnit to the last copy.
849void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
850                                               const TargetRegisterClass *DestRC,
851                                               const TargetRegisterClass *SrcRC,
852                                               SmallVector<SUnit*, 2> &Copies) {
853  SUnit *CopyFromSU = CreateNewSUnit(NULL);
854  CopyFromSU->CopySrcRC = SrcRC;
855  CopyFromSU->CopyDstRC = DestRC;
856
857  SUnit *CopyToSU = CreateNewSUnit(NULL);
858  CopyToSU->CopySrcRC = DestRC;
859  CopyToSU->CopyDstRC = SrcRC;
860
861  // Only copy scheduled successors. Cut them from old node's successor
862  // list and move them over.
863  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
864  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
865       I != E; ++I) {
866    if (I->isArtificial())
867      continue;
868    SUnit *SuccSU = I->getSUnit();
869    if (SuccSU->isScheduled) {
870      SDep D = *I;
871      D.setSUnit(CopyToSU);
872      AddPred(SuccSU, D);
873      DelDeps.push_back(std::make_pair(SuccSU, *I));
874    }
875  }
876  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
877    RemovePred(DelDeps[i].first, DelDeps[i].second);
878
879  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
880  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
881
882  AvailableQueue->updateNode(SU);
883  AvailableQueue->addNode(CopyFromSU);
884  AvailableQueue->addNode(CopyToSU);
885  Copies.push_back(CopyFromSU);
886  Copies.push_back(CopyToSU);
887
888  ++NumPRCopies;
889}
890
891/// getPhysicalRegisterVT - Returns the ValueType of the physical register
892/// definition of the specified node.
893/// FIXME: Move to SelectionDAG?
894static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
895                                 const TargetInstrInfo *TII) {
896  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
897  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
898  unsigned NumRes = TID.getNumDefs();
899  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
900    if (Reg == *ImpDef)
901      break;
902    ++NumRes;
903  }
904  return N->getValueType(NumRes);
905}
906
907/// CheckForLiveRegDef - Return true and update live register vector if the
908/// specified register def of the specified SUnit clobbers any "live" registers.
909static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
910                               std::vector<SUnit*> &LiveRegDefs,
911                               SmallSet<unsigned, 4> &RegAdded,
912                               SmallVector<unsigned, 4> &LRegs,
913                               const TargetRegisterInfo *TRI) {
914  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
915
916    // Check if Ref is live.
917    if (!LiveRegDefs[Reg]) continue;
918
919    // Allow multiple uses of the same def.
920    if (LiveRegDefs[Reg] == SU) continue;
921
922    // Add Reg to the set of interfering live regs.
923    if (RegAdded.insert(Reg))
924      LRegs.push_back(Reg);
925  }
926}
927
928/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
929/// scheduling of the given node to satisfy live physical register dependencies.
930/// If the specific node is the last one that's available to schedule, do
931/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
932bool ScheduleDAGRRList::
933DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
934  if (NumLiveRegs == 0)
935    return false;
936
937  SmallSet<unsigned, 4> RegAdded;
938  // If this node would clobber any "live" register, then it's not ready.
939  //
940  // If SU is the currently live definition of the same register that it uses,
941  // then we are free to schedule it.
942  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
943       I != E; ++I) {
944    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
945      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
946                         RegAdded, LRegs, TRI);
947  }
948
949  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
950    if (Node->getOpcode() == ISD::INLINEASM) {
951      // Inline asm can clobber physical defs.
952      unsigned NumOps = Node->getNumOperands();
953      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
954        --NumOps;  // Ignore the glue operand.
955
956      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
957        unsigned Flags =
958          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
959        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
960
961        ++i; // Skip the ID value.
962        if (InlineAsm::isRegDefKind(Flags) ||
963            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
964          // Check for def of register or earlyclobber register.
965          for (; NumVals; --NumVals, ++i) {
966            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
967            if (TargetRegisterInfo::isPhysicalRegister(Reg))
968              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
969          }
970        } else
971          i += NumVals;
972      }
973      continue;
974    }
975
976    if (!Node->isMachineOpcode())
977      continue;
978    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
979    if (!TID.ImplicitDefs)
980      continue;
981    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
982      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
983  }
984
985  return !LRegs.empty();
986}
987
988/// Return a node that can be scheduled in this cycle. Requirements:
989/// (1) Ready: latency has been satisfied
990/// (2) No Hazards: resources are available
991/// (3) No Interferences: may unschedule to break register interferences.
992SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
993  SmallVector<SUnit*, 4> Interferences;
994  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
995
996  SUnit *CurSU = AvailableQueue->pop();
997  while (CurSU) {
998    SmallVector<unsigned, 4> LRegs;
999    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1000      break;
1001    LRegsMap.insert(std::make_pair(CurSU, LRegs));
1002
1003    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1004    Interferences.push_back(CurSU);
1005    CurSU = AvailableQueue->pop();
1006  }
1007  if (CurSU) {
1008    // Add the nodes that aren't ready back onto the available list.
1009    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1010      Interferences[i]->isPending = false;
1011      assert(Interferences[i]->isAvailable && "must still be available");
1012      AvailableQueue->push(Interferences[i]);
1013    }
1014    return CurSU;
1015  }
1016
1017  // All candidates are delayed due to live physical reg dependencies.
1018  // Try backtracking, code duplication, or inserting cross class copies
1019  // to resolve it.
1020  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1021    SUnit *TrySU = Interferences[i];
1022    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1023
1024    // Try unscheduling up to the point where it's safe to schedule
1025    // this node.
1026    SUnit *BtSU = NULL;
1027    unsigned LiveCycle = UINT_MAX;
1028    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1029      unsigned Reg = LRegs[j];
1030      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1031        BtSU = LiveRegGens[Reg];
1032        LiveCycle = BtSU->getHeight();
1033      }
1034    }
1035    if (!WillCreateCycle(TrySU, BtSU))  {
1036      BacktrackBottomUp(TrySU, BtSU);
1037
1038      // Force the current node to be scheduled before the node that
1039      // requires the physical reg dep.
1040      if (BtSU->isAvailable) {
1041        BtSU->isAvailable = false;
1042        if (!BtSU->isPending)
1043          AvailableQueue->remove(BtSU);
1044      }
1045      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1046                          /*Reg=*/0, /*isNormalMemory=*/false,
1047                          /*isMustAlias=*/false, /*isArtificial=*/true));
1048
1049      // If one or more successors has been unscheduled, then the current
1050      // node is no longer avaialable. Schedule a successor that's now
1051      // available instead.
1052      if (!TrySU->isAvailable) {
1053        CurSU = AvailableQueue->pop();
1054      }
1055      else {
1056        CurSU = TrySU;
1057        TrySU->isPending = false;
1058        Interferences.erase(Interferences.begin()+i);
1059      }
1060      break;
1061    }
1062  }
1063
1064  if (!CurSU) {
1065    // Can't backtrack. If it's too expensive to copy the value, then try
1066    // duplicate the nodes that produces these "too expensive to copy"
1067    // values to break the dependency. In case even that doesn't work,
1068    // insert cross class copies.
1069    // If it's not too expensive, i.e. cost != -1, issue copies.
1070    SUnit *TrySU = Interferences[0];
1071    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1072    assert(LRegs.size() == 1 && "Can't handle this yet!");
1073    unsigned Reg = LRegs[0];
1074    SUnit *LRDef = LiveRegDefs[Reg];
1075    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1076    const TargetRegisterClass *RC =
1077      TRI->getMinimalPhysRegClass(Reg, VT);
1078    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1079
1080    // If cross copy register class is null, then it must be possible copy
1081    // the value directly. Do not try duplicate the def.
1082    SUnit *NewDef = 0;
1083    if (DestRC)
1084      NewDef = CopyAndMoveSuccessors(LRDef);
1085    else
1086      DestRC = RC;
1087    if (!NewDef) {
1088      // Issue copies, these can be expensive cross register class copies.
1089      SmallVector<SUnit*, 2> Copies;
1090      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1091      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1092            << " to SU #" << Copies.front()->NodeNum << "\n");
1093      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1094                          /*Reg=*/0, /*isNormalMemory=*/false,
1095                          /*isMustAlias=*/false,
1096                          /*isArtificial=*/true));
1097      NewDef = Copies.back();
1098    }
1099
1100    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1101          << " to SU #" << TrySU->NodeNum << "\n");
1102    LiveRegDefs[Reg] = NewDef;
1103    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1104                         /*Reg=*/0, /*isNormalMemory=*/false,
1105                         /*isMustAlias=*/false,
1106                         /*isArtificial=*/true));
1107    TrySU->isAvailable = false;
1108    CurSU = NewDef;
1109  }
1110
1111  assert(CurSU && "Unable to resolve live physical register dependencies!");
1112
1113  // Add the nodes that aren't ready back onto the available list.
1114  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1115    Interferences[i]->isPending = false;
1116    // May no longer be available due to backtracking.
1117    if (Interferences[i]->isAvailable) {
1118      AvailableQueue->push(Interferences[i]);
1119    }
1120  }
1121  return CurSU;
1122}
1123
1124/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1125/// schedulers.
1126void ScheduleDAGRRList::ListScheduleBottomUp() {
1127  // Release any predecessors of the special Exit node.
1128  ReleasePredecessors(&ExitSU);
1129
1130  // Add root to Available queue.
1131  if (!SUnits.empty()) {
1132    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1133    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1134    RootSU->isAvailable = true;
1135    AvailableQueue->push(RootSU);
1136  }
1137
1138  // While Available queue is not empty, grab the node with the highest
1139  // priority. If it is not ready put it back.  Schedule the node.
1140  Sequence.reserve(SUnits.size());
1141  while (!AvailableQueue->empty()) {
1142    DEBUG(dbgs() << "\n*** Examining Available\n";
1143          AvailableQueue->dump(this));
1144
1145    // Pick the best node to schedule taking all constraints into
1146    // consideration.
1147    SUnit *SU = PickNodeToScheduleBottomUp();
1148
1149    AdvancePastStalls(SU);
1150
1151    ScheduleNodeBottomUp(SU);
1152
1153    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1154      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1155      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1156      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1157    }
1158  }
1159
1160  // Reverse the order if it is bottom up.
1161  std::reverse(Sequence.begin(), Sequence.end());
1162
1163#ifndef NDEBUG
1164  VerifySchedule(isBottomUp);
1165#endif
1166}
1167
1168//===----------------------------------------------------------------------===//
1169//  Top-Down Scheduling
1170//===----------------------------------------------------------------------===//
1171
1172/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1173/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1174void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1175  SUnit *SuccSU = SuccEdge->getSUnit();
1176
1177#ifndef NDEBUG
1178  if (SuccSU->NumPredsLeft == 0) {
1179    dbgs() << "*** Scheduling failed! ***\n";
1180    SuccSU->dump(this);
1181    dbgs() << " has been released too many times!\n";
1182    llvm_unreachable(0);
1183  }
1184#endif
1185  --SuccSU->NumPredsLeft;
1186
1187  // If all the node's predecessors are scheduled, this node is ready
1188  // to be scheduled. Ignore the special ExitSU node.
1189  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1190    SuccSU->isAvailable = true;
1191    AvailableQueue->push(SuccSU);
1192  }
1193}
1194
1195void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1196  // Top down: release successors
1197  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1198       I != E; ++I) {
1199    assert(!I->isAssignedRegDep() &&
1200           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1201
1202    ReleaseSucc(SU, &*I);
1203  }
1204}
1205
1206/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1207/// count of its successors. If a successor pending count is zero, add it to
1208/// the Available queue.
1209void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1210  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1211  DEBUG(SU->dump(this));
1212
1213  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1214  SU->setDepthToAtLeast(CurCycle);
1215  Sequence.push_back(SU);
1216
1217  ReleaseSuccessors(SU);
1218  SU->isScheduled = true;
1219  AvailableQueue->ScheduledNode(SU);
1220}
1221
1222/// ListScheduleTopDown - The main loop of list scheduling for top-down
1223/// schedulers.
1224void ScheduleDAGRRList::ListScheduleTopDown() {
1225  AvailableQueue->setCurCycle(CurCycle);
1226
1227  // Release any successors of the special Entry node.
1228  ReleaseSuccessors(&EntrySU);
1229
1230  // All leaves to Available queue.
1231  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1232    // It is available if it has no predecessors.
1233    if (SUnits[i].Preds.empty()) {
1234      AvailableQueue->push(&SUnits[i]);
1235      SUnits[i].isAvailable = true;
1236    }
1237  }
1238
1239  // While Available queue is not empty, grab the node with the highest
1240  // priority. If it is not ready put it back.  Schedule the node.
1241  Sequence.reserve(SUnits.size());
1242  while (!AvailableQueue->empty()) {
1243    SUnit *CurSU = AvailableQueue->pop();
1244
1245    if (CurSU)
1246      ScheduleNodeTopDown(CurSU);
1247    ++CurCycle;
1248    AvailableQueue->setCurCycle(CurCycle);
1249  }
1250
1251#ifndef NDEBUG
1252  VerifySchedule(isBottomUp);
1253#endif
1254}
1255
1256
1257//===----------------------------------------------------------------------===//
1258//                RegReductionPriorityQueue Definition
1259//===----------------------------------------------------------------------===//
1260//
1261// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1262// to reduce register pressure.
1263//
1264namespace {
1265class RegReductionPQBase;
1266
1267struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1268  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1269};
1270
1271/// bu_ls_rr_sort - Priority function for bottom up register pressure
1272// reduction scheduler.
1273struct bu_ls_rr_sort : public queue_sort {
1274  enum {
1275    IsBottomUp = true,
1276    HasReadyFilter = false
1277  };
1278
1279  RegReductionPQBase *SPQ;
1280  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1281  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1282
1283  bool operator()(SUnit* left, SUnit* right) const;
1284};
1285
1286// td_ls_rr_sort - Priority function for top down register pressure reduction
1287// scheduler.
1288struct td_ls_rr_sort : public queue_sort {
1289  enum {
1290    IsBottomUp = false,
1291    HasReadyFilter = false
1292  };
1293
1294  RegReductionPQBase *SPQ;
1295  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1296  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1297
1298  bool operator()(const SUnit* left, const SUnit* right) const;
1299};
1300
1301// src_ls_rr_sort - Priority function for source order scheduler.
1302struct src_ls_rr_sort : public queue_sort {
1303  enum {
1304    IsBottomUp = true,
1305    HasReadyFilter = false
1306  };
1307
1308  RegReductionPQBase *SPQ;
1309  src_ls_rr_sort(RegReductionPQBase *spq)
1310    : SPQ(spq) {}
1311  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1312    : SPQ(RHS.SPQ) {}
1313
1314  bool operator()(SUnit* left, SUnit* right) const;
1315};
1316
1317// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1318struct hybrid_ls_rr_sort : public queue_sort {
1319  enum {
1320    IsBottomUp = true,
1321    HasReadyFilter = true
1322  };
1323
1324  RegReductionPQBase *SPQ;
1325  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1326    : SPQ(spq) {}
1327  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1328    : SPQ(RHS.SPQ) {}
1329
1330  bool isReady(SUnit *SU, unsigned CurCycle) const;
1331
1332  bool operator()(SUnit* left, SUnit* right) const;
1333};
1334
1335// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1336// scheduler.
1337struct ilp_ls_rr_sort : public queue_sort {
1338  enum {
1339    IsBottomUp = true,
1340    HasReadyFilter = true
1341  };
1342
1343  RegReductionPQBase *SPQ;
1344  ilp_ls_rr_sort(RegReductionPQBase *spq)
1345    : SPQ(spq) {}
1346  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1347    : SPQ(RHS.SPQ) {}
1348
1349  bool isReady(SUnit *SU, unsigned CurCycle) const;
1350
1351  bool operator()(SUnit* left, SUnit* right) const;
1352};
1353
1354class RegReductionPQBase : public SchedulingPriorityQueue {
1355protected:
1356  std::vector<SUnit*> Queue;
1357  unsigned CurQueueId;
1358  bool TracksRegPressure;
1359
1360  // SUnits - The SUnits for the current graph.
1361  std::vector<SUnit> *SUnits;
1362
1363  MachineFunction &MF;
1364  const TargetInstrInfo *TII;
1365  const TargetRegisterInfo *TRI;
1366  const TargetLowering *TLI;
1367  ScheduleDAGRRList *scheduleDAG;
1368
1369  // SethiUllmanNumbers - The SethiUllman number for each node.
1370  std::vector<unsigned> SethiUllmanNumbers;
1371
1372  /// RegPressure - Tracking current reg pressure per register class.
1373  ///
1374  std::vector<unsigned> RegPressure;
1375
1376  /// RegLimit - Tracking the number of allocatable registers per register
1377  /// class.
1378  std::vector<unsigned> RegLimit;
1379
1380public:
1381  RegReductionPQBase(MachineFunction &mf,
1382                     bool hasReadyFilter,
1383                     bool tracksrp,
1384                     const TargetInstrInfo *tii,
1385                     const TargetRegisterInfo *tri,
1386                     const TargetLowering *tli)
1387    : SchedulingPriorityQueue(hasReadyFilter),
1388      CurQueueId(0), TracksRegPressure(tracksrp),
1389      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1390    if (TracksRegPressure) {
1391      unsigned NumRC = TRI->getNumRegClasses();
1392      RegLimit.resize(NumRC);
1393      RegPressure.resize(NumRC);
1394      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1395      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1396      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1397             E = TRI->regclass_end(); I != E; ++I)
1398        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1399    }
1400  }
1401
1402  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1403    scheduleDAG = scheduleDag;
1404  }
1405
1406  ScheduleHazardRecognizer* getHazardRec() {
1407    return scheduleDAG->getHazardRec();
1408  }
1409
1410  void initNodes(std::vector<SUnit> &sunits);
1411
1412  void addNode(const SUnit *SU);
1413
1414  void updateNode(const SUnit *SU);
1415
1416  void releaseState() {
1417    SUnits = 0;
1418    SethiUllmanNumbers.clear();
1419    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1420  }
1421
1422  unsigned getNodePriority(const SUnit *SU) const;
1423
1424  unsigned getNodeOrdering(const SUnit *SU) const {
1425    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1426  }
1427
1428  bool empty() const { return Queue.empty(); }
1429
1430  void push(SUnit *U) {
1431    assert(!U->NodeQueueId && "Node in the queue already");
1432    U->NodeQueueId = ++CurQueueId;
1433    Queue.push_back(U);
1434  }
1435
1436  void remove(SUnit *SU) {
1437    assert(!Queue.empty() && "Queue is empty!");
1438    assert(SU->NodeQueueId != 0 && "Not in queue!");
1439    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1440                                                 SU);
1441    if (I != prior(Queue.end()))
1442      std::swap(*I, Queue.back());
1443    Queue.pop_back();
1444    SU->NodeQueueId = 0;
1445  }
1446
1447  bool tracksRegPressure() const { return TracksRegPressure; }
1448
1449  void dumpRegPressure() const;
1450
1451  bool HighRegPressure(const SUnit *SU) const;
1452
1453  bool MayReduceRegPressure(SUnit *SU);
1454
1455  void ScheduledNode(SUnit *SU);
1456
1457  void UnscheduledNode(SUnit *SU);
1458
1459protected:
1460  bool canClobber(const SUnit *SU, const SUnit *Op);
1461  void AddPseudoTwoAddrDeps();
1462  void PrescheduleNodesWithMultipleUses();
1463  void CalculateSethiUllmanNumbers();
1464};
1465
1466template<class SF>
1467class RegReductionPriorityQueue : public RegReductionPQBase {
1468  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1469    std::vector<SUnit *>::iterator Best = Q.begin();
1470    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1471           E = Q.end(); I != E; ++I)
1472      if (Picker(*Best, *I))
1473        Best = I;
1474    SUnit *V = *Best;
1475    if (Best != prior(Q.end()))
1476      std::swap(*Best, Q.back());
1477    Q.pop_back();
1478    return V;
1479  }
1480
1481  SF Picker;
1482
1483public:
1484  RegReductionPriorityQueue(MachineFunction &mf,
1485                            bool tracksrp,
1486                            const TargetInstrInfo *tii,
1487                            const TargetRegisterInfo *tri,
1488                            const TargetLowering *tli)
1489    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1490      Picker(this) {}
1491
1492  bool isBottomUp() const { return SF::IsBottomUp; }
1493
1494  bool isReady(SUnit *U) const {
1495    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1496  }
1497
1498  SUnit *pop() {
1499    if (Queue.empty()) return NULL;
1500
1501    SUnit *V = popFromQueue(Queue, Picker);
1502    V->NodeQueueId = 0;
1503    return V;
1504  }
1505
1506  void dump(ScheduleDAG *DAG) const {
1507    // Emulate pop() without clobbering NodeQueueIds.
1508    std::vector<SUnit*> DumpQueue = Queue;
1509    SF DumpPicker = Picker;
1510    while (!DumpQueue.empty()) {
1511      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1512      if (isBottomUp())
1513        dbgs() << "Height " << SU->getHeight() << ": ";
1514      else
1515        dbgs() << "Depth " << SU->getDepth() << ": ";
1516      SU->dump(DAG);
1517    }
1518  }
1519};
1520
1521typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1522BURegReductionPriorityQueue;
1523
1524typedef RegReductionPriorityQueue<td_ls_rr_sort>
1525TDRegReductionPriorityQueue;
1526
1527typedef RegReductionPriorityQueue<src_ls_rr_sort>
1528SrcRegReductionPriorityQueue;
1529
1530typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1531HybridBURRPriorityQueue;
1532
1533typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1534ILPBURRPriorityQueue;
1535} // end anonymous namespace
1536
1537//===----------------------------------------------------------------------===//
1538//           Static Node Priority for Register Pressure Reduction
1539//===----------------------------------------------------------------------===//
1540
1541/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1542/// Smaller number is the higher priority.
1543static unsigned
1544CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1545  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1546  if (SethiUllmanNumber != 0)
1547    return SethiUllmanNumber;
1548
1549  unsigned Extra = 0;
1550  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1551       I != E; ++I) {
1552    if (I->isCtrl()) continue;  // ignore chain preds
1553    SUnit *PredSU = I->getSUnit();
1554    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1555    if (PredSethiUllman > SethiUllmanNumber) {
1556      SethiUllmanNumber = PredSethiUllman;
1557      Extra = 0;
1558    } else if (PredSethiUllman == SethiUllmanNumber)
1559      ++Extra;
1560  }
1561
1562  SethiUllmanNumber += Extra;
1563
1564  if (SethiUllmanNumber == 0)
1565    SethiUllmanNumber = 1;
1566
1567  return SethiUllmanNumber;
1568}
1569
1570/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1571/// scheduling units.
1572void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1573  SethiUllmanNumbers.assign(SUnits->size(), 0);
1574
1575  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1576    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1577}
1578
1579void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
1580  SUnits = &sunits;
1581  // Add pseudo dependency edges for two-address nodes.
1582  AddPseudoTwoAddrDeps();
1583  // Reroute edges to nodes with multiple uses.
1584  if (!TracksRegPressure)
1585    PrescheduleNodesWithMultipleUses();
1586  // Calculate node priorities.
1587  CalculateSethiUllmanNumbers();
1588}
1589
1590void RegReductionPQBase::addNode(const SUnit *SU) {
1591  unsigned SUSize = SethiUllmanNumbers.size();
1592  if (SUnits->size() > SUSize)
1593    SethiUllmanNumbers.resize(SUSize*2, 0);
1594  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1595}
1596
1597void RegReductionPQBase::updateNode(const SUnit *SU) {
1598  SethiUllmanNumbers[SU->NodeNum] = 0;
1599  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1600}
1601
1602// Lower priority means schedule further down. For bottom-up scheduling, lower
1603// priority SUs are scheduled before higher priority SUs.
1604unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1605  assert(SU->NodeNum < SethiUllmanNumbers.size());
1606  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1607  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1608    // CopyToReg should be close to its uses to facilitate coalescing and
1609    // avoid spilling.
1610    return 0;
1611  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1612      Opc == TargetOpcode::SUBREG_TO_REG ||
1613      Opc == TargetOpcode::INSERT_SUBREG)
1614    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1615    // close to their uses to facilitate coalescing.
1616    return 0;
1617  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1618    // If SU does not have a register use, i.e. it doesn't produce a value
1619    // that would be consumed (e.g. store), then it terminates a chain of
1620    // computation.  Give it a large SethiUllman number so it will be
1621    // scheduled right before its predecessors that it doesn't lengthen
1622    // their live ranges.
1623    return 0xffff;
1624  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1625    // If SU does not have a register def, schedule it close to its uses
1626    // because it does not lengthen any live ranges.
1627    return 0;
1628  return SethiUllmanNumbers[SU->NodeNum];
1629}
1630
1631//===----------------------------------------------------------------------===//
1632//                     Register Pressure Tracking
1633//===----------------------------------------------------------------------===//
1634
1635void RegReductionPQBase::dumpRegPressure() const {
1636  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1637         E = TRI->regclass_end(); I != E; ++I) {
1638    const TargetRegisterClass *RC = *I;
1639    unsigned Id = RC->getID();
1640    unsigned RP = RegPressure[Id];
1641    if (!RP) continue;
1642    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1643          << '\n');
1644  }
1645}
1646
1647bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1648  if (!TLI)
1649    return false;
1650
1651  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1652       I != E; ++I) {
1653    if (I->isCtrl())
1654      continue;
1655    SUnit *PredSU = I->getSUnit();
1656    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1657    // to cover the number of registers defined (they are all live).
1658    if (PredSU->NumRegDefsLeft == 0) {
1659      continue;
1660    }
1661    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1662         RegDefPos.IsValid(); RegDefPos.Advance()) {
1663      EVT VT = RegDefPos.GetValue();
1664      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1665      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1666      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1667        return true;
1668    }
1669  }
1670  return false;
1671}
1672
1673bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
1674  const SDNode *N = SU->getNode();
1675
1676  if (!N->isMachineOpcode() || !SU->NumSuccs)
1677    return false;
1678
1679  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1680  for (unsigned i = 0; i != NumDefs; ++i) {
1681    EVT VT = N->getValueType(i);
1682    if (!N->hasAnyUseOfValue(i))
1683      continue;
1684    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1685    if (RegPressure[RCId] >= RegLimit[RCId])
1686      return true;
1687  }
1688  return false;
1689}
1690
1691void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1692  if (!TracksRegPressure)
1693    return;
1694
1695  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1696       I != E; ++I) {
1697    if (I->isCtrl())
1698      continue;
1699    SUnit *PredSU = I->getSUnit();
1700    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1701    // to cover the number of registers defined (they are all live).
1702    if (PredSU->NumRegDefsLeft == 0) {
1703      continue;
1704    }
1705    // FIXME: The ScheduleDAG currently loses information about which of a
1706    // node's values is consumed by each dependence. Consequently, if the node
1707    // defines multiple register classes, we don't know which to pressurize
1708    // here. Instead the following loop consumes the register defs in an
1709    // arbitrary order. At least it handles the common case of clustered loads
1710    // to the same class. For precise liveness, each SDep needs to indicate the
1711    // result number. But that tightly couples the ScheduleDAG with the
1712    // SelectionDAG making updates tricky. A simpler hack would be to attach a
1713    // value type or register class to SDep.
1714    //
1715    // The most important aspect of register tracking is balancing the increase
1716    // here with the reduction further below. Note that this SU may use multiple
1717    // defs in PredSU. The can't be determined here, but we've already
1718    // compensated by reducing NumRegDefsLeft in PredSU during
1719    // ScheduleDAGSDNodes::AddSchedEdges.
1720    --PredSU->NumRegDefsLeft;
1721    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
1722    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1723         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1724      if (SkipRegDefs)
1725        continue;
1726      EVT VT = RegDefPos.GetValue();
1727      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1728      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1729      break;
1730    }
1731  }
1732
1733  // We should have this assert, but there may be dead SDNodes that never
1734  // materialize as SUnits, so they don't appear to generate liveness.
1735  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
1736  int SkipRegDefs = (int)SU->NumRegDefsLeft;
1737  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
1738       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
1739    if (SkipRegDefs > 0)
1740      continue;
1741    EVT VT = RegDefPos.GetValue();
1742    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1743    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
1744      // Register pressure tracking is imprecise. This can happen. But we try
1745      // hard not to let it happen because it likely results in poor scheduling.
1746      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
1747      RegPressure[RCId] = 0;
1748    }
1749    else {
1750      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1751    }
1752  }
1753  dumpRegPressure();
1754}
1755
1756void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1757  if (!TracksRegPressure)
1758    return;
1759
1760  const SDNode *N = SU->getNode();
1761  if (!N->isMachineOpcode()) {
1762    if (N->getOpcode() != ISD::CopyToReg)
1763      return;
1764  } else {
1765    unsigned Opc = N->getMachineOpcode();
1766    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1767        Opc == TargetOpcode::INSERT_SUBREG ||
1768        Opc == TargetOpcode::SUBREG_TO_REG ||
1769        Opc == TargetOpcode::REG_SEQUENCE ||
1770        Opc == TargetOpcode::IMPLICIT_DEF)
1771      return;
1772  }
1773
1774  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1775       I != E; ++I) {
1776    if (I->isCtrl())
1777      continue;
1778    SUnit *PredSU = I->getSUnit();
1779    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1780    // counts data deps.
1781    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1782      continue;
1783    const SDNode *PN = PredSU->getNode();
1784    if (!PN->isMachineOpcode()) {
1785      if (PN->getOpcode() == ISD::CopyFromReg) {
1786        EVT VT = PN->getValueType(0);
1787        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1788        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1789      }
1790      continue;
1791    }
1792    unsigned POpc = PN->getMachineOpcode();
1793    if (POpc == TargetOpcode::IMPLICIT_DEF)
1794      continue;
1795    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1796      EVT VT = PN->getOperand(0).getValueType();
1797      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1798      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1799      continue;
1800    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1801               POpc == TargetOpcode::SUBREG_TO_REG) {
1802      EVT VT = PN->getValueType(0);
1803      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1804      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1805      continue;
1806    }
1807    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1808    for (unsigned i = 0; i != NumDefs; ++i) {
1809      EVT VT = PN->getValueType(i);
1810      if (!PN->hasAnyUseOfValue(i))
1811        continue;
1812      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1813      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1814        // Register pressure tracking is imprecise. This can happen.
1815        RegPressure[RCId] = 0;
1816      else
1817        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1818    }
1819  }
1820
1821  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1822  // may transfer data dependencies to CopyToReg.
1823  if (SU->NumSuccs && N->isMachineOpcode()) {
1824    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1825    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1826      EVT VT = N->getValueType(i);
1827      if (VT == MVT::Glue || VT == MVT::Other)
1828        continue;
1829      if (!N->hasAnyUseOfValue(i))
1830        continue;
1831      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1832      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1833    }
1834  }
1835
1836  dumpRegPressure();
1837}
1838
1839//===----------------------------------------------------------------------===//
1840//           Dynamic Node Priority for Register Pressure Reduction
1841//===----------------------------------------------------------------------===//
1842
1843/// closestSucc - Returns the scheduled cycle of the successor which is
1844/// closest to the current cycle.
1845static unsigned closestSucc(const SUnit *SU) {
1846  unsigned MaxHeight = 0;
1847  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1848       I != E; ++I) {
1849    if (I->isCtrl()) continue;  // ignore chain succs
1850    unsigned Height = I->getSUnit()->getHeight();
1851    // If there are bunch of CopyToRegs stacked up, they should be considered
1852    // to be at the same position.
1853    if (I->getSUnit()->getNode() &&
1854        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1855      Height = closestSucc(I->getSUnit())+1;
1856    if (Height > MaxHeight)
1857      MaxHeight = Height;
1858  }
1859  return MaxHeight;
1860}
1861
1862/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1863/// for scratch registers, i.e. number of data dependencies.
1864static unsigned calcMaxScratches(const SUnit *SU) {
1865  unsigned Scratches = 0;
1866  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1867       I != E; ++I) {
1868    if (I->isCtrl()) continue;  // ignore chain preds
1869    Scratches++;
1870  }
1871  return Scratches;
1872}
1873
1874/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1875/// CopyToReg to a virtual register. This SU def is probably a liveout and
1876/// it has no other use. It should be scheduled closer to the terminator.
1877static bool hasOnlyLiveOutUses(const SUnit *SU) {
1878  bool RetVal = false;
1879  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1880       I != E; ++I) {
1881    if (I->isCtrl()) continue;
1882    const SUnit *SuccSU = I->getSUnit();
1883    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1884      unsigned Reg =
1885        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1886      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1887        RetVal = true;
1888        continue;
1889      }
1890    }
1891    return false;
1892  }
1893  return RetVal;
1894}
1895
1896/// UnitsSharePred - Return true if the two scheduling units share a common
1897/// data predecessor.
1898static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1899  SmallSet<const SUnit*, 4> Preds;
1900  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1901       I != E; ++I) {
1902    if (I->isCtrl()) continue;  // ignore chain preds
1903    Preds.insert(I->getSUnit());
1904  }
1905  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1906       I != E; ++I) {
1907    if (I->isCtrl()) continue;  // ignore chain preds
1908    if (Preds.count(I->getSUnit()))
1909      return true;
1910  }
1911  return false;
1912}
1913
1914// Check for either a dependence (latency) or resource (hazard) stall.
1915//
1916// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
1917static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
1918  if ((int)SPQ->getCurCycle() < Height) return true;
1919  if (SPQ->getHazardRec()->getHazardType(SU, 0)
1920      != ScheduleHazardRecognizer::NoHazard)
1921    return true;
1922  return false;
1923}
1924
1925// Return -1 if left has higher priority, 1 if right has higher priority.
1926// Return 0 if latency-based priority is equivalent.
1927static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
1928                            RegReductionPQBase *SPQ) {
1929  // If the two nodes share an operand and one of them has a single
1930  // use that is a live out copy, favor the one that is live out. Otherwise
1931  // it will be difficult to eliminate the copy if the instruction is a
1932  // loop induction variable update. e.g.
1933  // BB:
1934  // sub r1, r3, #1
1935  // str r0, [r2, r3]
1936  // mov r3, r1
1937  // cmp
1938  // bne BB
1939  bool SharePred = UnitsSharePred(left, right);
1940  // FIXME: Only adjust if BB is a loop back edge.
1941  // FIXME: What's the cost of a copy?
1942  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1943  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1944  int LHeight = (int)left->getHeight() - LBonus;
1945  int RHeight = (int)right->getHeight() - RBonus;
1946
1947  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
1948    BUHasStall(left, LHeight, SPQ);
1949  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
1950    BUHasStall(right, RHeight, SPQ);
1951
1952  // If scheduling one of the node will cause a pipeline stall, delay it.
1953  // If scheduling either one of the node will cause a pipeline stall, sort
1954  // them according to their height.
1955  if (LStall) {
1956    if (!RStall)
1957      return 1;
1958    if (LHeight != RHeight)
1959      return LHeight > RHeight ? 1 : -1;
1960  } else if (RStall)
1961    return -1;
1962
1963  // If either node is scheduling for latency, sort them by height/depth
1964  // and latency.
1965  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
1966                     right->SchedulingPref == Sched::Latency)) {
1967    if (DisableSchedCycles) {
1968      if (LHeight != RHeight)
1969        return LHeight > RHeight ? 1 : -1;
1970    }
1971    else {
1972      // If neither instruction stalls (!LStall && !RStall) then
1973      // it's height is already covered so only its depth matters. We also reach
1974      // this if both stall but have the same height.
1975      unsigned LDepth = left->getDepth();
1976      unsigned RDepth = right->getDepth();
1977      if (LDepth != RDepth) {
1978        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
1979              << ") depth " << LDepth << " vs SU (" << right->NodeNum
1980              << ") depth " << RDepth << "\n");
1981        return LDepth < RDepth ? 1 : -1;
1982      }
1983    }
1984    if (left->Latency != right->Latency)
1985      return left->Latency > right->Latency ? 1 : -1;
1986  }
1987  return 0;
1988}
1989
1990static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
1991  unsigned LPriority = SPQ->getNodePriority(left);
1992  unsigned RPriority = SPQ->getNodePriority(right);
1993  if (LPriority != RPriority)
1994    return LPriority > RPriority;
1995
1996  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1997  // e.g.
1998  // t1 = op t2, c1
1999  // t3 = op t4, c2
2000  //
2001  // and the following instructions are both ready.
2002  // t2 = op c3
2003  // t4 = op c4
2004  //
2005  // Then schedule t2 = op first.
2006  // i.e.
2007  // t4 = op c4
2008  // t2 = op c3
2009  // t1 = op t2, c1
2010  // t3 = op t4, c2
2011  //
2012  // This creates more short live intervals.
2013  unsigned LDist = closestSucc(left);
2014  unsigned RDist = closestSucc(right);
2015  if (LDist != RDist)
2016    return LDist < RDist;
2017
2018  // How many registers becomes live when the node is scheduled.
2019  unsigned LScratch = calcMaxScratches(left);
2020  unsigned RScratch = calcMaxScratches(right);
2021  if (LScratch != RScratch)
2022    return LScratch > RScratch;
2023
2024  if (!DisableSchedCycles) {
2025    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2026    if (result != 0)
2027      return result > 0;
2028  }
2029  else {
2030    if (left->getHeight() != right->getHeight())
2031      return left->getHeight() > right->getHeight();
2032
2033    if (left->getDepth() != right->getDepth())
2034      return left->getDepth() < right->getDepth();
2035  }
2036
2037  assert(left->NodeQueueId && right->NodeQueueId &&
2038         "NodeQueueId cannot be zero");
2039  return (left->NodeQueueId > right->NodeQueueId);
2040}
2041
2042// Bottom up
2043bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2044  return BURRSort(left, right, SPQ);
2045}
2046
2047// Source order, otherwise bottom up.
2048bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2049  unsigned LOrder = SPQ->getNodeOrdering(left);
2050  unsigned ROrder = SPQ->getNodeOrdering(right);
2051
2052  // Prefer an ordering where the lower the non-zero order number, the higher
2053  // the preference.
2054  if ((LOrder || ROrder) && LOrder != ROrder)
2055    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2056
2057  return BURRSort(left, right, SPQ);
2058}
2059
2060// If the time between now and when the instruction will be ready can cover
2061// the spill code, then avoid adding it to the ready queue. This gives long
2062// stalls highest priority and allows hoisting across calls. It should also
2063// speed up processing the available queue.
2064bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2065  static const unsigned ReadyDelay = 3;
2066
2067  if (SPQ->MayReduceRegPressure(SU)) return true;
2068
2069  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2070
2071  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2072      != ScheduleHazardRecognizer::NoHazard)
2073    return false;
2074
2075  return true;
2076}
2077
2078// Return true if right should be scheduled with higher priority than left.
2079bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2080  if (left->isCall || right->isCall)
2081    // No way to compute latency of calls.
2082    return BURRSort(left, right, SPQ);
2083
2084  bool LHigh = SPQ->HighRegPressure(left);
2085  bool RHigh = SPQ->HighRegPressure(right);
2086  // Avoid causing spills. If register pressure is high, schedule for
2087  // register pressure reduction.
2088  if (LHigh && !RHigh) {
2089    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2090          << right->NodeNum << ")\n");
2091    return true;
2092  }
2093  else if (!LHigh && RHigh) {
2094    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2095          << left->NodeNum << ")\n");
2096    return false;
2097  }
2098  else if (!LHigh && !RHigh) {
2099    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2100    if (result != 0)
2101      return result > 0;
2102  }
2103  return BURRSort(left, right, SPQ);
2104}
2105
2106// Schedule as many instructions in each cycle as possible. So don't make an
2107// instruction available unless it is ready in the current cycle.
2108bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2109  if (SU->getHeight() > CurCycle) return false;
2110
2111  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2112      != ScheduleHazardRecognizer::NoHazard)
2113    return false;
2114
2115  return SU->getHeight() <= CurCycle;
2116}
2117
2118bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2119  if (left->isCall || right->isCall)
2120    // No way to compute latency of calls.
2121    return BURRSort(left, right, SPQ);
2122
2123  bool LHigh = SPQ->HighRegPressure(left);
2124  bool RHigh = SPQ->HighRegPressure(right);
2125  // Avoid causing spills. If register pressure is high, schedule for
2126  // register pressure reduction.
2127  if (LHigh && !RHigh)
2128    return true;
2129  else if (!LHigh && RHigh)
2130    return false;
2131  else if (!LHigh && !RHigh) {
2132    // Low register pressure situation, schedule to maximize instruction level
2133    // parallelism.
2134    if (left->NumPreds > right->NumPreds)
2135      return false;
2136    else if (left->NumPreds < right->NumPreds)
2137      return false;
2138  }
2139
2140  return BURRSort(left, right, SPQ);
2141}
2142
2143//===----------------------------------------------------------------------===//
2144//                    Preschedule for Register Pressure
2145//===----------------------------------------------------------------------===//
2146
2147bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2148  if (SU->isTwoAddress) {
2149    unsigned Opc = SU->getNode()->getMachineOpcode();
2150    const TargetInstrDesc &TID = TII->get(Opc);
2151    unsigned NumRes = TID.getNumDefs();
2152    unsigned NumOps = TID.getNumOperands() - NumRes;
2153    for (unsigned i = 0; i != NumOps; ++i) {
2154      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2155        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2156        if (DU->getNodeId() != -1 &&
2157            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2158          return true;
2159      }
2160    }
2161  }
2162  return false;
2163}
2164
2165/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2166/// physical register defs.
2167static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2168                                  const TargetInstrInfo *TII,
2169                                  const TargetRegisterInfo *TRI) {
2170  SDNode *N = SuccSU->getNode();
2171  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2172  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2173  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2174  for (const SDNode *SUNode = SU->getNode(); SUNode;
2175       SUNode = SUNode->getGluedNode()) {
2176    if (!SUNode->isMachineOpcode())
2177      continue;
2178    const unsigned *SUImpDefs =
2179      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2180    if (!SUImpDefs)
2181      return false;
2182    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2183      EVT VT = N->getValueType(i);
2184      if (VT == MVT::Glue || VT == MVT::Other)
2185        continue;
2186      if (!N->hasAnyUseOfValue(i))
2187        continue;
2188      unsigned Reg = ImpDefs[i - NumDefs];
2189      for (;*SUImpDefs; ++SUImpDefs) {
2190        unsigned SUReg = *SUImpDefs;
2191        if (TRI->regsOverlap(Reg, SUReg))
2192          return true;
2193      }
2194    }
2195  }
2196  return false;
2197}
2198
2199/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2200/// are not handled well by the general register pressure reduction
2201/// heuristics. When presented with code like this:
2202///
2203///      N
2204///    / |
2205///   /  |
2206///  U  store
2207///  |
2208/// ...
2209///
2210/// the heuristics tend to push the store up, but since the
2211/// operand of the store has another use (U), this would increase
2212/// the length of that other use (the U->N edge).
2213///
2214/// This function transforms code like the above to route U's
2215/// dependence through the store when possible, like this:
2216///
2217///      N
2218///      ||
2219///      ||
2220///     store
2221///       |
2222///       U
2223///       |
2224///      ...
2225///
2226/// This results in the store being scheduled immediately
2227/// after N, which shortens the U->N live range, reducing
2228/// register pressure.
2229///
2230void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2231  // Visit all the nodes in topological order, working top-down.
2232  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2233    SUnit *SU = &(*SUnits)[i];
2234    // For now, only look at nodes with no data successors, such as stores.
2235    // These are especially important, due to the heuristics in
2236    // getNodePriority for nodes with no data successors.
2237    if (SU->NumSuccs != 0)
2238      continue;
2239    // For now, only look at nodes with exactly one data predecessor.
2240    if (SU->NumPreds != 1)
2241      continue;
2242    // Avoid prescheduling copies to virtual registers, which don't behave
2243    // like other nodes from the perspective of scheduling heuristics.
2244    if (SDNode *N = SU->getNode())
2245      if (N->getOpcode() == ISD::CopyToReg &&
2246          TargetRegisterInfo::isVirtualRegister
2247            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2248        continue;
2249
2250    // Locate the single data predecessor.
2251    SUnit *PredSU = 0;
2252    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2253         EE = SU->Preds.end(); II != EE; ++II)
2254      if (!II->isCtrl()) {
2255        PredSU = II->getSUnit();
2256        break;
2257      }
2258    assert(PredSU);
2259
2260    // Don't rewrite edges that carry physregs, because that requires additional
2261    // support infrastructure.
2262    if (PredSU->hasPhysRegDefs)
2263      continue;
2264    // Short-circuit the case where SU is PredSU's only data successor.
2265    if (PredSU->NumSuccs == 1)
2266      continue;
2267    // Avoid prescheduling to copies from virtual registers, which don't behave
2268    // like other nodes from the perspective of scheduling heuristics.
2269    if (SDNode *N = SU->getNode())
2270      if (N->getOpcode() == ISD::CopyFromReg &&
2271          TargetRegisterInfo::isVirtualRegister
2272            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2273        continue;
2274
2275    // Perform checks on the successors of PredSU.
2276    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2277         EE = PredSU->Succs.end(); II != EE; ++II) {
2278      SUnit *PredSuccSU = II->getSUnit();
2279      if (PredSuccSU == SU) continue;
2280      // If PredSU has another successor with no data successors, for
2281      // now don't attempt to choose either over the other.
2282      if (PredSuccSU->NumSuccs == 0)
2283        goto outer_loop_continue;
2284      // Don't break physical register dependencies.
2285      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2286        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2287          goto outer_loop_continue;
2288      // Don't introduce graph cycles.
2289      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2290        goto outer_loop_continue;
2291    }
2292
2293    // Ok, the transformation is safe and the heuristics suggest it is
2294    // profitable. Update the graph.
2295    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2296                 << " next to PredSU #" << PredSU->NodeNum
2297                 << " to guide scheduling in the presence of multiple uses\n");
2298    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2299      SDep Edge = PredSU->Succs[i];
2300      assert(!Edge.isAssignedRegDep());
2301      SUnit *SuccSU = Edge.getSUnit();
2302      if (SuccSU != SU) {
2303        Edge.setSUnit(PredSU);
2304        scheduleDAG->RemovePred(SuccSU, Edge);
2305        scheduleDAG->AddPred(SU, Edge);
2306        Edge.setSUnit(SU);
2307        scheduleDAG->AddPred(SuccSU, Edge);
2308        --i;
2309      }
2310    }
2311  outer_loop_continue:;
2312  }
2313}
2314
2315/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2316/// it as a def&use operand. Add a pseudo control edge from it to the other
2317/// node (if it won't create a cycle) so the two-address one will be scheduled
2318/// first (lower in the schedule). If both nodes are two-address, favor the
2319/// one that has a CopyToReg use (more likely to be a loop induction update).
2320/// If both are two-address, but one is commutable while the other is not
2321/// commutable, favor the one that's not commutable.
2322void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2323  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2324    SUnit *SU = &(*SUnits)[i];
2325    if (!SU->isTwoAddress)
2326      continue;
2327
2328    SDNode *Node = SU->getNode();
2329    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2330      continue;
2331
2332    bool isLiveOut = hasOnlyLiveOutUses(SU);
2333    unsigned Opc = Node->getMachineOpcode();
2334    const TargetInstrDesc &TID = TII->get(Opc);
2335    unsigned NumRes = TID.getNumDefs();
2336    unsigned NumOps = TID.getNumOperands() - NumRes;
2337    for (unsigned j = 0; j != NumOps; ++j) {
2338      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2339        continue;
2340      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2341      if (DU->getNodeId() == -1)
2342        continue;
2343      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2344      if (!DUSU) continue;
2345      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2346           E = DUSU->Succs.end(); I != E; ++I) {
2347        if (I->isCtrl()) continue;
2348        SUnit *SuccSU = I->getSUnit();
2349        if (SuccSU == SU)
2350          continue;
2351        // Be conservative. Ignore if nodes aren't at roughly the same
2352        // depth and height.
2353        if (SuccSU->getHeight() < SU->getHeight() &&
2354            (SU->getHeight() - SuccSU->getHeight()) > 1)
2355          continue;
2356        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2357        // constrains whatever is using the copy, instead of the copy
2358        // itself. In the case that the copy is coalesced, this
2359        // preserves the intent of the pseudo two-address heurietics.
2360        while (SuccSU->Succs.size() == 1 &&
2361               SuccSU->getNode()->isMachineOpcode() &&
2362               SuccSU->getNode()->getMachineOpcode() ==
2363                 TargetOpcode::COPY_TO_REGCLASS)
2364          SuccSU = SuccSU->Succs.front().getSUnit();
2365        // Don't constrain non-instruction nodes.
2366        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2367          continue;
2368        // Don't constrain nodes with physical register defs if the
2369        // predecessor can clobber them.
2370        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2371          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2372            continue;
2373        }
2374        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2375        // these may be coalesced away. We want them close to their uses.
2376        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2377        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2378            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2379            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2380          continue;
2381        if ((!canClobber(SuccSU, DUSU) ||
2382             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2383             (!SU->isCommutable && SuccSU->isCommutable)) &&
2384            !scheduleDAG->IsReachable(SuccSU, SU)) {
2385          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2386                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2387          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2388                                        /*Reg=*/0, /*isNormalMemory=*/false,
2389                                        /*isMustAlias=*/false,
2390                                        /*isArtificial=*/true));
2391        }
2392      }
2393    }
2394  }
2395}
2396
2397/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2398/// predecessors of the successors of the SUnit SU. Stop when the provided
2399/// limit is exceeded.
2400static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2401                                                    unsigned Limit) {
2402  unsigned Sum = 0;
2403  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2404       I != E; ++I) {
2405    const SUnit *SuccSU = I->getSUnit();
2406    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2407         EE = SuccSU->Preds.end(); II != EE; ++II) {
2408      SUnit *PredSU = II->getSUnit();
2409      if (!PredSU->isScheduled)
2410        if (++Sum > Limit)
2411          return Sum;
2412    }
2413  }
2414  return Sum;
2415}
2416
2417
2418// Top down
2419bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2420  unsigned LPriority = SPQ->getNodePriority(left);
2421  unsigned RPriority = SPQ->getNodePriority(right);
2422  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2423  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2424  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2425  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2426  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2427  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2428
2429  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2430    return false;
2431  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2432    return true;
2433
2434  if (LIsFloater)
2435    LBonus -= 2;
2436  if (RIsFloater)
2437    RBonus -= 2;
2438  if (left->NumSuccs == 1)
2439    LBonus += 2;
2440  if (right->NumSuccs == 1)
2441    RBonus += 2;
2442
2443  if (LPriority+LBonus != RPriority+RBonus)
2444    return LPriority+LBonus < RPriority+RBonus;
2445
2446  if (left->getDepth() != right->getDepth())
2447    return left->getDepth() < right->getDepth();
2448
2449  if (left->NumSuccsLeft != right->NumSuccsLeft)
2450    return left->NumSuccsLeft > right->NumSuccsLeft;
2451
2452  assert(left->NodeQueueId && right->NodeQueueId &&
2453         "NodeQueueId cannot be zero");
2454  return (left->NodeQueueId > right->NodeQueueId);
2455}
2456
2457//===----------------------------------------------------------------------===//
2458//                         Public Constructor Functions
2459//===----------------------------------------------------------------------===//
2460
2461llvm::ScheduleDAGSDNodes *
2462llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2463                                 CodeGenOpt::Level OptLevel) {
2464  const TargetMachine &TM = IS->TM;
2465  const TargetInstrInfo *TII = TM.getInstrInfo();
2466  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2467
2468  BURegReductionPriorityQueue *PQ =
2469    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2470  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2471  PQ->setScheduleDAG(SD);
2472  return SD;
2473}
2474
2475llvm::ScheduleDAGSDNodes *
2476llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2477                                 CodeGenOpt::Level OptLevel) {
2478  const TargetMachine &TM = IS->TM;
2479  const TargetInstrInfo *TII = TM.getInstrInfo();
2480  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2481
2482  TDRegReductionPriorityQueue *PQ =
2483    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2484  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2485  PQ->setScheduleDAG(SD);
2486  return SD;
2487}
2488
2489llvm::ScheduleDAGSDNodes *
2490llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2491                                   CodeGenOpt::Level OptLevel) {
2492  const TargetMachine &TM = IS->TM;
2493  const TargetInstrInfo *TII = TM.getInstrInfo();
2494  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2495
2496  SrcRegReductionPriorityQueue *PQ =
2497    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2498  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2499  PQ->setScheduleDAG(SD);
2500  return SD;
2501}
2502
2503llvm::ScheduleDAGSDNodes *
2504llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2505                                   CodeGenOpt::Level OptLevel) {
2506  const TargetMachine &TM = IS->TM;
2507  const TargetInstrInfo *TII = TM.getInstrInfo();
2508  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2509  const TargetLowering *TLI = &IS->getTargetLowering();
2510
2511  HybridBURRPriorityQueue *PQ =
2512    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2513
2514  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2515  PQ->setScheduleDAG(SD);
2516  return SD;
2517}
2518
2519llvm::ScheduleDAGSDNodes *
2520llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2521                                CodeGenOpt::Level OptLevel) {
2522  const TargetMachine &TM = IS->TM;
2523  const TargetInstrInfo *TII = TM.getInstrInfo();
2524  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2525  const TargetLowering *TLI = &IS->getTargetLowering();
2526
2527  ILPBURRPriorityQueue *PQ =
2528    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2529  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2530  PQ->setScheduleDAG(SD);
2531  return SD;
2532}
2533