ScheduleDAGRRList.cpp revision d1dace8aea073716daf0055ad07fde1164b2a472
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> DisableSchedCycles(
70  "disable-sched-cycles", cl::Hidden, cl::init(false),
71  cl::desc("Disable cycle-level precision during preRA scheduling"));
72
73namespace {
74//===----------------------------------------------------------------------===//
75/// ScheduleDAGRRList - The actual register reduction list scheduler
76/// implementation.  This supports both top-down and bottom-up scheduling.
77///
78class ScheduleDAGRRList : public ScheduleDAGSDNodes {
79private:
80  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
81  /// it is top-down.
82  bool isBottomUp;
83
84  /// NeedLatency - True if the scheduler will make use of latency information.
85  ///
86  bool NeedLatency;
87
88  /// AvailableQueue - The priority queue to use for the available SUnits.
89  SchedulingPriorityQueue *AvailableQueue;
90
91  /// PendingQueue - This contains all of the instructions whose operands have
92  /// been issued, but their results are not ready yet (due to the latency of
93  /// the operation).  Once the operands becomes available, the instruction is
94  /// added to the AvailableQueue.
95  std::vector<SUnit*> PendingQueue;
96
97  /// HazardRec - The hazard recognizer to use.
98  ScheduleHazardRecognizer *HazardRec;
99
100  /// CurCycle - The current scheduler state corresponds to this cycle.
101  unsigned CurCycle;
102
103  /// MinAvailableCycle - Cycle of the soonest available instruction.
104  unsigned MinAvailableCycle;
105
106  /// LiveRegDefs - A set of physical registers and their definition
107  /// that are "live". These nodes must be scheduled before any other nodes that
108  /// modifies the registers can be scheduled.
109  unsigned NumLiveRegs;
110  std::vector<SUnit*> LiveRegDefs;
111  std::vector<SUnit*> LiveRegGens;
112
113  /// Topo - A topological ordering for SUnits which permits fast IsReachable
114  /// and similar queries.
115  ScheduleDAGTopologicalSort Topo;
116
117public:
118  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
119                    SchedulingPriorityQueue *availqueue,
120                    CodeGenOpt::Level OptLevel)
121    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
122      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
123      Topo(SUnits) {
124
125    const TargetMachine &tm = mf.getTarget();
126    if (DisableSchedCycles || !NeedLatency)
127      HazardRec = new ScheduleHazardRecognizer();
128    else
129      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
130  }
131
132  ~ScheduleDAGRRList() {
133    delete HazardRec;
134    delete AvailableQueue;
135  }
136
137  void Schedule();
138
139  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
140
141  /// IsReachable - Checks if SU is reachable from TargetSU.
142  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
143    return Topo.IsReachable(SU, TargetSU);
144  }
145
146  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
147  /// create a cycle.
148  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
149    return Topo.WillCreateCycle(SU, TargetSU);
150  }
151
152  /// AddPred - adds a predecessor edge to SUnit SU.
153  /// This returns true if this is a new predecessor.
154  /// Updates the topological ordering if required.
155  void AddPred(SUnit *SU, const SDep &D) {
156    Topo.AddPred(SU, D.getSUnit());
157    SU->addPred(D);
158  }
159
160  /// RemovePred - removes a predecessor edge from SUnit SU.
161  /// This returns true if an edge was removed.
162  /// Updates the topological ordering if required.
163  void RemovePred(SUnit *SU, const SDep &D) {
164    Topo.RemovePred(SU, D.getSUnit());
165    SU->removePred(D);
166  }
167
168private:
169  bool isReady(SUnit *SU) {
170    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
171      AvailableQueue->isReady(SU);
172  }
173
174  void ReleasePred(SUnit *SU, const SDep *PredEdge);
175  void ReleasePredecessors(SUnit *SU);
176  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
177  void ReleaseSuccessors(SUnit *SU);
178  void ReleasePending();
179  void AdvanceToCycle(unsigned NextCycle);
180  void AdvancePastStalls(SUnit *SU);
181  void EmitNode(SUnit *SU);
182  void ScheduleNodeBottomUp(SUnit*);
183  void CapturePred(SDep *PredEdge);
184  void UnscheduleNodeBottomUp(SUnit*);
185  void RestoreHazardCheckerBottomUp();
186  void BacktrackBottomUp(SUnit*, SUnit*);
187  SUnit *CopyAndMoveSuccessors(SUnit*);
188  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
189                                const TargetRegisterClass*,
190                                const TargetRegisterClass*,
191                                SmallVector<SUnit*, 2>&);
192  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
193
194  SUnit *PickNodeToScheduleBottomUp();
195  void ListScheduleBottomUp();
196
197  void ScheduleNodeTopDown(SUnit*);
198  void ListScheduleTopDown();
199
200
201  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
202  /// Updates the topological ordering if required.
203  SUnit *CreateNewSUnit(SDNode *N) {
204    unsigned NumSUnits = SUnits.size();
205    SUnit *NewNode = NewSUnit(N);
206    // Update the topological ordering.
207    if (NewNode->NodeNum >= NumSUnits)
208      Topo.InitDAGTopologicalSorting();
209    return NewNode;
210  }
211
212  /// CreateClone - Creates a new SUnit from an existing one.
213  /// Updates the topological ordering if required.
214  SUnit *CreateClone(SUnit *N) {
215    unsigned NumSUnits = SUnits.size();
216    SUnit *NewNode = Clone(N);
217    // Update the topological ordering.
218    if (NewNode->NodeNum >= NumSUnits)
219      Topo.InitDAGTopologicalSorting();
220    return NewNode;
221  }
222
223  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
224  /// need actual latency information but the hybrid scheduler does.
225  bool ForceUnitLatencies() const {
226    return !NeedLatency;
227  }
228};
229}  // end anonymous namespace
230
231
232/// Schedule - Schedule the DAG using list scheduling.
233void ScheduleDAGRRList::Schedule() {
234  DEBUG(dbgs()
235        << "********** List Scheduling BB#" << BB->getNumber()
236        << " '" << BB->getName() << "' **********\n");
237
238  CurCycle = 0;
239  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
240  NumLiveRegs = 0;
241  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
242  LiveRegGens.resize(TRI->getNumRegs(), NULL);
243
244  // Build the scheduling graph.
245  BuildSchedGraph(NULL);
246
247  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
248          SUnits[su].dumpAll(this));
249  Topo.InitDAGTopologicalSorting();
250
251  AvailableQueue->initNodes(SUnits);
252
253  HazardRec->Reset();
254
255  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
256  if (isBottomUp)
257    ListScheduleBottomUp();
258  else
259    ListScheduleTopDown();
260
261  AvailableQueue->releaseState();
262}
263
264//===----------------------------------------------------------------------===//
265//  Bottom-Up Scheduling
266//===----------------------------------------------------------------------===//
267
268/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
269/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
270void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
271  SUnit *PredSU = PredEdge->getSUnit();
272
273#ifndef NDEBUG
274  if (PredSU->NumSuccsLeft == 0) {
275    dbgs() << "*** Scheduling failed! ***\n";
276    PredSU->dump(this);
277    dbgs() << " has been released too many times!\n";
278    llvm_unreachable(0);
279  }
280#endif
281  --PredSU->NumSuccsLeft;
282
283  if (!ForceUnitLatencies()) {
284    // Updating predecessor's height. This is now the cycle when the
285    // predecessor can be scheduled without causing a pipeline stall.
286    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
287  }
288
289  // If all the node's successors are scheduled, this node is ready
290  // to be scheduled. Ignore the special EntrySU node.
291  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
292    PredSU->isAvailable = true;
293
294    unsigned Height = PredSU->getHeight();
295    if (Height < MinAvailableCycle)
296      MinAvailableCycle = Height;
297
298    if (isReady(SU)) {
299      AvailableQueue->push(PredSU);
300    }
301    // CapturePred and others may have left the node in the pending queue, avoid
302    // adding it twice.
303    else if (!PredSU->isPending) {
304      PredSU->isPending = true;
305      PendingQueue.push_back(PredSU);
306    }
307  }
308}
309
310/// Call ReleasePred for each predecessor, then update register live def/gen.
311/// Always update LiveRegDefs for a register dependence even if the current SU
312/// also defines the register. This effectively create one large live range
313/// across a sequence of two-address node. This is important because the
314/// entire chain must be scheduled together. Example:
315///
316/// flags = (3) add
317/// flags = (2) addc flags
318/// flags = (1) addc flags
319///
320/// results in
321///
322/// LiveRegDefs[flags] = 3
323/// LiveRegGens[flags] = 1
324///
325/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
326/// interference on flags.
327void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
328  // Bottom up: release predecessors
329  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
330       I != E; ++I) {
331    ReleasePred(SU, &*I);
332    if (I->isAssignedRegDep()) {
333      // This is a physical register dependency and it's impossible or
334      // expensive to copy the register. Make sure nothing that can
335      // clobber the register is scheduled between the predecessor and
336      // this node.
337      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
338      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
339             "interference on register dependence");
340      LiveRegDefs[I->getReg()] = I->getSUnit();
341      if (!LiveRegGens[I->getReg()]) {
342        ++NumLiveRegs;
343        LiveRegGens[I->getReg()] = SU;
344      }
345    }
346  }
347}
348
349/// Check to see if any of the pending instructions are ready to issue.  If
350/// so, add them to the available queue.
351void ScheduleDAGRRList::ReleasePending() {
352  if (DisableSchedCycles) {
353    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
354    return;
355  }
356
357  // If the available queue is empty, it is safe to reset MinAvailableCycle.
358  if (AvailableQueue->empty())
359    MinAvailableCycle = UINT_MAX;
360
361  // Check to see if any of the pending instructions are ready to issue.  If
362  // so, add them to the available queue.
363  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
364    unsigned ReadyCycle =
365      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
366    if (ReadyCycle < MinAvailableCycle)
367      MinAvailableCycle = ReadyCycle;
368
369    if (PendingQueue[i]->isAvailable) {
370      if (!isReady(PendingQueue[i]))
371          continue;
372      AvailableQueue->push(PendingQueue[i]);
373    }
374    PendingQueue[i]->isPending = false;
375    PendingQueue[i] = PendingQueue.back();
376    PendingQueue.pop_back();
377    --i; --e;
378  }
379}
380
381/// Move the scheduler state forward by the specified number of Cycles.
382void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
383  if (NextCycle <= CurCycle)
384    return;
385
386  AvailableQueue->setCurCycle(NextCycle);
387  if (!HazardRec->isEnabled()) {
388    // Bypass lots of virtual calls in case of long latency.
389    CurCycle = NextCycle;
390  }
391  else {
392    for (; CurCycle != NextCycle; ++CurCycle) {
393      if (isBottomUp)
394        HazardRec->RecedeCycle();
395      else
396        HazardRec->AdvanceCycle();
397    }
398  }
399  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
400  // available Q to release pending nodes at least once before popping.
401  ReleasePending();
402}
403
404/// Move the scheduler state forward until the specified node's dependents are
405/// ready and can be scheduled with no resource conflicts.
406void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
407  if (DisableSchedCycles)
408    return;
409
410  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
411
412  // Bump CurCycle to account for latency. We assume the latency of other
413  // available instructions may be hidden by the stall (not a full pipe stall).
414  // This updates the hazard recognizer's cycle before reserving resources for
415  // this instruction.
416  AdvanceToCycle(ReadyCycle);
417
418  // Calls are scheduled in their preceding cycle, so don't conflict with
419  // hazards from instructions after the call. EmitNode will reset the
420  // scoreboard state before emitting the call.
421  if (isBottomUp && SU->isCall)
422    return;
423
424  // FIXME: For resource conflicts in very long non-pipelined stages, we
425  // should probably skip ahead here to avoid useless scoreboard checks.
426  int Stalls = 0;
427  while (true) {
428    ScheduleHazardRecognizer::HazardType HT =
429      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
430
431    if (HT == ScheduleHazardRecognizer::NoHazard)
432      break;
433
434    ++Stalls;
435  }
436  AdvanceToCycle(CurCycle + Stalls);
437}
438
439/// Record this SUnit in the HazardRecognizer.
440/// Does not update CurCycle.
441void ScheduleDAGRRList::EmitNode(SUnit *SU) {
442  if (!HazardRec->isEnabled())
443    return;
444
445  // Check for phys reg copy.
446  if (!SU->getNode())
447    return;
448
449  switch (SU->getNode()->getOpcode()) {
450  default:
451    assert(SU->getNode()->isMachineOpcode() &&
452           "This target-independent node should not be scheduled.");
453    break;
454  case ISD::MERGE_VALUES:
455  case ISD::TokenFactor:
456  case ISD::CopyToReg:
457  case ISD::CopyFromReg:
458  case ISD::EH_LABEL:
459    // Noops don't affect the scoreboard state. Copies are likely to be
460    // removed.
461    return;
462  case ISD::INLINEASM:
463    // For inline asm, clear the pipeline state.
464    HazardRec->Reset();
465    return;
466  }
467  if (isBottomUp && SU->isCall) {
468    // Calls are scheduled with their preceding instructions. For bottom-up
469    // scheduling, clear the pipeline state before emitting.
470    HazardRec->Reset();
471  }
472
473  HazardRec->EmitInstruction(SU);
474
475  if (!isBottomUp && SU->isCall) {
476    HazardRec->Reset();
477  }
478}
479
480/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
481/// count of its predecessors. If a predecessor pending count is zero, add it to
482/// the Available queue.
483void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
484  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
485  DEBUG(SU->dump(this));
486
487#ifndef NDEBUG
488  if (CurCycle < SU->getHeight())
489    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
490#endif
491
492  // FIXME: Do not modify node height. It may interfere with
493  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
494  // node it's ready cycle can aid heuristics, and after scheduling it can
495  // indicate the scheduled cycle.
496  SU->setHeightToAtLeast(CurCycle);
497
498  // Reserve resources for the scheduled intruction.
499  EmitNode(SU);
500
501  Sequence.push_back(SU);
502
503  AvailableQueue->ScheduledNode(SU);
504
505  // Update liveness of predecessors before successors to avoid treating a
506  // two-address node as a live range def.
507  ReleasePredecessors(SU);
508
509  // Release all the implicit physical register defs that are live.
510  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
511       I != E; ++I) {
512    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
513    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
514      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
515      --NumLiveRegs;
516      LiveRegDefs[I->getReg()] = NULL;
517      LiveRegGens[I->getReg()] = NULL;
518    }
519  }
520
521  SU->isScheduled = true;
522
523  // Conditions under which the scheduler should eagerly advance the cycle:
524  // (1) No available instructions
525  // (2) All pipelines full, so available instructions must have hazards.
526  //
527  // If HazardRec is disabled, count each inst as one cycle.
528  if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
529      || AvailableQueue->empty())
530    AdvanceToCycle(CurCycle + 1);
531}
532
533/// CapturePred - This does the opposite of ReleasePred. Since SU is being
534/// unscheduled, incrcease the succ left count of its predecessors. Remove
535/// them from AvailableQueue if necessary.
536void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
537  SUnit *PredSU = PredEdge->getSUnit();
538  if (PredSU->isAvailable) {
539    PredSU->isAvailable = false;
540    if (!PredSU->isPending)
541      AvailableQueue->remove(PredSU);
542  }
543
544  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
545  ++PredSU->NumSuccsLeft;
546}
547
548/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
549/// its predecessor states to reflect the change.
550void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
551  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
552  DEBUG(SU->dump(this));
553
554  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
555       I != E; ++I) {
556    CapturePred(&*I);
557    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
558      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
559      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
560             "Physical register dependency violated?");
561      --NumLiveRegs;
562      LiveRegDefs[I->getReg()] = NULL;
563      LiveRegGens[I->getReg()] = NULL;
564    }
565  }
566
567  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
568       I != E; ++I) {
569    if (I->isAssignedRegDep()) {
570      // This becomes the nearest def. Note that an earlier def may still be
571      // pending if this is a two-address node.
572      LiveRegDefs[I->getReg()] = SU;
573      if (!LiveRegDefs[I->getReg()]) {
574        ++NumLiveRegs;
575      }
576      if (LiveRegGens[I->getReg()] == NULL ||
577          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
578        LiveRegGens[I->getReg()] = I->getSUnit();
579    }
580  }
581  if (SU->getHeight() < MinAvailableCycle)
582    MinAvailableCycle = SU->getHeight();
583
584  SU->setHeightDirty();
585  SU->isScheduled = false;
586  SU->isAvailable = true;
587  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
588    // Don't make available until backtracking is complete.
589    SU->isPending = true;
590    PendingQueue.push_back(SU);
591  }
592  else {
593    AvailableQueue->push(SU);
594  }
595  AvailableQueue->UnscheduledNode(SU);
596}
597
598/// After backtracking, the hazard checker needs to be restored to a state
599/// corresponding the the current cycle.
600void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
601  HazardRec->Reset();
602
603  unsigned LookAhead = std::min((unsigned)Sequence.size(),
604                                HazardRec->getMaxLookAhead());
605  if (LookAhead == 0)
606    return;
607
608  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
609  unsigned HazardCycle = (*I)->getHeight();
610  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
611    SUnit *SU = *I;
612    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
613      HazardRec->RecedeCycle();
614    }
615    EmitNode(SU);
616  }
617}
618
619/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
620/// BTCycle in order to schedule a specific node.
621void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
622  SUnit *OldSU = Sequence.back();
623  while (true) {
624    Sequence.pop_back();
625    if (SU->isSucc(OldSU))
626      // Don't try to remove SU from AvailableQueue.
627      SU->isAvailable = false;
628    // FIXME: use ready cycle instead of height
629    CurCycle = OldSU->getHeight();
630    UnscheduleNodeBottomUp(OldSU);
631    AvailableQueue->setCurCycle(CurCycle);
632    if (OldSU == BtSU)
633      break;
634    OldSU = Sequence.back();
635  }
636
637  assert(!SU->isSucc(OldSU) && "Something is wrong!");
638
639  RestoreHazardCheckerBottomUp();
640
641  ReleasePending();
642
643  ++NumBacktracks;
644}
645
646static bool isOperandOf(const SUnit *SU, SDNode *N) {
647  for (const SDNode *SUNode = SU->getNode(); SUNode;
648       SUNode = SUNode->getGluedNode()) {
649    if (SUNode->isOperandOf(N))
650      return true;
651  }
652  return false;
653}
654
655/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
656/// successors to the newly created node.
657SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
658  SDNode *N = SU->getNode();
659  if (!N)
660    return NULL;
661
662  if (SU->getNode()->getGluedNode())
663    return NULL;
664
665  SUnit *NewSU;
666  bool TryUnfold = false;
667  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
668    EVT VT = N->getValueType(i);
669    if (VT == MVT::Glue)
670      return NULL;
671    else if (VT == MVT::Other)
672      TryUnfold = true;
673  }
674  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
675    const SDValue &Op = N->getOperand(i);
676    EVT VT = Op.getNode()->getValueType(Op.getResNo());
677    if (VT == MVT::Glue)
678      return NULL;
679  }
680
681  if (TryUnfold) {
682    SmallVector<SDNode*, 2> NewNodes;
683    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
684      return NULL;
685
686    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
687    assert(NewNodes.size() == 2 && "Expected a load folding node!");
688
689    N = NewNodes[1];
690    SDNode *LoadNode = NewNodes[0];
691    unsigned NumVals = N->getNumValues();
692    unsigned OldNumVals = SU->getNode()->getNumValues();
693    for (unsigned i = 0; i != NumVals; ++i)
694      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
695    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
696                                   SDValue(LoadNode, 1));
697
698    // LoadNode may already exist. This can happen when there is another
699    // load from the same location and producing the same type of value
700    // but it has different alignment or volatileness.
701    bool isNewLoad = true;
702    SUnit *LoadSU;
703    if (LoadNode->getNodeId() != -1) {
704      LoadSU = &SUnits[LoadNode->getNodeId()];
705      isNewLoad = false;
706    } else {
707      LoadSU = CreateNewSUnit(LoadNode);
708      LoadNode->setNodeId(LoadSU->NodeNum);
709      ComputeLatency(LoadSU);
710    }
711
712    SUnit *NewSU = CreateNewSUnit(N);
713    assert(N->getNodeId() == -1 && "Node already inserted!");
714    N->setNodeId(NewSU->NodeNum);
715
716    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
717    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
718      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
719        NewSU->isTwoAddress = true;
720        break;
721      }
722    }
723    if (TID.isCommutable())
724      NewSU->isCommutable = true;
725    ComputeLatency(NewSU);
726
727    // Record all the edges to and from the old SU, by category.
728    SmallVector<SDep, 4> ChainPreds;
729    SmallVector<SDep, 4> ChainSuccs;
730    SmallVector<SDep, 4> LoadPreds;
731    SmallVector<SDep, 4> NodePreds;
732    SmallVector<SDep, 4> NodeSuccs;
733    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
734         I != E; ++I) {
735      if (I->isCtrl())
736        ChainPreds.push_back(*I);
737      else if (isOperandOf(I->getSUnit(), LoadNode))
738        LoadPreds.push_back(*I);
739      else
740        NodePreds.push_back(*I);
741    }
742    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
743         I != E; ++I) {
744      if (I->isCtrl())
745        ChainSuccs.push_back(*I);
746      else
747        NodeSuccs.push_back(*I);
748    }
749
750    // Now assign edges to the newly-created nodes.
751    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
752      const SDep &Pred = ChainPreds[i];
753      RemovePred(SU, Pred);
754      if (isNewLoad)
755        AddPred(LoadSU, Pred);
756    }
757    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
758      const SDep &Pred = LoadPreds[i];
759      RemovePred(SU, Pred);
760      if (isNewLoad)
761        AddPred(LoadSU, Pred);
762    }
763    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
764      const SDep &Pred = NodePreds[i];
765      RemovePred(SU, Pred);
766      AddPred(NewSU, Pred);
767    }
768    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
769      SDep D = NodeSuccs[i];
770      SUnit *SuccDep = D.getSUnit();
771      D.setSUnit(SU);
772      RemovePred(SuccDep, D);
773      D.setSUnit(NewSU);
774      AddPred(SuccDep, D);
775    }
776    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
777      SDep D = ChainSuccs[i];
778      SUnit *SuccDep = D.getSUnit();
779      D.setSUnit(SU);
780      RemovePred(SuccDep, D);
781      if (isNewLoad) {
782        D.setSUnit(LoadSU);
783        AddPred(SuccDep, D);
784      }
785    }
786
787    // Add a data dependency to reflect that NewSU reads the value defined
788    // by LoadSU.
789    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
790
791    if (isNewLoad)
792      AvailableQueue->addNode(LoadSU);
793    AvailableQueue->addNode(NewSU);
794
795    ++NumUnfolds;
796
797    if (NewSU->NumSuccsLeft == 0) {
798      NewSU->isAvailable = true;
799      return NewSU;
800    }
801    SU = NewSU;
802  }
803
804  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
805  NewSU = CreateClone(SU);
806
807  // New SUnit has the exact same predecessors.
808  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
809       I != E; ++I)
810    if (!I->isArtificial())
811      AddPred(NewSU, *I);
812
813  // Only copy scheduled successors. Cut them from old node's successor
814  // list and move them over.
815  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
816  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
817       I != E; ++I) {
818    if (I->isArtificial())
819      continue;
820    SUnit *SuccSU = I->getSUnit();
821    if (SuccSU->isScheduled) {
822      SDep D = *I;
823      D.setSUnit(NewSU);
824      AddPred(SuccSU, D);
825      D.setSUnit(SU);
826      DelDeps.push_back(std::make_pair(SuccSU, D));
827    }
828  }
829  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
830    RemovePred(DelDeps[i].first, DelDeps[i].second);
831
832  AvailableQueue->updateNode(SU);
833  AvailableQueue->addNode(NewSU);
834
835  ++NumDups;
836  return NewSU;
837}
838
839/// InsertCopiesAndMoveSuccs - Insert register copies and move all
840/// scheduled successors of the given SUnit to the last copy.
841void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
842                                               const TargetRegisterClass *DestRC,
843                                               const TargetRegisterClass *SrcRC,
844                                               SmallVector<SUnit*, 2> &Copies) {
845  SUnit *CopyFromSU = CreateNewSUnit(NULL);
846  CopyFromSU->CopySrcRC = SrcRC;
847  CopyFromSU->CopyDstRC = DestRC;
848
849  SUnit *CopyToSU = CreateNewSUnit(NULL);
850  CopyToSU->CopySrcRC = DestRC;
851  CopyToSU->CopyDstRC = SrcRC;
852
853  // Only copy scheduled successors. Cut them from old node's successor
854  // list and move them over.
855  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
856  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
857       I != E; ++I) {
858    if (I->isArtificial())
859      continue;
860    SUnit *SuccSU = I->getSUnit();
861    if (SuccSU->isScheduled) {
862      SDep D = *I;
863      D.setSUnit(CopyToSU);
864      AddPred(SuccSU, D);
865      DelDeps.push_back(std::make_pair(SuccSU, *I));
866    }
867  }
868  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
869    RemovePred(DelDeps[i].first, DelDeps[i].second);
870
871  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
872  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
873
874  AvailableQueue->updateNode(SU);
875  AvailableQueue->addNode(CopyFromSU);
876  AvailableQueue->addNode(CopyToSU);
877  Copies.push_back(CopyFromSU);
878  Copies.push_back(CopyToSU);
879
880  ++NumPRCopies;
881}
882
883/// getPhysicalRegisterVT - Returns the ValueType of the physical register
884/// definition of the specified node.
885/// FIXME: Move to SelectionDAG?
886static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
887                                 const TargetInstrInfo *TII) {
888  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
889  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
890  unsigned NumRes = TID.getNumDefs();
891  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
892    if (Reg == *ImpDef)
893      break;
894    ++NumRes;
895  }
896  return N->getValueType(NumRes);
897}
898
899/// CheckForLiveRegDef - Return true and update live register vector if the
900/// specified register def of the specified SUnit clobbers any "live" registers.
901static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
902                               std::vector<SUnit*> &LiveRegDefs,
903                               SmallSet<unsigned, 4> &RegAdded,
904                               SmallVector<unsigned, 4> &LRegs,
905                               const TargetRegisterInfo *TRI) {
906  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
907
908    // Check if Ref is live.
909    if (!LiveRegDefs[Reg]) continue;
910
911    // Allow multiple uses of the same def.
912    if (LiveRegDefs[Reg] == SU) continue;
913
914    // Add Reg to the set of interfering live regs.
915    if (RegAdded.insert(Reg))
916      LRegs.push_back(Reg);
917  }
918}
919
920/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
921/// scheduling of the given node to satisfy live physical register dependencies.
922/// If the specific node is the last one that's available to schedule, do
923/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
924bool ScheduleDAGRRList::
925DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
926  if (NumLiveRegs == 0)
927    return false;
928
929  SmallSet<unsigned, 4> RegAdded;
930  // If this node would clobber any "live" register, then it's not ready.
931  //
932  // If SU is the currently live definition of the same register that it uses,
933  // then we are free to schedule it.
934  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
935       I != E; ++I) {
936    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
937      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
938                         RegAdded, LRegs, TRI);
939  }
940
941  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
942    if (Node->getOpcode() == ISD::INLINEASM) {
943      // Inline asm can clobber physical defs.
944      unsigned NumOps = Node->getNumOperands();
945      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
946        --NumOps;  // Ignore the glue operand.
947
948      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
949        unsigned Flags =
950          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
951        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
952
953        ++i; // Skip the ID value.
954        if (InlineAsm::isRegDefKind(Flags) ||
955            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
956          // Check for def of register or earlyclobber register.
957          for (; NumVals; --NumVals, ++i) {
958            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
959            if (TargetRegisterInfo::isPhysicalRegister(Reg))
960              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
961          }
962        } else
963          i += NumVals;
964      }
965      continue;
966    }
967
968    if (!Node->isMachineOpcode())
969      continue;
970    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
971    if (!TID.ImplicitDefs)
972      continue;
973    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
974      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
975  }
976
977  return !LRegs.empty();
978}
979
980/// Return a node that can be scheduled in this cycle. Requirements:
981/// (1) Ready: latency has been satisfied
982/// (2) No Hazards: resources are available
983/// (3) No Interferences: may unschedule to break register interferences.
984SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
985  SmallVector<SUnit*, 4> Interferences;
986  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
987
988  SUnit *CurSU = AvailableQueue->pop();
989  while (CurSU) {
990    SmallVector<unsigned, 4> LRegs;
991    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
992      break;
993    LRegsMap.insert(std::make_pair(CurSU, LRegs));
994
995    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
996    Interferences.push_back(CurSU);
997    CurSU = AvailableQueue->pop();
998  }
999  if (CurSU) {
1000    // Add the nodes that aren't ready back onto the available list.
1001    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1002      Interferences[i]->isPending = false;
1003      assert(Interferences[i]->isAvailable && "must still be available");
1004      AvailableQueue->push(Interferences[i]);
1005    }
1006    return CurSU;
1007  }
1008
1009  // All candidates are delayed due to live physical reg dependencies.
1010  // Try backtracking, code duplication, or inserting cross class copies
1011  // to resolve it.
1012  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1013    SUnit *TrySU = Interferences[i];
1014    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1015
1016    // Try unscheduling up to the point where it's safe to schedule
1017    // this node.
1018    SUnit *BtSU = NULL;
1019    unsigned LiveCycle = UINT_MAX;
1020    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1021      unsigned Reg = LRegs[j];
1022      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1023        BtSU = LiveRegGens[Reg];
1024        LiveCycle = BtSU->getHeight();
1025      }
1026    }
1027    if (!WillCreateCycle(TrySU, BtSU))  {
1028      BacktrackBottomUp(TrySU, BtSU);
1029
1030      // Force the current node to be scheduled before the node that
1031      // requires the physical reg dep.
1032      if (BtSU->isAvailable) {
1033        BtSU->isAvailable = false;
1034        if (!BtSU->isPending)
1035          AvailableQueue->remove(BtSU);
1036      }
1037      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1038                          /*Reg=*/0, /*isNormalMemory=*/false,
1039                          /*isMustAlias=*/false, /*isArtificial=*/true));
1040
1041      // If one or more successors has been unscheduled, then the current
1042      // node is no longer avaialable. Schedule a successor that's now
1043      // available instead.
1044      if (!TrySU->isAvailable) {
1045        CurSU = AvailableQueue->pop();
1046      }
1047      else {
1048        CurSU = TrySU;
1049        TrySU->isPending = false;
1050        Interferences.erase(Interferences.begin()+i);
1051      }
1052      break;
1053    }
1054  }
1055
1056  if (!CurSU) {
1057    // Can't backtrack. If it's too expensive to copy the value, then try
1058    // duplicate the nodes that produces these "too expensive to copy"
1059    // values to break the dependency. In case even that doesn't work,
1060    // insert cross class copies.
1061    // If it's not too expensive, i.e. cost != -1, issue copies.
1062    SUnit *TrySU = Interferences[0];
1063    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1064    assert(LRegs.size() == 1 && "Can't handle this yet!");
1065    unsigned Reg = LRegs[0];
1066    SUnit *LRDef = LiveRegDefs[Reg];
1067    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1068    const TargetRegisterClass *RC =
1069      TRI->getMinimalPhysRegClass(Reg, VT);
1070    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1071
1072    // If cross copy register class is null, then it must be possible copy
1073    // the value directly. Do not try duplicate the def.
1074    SUnit *NewDef = 0;
1075    if (DestRC)
1076      NewDef = CopyAndMoveSuccessors(LRDef);
1077    else
1078      DestRC = RC;
1079    if (!NewDef) {
1080      // Issue copies, these can be expensive cross register class copies.
1081      SmallVector<SUnit*, 2> Copies;
1082      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1083      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1084            << " to SU #" << Copies.front()->NodeNum << "\n");
1085      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1086                          /*Reg=*/0, /*isNormalMemory=*/false,
1087                          /*isMustAlias=*/false,
1088                          /*isArtificial=*/true));
1089      NewDef = Copies.back();
1090    }
1091
1092    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1093          << " to SU #" << TrySU->NodeNum << "\n");
1094    LiveRegDefs[Reg] = NewDef;
1095    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1096                         /*Reg=*/0, /*isNormalMemory=*/false,
1097                         /*isMustAlias=*/false,
1098                         /*isArtificial=*/true));
1099    TrySU->isAvailable = false;
1100    CurSU = NewDef;
1101  }
1102
1103  assert(CurSU && "Unable to resolve live physical register dependencies!");
1104
1105  // Add the nodes that aren't ready back onto the available list.
1106  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1107    Interferences[i]->isPending = false;
1108    // May no longer be available due to backtracking.
1109    if (Interferences[i]->isAvailable) {
1110      AvailableQueue->push(Interferences[i]);
1111    }
1112  }
1113  return CurSU;
1114}
1115
1116/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1117/// schedulers.
1118void ScheduleDAGRRList::ListScheduleBottomUp() {
1119  // Release any predecessors of the special Exit node.
1120  ReleasePredecessors(&ExitSU);
1121
1122  // Add root to Available queue.
1123  if (!SUnits.empty()) {
1124    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1125    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1126    RootSU->isAvailable = true;
1127    AvailableQueue->push(RootSU);
1128  }
1129
1130  // While Available queue is not empty, grab the node with the highest
1131  // priority. If it is not ready put it back.  Schedule the node.
1132  Sequence.reserve(SUnits.size());
1133  while (!AvailableQueue->empty()) {
1134    DEBUG(dbgs() << "\n*** Examining Available\n";
1135          AvailableQueue->dump(this));
1136
1137    // Pick the best node to schedule taking all constraints into
1138    // consideration.
1139    SUnit *SU = PickNodeToScheduleBottomUp();
1140
1141    AdvancePastStalls(SU);
1142
1143    ScheduleNodeBottomUp(SU);
1144
1145    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1146      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1147      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1148      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1149    }
1150  }
1151
1152  // Reverse the order if it is bottom up.
1153  std::reverse(Sequence.begin(), Sequence.end());
1154
1155#ifndef NDEBUG
1156  VerifySchedule(isBottomUp);
1157#endif
1158}
1159
1160//===----------------------------------------------------------------------===//
1161//  Top-Down Scheduling
1162//===----------------------------------------------------------------------===//
1163
1164/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1165/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1166void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1167  SUnit *SuccSU = SuccEdge->getSUnit();
1168
1169#ifndef NDEBUG
1170  if (SuccSU->NumPredsLeft == 0) {
1171    dbgs() << "*** Scheduling failed! ***\n";
1172    SuccSU->dump(this);
1173    dbgs() << " has been released too many times!\n";
1174    llvm_unreachable(0);
1175  }
1176#endif
1177  --SuccSU->NumPredsLeft;
1178
1179  // If all the node's predecessors are scheduled, this node is ready
1180  // to be scheduled. Ignore the special ExitSU node.
1181  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1182    SuccSU->isAvailable = true;
1183    AvailableQueue->push(SuccSU);
1184  }
1185}
1186
1187void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1188  // Top down: release successors
1189  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1190       I != E; ++I) {
1191    assert(!I->isAssignedRegDep() &&
1192           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1193
1194    ReleaseSucc(SU, &*I);
1195  }
1196}
1197
1198/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1199/// count of its successors. If a successor pending count is zero, add it to
1200/// the Available queue.
1201void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1202  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1203  DEBUG(SU->dump(this));
1204
1205  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1206  SU->setDepthToAtLeast(CurCycle);
1207  Sequence.push_back(SU);
1208
1209  ReleaseSuccessors(SU);
1210  SU->isScheduled = true;
1211  AvailableQueue->ScheduledNode(SU);
1212}
1213
1214/// ListScheduleTopDown - The main loop of list scheduling for top-down
1215/// schedulers.
1216void ScheduleDAGRRList::ListScheduleTopDown() {
1217  AvailableQueue->setCurCycle(CurCycle);
1218
1219  // Release any successors of the special Entry node.
1220  ReleaseSuccessors(&EntrySU);
1221
1222  // All leaves to Available queue.
1223  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1224    // It is available if it has no predecessors.
1225    if (SUnits[i].Preds.empty()) {
1226      AvailableQueue->push(&SUnits[i]);
1227      SUnits[i].isAvailable = true;
1228    }
1229  }
1230
1231  // While Available queue is not empty, grab the node with the highest
1232  // priority. If it is not ready put it back.  Schedule the node.
1233  Sequence.reserve(SUnits.size());
1234  while (!AvailableQueue->empty()) {
1235    SUnit *CurSU = AvailableQueue->pop();
1236
1237    if (CurSU)
1238      ScheduleNodeTopDown(CurSU);
1239    ++CurCycle;
1240    AvailableQueue->setCurCycle(CurCycle);
1241  }
1242
1243#ifndef NDEBUG
1244  VerifySchedule(isBottomUp);
1245#endif
1246}
1247
1248
1249//===----------------------------------------------------------------------===//
1250//                RegReductionPriorityQueue Definition
1251//===----------------------------------------------------------------------===//
1252//
1253// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1254// to reduce register pressure.
1255//
1256namespace {
1257class RegReductionPQBase;
1258
1259struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1260  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1261};
1262
1263/// bu_ls_rr_sort - Priority function for bottom up register pressure
1264// reduction scheduler.
1265struct bu_ls_rr_sort : public queue_sort {
1266  enum {
1267    IsBottomUp = true,
1268    HasReadyFilter = false
1269  };
1270
1271  RegReductionPQBase *SPQ;
1272  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1273  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1274
1275  bool operator()(SUnit* left, SUnit* right) const;
1276};
1277
1278// td_ls_rr_sort - Priority function for top down register pressure reduction
1279// scheduler.
1280struct td_ls_rr_sort : public queue_sort {
1281  enum {
1282    IsBottomUp = false,
1283    HasReadyFilter = false
1284  };
1285
1286  RegReductionPQBase *SPQ;
1287  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1288  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1289
1290  bool operator()(const SUnit* left, const SUnit* right) const;
1291};
1292
1293// src_ls_rr_sort - Priority function for source order scheduler.
1294struct src_ls_rr_sort : public queue_sort {
1295  enum {
1296    IsBottomUp = true,
1297    HasReadyFilter = false
1298  };
1299
1300  RegReductionPQBase *SPQ;
1301  src_ls_rr_sort(RegReductionPQBase *spq)
1302    : SPQ(spq) {}
1303  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1304    : SPQ(RHS.SPQ) {}
1305
1306  bool operator()(SUnit* left, SUnit* right) const;
1307};
1308
1309// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1310struct hybrid_ls_rr_sort : public queue_sort {
1311  enum {
1312    IsBottomUp = true,
1313    HasReadyFilter = true
1314  };
1315
1316  RegReductionPQBase *SPQ;
1317  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1318    : SPQ(spq) {}
1319  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1320    : SPQ(RHS.SPQ) {}
1321
1322  bool isReady(SUnit *SU, unsigned CurCycle) const;
1323
1324  bool operator()(SUnit* left, SUnit* right) const;
1325};
1326
1327// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1328// scheduler.
1329struct ilp_ls_rr_sort : public queue_sort {
1330  enum {
1331    IsBottomUp = true,
1332    HasReadyFilter = true
1333  };
1334
1335  RegReductionPQBase *SPQ;
1336  ilp_ls_rr_sort(RegReductionPQBase *spq)
1337    : SPQ(spq) {}
1338  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1339    : SPQ(RHS.SPQ) {}
1340
1341  bool isReady(SUnit *SU, unsigned CurCycle) const;
1342
1343  bool operator()(SUnit* left, SUnit* right) const;
1344};
1345
1346class RegReductionPQBase : public SchedulingPriorityQueue {
1347protected:
1348  std::vector<SUnit*> Queue;
1349  unsigned CurQueueId;
1350  bool TracksRegPressure;
1351
1352  // SUnits - The SUnits for the current graph.
1353  std::vector<SUnit> *SUnits;
1354
1355  MachineFunction &MF;
1356  const TargetInstrInfo *TII;
1357  const TargetRegisterInfo *TRI;
1358  const TargetLowering *TLI;
1359  ScheduleDAGRRList *scheduleDAG;
1360
1361  // SethiUllmanNumbers - The SethiUllman number for each node.
1362  std::vector<unsigned> SethiUllmanNumbers;
1363
1364  /// RegPressure - Tracking current reg pressure per register class.
1365  ///
1366  std::vector<unsigned> RegPressure;
1367
1368  /// RegLimit - Tracking the number of allocatable registers per register
1369  /// class.
1370  std::vector<unsigned> RegLimit;
1371
1372public:
1373  RegReductionPQBase(MachineFunction &mf,
1374                     bool hasReadyFilter,
1375                     bool tracksrp,
1376                     const TargetInstrInfo *tii,
1377                     const TargetRegisterInfo *tri,
1378                     const TargetLowering *tli)
1379    : SchedulingPriorityQueue(hasReadyFilter),
1380      CurQueueId(0), TracksRegPressure(tracksrp),
1381      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1382    if (TracksRegPressure) {
1383      unsigned NumRC = TRI->getNumRegClasses();
1384      RegLimit.resize(NumRC);
1385      RegPressure.resize(NumRC);
1386      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1387      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1388      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1389             E = TRI->regclass_end(); I != E; ++I)
1390        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1391    }
1392  }
1393
1394  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1395    scheduleDAG = scheduleDag;
1396  }
1397
1398  ScheduleHazardRecognizer* getHazardRec() {
1399    return scheduleDAG->getHazardRec();
1400  }
1401
1402  void initNodes(std::vector<SUnit> &sunits);
1403
1404  void addNode(const SUnit *SU);
1405
1406  void updateNode(const SUnit *SU);
1407
1408  void releaseState() {
1409    SUnits = 0;
1410    SethiUllmanNumbers.clear();
1411    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1412  }
1413
1414  unsigned getNodePriority(const SUnit *SU) const;
1415
1416  unsigned getNodeOrdering(const SUnit *SU) const {
1417    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1418  }
1419
1420  bool empty() const { return Queue.empty(); }
1421
1422  void push(SUnit *U) {
1423    assert(!U->NodeQueueId && "Node in the queue already");
1424    U->NodeQueueId = ++CurQueueId;
1425    Queue.push_back(U);
1426  }
1427
1428  void remove(SUnit *SU) {
1429    assert(!Queue.empty() && "Queue is empty!");
1430    assert(SU->NodeQueueId != 0 && "Not in queue!");
1431    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1432                                                 SU);
1433    if (I != prior(Queue.end()))
1434      std::swap(*I, Queue.back());
1435    Queue.pop_back();
1436    SU->NodeQueueId = 0;
1437  }
1438
1439  void dumpRegPressure() const;
1440
1441  bool HighRegPressure(const SUnit *SU) const;
1442
1443  bool MayReduceRegPressure(SUnit *SU);
1444
1445  void ScheduledNode(SUnit *SU);
1446
1447  void UnscheduledNode(SUnit *SU);
1448
1449protected:
1450  bool canClobber(const SUnit *SU, const SUnit *Op);
1451  void AddPseudoTwoAddrDeps();
1452  void PrescheduleNodesWithMultipleUses();
1453  void CalculateSethiUllmanNumbers();
1454};
1455
1456template<class SF>
1457class RegReductionPriorityQueue : public RegReductionPQBase {
1458  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1459    std::vector<SUnit *>::iterator Best = Q.begin();
1460    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1461           E = Q.end(); I != E; ++I)
1462      if (Picker(*Best, *I))
1463        Best = I;
1464    SUnit *V = *Best;
1465    if (Best != prior(Q.end()))
1466      std::swap(*Best, Q.back());
1467    Q.pop_back();
1468    return V;
1469  }
1470
1471  SF Picker;
1472
1473public:
1474  RegReductionPriorityQueue(MachineFunction &mf,
1475                            bool tracksrp,
1476                            const TargetInstrInfo *tii,
1477                            const TargetRegisterInfo *tri,
1478                            const TargetLowering *tli)
1479    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1480      Picker(this) {}
1481
1482  bool isBottomUp() const { return SF::IsBottomUp; }
1483
1484  bool isReady(SUnit *U) const {
1485    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1486  }
1487
1488  SUnit *pop() {
1489    if (Queue.empty()) return NULL;
1490
1491    SUnit *V = popFromQueue(Queue, Picker);
1492    V->NodeQueueId = 0;
1493    return V;
1494  }
1495
1496  void dump(ScheduleDAG *DAG) const {
1497    // Emulate pop() without clobbering NodeQueueIds.
1498    std::vector<SUnit*> DumpQueue = Queue;
1499    SF DumpPicker = Picker;
1500    while (!DumpQueue.empty()) {
1501      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1502      if (isBottomUp())
1503        dbgs() << "Height " << SU->getHeight() << ": ";
1504      else
1505        dbgs() << "Depth " << SU->getDepth() << ": ";
1506      SU->dump(DAG);
1507    }
1508  }
1509};
1510
1511typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1512BURegReductionPriorityQueue;
1513
1514typedef RegReductionPriorityQueue<td_ls_rr_sort>
1515TDRegReductionPriorityQueue;
1516
1517typedef RegReductionPriorityQueue<src_ls_rr_sort>
1518SrcRegReductionPriorityQueue;
1519
1520typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1521HybridBURRPriorityQueue;
1522
1523typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1524ILPBURRPriorityQueue;
1525} // end anonymous namespace
1526
1527//===----------------------------------------------------------------------===//
1528//           Static Node Priority for Register Pressure Reduction
1529//===----------------------------------------------------------------------===//
1530
1531/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1532/// Smaller number is the higher priority.
1533static unsigned
1534CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1535  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1536  if (SethiUllmanNumber != 0)
1537    return SethiUllmanNumber;
1538
1539  unsigned Extra = 0;
1540  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1541       I != E; ++I) {
1542    if (I->isCtrl()) continue;  // ignore chain preds
1543    SUnit *PredSU = I->getSUnit();
1544    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1545    if (PredSethiUllman > SethiUllmanNumber) {
1546      SethiUllmanNumber = PredSethiUllman;
1547      Extra = 0;
1548    } else if (PredSethiUllman == SethiUllmanNumber)
1549      ++Extra;
1550  }
1551
1552  SethiUllmanNumber += Extra;
1553
1554  if (SethiUllmanNumber == 0)
1555    SethiUllmanNumber = 1;
1556
1557  return SethiUllmanNumber;
1558}
1559
1560/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1561/// scheduling units.
1562void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1563  SethiUllmanNumbers.assign(SUnits->size(), 0);
1564
1565  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1566    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1567}
1568
1569void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
1570  SUnits = &sunits;
1571  // Add pseudo dependency edges for two-address nodes.
1572  AddPseudoTwoAddrDeps();
1573  // Reroute edges to nodes with multiple uses.
1574  PrescheduleNodesWithMultipleUses();
1575  // Calculate node priorities.
1576  CalculateSethiUllmanNumbers();
1577}
1578
1579void RegReductionPQBase::addNode(const SUnit *SU) {
1580  unsigned SUSize = SethiUllmanNumbers.size();
1581  if (SUnits->size() > SUSize)
1582    SethiUllmanNumbers.resize(SUSize*2, 0);
1583  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1584}
1585
1586void RegReductionPQBase::updateNode(const SUnit *SU) {
1587  SethiUllmanNumbers[SU->NodeNum] = 0;
1588  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1589}
1590
1591// Lower priority means schedule further down. For bottom-up scheduling, lower
1592// priority SUs are scheduled before higher priority SUs.
1593unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1594  assert(SU->NodeNum < SethiUllmanNumbers.size());
1595  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1596  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1597    // CopyToReg should be close to its uses to facilitate coalescing and
1598    // avoid spilling.
1599    return 0;
1600  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1601      Opc == TargetOpcode::SUBREG_TO_REG ||
1602      Opc == TargetOpcode::INSERT_SUBREG)
1603    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1604    // close to their uses to facilitate coalescing.
1605    return 0;
1606  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1607    // If SU does not have a register use, i.e. it doesn't produce a value
1608    // that would be consumed (e.g. store), then it terminates a chain of
1609    // computation.  Give it a large SethiUllman number so it will be
1610    // scheduled right before its predecessors that it doesn't lengthen
1611    // their live ranges.
1612    return 0xffff;
1613  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1614    // If SU does not have a register def, schedule it close to its uses
1615    // because it does not lengthen any live ranges.
1616    return 0;
1617  return SethiUllmanNumbers[SU->NodeNum];
1618}
1619
1620//===----------------------------------------------------------------------===//
1621//                     Register Pressure Tracking
1622//===----------------------------------------------------------------------===//
1623
1624void RegReductionPQBase::dumpRegPressure() const {
1625  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1626         E = TRI->regclass_end(); I != E; ++I) {
1627    const TargetRegisterClass *RC = *I;
1628    unsigned Id = RC->getID();
1629    unsigned RP = RegPressure[Id];
1630    if (!RP) continue;
1631    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1632          << '\n');
1633  }
1634}
1635
1636bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1637  if (!TLI)
1638    return false;
1639
1640  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1641       I != E; ++I) {
1642    if (I->isCtrl())
1643      continue;
1644    SUnit *PredSU = I->getSUnit();
1645    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1646    // counts data deps.  To be more precise, we could maintain a
1647    // NumDataSuccsLeft count.
1648    if (PredSU->NumSuccsLeft != PredSU->Succs.size()) {
1649      DEBUG(dbgs() << "  SU(" << PredSU->NodeNum << ") live across SU("
1650            << SU->NodeNum << ")\n");
1651      continue;
1652    }
1653    const SDNode *PN = PredSU->getNode();
1654    if (!PN->isMachineOpcode()) {
1655      if (PN->getOpcode() == ISD::CopyFromReg) {
1656        EVT VT = PN->getValueType(0);
1657        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1658        unsigned Cost = TLI->getRepRegClassCostFor(VT);
1659        if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1660          return true;
1661      }
1662      continue;
1663    }
1664    unsigned POpc = PN->getMachineOpcode();
1665    if (POpc == TargetOpcode::IMPLICIT_DEF)
1666      continue;
1667    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1668      EVT VT = PN->getOperand(0).getValueType();
1669      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1670      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1671      // Check if this increases register pressure of the specific register
1672      // class to the point where it would cause spills.
1673      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1674        return true;
1675      continue;
1676    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1677               POpc == TargetOpcode::SUBREG_TO_REG) {
1678      EVT VT = PN->getValueType(0);
1679      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1680      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1681      // Check if this increases register pressure of the specific register
1682      // class to the point where it would cause spills.
1683      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1684        return true;
1685      continue;
1686    }
1687    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1688    for (unsigned i = 0; i != NumDefs; ++i) {
1689      EVT VT = PN->getValueType(i);
1690      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1691      if (RegPressure[RCId] >= RegLimit[RCId])
1692        return true; // Reg pressure already high.
1693      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1694      if (!PN->hasAnyUseOfValue(i))
1695        continue;
1696      // Check if this increases register pressure of the specific register
1697      // class to the point where it would cause spills.
1698      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1699        return true;
1700    }
1701  }
1702
1703  return false;
1704}
1705
1706bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
1707  const SDNode *N = SU->getNode();
1708
1709  if (!N->isMachineOpcode() || !SU->NumSuccs)
1710    return false;
1711
1712  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1713  for (unsigned i = 0; i != NumDefs; ++i) {
1714    EVT VT = N->getValueType(i);
1715    if (!N->hasAnyUseOfValue(i))
1716      continue;
1717    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1718    if (RegPressure[RCId] >= RegLimit[RCId])
1719      return true;
1720  }
1721  return false;
1722}
1723
1724void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1725  if (!TracksRegPressure)
1726    return;
1727
1728  const SDNode *N = SU->getNode();
1729  if (!N->isMachineOpcode()) {
1730    if (N->getOpcode() != ISD::CopyToReg)
1731      return;
1732  } else {
1733    unsigned Opc = N->getMachineOpcode();
1734    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1735        Opc == TargetOpcode::INSERT_SUBREG ||
1736        Opc == TargetOpcode::SUBREG_TO_REG ||
1737        Opc == TargetOpcode::REG_SEQUENCE ||
1738        Opc == TargetOpcode::IMPLICIT_DEF)
1739      return;
1740  }
1741
1742  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1743       I != E; ++I) {
1744    if (I->isCtrl())
1745      continue;
1746    SUnit *PredSU = I->getSUnit();
1747    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1748    // counts data deps.
1749    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1750      continue;
1751    const SDNode *PN = PredSU->getNode();
1752    if (!PN->isMachineOpcode()) {
1753      if (PN->getOpcode() == ISD::CopyFromReg) {
1754        EVT VT = PN->getValueType(0);
1755        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1756        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1757      }
1758      continue;
1759    }
1760    unsigned POpc = PN->getMachineOpcode();
1761    if (POpc == TargetOpcode::IMPLICIT_DEF)
1762      continue;
1763    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1764      EVT VT = PN->getOperand(0).getValueType();
1765      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1766      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1767      continue;
1768    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1769               POpc == TargetOpcode::SUBREG_TO_REG) {
1770      EVT VT = PN->getValueType(0);
1771      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1772      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1773      continue;
1774    }
1775    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1776    for (unsigned i = 0; i != NumDefs; ++i) {
1777      EVT VT = PN->getValueType(i);
1778      if (!PN->hasAnyUseOfValue(i))
1779        continue;
1780      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1781      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1782    }
1783  }
1784
1785  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1786  // may transfer data dependencies to CopyToReg.
1787  if (SU->NumSuccs && N->isMachineOpcode()) {
1788    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1789    for (unsigned i = 0; i != NumDefs; ++i) {
1790      EVT VT = N->getValueType(i);
1791      if (!N->hasAnyUseOfValue(i))
1792        continue;
1793      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1794      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1795        // Register pressure tracking is imprecise. This can happen.
1796        RegPressure[RCId] = 0;
1797      else
1798        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1799    }
1800  }
1801
1802  dumpRegPressure();
1803}
1804
1805void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1806  if (!TracksRegPressure)
1807    return;
1808
1809  const SDNode *N = SU->getNode();
1810  if (!N->isMachineOpcode()) {
1811    if (N->getOpcode() != ISD::CopyToReg)
1812      return;
1813  } else {
1814    unsigned Opc = N->getMachineOpcode();
1815    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1816        Opc == TargetOpcode::INSERT_SUBREG ||
1817        Opc == TargetOpcode::SUBREG_TO_REG ||
1818        Opc == TargetOpcode::REG_SEQUENCE ||
1819        Opc == TargetOpcode::IMPLICIT_DEF)
1820      return;
1821  }
1822
1823  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1824       I != E; ++I) {
1825    if (I->isCtrl())
1826      continue;
1827    SUnit *PredSU = I->getSUnit();
1828    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
1829    // counts data deps.
1830    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
1831      continue;
1832    const SDNode *PN = PredSU->getNode();
1833    if (!PN->isMachineOpcode()) {
1834      if (PN->getOpcode() == ISD::CopyFromReg) {
1835        EVT VT = PN->getValueType(0);
1836        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1837        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1838      }
1839      continue;
1840    }
1841    unsigned POpc = PN->getMachineOpcode();
1842    if (POpc == TargetOpcode::IMPLICIT_DEF)
1843      continue;
1844    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1845      EVT VT = PN->getOperand(0).getValueType();
1846      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1847      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1848      continue;
1849    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1850               POpc == TargetOpcode::SUBREG_TO_REG) {
1851      EVT VT = PN->getValueType(0);
1852      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1853      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1854      continue;
1855    }
1856    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1857    for (unsigned i = 0; i != NumDefs; ++i) {
1858      EVT VT = PN->getValueType(i);
1859      if (!PN->hasAnyUseOfValue(i))
1860        continue;
1861      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1862      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1863        // Register pressure tracking is imprecise. This can happen.
1864        RegPressure[RCId] = 0;
1865      else
1866        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1867    }
1868  }
1869
1870  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1871  // may transfer data dependencies to CopyToReg.
1872  if (SU->NumSuccs && N->isMachineOpcode()) {
1873    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1874    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1875      EVT VT = N->getValueType(i);
1876      if (VT == MVT::Glue || VT == MVT::Other)
1877        continue;
1878      if (!N->hasAnyUseOfValue(i))
1879        continue;
1880      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1881      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1882    }
1883  }
1884
1885  dumpRegPressure();
1886}
1887
1888//===----------------------------------------------------------------------===//
1889//           Dynamic Node Priority for Register Pressure Reduction
1890//===----------------------------------------------------------------------===//
1891
1892/// closestSucc - Returns the scheduled cycle of the successor which is
1893/// closest to the current cycle.
1894static unsigned closestSucc(const SUnit *SU) {
1895  unsigned MaxHeight = 0;
1896  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1897       I != E; ++I) {
1898    if (I->isCtrl()) continue;  // ignore chain succs
1899    unsigned Height = I->getSUnit()->getHeight();
1900    // If there are bunch of CopyToRegs stacked up, they should be considered
1901    // to be at the same position.
1902    if (I->getSUnit()->getNode() &&
1903        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1904      Height = closestSucc(I->getSUnit())+1;
1905    if (Height > MaxHeight)
1906      MaxHeight = Height;
1907  }
1908  return MaxHeight;
1909}
1910
1911/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1912/// for scratch registers, i.e. number of data dependencies.
1913static unsigned calcMaxScratches(const SUnit *SU) {
1914  unsigned Scratches = 0;
1915  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1916       I != E; ++I) {
1917    if (I->isCtrl()) continue;  // ignore chain preds
1918    Scratches++;
1919  }
1920  return Scratches;
1921}
1922
1923/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1924/// CopyToReg to a virtual register. This SU def is probably a liveout and
1925/// it has no other use. It should be scheduled closer to the terminator.
1926static bool hasOnlyLiveOutUses(const SUnit *SU) {
1927  bool RetVal = false;
1928  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1929       I != E; ++I) {
1930    if (I->isCtrl()) continue;
1931    const SUnit *SuccSU = I->getSUnit();
1932    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1933      unsigned Reg =
1934        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1935      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1936        RetVal = true;
1937        continue;
1938      }
1939    }
1940    return false;
1941  }
1942  return RetVal;
1943}
1944
1945/// UnitsSharePred - Return true if the two scheduling units share a common
1946/// data predecessor.
1947static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1948  SmallSet<const SUnit*, 4> Preds;
1949  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1950       I != E; ++I) {
1951    if (I->isCtrl()) continue;  // ignore chain preds
1952    Preds.insert(I->getSUnit());
1953  }
1954  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1955       I != E; ++I) {
1956    if (I->isCtrl()) continue;  // ignore chain preds
1957    if (Preds.count(I->getSUnit()))
1958      return true;
1959  }
1960  return false;
1961}
1962
1963// Check for either a dependence (latency) or resource (hazard) stall.
1964//
1965// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
1966static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
1967  if ((int)SPQ->getCurCycle() < Height) return true;
1968  if (SPQ->getHazardRec()->getHazardType(SU, 0)
1969      != ScheduleHazardRecognizer::NoHazard)
1970    return true;
1971  return false;
1972}
1973
1974// Return -1 if left has higher priority, 1 if right has higher priority.
1975// Return 0 if latency-based priority is equivalent.
1976static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
1977                            RegReductionPQBase *SPQ) {
1978  // If the two nodes share an operand and one of them has a single
1979  // use that is a live out copy, favor the one that is live out. Otherwise
1980  // it will be difficult to eliminate the copy if the instruction is a
1981  // loop induction variable update. e.g.
1982  // BB:
1983  // sub r1, r3, #1
1984  // str r0, [r2, r3]
1985  // mov r3, r1
1986  // cmp
1987  // bne BB
1988  bool SharePred = UnitsSharePred(left, right);
1989  // FIXME: Only adjust if BB is a loop back edge.
1990  // FIXME: What's the cost of a copy?
1991  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1992  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1993  int LHeight = (int)left->getHeight() - LBonus;
1994  int RHeight = (int)right->getHeight() - RBonus;
1995
1996  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
1997    BUHasStall(left, LHeight, SPQ);
1998  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
1999    BUHasStall(right, RHeight, SPQ);
2000
2001  // If scheduling one of the node will cause a pipeline stall, delay it.
2002  // If scheduling either one of the node will cause a pipeline stall, sort
2003  // them according to their height.
2004  if (LStall) {
2005    if (!RStall)
2006      return 1;
2007    if (LHeight != RHeight)
2008      return LHeight > RHeight ? 1 : -1;
2009  } else if (RStall)
2010    return -1;
2011
2012  // If either node is scheduling for latency, sort them by height/depth
2013  // and latency.
2014  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2015                     right->SchedulingPref == Sched::Latency)) {
2016    if (DisableSchedCycles) {
2017      if (LHeight != RHeight)
2018        return LHeight > RHeight ? 1 : -1;
2019    }
2020    else {
2021      // If neither instruction stalls (!LStall && !RStall) then
2022      // it's height is already covered so only its depth matters. We also reach
2023      // this if both stall but have the same height.
2024      unsigned LDepth = left->getDepth();
2025      unsigned RDepth = right->getDepth();
2026      if (LDepth != RDepth) {
2027        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2028              << ") depth " << LDepth << " vs SU (" << right->NodeNum
2029              << ") depth " << RDepth << "\n");
2030        return LDepth < RDepth ? 1 : -1;
2031      }
2032    }
2033    if (left->Latency != right->Latency)
2034      return left->Latency > right->Latency ? 1 : -1;
2035  }
2036  return 0;
2037}
2038
2039static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2040  unsigned LPriority = SPQ->getNodePriority(left);
2041  unsigned RPriority = SPQ->getNodePriority(right);
2042  if (LPriority != RPriority)
2043    return LPriority > RPriority;
2044
2045  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2046  // e.g.
2047  // t1 = op t2, c1
2048  // t3 = op t4, c2
2049  //
2050  // and the following instructions are both ready.
2051  // t2 = op c3
2052  // t4 = op c4
2053  //
2054  // Then schedule t2 = op first.
2055  // i.e.
2056  // t4 = op c4
2057  // t2 = op c3
2058  // t1 = op t2, c1
2059  // t3 = op t4, c2
2060  //
2061  // This creates more short live intervals.
2062  unsigned LDist = closestSucc(left);
2063  unsigned RDist = closestSucc(right);
2064  if (LDist != RDist)
2065    return LDist < RDist;
2066
2067  // How many registers becomes live when the node is scheduled.
2068  unsigned LScratch = calcMaxScratches(left);
2069  unsigned RScratch = calcMaxScratches(right);
2070  if (LScratch != RScratch)
2071    return LScratch > RScratch;
2072
2073  if (!DisableSchedCycles) {
2074    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2075    if (result != 0)
2076      return result > 0;
2077  }
2078  else {
2079    if (left->getHeight() != right->getHeight())
2080      return left->getHeight() > right->getHeight();
2081
2082    if (left->getDepth() != right->getDepth())
2083      return left->getDepth() < right->getDepth();
2084  }
2085
2086  assert(left->NodeQueueId && right->NodeQueueId &&
2087         "NodeQueueId cannot be zero");
2088  return (left->NodeQueueId > right->NodeQueueId);
2089}
2090
2091// Bottom up
2092bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2093  return BURRSort(left, right, SPQ);
2094}
2095
2096// Source order, otherwise bottom up.
2097bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2098  unsigned LOrder = SPQ->getNodeOrdering(left);
2099  unsigned ROrder = SPQ->getNodeOrdering(right);
2100
2101  // Prefer an ordering where the lower the non-zero order number, the higher
2102  // the preference.
2103  if ((LOrder || ROrder) && LOrder != ROrder)
2104    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2105
2106  return BURRSort(left, right, SPQ);
2107}
2108
2109// If the time between now and when the instruction will be ready can cover
2110// the spill code, then avoid adding it to the ready queue. This gives long
2111// stalls highest priority and allows hoisting across calls. It should also
2112// speed up processing the available queue.
2113bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2114  static const unsigned ReadyDelay = 3;
2115
2116  if (SPQ->MayReduceRegPressure(SU)) return true;
2117
2118  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2119
2120  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2121      != ScheduleHazardRecognizer::NoHazard)
2122    return false;
2123
2124  return true;
2125}
2126
2127// Return true if right should be scheduled with higher priority than left.
2128bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2129  if (left->isCall || right->isCall)
2130    // No way to compute latency of calls.
2131    return BURRSort(left, right, SPQ);
2132
2133  bool LHigh = SPQ->HighRegPressure(left);
2134  bool RHigh = SPQ->HighRegPressure(right);
2135  // Avoid causing spills. If register pressure is high, schedule for
2136  // register pressure reduction.
2137  if (LHigh && !RHigh) {
2138    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2139          << right->NodeNum << ")\n");
2140    return true;
2141  }
2142  else if (!LHigh && RHigh) {
2143    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2144          << left->NodeNum << ")\n");
2145    return false;
2146  }
2147  else if (!LHigh && !RHigh) {
2148    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2149    if (result != 0)
2150      return result > 0;
2151  }
2152  return BURRSort(left, right, SPQ);
2153}
2154
2155// Schedule as many instructions in each cycle as possible. So don't make an
2156// instruction available unless it is ready in the current cycle.
2157bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2158  if (SU->getHeight() > CurCycle) return false;
2159
2160  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2161      != ScheduleHazardRecognizer::NoHazard)
2162    return false;
2163
2164  return SU->getHeight() <= CurCycle;
2165}
2166
2167bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2168  if (left->isCall || right->isCall)
2169    // No way to compute latency of calls.
2170    return BURRSort(left, right, SPQ);
2171
2172  bool LHigh = SPQ->HighRegPressure(left);
2173  bool RHigh = SPQ->HighRegPressure(right);
2174  // Avoid causing spills. If register pressure is high, schedule for
2175  // register pressure reduction.
2176  if (LHigh && !RHigh)
2177    return true;
2178  else if (!LHigh && RHigh)
2179    return false;
2180  else if (!LHigh && !RHigh) {
2181    // Low register pressure situation, schedule to maximize instruction level
2182    // parallelism.
2183    if (left->NumPreds > right->NumPreds)
2184      return false;
2185    else if (left->NumPreds < right->NumPreds)
2186      return false;
2187  }
2188
2189  return BURRSort(left, right, SPQ);
2190}
2191
2192//===----------------------------------------------------------------------===//
2193//                    Preschedule for Register Pressure
2194//===----------------------------------------------------------------------===//
2195
2196bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2197  if (SU->isTwoAddress) {
2198    unsigned Opc = SU->getNode()->getMachineOpcode();
2199    const TargetInstrDesc &TID = TII->get(Opc);
2200    unsigned NumRes = TID.getNumDefs();
2201    unsigned NumOps = TID.getNumOperands() - NumRes;
2202    for (unsigned i = 0; i != NumOps; ++i) {
2203      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2204        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2205        if (DU->getNodeId() != -1 &&
2206            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2207          return true;
2208      }
2209    }
2210  }
2211  return false;
2212}
2213
2214/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2215/// physical register defs.
2216static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2217                                  const TargetInstrInfo *TII,
2218                                  const TargetRegisterInfo *TRI) {
2219  SDNode *N = SuccSU->getNode();
2220  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2221  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2222  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2223  for (const SDNode *SUNode = SU->getNode(); SUNode;
2224       SUNode = SUNode->getGluedNode()) {
2225    if (!SUNode->isMachineOpcode())
2226      continue;
2227    const unsigned *SUImpDefs =
2228      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2229    if (!SUImpDefs)
2230      return false;
2231    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2232      EVT VT = N->getValueType(i);
2233      if (VT == MVT::Glue || VT == MVT::Other)
2234        continue;
2235      if (!N->hasAnyUseOfValue(i))
2236        continue;
2237      unsigned Reg = ImpDefs[i - NumDefs];
2238      for (;*SUImpDefs; ++SUImpDefs) {
2239        unsigned SUReg = *SUImpDefs;
2240        if (TRI->regsOverlap(Reg, SUReg))
2241          return true;
2242      }
2243    }
2244  }
2245  return false;
2246}
2247
2248/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2249/// are not handled well by the general register pressure reduction
2250/// heuristics. When presented with code like this:
2251///
2252///      N
2253///    / |
2254///   /  |
2255///  U  store
2256///  |
2257/// ...
2258///
2259/// the heuristics tend to push the store up, but since the
2260/// operand of the store has another use (U), this would increase
2261/// the length of that other use (the U->N edge).
2262///
2263/// This function transforms code like the above to route U's
2264/// dependence through the store when possible, like this:
2265///
2266///      N
2267///      ||
2268///      ||
2269///     store
2270///       |
2271///       U
2272///       |
2273///      ...
2274///
2275/// This results in the store being scheduled immediately
2276/// after N, which shortens the U->N live range, reducing
2277/// register pressure.
2278///
2279void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2280  // Visit all the nodes in topological order, working top-down.
2281  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2282    SUnit *SU = &(*SUnits)[i];
2283    // For now, only look at nodes with no data successors, such as stores.
2284    // These are especially important, due to the heuristics in
2285    // getNodePriority for nodes with no data successors.
2286    if (SU->NumSuccs != 0)
2287      continue;
2288    // For now, only look at nodes with exactly one data predecessor.
2289    if (SU->NumPreds != 1)
2290      continue;
2291    // Avoid prescheduling copies to virtual registers, which don't behave
2292    // like other nodes from the perspective of scheduling heuristics.
2293    if (SDNode *N = SU->getNode())
2294      if (N->getOpcode() == ISD::CopyToReg &&
2295          TargetRegisterInfo::isVirtualRegister
2296            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2297        continue;
2298
2299    // Locate the single data predecessor.
2300    SUnit *PredSU = 0;
2301    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2302         EE = SU->Preds.end(); II != EE; ++II)
2303      if (!II->isCtrl()) {
2304        PredSU = II->getSUnit();
2305        break;
2306      }
2307    assert(PredSU);
2308
2309    // Don't rewrite edges that carry physregs, because that requires additional
2310    // support infrastructure.
2311    if (PredSU->hasPhysRegDefs)
2312      continue;
2313    // Short-circuit the case where SU is PredSU's only data successor.
2314    if (PredSU->NumSuccs == 1)
2315      continue;
2316    // Avoid prescheduling to copies from virtual registers, which don't behave
2317    // like other nodes from the perspective of scheduling // heuristics.
2318    if (SDNode *N = SU->getNode())
2319      if (N->getOpcode() == ISD::CopyFromReg &&
2320          TargetRegisterInfo::isVirtualRegister
2321            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2322        continue;
2323
2324    // Perform checks on the successors of PredSU.
2325    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2326         EE = PredSU->Succs.end(); II != EE; ++II) {
2327      SUnit *PredSuccSU = II->getSUnit();
2328      if (PredSuccSU == SU) continue;
2329      // If PredSU has another successor with no data successors, for
2330      // now don't attempt to choose either over the other.
2331      if (PredSuccSU->NumSuccs == 0)
2332        goto outer_loop_continue;
2333      // Don't break physical register dependencies.
2334      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2335        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2336          goto outer_loop_continue;
2337      // Don't introduce graph cycles.
2338      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2339        goto outer_loop_continue;
2340    }
2341
2342    // Ok, the transformation is safe and the heuristics suggest it is
2343    // profitable. Update the graph.
2344    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2345                 << " next to PredSU #" << PredSU->NodeNum
2346                 << " to guide scheduling in the presence of multiple uses\n");
2347    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2348      SDep Edge = PredSU->Succs[i];
2349      assert(!Edge.isAssignedRegDep());
2350      SUnit *SuccSU = Edge.getSUnit();
2351      if (SuccSU != SU) {
2352        Edge.setSUnit(PredSU);
2353        scheduleDAG->RemovePred(SuccSU, Edge);
2354        scheduleDAG->AddPred(SU, Edge);
2355        Edge.setSUnit(SU);
2356        scheduleDAG->AddPred(SuccSU, Edge);
2357        --i;
2358      }
2359    }
2360  outer_loop_continue:;
2361  }
2362}
2363
2364/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2365/// it as a def&use operand. Add a pseudo control edge from it to the other
2366/// node (if it won't create a cycle) so the two-address one will be scheduled
2367/// first (lower in the schedule). If both nodes are two-address, favor the
2368/// one that has a CopyToReg use (more likely to be a loop induction update).
2369/// If both are two-address, but one is commutable while the other is not
2370/// commutable, favor the one that's not commutable.
2371void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2372  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2373    SUnit *SU = &(*SUnits)[i];
2374    if (!SU->isTwoAddress)
2375      continue;
2376
2377    SDNode *Node = SU->getNode();
2378    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2379      continue;
2380
2381    bool isLiveOut = hasOnlyLiveOutUses(SU);
2382    unsigned Opc = Node->getMachineOpcode();
2383    const TargetInstrDesc &TID = TII->get(Opc);
2384    unsigned NumRes = TID.getNumDefs();
2385    unsigned NumOps = TID.getNumOperands() - NumRes;
2386    for (unsigned j = 0; j != NumOps; ++j) {
2387      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2388        continue;
2389      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2390      if (DU->getNodeId() == -1)
2391        continue;
2392      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2393      if (!DUSU) continue;
2394      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2395           E = DUSU->Succs.end(); I != E; ++I) {
2396        if (I->isCtrl()) continue;
2397        SUnit *SuccSU = I->getSUnit();
2398        if (SuccSU == SU)
2399          continue;
2400        // Be conservative. Ignore if nodes aren't at roughly the same
2401        // depth and height.
2402        if (SuccSU->getHeight() < SU->getHeight() &&
2403            (SU->getHeight() - SuccSU->getHeight()) > 1)
2404          continue;
2405        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2406        // constrains whatever is using the copy, instead of the copy
2407        // itself. In the case that the copy is coalesced, this
2408        // preserves the intent of the pseudo two-address heurietics.
2409        while (SuccSU->Succs.size() == 1 &&
2410               SuccSU->getNode()->isMachineOpcode() &&
2411               SuccSU->getNode()->getMachineOpcode() ==
2412                 TargetOpcode::COPY_TO_REGCLASS)
2413          SuccSU = SuccSU->Succs.front().getSUnit();
2414        // Don't constrain non-instruction nodes.
2415        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2416          continue;
2417        // Don't constrain nodes with physical register defs if the
2418        // predecessor can clobber them.
2419        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2420          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2421            continue;
2422        }
2423        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2424        // these may be coalesced away. We want them close to their uses.
2425        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2426        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2427            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2428            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2429          continue;
2430        if ((!canClobber(SuccSU, DUSU) ||
2431             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2432             (!SU->isCommutable && SuccSU->isCommutable)) &&
2433            !scheduleDAG->IsReachable(SuccSU, SU)) {
2434          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2435                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2436          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2437                                        /*Reg=*/0, /*isNormalMemory=*/false,
2438                                        /*isMustAlias=*/false,
2439                                        /*isArtificial=*/true));
2440        }
2441      }
2442    }
2443  }
2444}
2445
2446/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2447/// predecessors of the successors of the SUnit SU. Stop when the provided
2448/// limit is exceeded.
2449static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2450                                                    unsigned Limit) {
2451  unsigned Sum = 0;
2452  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2453       I != E; ++I) {
2454    const SUnit *SuccSU = I->getSUnit();
2455    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2456         EE = SuccSU->Preds.end(); II != EE; ++II) {
2457      SUnit *PredSU = II->getSUnit();
2458      if (!PredSU->isScheduled)
2459        if (++Sum > Limit)
2460          return Sum;
2461    }
2462  }
2463  return Sum;
2464}
2465
2466
2467// Top down
2468bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2469  unsigned LPriority = SPQ->getNodePriority(left);
2470  unsigned RPriority = SPQ->getNodePriority(right);
2471  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2472  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2473  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2474  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2475  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2476  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2477
2478  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2479    return false;
2480  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2481    return true;
2482
2483  if (LIsFloater)
2484    LBonus -= 2;
2485  if (RIsFloater)
2486    RBonus -= 2;
2487  if (left->NumSuccs == 1)
2488    LBonus += 2;
2489  if (right->NumSuccs == 1)
2490    RBonus += 2;
2491
2492  if (LPriority+LBonus != RPriority+RBonus)
2493    return LPriority+LBonus < RPriority+RBonus;
2494
2495  if (left->getDepth() != right->getDepth())
2496    return left->getDepth() < right->getDepth();
2497
2498  if (left->NumSuccsLeft != right->NumSuccsLeft)
2499    return left->NumSuccsLeft > right->NumSuccsLeft;
2500
2501  assert(left->NodeQueueId && right->NodeQueueId &&
2502         "NodeQueueId cannot be zero");
2503  return (left->NodeQueueId > right->NodeQueueId);
2504}
2505
2506//===----------------------------------------------------------------------===//
2507//                         Public Constructor Functions
2508//===----------------------------------------------------------------------===//
2509
2510llvm::ScheduleDAGSDNodes *
2511llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2512                                 CodeGenOpt::Level OptLevel) {
2513  const TargetMachine &TM = IS->TM;
2514  const TargetInstrInfo *TII = TM.getInstrInfo();
2515  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2516
2517  BURegReductionPriorityQueue *PQ =
2518    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2519  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2520  PQ->setScheduleDAG(SD);
2521  return SD;
2522}
2523
2524llvm::ScheduleDAGSDNodes *
2525llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2526                                 CodeGenOpt::Level OptLevel) {
2527  const TargetMachine &TM = IS->TM;
2528  const TargetInstrInfo *TII = TM.getInstrInfo();
2529  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2530
2531  TDRegReductionPriorityQueue *PQ =
2532    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2533  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2534  PQ->setScheduleDAG(SD);
2535  return SD;
2536}
2537
2538llvm::ScheduleDAGSDNodes *
2539llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2540                                   CodeGenOpt::Level OptLevel) {
2541  const TargetMachine &TM = IS->TM;
2542  const TargetInstrInfo *TII = TM.getInstrInfo();
2543  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2544
2545  SrcRegReductionPriorityQueue *PQ =
2546    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2547  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2548  PQ->setScheduleDAG(SD);
2549  return SD;
2550}
2551
2552llvm::ScheduleDAGSDNodes *
2553llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2554                                   CodeGenOpt::Level OptLevel) {
2555  const TargetMachine &TM = IS->TM;
2556  const TargetInstrInfo *TII = TM.getInstrInfo();
2557  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2558  const TargetLowering *TLI = &IS->getTargetLowering();
2559
2560  HybridBURRPriorityQueue *PQ =
2561    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2562
2563  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2564  PQ->setScheduleDAG(SD);
2565  return SD;
2566}
2567
2568llvm::ScheduleDAGSDNodes *
2569llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2570                                CodeGenOpt::Level OptLevel) {
2571  const TargetMachine &TM = IS->TM;
2572  const TargetInstrInfo *TII = TM.getInstrInfo();
2573  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2574  const TargetLowering *TLI = &IS->getTargetLowering();
2575
2576  ILPBURRPriorityQueue *PQ =
2577    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2578  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2579  PQ->setScheduleDAG(SD);
2580  return SD;
2581}
2582