ScheduleDAGRRList.cpp revision f697c8a19adf962a933b055383952e72789a0e20
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
24#include "llvm/Target/TargetRegisterInfo.h"
25#include "llvm/Target/TargetData.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetInstrInfo.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
39STATISTIC(NumUnfolds,    "Number of nodes unfolded");
40STATISTIC(NumDups,       "Number of duplicated nodes");
41STATISTIC(NumPRCopies,   "Number of physical register copies");
42
43static RegisterScheduler
44  burrListDAGScheduler("list-burr",
45                       "Bottom-up register reduction list scheduling",
46                       createBURRListDAGScheduler);
47static RegisterScheduler
48  tdrListrDAGScheduler("list-tdrr",
49                       "Top-down register reduction list scheduling",
50                       createTDRRListDAGScheduler);
51static RegisterScheduler
52  sourceListDAGScheduler("source",
53                         "Similar to list-burr but schedules in source "
54                         "order when possible",
55                         createSourceListDAGScheduler);
56
57static RegisterScheduler
58  hybridListDAGScheduler("list-hybrid",
59                         "Bottom-up register pressure aware list scheduling "
60                         "which tries to balance latency and register pressure",
61                         createHybridListDAGScheduler);
62
63static RegisterScheduler
64  ILPListDAGScheduler("list-ilp",
65                      "Bottom-up register pressure aware list scheduling "
66                      "which tries to balance ILP and register pressure",
67                      createILPListDAGScheduler);
68
69static cl::opt<bool> EnableSchedCycles(
70  "enable-sched-cycles",
71  cl::desc("Enable cycle-level precision during preRA scheduling"),
72  cl::init(false), cl::Hidden);
73
74namespace {
75//===----------------------------------------------------------------------===//
76/// ScheduleDAGRRList - The actual register reduction list scheduler
77/// implementation.  This supports both top-down and bottom-up scheduling.
78///
79class ScheduleDAGRRList : public ScheduleDAGSDNodes {
80private:
81  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
82  /// it is top-down.
83  bool isBottomUp;
84
85  /// NeedLatency - True if the scheduler will make use of latency information.
86  ///
87  bool NeedLatency;
88
89  /// AvailableQueue - The priority queue to use for the available SUnits.
90  SchedulingPriorityQueue *AvailableQueue;
91
92  /// PendingQueue - This contains all of the instructions whose operands have
93  /// been issued, but their results are not ready yet (due to the latency of
94  /// the operation).  Once the operands becomes available, the instruction is
95  /// added to the AvailableQueue.
96  std::vector<SUnit*> PendingQueue;
97
98  /// HazardRec - The hazard recognizer to use.
99  ScheduleHazardRecognizer *HazardRec;
100
101  /// CurCycle - The current scheduler state corresponds to this cycle.
102  unsigned CurCycle;
103
104  /// MinAvailableCycle - Cycle of the soonest available instruction.
105  unsigned MinAvailableCycle;
106
107  /// LiveRegDefs - A set of physical registers and their definition
108  /// that are "live". These nodes must be scheduled before any other nodes that
109  /// modifies the registers can be scheduled.
110  unsigned NumLiveRegs;
111  std::vector<SUnit*> LiveRegDefs;
112  std::vector<SUnit*> LiveRegGens;
113
114  /// Topo - A topological ordering for SUnits which permits fast IsReachable
115  /// and similar queries.
116  ScheduleDAGTopologicalSort Topo;
117
118public:
119  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
120                    SchedulingPriorityQueue *availqueue,
121                    CodeGenOpt::Level OptLevel)
122    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
123      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
124      Topo(SUnits) {
125
126    const TargetMachine &tm = mf.getTarget();
127    if (EnableSchedCycles && OptLevel != CodeGenOpt::None)
128      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
129    else
130      HazardRec = new ScheduleHazardRecognizer();
131  }
132
133  ~ScheduleDAGRRList() {
134    delete HazardRec;
135    delete AvailableQueue;
136  }
137
138  void Schedule();
139
140  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
141
142  /// IsReachable - Checks if SU is reachable from TargetSU.
143  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
144    return Topo.IsReachable(SU, TargetSU);
145  }
146
147  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
148  /// create a cycle.
149  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
150    return Topo.WillCreateCycle(SU, TargetSU);
151  }
152
153  /// AddPred - adds a predecessor edge to SUnit SU.
154  /// This returns true if this is a new predecessor.
155  /// Updates the topological ordering if required.
156  void AddPred(SUnit *SU, const SDep &D) {
157    Topo.AddPred(SU, D.getSUnit());
158    SU->addPred(D);
159  }
160
161  /// RemovePred - removes a predecessor edge from SUnit SU.
162  /// This returns true if an edge was removed.
163  /// Updates the topological ordering if required.
164  void RemovePred(SUnit *SU, const SDep &D) {
165    Topo.RemovePred(SU, D.getSUnit());
166    SU->removePred(D);
167  }
168
169private:
170  bool isReady(SUnit *SU) {
171    return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() ||
172      AvailableQueue->isReady(SU);
173  }
174
175  void ReleasePred(SUnit *SU, const SDep *PredEdge);
176  void ReleasePredecessors(SUnit *SU);
177  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
178  void ReleaseSuccessors(SUnit *SU);
179  void ReleasePending();
180  void AdvanceToCycle(unsigned NextCycle);
181  void AdvancePastStalls(SUnit *SU);
182  void EmitNode(SUnit *SU);
183  void ScheduleNodeBottomUp(SUnit*);
184  void CapturePred(SDep *PredEdge);
185  void UnscheduleNodeBottomUp(SUnit*);
186  void RestoreHazardCheckerBottomUp();
187  void BacktrackBottomUp(SUnit*, SUnit*);
188  SUnit *CopyAndMoveSuccessors(SUnit*);
189  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
190                                const TargetRegisterClass*,
191                                const TargetRegisterClass*,
192                                SmallVector<SUnit*, 2>&);
193  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
194
195  SUnit *PickNodeToScheduleBottomUp();
196  void ListScheduleBottomUp();
197
198  void ScheduleNodeTopDown(SUnit*);
199  void ListScheduleTopDown();
200
201
202  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
203  /// Updates the topological ordering if required.
204  SUnit *CreateNewSUnit(SDNode *N) {
205    unsigned NumSUnits = SUnits.size();
206    SUnit *NewNode = NewSUnit(N);
207    // Update the topological ordering.
208    if (NewNode->NodeNum >= NumSUnits)
209      Topo.InitDAGTopologicalSorting();
210    return NewNode;
211  }
212
213  /// CreateClone - Creates a new SUnit from an existing one.
214  /// Updates the topological ordering if required.
215  SUnit *CreateClone(SUnit *N) {
216    unsigned NumSUnits = SUnits.size();
217    SUnit *NewNode = Clone(N);
218    // Update the topological ordering.
219    if (NewNode->NodeNum >= NumSUnits)
220      Topo.InitDAGTopologicalSorting();
221    return NewNode;
222  }
223
224  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
225  /// need actual latency information but the hybrid scheduler does.
226  bool ForceUnitLatencies() const {
227    return !NeedLatency;
228  }
229};
230}  // end anonymous namespace
231
232
233/// Schedule - Schedule the DAG using list scheduling.
234void ScheduleDAGRRList::Schedule() {
235  DEBUG(dbgs()
236        << "********** List Scheduling BB#" << BB->getNumber()
237        << " '" << BB->getName() << "' **********\n");
238
239  CurCycle = 0;
240  MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0;
241  NumLiveRegs = 0;
242  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
243  LiveRegGens.resize(TRI->getNumRegs(), NULL);
244
245  // Build the scheduling graph.
246  BuildSchedGraph(NULL);
247
248  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
249          SUnits[su].dumpAll(this));
250  Topo.InitDAGTopologicalSorting();
251
252  AvailableQueue->initNodes(SUnits);
253
254  HazardRec->Reset();
255
256  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
257  if (isBottomUp)
258    ListScheduleBottomUp();
259  else
260    ListScheduleTopDown();
261
262  AvailableQueue->releaseState();
263}
264
265//===----------------------------------------------------------------------===//
266//  Bottom-Up Scheduling
267//===----------------------------------------------------------------------===//
268
269/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
270/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
271void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
272  SUnit *PredSU = PredEdge->getSUnit();
273
274#ifndef NDEBUG
275  if (PredSU->NumSuccsLeft == 0) {
276    dbgs() << "*** Scheduling failed! ***\n";
277    PredSU->dump(this);
278    dbgs() << " has been released too many times!\n";
279    llvm_unreachable(0);
280  }
281#endif
282  --PredSU->NumSuccsLeft;
283
284  if (!ForceUnitLatencies()) {
285    // Updating predecessor's height. This is now the cycle when the
286    // predecessor can be scheduled without causing a pipeline stall.
287    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
288  }
289
290  // If all the node's successors are scheduled, this node is ready
291  // to be scheduled. Ignore the special EntrySU node.
292  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
293    PredSU->isAvailable = true;
294
295    unsigned Height = PredSU->getHeight();
296    if (Height < MinAvailableCycle)
297      MinAvailableCycle = Height;
298
299    if (isReady(SU)) {
300      AvailableQueue->push(PredSU);
301    }
302    // CapturePred and others may have left the node in the pending queue, avoid
303    // adding it twice.
304    else if (!PredSU->isPending) {
305      PredSU->isPending = true;
306      PendingQueue.push_back(PredSU);
307    }
308  }
309}
310
311/// Call ReleasePred for each predecessor, then update register live def/gen.
312/// Always update LiveRegDefs for a register dependence even if the current SU
313/// also defines the register. This effectively create one large live range
314/// across a sequence of two-address node. This is important because the
315/// entire chain must be scheduled together. Example:
316///
317/// flags = (3) add
318/// flags = (2) addc flags
319/// flags = (1) addc flags
320///
321/// results in
322///
323/// LiveRegDefs[flags] = 3
324/// LiveRegGens[flags] = 1
325///
326/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
327/// interference on flags.
328void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
329  // Bottom up: release predecessors
330  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
331       I != E; ++I) {
332    ReleasePred(SU, &*I);
333    if (I->isAssignedRegDep()) {
334      // This is a physical register dependency and it's impossible or
335      // expensive to copy the register. Make sure nothing that can
336      // clobber the register is scheduled between the predecessor and
337      // this node.
338      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
339      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
340             "interference on register dependence");
341      LiveRegDefs[I->getReg()] = I->getSUnit();
342      if (!LiveRegGens[I->getReg()]) {
343        ++NumLiveRegs;
344        LiveRegGens[I->getReg()] = SU;
345      }
346    }
347  }
348}
349
350/// Check to see if any of the pending instructions are ready to issue.  If
351/// so, add them to the available queue.
352void ScheduleDAGRRList::ReleasePending() {
353  if (!EnableSchedCycles) {
354    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
355    return;
356  }
357
358  // If the available queue is empty, it is safe to reset MinAvailableCycle.
359  if (AvailableQueue->empty())
360    MinAvailableCycle = UINT_MAX;
361
362  // Check to see if any of the pending instructions are ready to issue.  If
363  // so, add them to the available queue.
364  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
365    unsigned ReadyCycle =
366      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
367    if (ReadyCycle < MinAvailableCycle)
368      MinAvailableCycle = ReadyCycle;
369
370    if (PendingQueue[i]->isAvailable) {
371      if (!isReady(PendingQueue[i]))
372          continue;
373      AvailableQueue->push(PendingQueue[i]);
374    }
375    PendingQueue[i]->isPending = false;
376    PendingQueue[i] = PendingQueue.back();
377    PendingQueue.pop_back();
378    --i; --e;
379  }
380}
381
382/// Move the scheduler state forward by the specified number of Cycles.
383void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
384  if (NextCycle <= CurCycle)
385    return;
386
387  AvailableQueue->setCurCycle(NextCycle);
388  if (HazardRec->getMaxLookAhead() == 0) {
389    // Bypass lots of virtual calls in case of long latency.
390    CurCycle = NextCycle;
391  }
392  else {
393    for (; CurCycle != NextCycle; ++CurCycle) {
394      if (isBottomUp)
395        HazardRec->RecedeCycle();
396      else
397        HazardRec->AdvanceCycle();
398    }
399  }
400  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
401  // available Q to release pending nodes at least once before popping.
402  ReleasePending();
403}
404
405/// Move the scheduler state forward until the specified node's dependents are
406/// ready and can be scheduled with no resource conflicts.
407void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
408  if (!EnableSchedCycles)
409    return;
410
411  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
412
413  // Bump CurCycle to account for latency. We assume the latency of other
414  // available instructions may be hidden by the stall (not a full pipe stall).
415  // This updates the hazard recognizer's cycle before reserving resources for
416  // this instruction.
417  AdvanceToCycle(ReadyCycle);
418
419  // Calls are scheduled in their preceding cycle, so don't conflict with
420  // hazards from instructions after the call. EmitNode will reset the
421  // scoreboard state before emitting the call.
422  if (isBottomUp && SU->isCall)
423    return;
424
425  // FIXME: For resource conflicts in very long non-pipelined stages, we
426  // should probably skip ahead here to avoid useless scoreboard checks.
427  int Stalls = 0;
428  while (true) {
429    ScheduleHazardRecognizer::HazardType HT =
430      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
431
432    if (HT == ScheduleHazardRecognizer::NoHazard)
433      break;
434
435    ++Stalls;
436  }
437  AdvanceToCycle(CurCycle + Stalls);
438}
439
440/// Record this SUnit in the HazardRecognizer.
441/// Does not update CurCycle.
442void ScheduleDAGRRList::EmitNode(SUnit *SU) {
443  if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0)
444    return;
445
446  // Check for phys reg copy.
447  if (!SU->getNode())
448    return;
449
450  switch (SU->getNode()->getOpcode()) {
451  default:
452    assert(SU->getNode()->isMachineOpcode() &&
453           "This target-independent node should not be scheduled.");
454    break;
455  case ISD::MERGE_VALUES:
456  case ISD::TokenFactor:
457  case ISD::CopyToReg:
458  case ISD::CopyFromReg:
459  case ISD::EH_LABEL:
460    // Noops don't affect the scoreboard state. Copies are likely to be
461    // removed.
462    return;
463  case ISD::INLINEASM:
464    // For inline asm, clear the pipeline state.
465    HazardRec->Reset();
466    return;
467  }
468  if (isBottomUp && SU->isCall) {
469    // Calls are scheduled with their preceding instructions. For bottom-up
470    // scheduling, clear the pipeline state before emitting.
471    HazardRec->Reset();
472  }
473
474  HazardRec->EmitInstruction(SU);
475
476  if (!isBottomUp && SU->isCall) {
477    HazardRec->Reset();
478  }
479}
480
481/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
482/// count of its predecessors. If a predecessor pending count is zero, add it to
483/// the Available queue.
484void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
485  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
486  DEBUG(SU->dump(this));
487
488#ifndef NDEBUG
489  if (CurCycle < SU->getHeight())
490    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
491#endif
492
493  // FIXME: Do not modify node height. It may interfere with
494  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
495  // node it's ready cycle can aid heuristics, and after scheduling it can
496  // indicate the scheduled cycle.
497  SU->setHeightToAtLeast(CurCycle);
498
499  // Reserve resources for the scheduled intruction.
500  EmitNode(SU);
501
502  Sequence.push_back(SU);
503
504  AvailableQueue->ScheduledNode(SU);
505
506  // Update liveness of predecessors before successors to avoid treating a
507  // two-address node as a live range def.
508  ReleasePredecessors(SU);
509
510  // Release all the implicit physical register defs that are live.
511  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
512       I != E; ++I) {
513    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
514    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
515      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
516      --NumLiveRegs;
517      LiveRegDefs[I->getReg()] = NULL;
518      LiveRegGens[I->getReg()] = NULL;
519    }
520  }
521
522  SU->isScheduled = true;
523
524  // Conditions under which the scheduler should eagerly advance the cycle:
525  // (1) No available instructions
526  // (2) All pipelines full, so available instructions must have hazards.
527  //
528  // If SchedCycles is disabled, count each inst as one cycle.
529  if (!EnableSchedCycles ||
530      AvailableQueue->empty() || HazardRec->atIssueLimit())
531    AdvanceToCycle(CurCycle + 1);
532}
533
534/// CapturePred - This does the opposite of ReleasePred. Since SU is being
535/// unscheduled, incrcease the succ left count of its predecessors. Remove
536/// them from AvailableQueue if necessary.
537void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
538  SUnit *PredSU = PredEdge->getSUnit();
539  if (PredSU->isAvailable) {
540    PredSU->isAvailable = false;
541    if (!PredSU->isPending)
542      AvailableQueue->remove(PredSU);
543  }
544
545  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
546  ++PredSU->NumSuccsLeft;
547}
548
549/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
550/// its predecessor states to reflect the change.
551void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
552  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
553  DEBUG(SU->dump(this));
554
555  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
556       I != E; ++I) {
557    CapturePred(&*I);
558    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
559      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
560      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
561             "Physical register dependency violated?");
562      --NumLiveRegs;
563      LiveRegDefs[I->getReg()] = NULL;
564      LiveRegGens[I->getReg()] = NULL;
565    }
566  }
567
568  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
569       I != E; ++I) {
570    if (I->isAssignedRegDep()) {
571      // This becomes the nearest def. Note that an earlier def may still be
572      // pending if this is a two-address node.
573      LiveRegDefs[I->getReg()] = SU;
574      if (!LiveRegDefs[I->getReg()]) {
575        ++NumLiveRegs;
576      }
577      if (LiveRegGens[I->getReg()] == NULL ||
578          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
579        LiveRegGens[I->getReg()] = I->getSUnit();
580    }
581  }
582  if (SU->getHeight() < MinAvailableCycle)
583    MinAvailableCycle = SU->getHeight();
584
585  SU->setHeightDirty();
586  SU->isScheduled = false;
587  SU->isAvailable = true;
588  if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) {
589    // Don't make available until backtracking is complete.
590    SU->isPending = true;
591    PendingQueue.push_back(SU);
592  }
593  else {
594    AvailableQueue->push(SU);
595  }
596  AvailableQueue->UnscheduledNode(SU);
597}
598
599/// After backtracking, the hazard checker needs to be restored to a state
600/// corresponding the the current cycle.
601void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
602  HazardRec->Reset();
603
604  unsigned LookAhead = std::min((unsigned)Sequence.size(),
605                                HazardRec->getMaxLookAhead());
606  if (LookAhead == 0)
607    return;
608
609  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
610  unsigned HazardCycle = (*I)->getHeight();
611  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
612    SUnit *SU = *I;
613    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
614      HazardRec->RecedeCycle();
615    }
616    EmitNode(SU);
617  }
618}
619
620/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
621/// BTCycle in order to schedule a specific node.
622void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
623  SUnit *OldSU = Sequence.back();
624  while (true) {
625    Sequence.pop_back();
626    if (SU->isSucc(OldSU))
627      // Don't try to remove SU from AvailableQueue.
628      SU->isAvailable = false;
629    // FIXME: use ready cycle instead of height
630    CurCycle = OldSU->getHeight();
631    UnscheduleNodeBottomUp(OldSU);
632    AvailableQueue->setCurCycle(CurCycle);
633    if (OldSU == BtSU)
634      break;
635    OldSU = Sequence.back();
636  }
637
638  assert(!SU->isSucc(OldSU) && "Something is wrong!");
639
640  RestoreHazardCheckerBottomUp();
641
642  ReleasePending();
643
644  ++NumBacktracks;
645}
646
647static bool isOperandOf(const SUnit *SU, SDNode *N) {
648  for (const SDNode *SUNode = SU->getNode(); SUNode;
649       SUNode = SUNode->getGluedNode()) {
650    if (SUNode->isOperandOf(N))
651      return true;
652  }
653  return false;
654}
655
656/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
657/// successors to the newly created node.
658SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
659  SDNode *N = SU->getNode();
660  if (!N)
661    return NULL;
662
663  if (SU->getNode()->getGluedNode())
664    return NULL;
665
666  SUnit *NewSU;
667  bool TryUnfold = false;
668  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
669    EVT VT = N->getValueType(i);
670    if (VT == MVT::Glue)
671      return NULL;
672    else if (VT == MVT::Other)
673      TryUnfold = true;
674  }
675  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
676    const SDValue &Op = N->getOperand(i);
677    EVT VT = Op.getNode()->getValueType(Op.getResNo());
678    if (VT == MVT::Glue)
679      return NULL;
680  }
681
682  if (TryUnfold) {
683    SmallVector<SDNode*, 2> NewNodes;
684    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
685      return NULL;
686
687    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
688    assert(NewNodes.size() == 2 && "Expected a load folding node!");
689
690    N = NewNodes[1];
691    SDNode *LoadNode = NewNodes[0];
692    unsigned NumVals = N->getNumValues();
693    unsigned OldNumVals = SU->getNode()->getNumValues();
694    for (unsigned i = 0; i != NumVals; ++i)
695      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
696    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
697                                   SDValue(LoadNode, 1));
698
699    // LoadNode may already exist. This can happen when there is another
700    // load from the same location and producing the same type of value
701    // but it has different alignment or volatileness.
702    bool isNewLoad = true;
703    SUnit *LoadSU;
704    if (LoadNode->getNodeId() != -1) {
705      LoadSU = &SUnits[LoadNode->getNodeId()];
706      isNewLoad = false;
707    } else {
708      LoadSU = CreateNewSUnit(LoadNode);
709      LoadNode->setNodeId(LoadSU->NodeNum);
710      ComputeLatency(LoadSU);
711    }
712
713    SUnit *NewSU = CreateNewSUnit(N);
714    assert(N->getNodeId() == -1 && "Node already inserted!");
715    N->setNodeId(NewSU->NodeNum);
716
717    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
718    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
719      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
720        NewSU->isTwoAddress = true;
721        break;
722      }
723    }
724    if (TID.isCommutable())
725      NewSU->isCommutable = true;
726    ComputeLatency(NewSU);
727
728    // Record all the edges to and from the old SU, by category.
729    SmallVector<SDep, 4> ChainPreds;
730    SmallVector<SDep, 4> ChainSuccs;
731    SmallVector<SDep, 4> LoadPreds;
732    SmallVector<SDep, 4> NodePreds;
733    SmallVector<SDep, 4> NodeSuccs;
734    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
735         I != E; ++I) {
736      if (I->isCtrl())
737        ChainPreds.push_back(*I);
738      else if (isOperandOf(I->getSUnit(), LoadNode))
739        LoadPreds.push_back(*I);
740      else
741        NodePreds.push_back(*I);
742    }
743    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
744         I != E; ++I) {
745      if (I->isCtrl())
746        ChainSuccs.push_back(*I);
747      else
748        NodeSuccs.push_back(*I);
749    }
750
751    // Now assign edges to the newly-created nodes.
752    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
753      const SDep &Pred = ChainPreds[i];
754      RemovePred(SU, Pred);
755      if (isNewLoad)
756        AddPred(LoadSU, Pred);
757    }
758    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
759      const SDep &Pred = LoadPreds[i];
760      RemovePred(SU, Pred);
761      if (isNewLoad)
762        AddPred(LoadSU, Pred);
763    }
764    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
765      const SDep &Pred = NodePreds[i];
766      RemovePred(SU, Pred);
767      AddPred(NewSU, Pred);
768    }
769    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
770      SDep D = NodeSuccs[i];
771      SUnit *SuccDep = D.getSUnit();
772      D.setSUnit(SU);
773      RemovePred(SuccDep, D);
774      D.setSUnit(NewSU);
775      AddPred(SuccDep, D);
776    }
777    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
778      SDep D = ChainSuccs[i];
779      SUnit *SuccDep = D.getSUnit();
780      D.setSUnit(SU);
781      RemovePred(SuccDep, D);
782      if (isNewLoad) {
783        D.setSUnit(LoadSU);
784        AddPred(SuccDep, D);
785      }
786    }
787
788    // Add a data dependency to reflect that NewSU reads the value defined
789    // by LoadSU.
790    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
791
792    if (isNewLoad)
793      AvailableQueue->addNode(LoadSU);
794    AvailableQueue->addNode(NewSU);
795
796    ++NumUnfolds;
797
798    if (NewSU->NumSuccsLeft == 0) {
799      NewSU->isAvailable = true;
800      return NewSU;
801    }
802    SU = NewSU;
803  }
804
805  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
806  NewSU = CreateClone(SU);
807
808  // New SUnit has the exact same predecessors.
809  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
810       I != E; ++I)
811    if (!I->isArtificial())
812      AddPred(NewSU, *I);
813
814  // Only copy scheduled successors. Cut them from old node's successor
815  // list and move them over.
816  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
817  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
818       I != E; ++I) {
819    if (I->isArtificial())
820      continue;
821    SUnit *SuccSU = I->getSUnit();
822    if (SuccSU->isScheduled) {
823      SDep D = *I;
824      D.setSUnit(NewSU);
825      AddPred(SuccSU, D);
826      D.setSUnit(SU);
827      DelDeps.push_back(std::make_pair(SuccSU, D));
828    }
829  }
830  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
831    RemovePred(DelDeps[i].first, DelDeps[i].second);
832
833  AvailableQueue->updateNode(SU);
834  AvailableQueue->addNode(NewSU);
835
836  ++NumDups;
837  return NewSU;
838}
839
840/// InsertCopiesAndMoveSuccs - Insert register copies and move all
841/// scheduled successors of the given SUnit to the last copy.
842void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
843                                               const TargetRegisterClass *DestRC,
844                                               const TargetRegisterClass *SrcRC,
845                                               SmallVector<SUnit*, 2> &Copies) {
846  SUnit *CopyFromSU = CreateNewSUnit(NULL);
847  CopyFromSU->CopySrcRC = SrcRC;
848  CopyFromSU->CopyDstRC = DestRC;
849
850  SUnit *CopyToSU = CreateNewSUnit(NULL);
851  CopyToSU->CopySrcRC = DestRC;
852  CopyToSU->CopyDstRC = SrcRC;
853
854  // Only copy scheduled successors. Cut them from old node's successor
855  // list and move them over.
856  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
857  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
858       I != E; ++I) {
859    if (I->isArtificial())
860      continue;
861    SUnit *SuccSU = I->getSUnit();
862    if (SuccSU->isScheduled) {
863      SDep D = *I;
864      D.setSUnit(CopyToSU);
865      AddPred(SuccSU, D);
866      DelDeps.push_back(std::make_pair(SuccSU, *I));
867    }
868  }
869  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
870    RemovePred(DelDeps[i].first, DelDeps[i].second);
871
872  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
873  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
874
875  AvailableQueue->updateNode(SU);
876  AvailableQueue->addNode(CopyFromSU);
877  AvailableQueue->addNode(CopyToSU);
878  Copies.push_back(CopyFromSU);
879  Copies.push_back(CopyToSU);
880
881  ++NumPRCopies;
882}
883
884/// getPhysicalRegisterVT - Returns the ValueType of the physical register
885/// definition of the specified node.
886/// FIXME: Move to SelectionDAG?
887static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
888                                 const TargetInstrInfo *TII) {
889  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
890  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
891  unsigned NumRes = TID.getNumDefs();
892  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
893    if (Reg == *ImpDef)
894      break;
895    ++NumRes;
896  }
897  return N->getValueType(NumRes);
898}
899
900/// CheckForLiveRegDef - Return true and update live register vector if the
901/// specified register def of the specified SUnit clobbers any "live" registers.
902static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
903                               std::vector<SUnit*> &LiveRegDefs,
904                               SmallSet<unsigned, 4> &RegAdded,
905                               SmallVector<unsigned, 4> &LRegs,
906                               const TargetRegisterInfo *TRI) {
907  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
908
909    // Check if Ref is live.
910    if (!LiveRegDefs[Reg]) continue;
911
912    // Allow multiple uses of the same def.
913    if (LiveRegDefs[Reg] == SU) continue;
914
915    // Add Reg to the set of interfering live regs.
916    if (RegAdded.insert(Reg))
917      LRegs.push_back(Reg);
918  }
919}
920
921/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
922/// scheduling of the given node to satisfy live physical register dependencies.
923/// If the specific node is the last one that's available to schedule, do
924/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
925bool ScheduleDAGRRList::
926DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
927  if (NumLiveRegs == 0)
928    return false;
929
930  SmallSet<unsigned, 4> RegAdded;
931  // If this node would clobber any "live" register, then it's not ready.
932  //
933  // If SU is the currently live definition of the same register that it uses,
934  // then we are free to schedule it.
935  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
936       I != E; ++I) {
937    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
938      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
939                         RegAdded, LRegs, TRI);
940  }
941
942  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
943    if (Node->getOpcode() == ISD::INLINEASM) {
944      // Inline asm can clobber physical defs.
945      unsigned NumOps = Node->getNumOperands();
946      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
947        --NumOps;  // Ignore the glue operand.
948
949      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
950        unsigned Flags =
951          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
952        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
953
954        ++i; // Skip the ID value.
955        if (InlineAsm::isRegDefKind(Flags) ||
956            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
957          // Check for def of register or earlyclobber register.
958          for (; NumVals; --NumVals, ++i) {
959            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
960            if (TargetRegisterInfo::isPhysicalRegister(Reg))
961              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
962          }
963        } else
964          i += NumVals;
965      }
966      continue;
967    }
968
969    if (!Node->isMachineOpcode())
970      continue;
971    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
972    if (!TID.ImplicitDefs)
973      continue;
974    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
975      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
976  }
977
978  return !LRegs.empty();
979}
980
981/// Return a node that can be scheduled in this cycle. Requirements:
982/// (1) Ready: latency has been satisfied
983/// (2) No Hazards: resources are available
984/// (3) No Interferences: may unschedule to break register interferences.
985SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
986  SmallVector<SUnit*, 4> Interferences;
987  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
988
989  SUnit *CurSU = AvailableQueue->pop();
990  while (CurSU) {
991    SmallVector<unsigned, 4> LRegs;
992    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
993      break;
994    LRegsMap.insert(std::make_pair(CurSU, LRegs));
995
996    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
997    Interferences.push_back(CurSU);
998    CurSU = AvailableQueue->pop();
999  }
1000  if (CurSU) {
1001    // Add the nodes that aren't ready back onto the available list.
1002    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1003      Interferences[i]->isPending = false;
1004      assert(Interferences[i]->isAvailable && "must still be available");
1005      AvailableQueue->push(Interferences[i]);
1006    }
1007    return CurSU;
1008  }
1009
1010  // All candidates are delayed due to live physical reg dependencies.
1011  // Try backtracking, code duplication, or inserting cross class copies
1012  // to resolve it.
1013  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1014    SUnit *TrySU = Interferences[i];
1015    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1016
1017    // Try unscheduling up to the point where it's safe to schedule
1018    // this node.
1019    SUnit *BtSU = NULL;
1020    unsigned LiveCycle = UINT_MAX;
1021    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1022      unsigned Reg = LRegs[j];
1023      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1024        BtSU = LiveRegGens[Reg];
1025        LiveCycle = BtSU->getHeight();
1026      }
1027    }
1028    if (!WillCreateCycle(TrySU, BtSU))  {
1029      BacktrackBottomUp(TrySU, BtSU);
1030
1031      // Force the current node to be scheduled before the node that
1032      // requires the physical reg dep.
1033      if (BtSU->isAvailable) {
1034        BtSU->isAvailable = false;
1035        if (!BtSU->isPending)
1036          AvailableQueue->remove(BtSU);
1037      }
1038      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
1039                          /*Reg=*/0, /*isNormalMemory=*/false,
1040                          /*isMustAlias=*/false, /*isArtificial=*/true));
1041
1042      // If one or more successors has been unscheduled, then the current
1043      // node is no longer avaialable. Schedule a successor that's now
1044      // available instead.
1045      if (!TrySU->isAvailable) {
1046        CurSU = AvailableQueue->pop();
1047      }
1048      else {
1049        CurSU = TrySU;
1050        TrySU->isPending = false;
1051        Interferences.erase(Interferences.begin()+i);
1052      }
1053      break;
1054    }
1055  }
1056
1057  if (!CurSU) {
1058    // Can't backtrack. If it's too expensive to copy the value, then try
1059    // duplicate the nodes that produces these "too expensive to copy"
1060    // values to break the dependency. In case even that doesn't work,
1061    // insert cross class copies.
1062    // If it's not too expensive, i.e. cost != -1, issue copies.
1063    SUnit *TrySU = Interferences[0];
1064    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
1065    assert(LRegs.size() == 1 && "Can't handle this yet!");
1066    unsigned Reg = LRegs[0];
1067    SUnit *LRDef = LiveRegDefs[Reg];
1068    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1069    const TargetRegisterClass *RC =
1070      TRI->getMinimalPhysRegClass(Reg, VT);
1071    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1072
1073    // If cross copy register class is null, then it must be possible copy
1074    // the value directly. Do not try duplicate the def.
1075    SUnit *NewDef = 0;
1076    if (DestRC)
1077      NewDef = CopyAndMoveSuccessors(LRDef);
1078    else
1079      DestRC = RC;
1080    if (!NewDef) {
1081      // Issue copies, these can be expensive cross register class copies.
1082      SmallVector<SUnit*, 2> Copies;
1083      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1084      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1085            << " to SU #" << Copies.front()->NodeNum << "\n");
1086      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
1087                          /*Reg=*/0, /*isNormalMemory=*/false,
1088                          /*isMustAlias=*/false,
1089                          /*isArtificial=*/true));
1090      NewDef = Copies.back();
1091    }
1092
1093    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1094          << " to SU #" << TrySU->NodeNum << "\n");
1095    LiveRegDefs[Reg] = NewDef;
1096    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
1097                         /*Reg=*/0, /*isNormalMemory=*/false,
1098                         /*isMustAlias=*/false,
1099                         /*isArtificial=*/true));
1100    TrySU->isAvailable = false;
1101    CurSU = NewDef;
1102  }
1103
1104  assert(CurSU && "Unable to resolve live physical register dependencies!");
1105
1106  // Add the nodes that aren't ready back onto the available list.
1107  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1108    Interferences[i]->isPending = false;
1109    // May no longer be available due to backtracking.
1110    if (Interferences[i]->isAvailable) {
1111      AvailableQueue->push(Interferences[i]);
1112    }
1113  }
1114  return CurSU;
1115}
1116
1117/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1118/// schedulers.
1119void ScheduleDAGRRList::ListScheduleBottomUp() {
1120  // Release any predecessors of the special Exit node.
1121  ReleasePredecessors(&ExitSU);
1122
1123  // Add root to Available queue.
1124  if (!SUnits.empty()) {
1125    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1126    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1127    RootSU->isAvailable = true;
1128    AvailableQueue->push(RootSU);
1129  }
1130
1131  // While Available queue is not empty, grab the node with the highest
1132  // priority. If it is not ready put it back.  Schedule the node.
1133  Sequence.reserve(SUnits.size());
1134  while (!AvailableQueue->empty()) {
1135    DEBUG(dbgs() << "\n*** Examining Available\n";
1136          AvailableQueue->dump(this));
1137
1138    // Pick the best node to schedule taking all constraints into
1139    // consideration.
1140    SUnit *SU = PickNodeToScheduleBottomUp();
1141
1142    AdvancePastStalls(SU);
1143
1144    ScheduleNodeBottomUp(SU);
1145
1146    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1147      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1148      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1149      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1150    }
1151  }
1152
1153  // Reverse the order if it is bottom up.
1154  std::reverse(Sequence.begin(), Sequence.end());
1155
1156#ifndef NDEBUG
1157  VerifySchedule(isBottomUp);
1158#endif
1159}
1160
1161//===----------------------------------------------------------------------===//
1162//  Top-Down Scheduling
1163//===----------------------------------------------------------------------===//
1164
1165/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
1166/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
1167void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
1168  SUnit *SuccSU = SuccEdge->getSUnit();
1169
1170#ifndef NDEBUG
1171  if (SuccSU->NumPredsLeft == 0) {
1172    dbgs() << "*** Scheduling failed! ***\n";
1173    SuccSU->dump(this);
1174    dbgs() << " has been released too many times!\n";
1175    llvm_unreachable(0);
1176  }
1177#endif
1178  --SuccSU->NumPredsLeft;
1179
1180  // If all the node's predecessors are scheduled, this node is ready
1181  // to be scheduled. Ignore the special ExitSU node.
1182  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
1183    SuccSU->isAvailable = true;
1184    AvailableQueue->push(SuccSU);
1185  }
1186}
1187
1188void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
1189  // Top down: release successors
1190  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1191       I != E; ++I) {
1192    assert(!I->isAssignedRegDep() &&
1193           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
1194
1195    ReleaseSucc(SU, &*I);
1196  }
1197}
1198
1199/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
1200/// count of its successors. If a successor pending count is zero, add it to
1201/// the Available queue.
1202void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
1203  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
1204  DEBUG(SU->dump(this));
1205
1206  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
1207  SU->setDepthToAtLeast(CurCycle);
1208  Sequence.push_back(SU);
1209
1210  ReleaseSuccessors(SU);
1211  SU->isScheduled = true;
1212  AvailableQueue->ScheduledNode(SU);
1213}
1214
1215/// ListScheduleTopDown - The main loop of list scheduling for top-down
1216/// schedulers.
1217void ScheduleDAGRRList::ListScheduleTopDown() {
1218  AvailableQueue->setCurCycle(CurCycle);
1219
1220  // Release any successors of the special Entry node.
1221  ReleaseSuccessors(&EntrySU);
1222
1223  // All leaves to Available queue.
1224  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1225    // It is available if it has no predecessors.
1226    if (SUnits[i].Preds.empty()) {
1227      AvailableQueue->push(&SUnits[i]);
1228      SUnits[i].isAvailable = true;
1229    }
1230  }
1231
1232  // While Available queue is not empty, grab the node with the highest
1233  // priority. If it is not ready put it back.  Schedule the node.
1234  Sequence.reserve(SUnits.size());
1235  while (!AvailableQueue->empty()) {
1236    SUnit *CurSU = AvailableQueue->pop();
1237
1238    if (CurSU)
1239      ScheduleNodeTopDown(CurSU);
1240    ++CurCycle;
1241    AvailableQueue->setCurCycle(CurCycle);
1242  }
1243
1244#ifndef NDEBUG
1245  VerifySchedule(isBottomUp);
1246#endif
1247}
1248
1249
1250//===----------------------------------------------------------------------===//
1251//                RegReductionPriorityQueue Definition
1252//===----------------------------------------------------------------------===//
1253//
1254// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1255// to reduce register pressure.
1256//
1257namespace {
1258class RegReductionPQBase;
1259
1260struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1261  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1262};
1263
1264/// bu_ls_rr_sort - Priority function for bottom up register pressure
1265// reduction scheduler.
1266struct bu_ls_rr_sort : public queue_sort {
1267  enum {
1268    IsBottomUp = true,
1269    HasReadyFilter = false
1270  };
1271
1272  RegReductionPQBase *SPQ;
1273  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1274  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1275
1276  bool operator()(SUnit* left, SUnit* right) const;
1277};
1278
1279// td_ls_rr_sort - Priority function for top down register pressure reduction
1280// scheduler.
1281struct td_ls_rr_sort : public queue_sort {
1282  enum {
1283    IsBottomUp = false,
1284    HasReadyFilter = false
1285  };
1286
1287  RegReductionPQBase *SPQ;
1288  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1289  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
1290
1291  bool operator()(const SUnit* left, const SUnit* right) const;
1292};
1293
1294// src_ls_rr_sort - Priority function for source order scheduler.
1295struct src_ls_rr_sort : public queue_sort {
1296  enum {
1297    IsBottomUp = true,
1298    HasReadyFilter = false
1299  };
1300
1301  RegReductionPQBase *SPQ;
1302  src_ls_rr_sort(RegReductionPQBase *spq)
1303    : SPQ(spq) {}
1304  src_ls_rr_sort(const src_ls_rr_sort &RHS)
1305    : SPQ(RHS.SPQ) {}
1306
1307  bool operator()(SUnit* left, SUnit* right) const;
1308};
1309
1310// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1311struct hybrid_ls_rr_sort : public queue_sort {
1312  enum {
1313    IsBottomUp = true,
1314    HasReadyFilter = true
1315  };
1316
1317  RegReductionPQBase *SPQ;
1318  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1319    : SPQ(spq) {}
1320  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
1321    : SPQ(RHS.SPQ) {}
1322
1323  bool isReady(SUnit *SU, unsigned CurCycle) const;
1324
1325  bool operator()(SUnit* left, SUnit* right) const;
1326};
1327
1328// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1329// scheduler.
1330struct ilp_ls_rr_sort : public queue_sort {
1331  enum {
1332    IsBottomUp = true,
1333    HasReadyFilter = true
1334  };
1335
1336  RegReductionPQBase *SPQ;
1337  ilp_ls_rr_sort(RegReductionPQBase *spq)
1338    : SPQ(spq) {}
1339  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
1340    : SPQ(RHS.SPQ) {}
1341
1342  bool isReady(SUnit *SU, unsigned CurCycle) const;
1343
1344  bool operator()(SUnit* left, SUnit* right) const;
1345};
1346
1347class RegReductionPQBase : public SchedulingPriorityQueue {
1348protected:
1349  std::vector<SUnit*> Queue;
1350  unsigned CurQueueId;
1351  bool TracksRegPressure;
1352
1353  // SUnits - The SUnits for the current graph.
1354  std::vector<SUnit> *SUnits;
1355
1356  MachineFunction &MF;
1357  const TargetInstrInfo *TII;
1358  const TargetRegisterInfo *TRI;
1359  const TargetLowering *TLI;
1360  ScheduleDAGRRList *scheduleDAG;
1361
1362  // SethiUllmanNumbers - The SethiUllman number for each node.
1363  std::vector<unsigned> SethiUllmanNumbers;
1364
1365  /// RegPressure - Tracking current reg pressure per register class.
1366  ///
1367  std::vector<unsigned> RegPressure;
1368
1369  /// RegLimit - Tracking the number of allocatable registers per register
1370  /// class.
1371  std::vector<unsigned> RegLimit;
1372
1373public:
1374  RegReductionPQBase(MachineFunction &mf,
1375                     bool hasReadyFilter,
1376                     bool tracksrp,
1377                     const TargetInstrInfo *tii,
1378                     const TargetRegisterInfo *tri,
1379                     const TargetLowering *tli)
1380    : SchedulingPriorityQueue(hasReadyFilter),
1381      CurQueueId(0), TracksRegPressure(tracksrp),
1382      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1383    if (TracksRegPressure) {
1384      unsigned NumRC = TRI->getNumRegClasses();
1385      RegLimit.resize(NumRC);
1386      RegPressure.resize(NumRC);
1387      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1388      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1389      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1390             E = TRI->regclass_end(); I != E; ++I)
1391        RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
1392    }
1393  }
1394
1395  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1396    scheduleDAG = scheduleDag;
1397  }
1398
1399  ScheduleHazardRecognizer* getHazardRec() {
1400    return scheduleDAG->getHazardRec();
1401  }
1402
1403  void initNodes(std::vector<SUnit> &sunits);
1404
1405  void addNode(const SUnit *SU);
1406
1407  void updateNode(const SUnit *SU);
1408
1409  void releaseState() {
1410    SUnits = 0;
1411    SethiUllmanNumbers.clear();
1412    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1413  }
1414
1415  unsigned getNodePriority(const SUnit *SU) const;
1416
1417  unsigned getNodeOrdering(const SUnit *SU) const {
1418    return scheduleDAG->DAG->GetOrdering(SU->getNode());
1419  }
1420
1421  bool empty() const { return Queue.empty(); }
1422
1423  void push(SUnit *U) {
1424    assert(!U->NodeQueueId && "Node in the queue already");
1425    U->NodeQueueId = ++CurQueueId;
1426    Queue.push_back(U);
1427  }
1428
1429  void remove(SUnit *SU) {
1430    assert(!Queue.empty() && "Queue is empty!");
1431    assert(SU->NodeQueueId != 0 && "Not in queue!");
1432    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1433                                                 SU);
1434    if (I != prior(Queue.end()))
1435      std::swap(*I, Queue.back());
1436    Queue.pop_back();
1437    SU->NodeQueueId = 0;
1438  }
1439
1440  void dumpRegPressure() const;
1441
1442  bool HighRegPressure(const SUnit *SU) const;
1443
1444  bool MayReduceRegPressure(SUnit *SU);
1445
1446  void ScheduledNode(SUnit *SU);
1447
1448  void UnscheduledNode(SUnit *SU);
1449
1450protected:
1451  bool canClobber(const SUnit *SU, const SUnit *Op);
1452  void AddPseudoTwoAddrDeps();
1453  void PrescheduleNodesWithMultipleUses();
1454  void CalculateSethiUllmanNumbers();
1455};
1456
1457template<class SF>
1458class RegReductionPriorityQueue : public RegReductionPQBase {
1459  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1460    std::vector<SUnit *>::iterator Best = Q.begin();
1461    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1462           E = Q.end(); I != E; ++I)
1463      if (Picker(*Best, *I))
1464        Best = I;
1465    SUnit *V = *Best;
1466    if (Best != prior(Q.end()))
1467      std::swap(*Best, Q.back());
1468    Q.pop_back();
1469    return V;
1470  }
1471
1472  SF Picker;
1473
1474public:
1475  RegReductionPriorityQueue(MachineFunction &mf,
1476                            bool tracksrp,
1477                            const TargetInstrInfo *tii,
1478                            const TargetRegisterInfo *tri,
1479                            const TargetLowering *tli)
1480    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
1481      Picker(this) {}
1482
1483  bool isBottomUp() const { return SF::IsBottomUp; }
1484
1485  bool isReady(SUnit *U) const {
1486    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1487  }
1488
1489  SUnit *pop() {
1490    if (Queue.empty()) return NULL;
1491
1492    SUnit *V = popFromQueue(Queue, Picker);
1493    V->NodeQueueId = 0;
1494    return V;
1495  }
1496
1497  void dump(ScheduleDAG *DAG) const {
1498    // Emulate pop() without clobbering NodeQueueIds.
1499    std::vector<SUnit*> DumpQueue = Queue;
1500    SF DumpPicker = Picker;
1501    while (!DumpQueue.empty()) {
1502      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
1503      if (isBottomUp())
1504        dbgs() << "Height " << SU->getHeight() << ": ";
1505      else
1506        dbgs() << "Depth " << SU->getDepth() << ": ";
1507      SU->dump(DAG);
1508    }
1509  }
1510};
1511
1512typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1513BURegReductionPriorityQueue;
1514
1515typedef RegReductionPriorityQueue<td_ls_rr_sort>
1516TDRegReductionPriorityQueue;
1517
1518typedef RegReductionPriorityQueue<src_ls_rr_sort>
1519SrcRegReductionPriorityQueue;
1520
1521typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1522HybridBURRPriorityQueue;
1523
1524typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1525ILPBURRPriorityQueue;
1526} // end anonymous namespace
1527
1528//===----------------------------------------------------------------------===//
1529//           Static Node Priority for Register Pressure Reduction
1530//===----------------------------------------------------------------------===//
1531
1532/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1533/// Smaller number is the higher priority.
1534static unsigned
1535CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1536  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1537  if (SethiUllmanNumber != 0)
1538    return SethiUllmanNumber;
1539
1540  unsigned Extra = 0;
1541  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1542       I != E; ++I) {
1543    if (I->isCtrl()) continue;  // ignore chain preds
1544    SUnit *PredSU = I->getSUnit();
1545    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1546    if (PredSethiUllman > SethiUllmanNumber) {
1547      SethiUllmanNumber = PredSethiUllman;
1548      Extra = 0;
1549    } else if (PredSethiUllman == SethiUllmanNumber)
1550      ++Extra;
1551  }
1552
1553  SethiUllmanNumber += Extra;
1554
1555  if (SethiUllmanNumber == 0)
1556    SethiUllmanNumber = 1;
1557
1558  return SethiUllmanNumber;
1559}
1560
1561/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1562/// scheduling units.
1563void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1564  SethiUllmanNumbers.assign(SUnits->size(), 0);
1565
1566  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1567    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1568}
1569
1570void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
1571  SUnits = &sunits;
1572  // Add pseudo dependency edges for two-address nodes.
1573  AddPseudoTwoAddrDeps();
1574  // Reroute edges to nodes with multiple uses.
1575  PrescheduleNodesWithMultipleUses();
1576  // Calculate node priorities.
1577  CalculateSethiUllmanNumbers();
1578}
1579
1580void RegReductionPQBase::addNode(const SUnit *SU) {
1581  unsigned SUSize = SethiUllmanNumbers.size();
1582  if (SUnits->size() > SUSize)
1583    SethiUllmanNumbers.resize(SUSize*2, 0);
1584  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1585}
1586
1587void RegReductionPQBase::updateNode(const SUnit *SU) {
1588  SethiUllmanNumbers[SU->NodeNum] = 0;
1589  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1590}
1591
1592unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1593  assert(SU->NodeNum < SethiUllmanNumbers.size());
1594  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1595  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1596    // CopyToReg should be close to its uses to facilitate coalescing and
1597    // avoid spilling.
1598    return 0;
1599  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1600      Opc == TargetOpcode::SUBREG_TO_REG ||
1601      Opc == TargetOpcode::INSERT_SUBREG)
1602    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1603    // close to their uses to facilitate coalescing.
1604    return 0;
1605  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1606    // If SU does not have a register use, i.e. it doesn't produce a value
1607    // that would be consumed (e.g. store), then it terminates a chain of
1608    // computation.  Give it a large SethiUllman number so it will be
1609    // scheduled right before its predecessors that it doesn't lengthen
1610    // their live ranges.
1611    return 0xffff;
1612  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1613    // If SU does not have a register def, schedule it close to its uses
1614    // because it does not lengthen any live ranges.
1615    return 0;
1616  return SethiUllmanNumbers[SU->NodeNum];
1617}
1618
1619//===----------------------------------------------------------------------===//
1620//                     Register Pressure Tracking
1621//===----------------------------------------------------------------------===//
1622
1623void RegReductionPQBase::dumpRegPressure() const {
1624  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1625         E = TRI->regclass_end(); I != E; ++I) {
1626    const TargetRegisterClass *RC = *I;
1627    unsigned Id = RC->getID();
1628    unsigned RP = RegPressure[Id];
1629    if (!RP) continue;
1630    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1631          << '\n');
1632  }
1633}
1634
1635bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1636  if (!TLI)
1637    return false;
1638
1639  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1640       I != E; ++I) {
1641    if (I->isCtrl())
1642      continue;
1643    SUnit *PredSU = I->getSUnit();
1644    const SDNode *PN = PredSU->getNode();
1645    if (!PN->isMachineOpcode()) {
1646      if (PN->getOpcode() == ISD::CopyFromReg) {
1647        EVT VT = PN->getValueType(0);
1648        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1649        unsigned Cost = TLI->getRepRegClassCostFor(VT);
1650        if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1651          return true;
1652      }
1653      continue;
1654    }
1655    unsigned POpc = PN->getMachineOpcode();
1656    if (POpc == TargetOpcode::IMPLICIT_DEF)
1657      continue;
1658    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1659      EVT VT = PN->getOperand(0).getValueType();
1660      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1661      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1662      // Check if this increases register pressure of the specific register
1663      // class to the point where it would cause spills.
1664      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1665        return true;
1666      continue;
1667    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1668               POpc == TargetOpcode::SUBREG_TO_REG) {
1669      EVT VT = PN->getValueType(0);
1670      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1671      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1672      // Check if this increases register pressure of the specific register
1673      // class to the point where it would cause spills.
1674      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1675        return true;
1676      continue;
1677    }
1678    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1679    for (unsigned i = 0; i != NumDefs; ++i) {
1680      EVT VT = PN->getValueType(i);
1681      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1682      if (RegPressure[RCId] >= RegLimit[RCId])
1683        return true; // Reg pressure already high.
1684      unsigned Cost = TLI->getRepRegClassCostFor(VT);
1685      if (!PN->hasAnyUseOfValue(i))
1686        continue;
1687      // Check if this increases register pressure of the specific register
1688      // class to the point where it would cause spills.
1689      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1690        return true;
1691    }
1692  }
1693
1694  return false;
1695}
1696
1697bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
1698  const SDNode *N = SU->getNode();
1699
1700  if (!N->isMachineOpcode() || !SU->NumSuccs)
1701    return false;
1702
1703  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1704  for (unsigned i = 0; i != NumDefs; ++i) {
1705    EVT VT = N->getValueType(i);
1706    if (!N->hasAnyUseOfValue(i))
1707      continue;
1708    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1709    if (RegPressure[RCId] >= RegLimit[RCId])
1710      return true;
1711  }
1712  return false;
1713}
1714
1715void RegReductionPQBase::ScheduledNode(SUnit *SU) {
1716  if (!TracksRegPressure)
1717    return;
1718
1719  const SDNode *N = SU->getNode();
1720  if (!N->isMachineOpcode()) {
1721    if (N->getOpcode() != ISD::CopyToReg)
1722      return;
1723  } else {
1724    unsigned Opc = N->getMachineOpcode();
1725    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1726        Opc == TargetOpcode::INSERT_SUBREG ||
1727        Opc == TargetOpcode::SUBREG_TO_REG ||
1728        Opc == TargetOpcode::REG_SEQUENCE ||
1729        Opc == TargetOpcode::IMPLICIT_DEF)
1730      return;
1731  }
1732
1733  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1734       I != E; ++I) {
1735    if (I->isCtrl())
1736      continue;
1737    SUnit *PredSU = I->getSUnit();
1738    if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1739      continue;
1740    const SDNode *PN = PredSU->getNode();
1741    if (!PN->isMachineOpcode()) {
1742      if (PN->getOpcode() == ISD::CopyFromReg) {
1743        EVT VT = PN->getValueType(0);
1744        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1745        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1746      }
1747      continue;
1748    }
1749    unsigned POpc = PN->getMachineOpcode();
1750    if (POpc == TargetOpcode::IMPLICIT_DEF)
1751      continue;
1752    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1753      EVT VT = PN->getOperand(0).getValueType();
1754      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1755      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1756      continue;
1757    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1758               POpc == TargetOpcode::SUBREG_TO_REG) {
1759      EVT VT = PN->getValueType(0);
1760      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1761      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1762      continue;
1763    }
1764    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1765    for (unsigned i = 0; i != NumDefs; ++i) {
1766      EVT VT = PN->getValueType(i);
1767      if (!PN->hasAnyUseOfValue(i))
1768        continue;
1769      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1770      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1771    }
1772  }
1773
1774  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1775  // may transfer data dependencies to CopyToReg.
1776  if (SU->NumSuccs && N->isMachineOpcode()) {
1777    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1778    for (unsigned i = 0; i != NumDefs; ++i) {
1779      EVT VT = N->getValueType(i);
1780      if (!N->hasAnyUseOfValue(i))
1781        continue;
1782      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1783      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1784        // Register pressure tracking is imprecise. This can happen.
1785        RegPressure[RCId] = 0;
1786      else
1787        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1788    }
1789  }
1790
1791  dumpRegPressure();
1792}
1793
1794void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
1795  if (!TracksRegPressure)
1796    return;
1797
1798  const SDNode *N = SU->getNode();
1799  if (!N->isMachineOpcode()) {
1800    if (N->getOpcode() != ISD::CopyToReg)
1801      return;
1802  } else {
1803    unsigned Opc = N->getMachineOpcode();
1804    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1805        Opc == TargetOpcode::INSERT_SUBREG ||
1806        Opc == TargetOpcode::SUBREG_TO_REG ||
1807        Opc == TargetOpcode::REG_SEQUENCE ||
1808        Opc == TargetOpcode::IMPLICIT_DEF)
1809      return;
1810  }
1811
1812  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1813       I != E; ++I) {
1814    if (I->isCtrl())
1815      continue;
1816    SUnit *PredSU = I->getSUnit();
1817    if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1818      continue;
1819    const SDNode *PN = PredSU->getNode();
1820    if (!PN->isMachineOpcode()) {
1821      if (PN->getOpcode() == ISD::CopyFromReg) {
1822        EVT VT = PN->getValueType(0);
1823        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1824        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1825      }
1826      continue;
1827    }
1828    unsigned POpc = PN->getMachineOpcode();
1829    if (POpc == TargetOpcode::IMPLICIT_DEF)
1830      continue;
1831    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
1832      EVT VT = PN->getOperand(0).getValueType();
1833      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1834      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1835      continue;
1836    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
1837               POpc == TargetOpcode::SUBREG_TO_REG) {
1838      EVT VT = PN->getValueType(0);
1839      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1840      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1841      continue;
1842    }
1843    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1844    for (unsigned i = 0; i != NumDefs; ++i) {
1845      EVT VT = PN->getValueType(i);
1846      if (!PN->hasAnyUseOfValue(i))
1847        continue;
1848      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1849      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1850        // Register pressure tracking is imprecise. This can happen.
1851        RegPressure[RCId] = 0;
1852      else
1853        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1854    }
1855  }
1856
1857  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
1858  // may transfer data dependencies to CopyToReg.
1859  if (SU->NumSuccs && N->isMachineOpcode()) {
1860    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1861    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1862      EVT VT = N->getValueType(i);
1863      if (VT == MVT::Glue || VT == MVT::Other)
1864        continue;
1865      if (!N->hasAnyUseOfValue(i))
1866        continue;
1867      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1868      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1869    }
1870  }
1871
1872  dumpRegPressure();
1873}
1874
1875//===----------------------------------------------------------------------===//
1876//           Dynamic Node Priority for Register Pressure Reduction
1877//===----------------------------------------------------------------------===//
1878
1879/// closestSucc - Returns the scheduled cycle of the successor which is
1880/// closest to the current cycle.
1881static unsigned closestSucc(const SUnit *SU) {
1882  unsigned MaxHeight = 0;
1883  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1884       I != E; ++I) {
1885    if (I->isCtrl()) continue;  // ignore chain succs
1886    unsigned Height = I->getSUnit()->getHeight();
1887    // If there are bunch of CopyToRegs stacked up, they should be considered
1888    // to be at the same position.
1889    if (I->getSUnit()->getNode() &&
1890        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1891      Height = closestSucc(I->getSUnit())+1;
1892    if (Height > MaxHeight)
1893      MaxHeight = Height;
1894  }
1895  return MaxHeight;
1896}
1897
1898/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1899/// for scratch registers, i.e. number of data dependencies.
1900static unsigned calcMaxScratches(const SUnit *SU) {
1901  unsigned Scratches = 0;
1902  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1903       I != E; ++I) {
1904    if (I->isCtrl()) continue;  // ignore chain preds
1905    Scratches++;
1906  }
1907  return Scratches;
1908}
1909
1910/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
1911/// CopyToReg to a virtual register. This SU def is probably a liveout and
1912/// it has no other use. It should be scheduled closer to the terminator.
1913static bool hasOnlyLiveOutUses(const SUnit *SU) {
1914  bool RetVal = false;
1915  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1916       I != E; ++I) {
1917    if (I->isCtrl()) continue;
1918    const SUnit *SuccSU = I->getSUnit();
1919    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
1920      unsigned Reg =
1921        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
1922      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
1923        RetVal = true;
1924        continue;
1925      }
1926    }
1927    return false;
1928  }
1929  return RetVal;
1930}
1931
1932/// UnitsSharePred - Return true if the two scheduling units share a common
1933/// data predecessor.
1934static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
1935  SmallSet<const SUnit*, 4> Preds;
1936  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
1937       I != E; ++I) {
1938    if (I->isCtrl()) continue;  // ignore chain preds
1939    Preds.insert(I->getSUnit());
1940  }
1941  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
1942       I != E; ++I) {
1943    if (I->isCtrl()) continue;  // ignore chain preds
1944    if (Preds.count(I->getSUnit()))
1945      return true;
1946  }
1947  return false;
1948}
1949
1950// Check for either a dependence (latency) or resource (hazard) stall.
1951//
1952// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
1953static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
1954  if ((int)SPQ->getCurCycle() < Height) return true;
1955  if (SPQ->getHazardRec()->getHazardType(SU, 0)
1956      != ScheduleHazardRecognizer::NoHazard)
1957    return true;
1958  return false;
1959}
1960
1961// Return -1 if left has higher priority, 1 if right has higher priority.
1962// Return 0 if latency-based priority is equivalent.
1963static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
1964                            RegReductionPQBase *SPQ) {
1965  // If the two nodes share an operand and one of them has a single
1966  // use that is a live out copy, favor the one that is live out. Otherwise
1967  // it will be difficult to eliminate the copy if the instruction is a
1968  // loop induction variable update. e.g.
1969  // BB:
1970  // sub r1, r3, #1
1971  // str r0, [r2, r3]
1972  // mov r3, r1
1973  // cmp
1974  // bne BB
1975  bool SharePred = UnitsSharePred(left, right);
1976  // FIXME: Only adjust if BB is a loop back edge.
1977  // FIXME: What's the cost of a copy?
1978  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
1979  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
1980  int LHeight = (int)left->getHeight() - LBonus;
1981  int RHeight = (int)right->getHeight() - RBonus;
1982
1983  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
1984    BUHasStall(left, LHeight, SPQ);
1985  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
1986    BUHasStall(right, RHeight, SPQ);
1987
1988  // If scheduling one of the node will cause a pipeline stall, delay it.
1989  // If scheduling either one of the node will cause a pipeline stall, sort
1990  // them according to their height.
1991  if (LStall) {
1992    if (!RStall)
1993      return 1;
1994    if (LHeight != RHeight)
1995      return LHeight > RHeight ? 1 : -1;
1996  } else if (RStall)
1997    return -1;
1998
1999  // If either node is scheduling for latency, sort them by depth
2000  // and latency.
2001  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
2002                     right->SchedulingPref == Sched::Latency)) {
2003    int LDepth = (int)left->getDepth();
2004    int RDepth = (int)right->getDepth();
2005
2006    DEBUG(dbgs() << "  Comparing latency of SU #" << left->NodeNum
2007          << " depth " << LDepth << " vs SU #" << right->NodeNum
2008          << " depth " << RDepth << "\n");
2009
2010    if (EnableSchedCycles) {
2011      if (LDepth != RDepth)
2012        return LDepth < RDepth ? 1 : -1;
2013    }
2014    else {
2015      if (LHeight != RHeight)
2016        return LHeight > RHeight ? 1 : -1;
2017    }
2018    if (left->Latency != right->Latency)
2019      return left->Latency > right->Latency ? 1 : -1;
2020  }
2021  return 0;
2022}
2023
2024static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2025  unsigned LPriority = SPQ->getNodePriority(left);
2026  unsigned RPriority = SPQ->getNodePriority(right);
2027  if (LPriority != RPriority)
2028    return LPriority > RPriority;
2029
2030  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2031  // e.g.
2032  // t1 = op t2, c1
2033  // t3 = op t4, c2
2034  //
2035  // and the following instructions are both ready.
2036  // t2 = op c3
2037  // t4 = op c4
2038  //
2039  // Then schedule t2 = op first.
2040  // i.e.
2041  // t4 = op c4
2042  // t2 = op c3
2043  // t1 = op t2, c1
2044  // t3 = op t4, c2
2045  //
2046  // This creates more short live intervals.
2047  unsigned LDist = closestSucc(left);
2048  unsigned RDist = closestSucc(right);
2049  if (LDist != RDist)
2050    return LDist < RDist;
2051
2052  // How many registers becomes live when the node is scheduled.
2053  unsigned LScratch = calcMaxScratches(left);
2054  unsigned RScratch = calcMaxScratches(right);
2055  if (LScratch != RScratch)
2056    return LScratch > RScratch;
2057
2058  if (EnableSchedCycles) {
2059    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2060    if (result != 0)
2061      return result > 0;
2062  }
2063  else {
2064    if (left->getHeight() != right->getHeight())
2065      return left->getHeight() > right->getHeight();
2066
2067    if (left->getDepth() != right->getDepth())
2068      return left->getDepth() < right->getDepth();
2069  }
2070
2071  assert(left->NodeQueueId && right->NodeQueueId &&
2072         "NodeQueueId cannot be zero");
2073  return (left->NodeQueueId > right->NodeQueueId);
2074}
2075
2076// Bottom up
2077bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2078  return BURRSort(left, right, SPQ);
2079}
2080
2081// Source order, otherwise bottom up.
2082bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2083  unsigned LOrder = SPQ->getNodeOrdering(left);
2084  unsigned ROrder = SPQ->getNodeOrdering(right);
2085
2086  // Prefer an ordering where the lower the non-zero order number, the higher
2087  // the preference.
2088  if ((LOrder || ROrder) && LOrder != ROrder)
2089    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2090
2091  return BURRSort(left, right, SPQ);
2092}
2093
2094// If the time between now and when the instruction will be ready can cover
2095// the spill code, then avoid adding it to the ready queue. This gives long
2096// stalls highest priority and allows hoisting across calls. It should also
2097// speed up processing the available queue.
2098bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2099  static const unsigned ReadyDelay = 3;
2100
2101  if (SPQ->MayReduceRegPressure(SU)) return true;
2102
2103  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2104
2105  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2106      != ScheduleHazardRecognizer::NoHazard)
2107    return false;
2108
2109  return true;
2110}
2111
2112// Return true if right should be scheduled with higher priority than left.
2113bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2114  if (left->isCall || right->isCall)
2115    // No way to compute latency of calls.
2116    return BURRSort(left, right, SPQ);
2117
2118  bool LHigh = SPQ->HighRegPressure(left);
2119  bool RHigh = SPQ->HighRegPressure(right);
2120  // Avoid causing spills. If register pressure is high, schedule for
2121  // register pressure reduction.
2122  if (LHigh && !RHigh)
2123    return true;
2124  else if (!LHigh && RHigh)
2125    return false;
2126  else if (!LHigh && !RHigh) {
2127    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2128    if (result != 0)
2129      return result > 0;
2130  }
2131  return BURRSort(left, right, SPQ);
2132}
2133
2134// Schedule as many instructions in each cycle as possible. So don't make an
2135// instruction available unless it is ready in the current cycle.
2136bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2137  if (SU->getHeight() > CurCycle) return false;
2138
2139  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2140      != ScheduleHazardRecognizer::NoHazard)
2141    return false;
2142
2143  return SU->getHeight() <= CurCycle;
2144}
2145
2146bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2147  if (left->isCall || right->isCall)
2148    // No way to compute latency of calls.
2149    return BURRSort(left, right, SPQ);
2150
2151  bool LHigh = SPQ->HighRegPressure(left);
2152  bool RHigh = SPQ->HighRegPressure(right);
2153  // Avoid causing spills. If register pressure is high, schedule for
2154  // register pressure reduction.
2155  if (LHigh && !RHigh)
2156    return true;
2157  else if (!LHigh && RHigh)
2158    return false;
2159  else if (!LHigh && !RHigh) {
2160    // Low register pressure situation, schedule to maximize instruction level
2161    // parallelism.
2162    if (left->NumPreds > right->NumPreds)
2163      return false;
2164    else if (left->NumPreds < right->NumPreds)
2165      return false;
2166  }
2167
2168  return BURRSort(left, right, SPQ);
2169}
2170
2171//===----------------------------------------------------------------------===//
2172//                    Preschedule for Register Pressure
2173//===----------------------------------------------------------------------===//
2174
2175bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2176  if (SU->isTwoAddress) {
2177    unsigned Opc = SU->getNode()->getMachineOpcode();
2178    const TargetInstrDesc &TID = TII->get(Opc);
2179    unsigned NumRes = TID.getNumDefs();
2180    unsigned NumOps = TID.getNumOperands() - NumRes;
2181    for (unsigned i = 0; i != NumOps; ++i) {
2182      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
2183        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2184        if (DU->getNodeId() != -1 &&
2185            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2186          return true;
2187      }
2188    }
2189  }
2190  return false;
2191}
2192
2193/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2194/// physical register defs.
2195static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2196                                  const TargetInstrInfo *TII,
2197                                  const TargetRegisterInfo *TRI) {
2198  SDNode *N = SuccSU->getNode();
2199  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2200  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2201  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2202  for (const SDNode *SUNode = SU->getNode(); SUNode;
2203       SUNode = SUNode->getGluedNode()) {
2204    if (!SUNode->isMachineOpcode())
2205      continue;
2206    const unsigned *SUImpDefs =
2207      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2208    if (!SUImpDefs)
2209      return false;
2210    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2211      EVT VT = N->getValueType(i);
2212      if (VT == MVT::Glue || VT == MVT::Other)
2213        continue;
2214      if (!N->hasAnyUseOfValue(i))
2215        continue;
2216      unsigned Reg = ImpDefs[i - NumDefs];
2217      for (;*SUImpDefs; ++SUImpDefs) {
2218        unsigned SUReg = *SUImpDefs;
2219        if (TRI->regsOverlap(Reg, SUReg))
2220          return true;
2221      }
2222    }
2223  }
2224  return false;
2225}
2226
2227/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2228/// are not handled well by the general register pressure reduction
2229/// heuristics. When presented with code like this:
2230///
2231///      N
2232///    / |
2233///   /  |
2234///  U  store
2235///  |
2236/// ...
2237///
2238/// the heuristics tend to push the store up, but since the
2239/// operand of the store has another use (U), this would increase
2240/// the length of that other use (the U->N edge).
2241///
2242/// This function transforms code like the above to route U's
2243/// dependence through the store when possible, like this:
2244///
2245///      N
2246///      ||
2247///      ||
2248///     store
2249///       |
2250///       U
2251///       |
2252///      ...
2253///
2254/// This results in the store being scheduled immediately
2255/// after N, which shortens the U->N live range, reducing
2256/// register pressure.
2257///
2258void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2259  // Visit all the nodes in topological order, working top-down.
2260  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2261    SUnit *SU = &(*SUnits)[i];
2262    // For now, only look at nodes with no data successors, such as stores.
2263    // These are especially important, due to the heuristics in
2264    // getNodePriority for nodes with no data successors.
2265    if (SU->NumSuccs != 0)
2266      continue;
2267    // For now, only look at nodes with exactly one data predecessor.
2268    if (SU->NumPreds != 1)
2269      continue;
2270    // Avoid prescheduling copies to virtual registers, which don't behave
2271    // like other nodes from the perspective of scheduling heuristics.
2272    if (SDNode *N = SU->getNode())
2273      if (N->getOpcode() == ISD::CopyToReg &&
2274          TargetRegisterInfo::isVirtualRegister
2275            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2276        continue;
2277
2278    // Locate the single data predecessor.
2279    SUnit *PredSU = 0;
2280    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2281         EE = SU->Preds.end(); II != EE; ++II)
2282      if (!II->isCtrl()) {
2283        PredSU = II->getSUnit();
2284        break;
2285      }
2286    assert(PredSU);
2287
2288    // Don't rewrite edges that carry physregs, because that requires additional
2289    // support infrastructure.
2290    if (PredSU->hasPhysRegDefs)
2291      continue;
2292    // Short-circuit the case where SU is PredSU's only data successor.
2293    if (PredSU->NumSuccs == 1)
2294      continue;
2295    // Avoid prescheduling to copies from virtual registers, which don't behave
2296    // like other nodes from the perspective of scheduling // heuristics.
2297    if (SDNode *N = SU->getNode())
2298      if (N->getOpcode() == ISD::CopyFromReg &&
2299          TargetRegisterInfo::isVirtualRegister
2300            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2301        continue;
2302
2303    // Perform checks on the successors of PredSU.
2304    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2305         EE = PredSU->Succs.end(); II != EE; ++II) {
2306      SUnit *PredSuccSU = II->getSUnit();
2307      if (PredSuccSU == SU) continue;
2308      // If PredSU has another successor with no data successors, for
2309      // now don't attempt to choose either over the other.
2310      if (PredSuccSU->NumSuccs == 0)
2311        goto outer_loop_continue;
2312      // Don't break physical register dependencies.
2313      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2314        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2315          goto outer_loop_continue;
2316      // Don't introduce graph cycles.
2317      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2318        goto outer_loop_continue;
2319    }
2320
2321    // Ok, the transformation is safe and the heuristics suggest it is
2322    // profitable. Update the graph.
2323    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2324                 << " next to PredSU #" << PredSU->NodeNum
2325                 << " to guide scheduling in the presence of multiple uses\n");
2326    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2327      SDep Edge = PredSU->Succs[i];
2328      assert(!Edge.isAssignedRegDep());
2329      SUnit *SuccSU = Edge.getSUnit();
2330      if (SuccSU != SU) {
2331        Edge.setSUnit(PredSU);
2332        scheduleDAG->RemovePred(SuccSU, Edge);
2333        scheduleDAG->AddPred(SU, Edge);
2334        Edge.setSUnit(SU);
2335        scheduleDAG->AddPred(SuccSU, Edge);
2336        --i;
2337      }
2338    }
2339  outer_loop_continue:;
2340  }
2341}
2342
2343/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2344/// it as a def&use operand. Add a pseudo control edge from it to the other
2345/// node (if it won't create a cycle) so the two-address one will be scheduled
2346/// first (lower in the schedule). If both nodes are two-address, favor the
2347/// one that has a CopyToReg use (more likely to be a loop induction update).
2348/// If both are two-address, but one is commutable while the other is not
2349/// commutable, favor the one that's not commutable.
2350void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2351  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2352    SUnit *SU = &(*SUnits)[i];
2353    if (!SU->isTwoAddress)
2354      continue;
2355
2356    SDNode *Node = SU->getNode();
2357    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2358      continue;
2359
2360    bool isLiveOut = hasOnlyLiveOutUses(SU);
2361    unsigned Opc = Node->getMachineOpcode();
2362    const TargetInstrDesc &TID = TII->get(Opc);
2363    unsigned NumRes = TID.getNumDefs();
2364    unsigned NumOps = TID.getNumOperands() - NumRes;
2365    for (unsigned j = 0; j != NumOps; ++j) {
2366      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
2367        continue;
2368      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2369      if (DU->getNodeId() == -1)
2370        continue;
2371      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2372      if (!DUSU) continue;
2373      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2374           E = DUSU->Succs.end(); I != E; ++I) {
2375        if (I->isCtrl()) continue;
2376        SUnit *SuccSU = I->getSUnit();
2377        if (SuccSU == SU)
2378          continue;
2379        // Be conservative. Ignore if nodes aren't at roughly the same
2380        // depth and height.
2381        if (SuccSU->getHeight() < SU->getHeight() &&
2382            (SU->getHeight() - SuccSU->getHeight()) > 1)
2383          continue;
2384        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2385        // constrains whatever is using the copy, instead of the copy
2386        // itself. In the case that the copy is coalesced, this
2387        // preserves the intent of the pseudo two-address heurietics.
2388        while (SuccSU->Succs.size() == 1 &&
2389               SuccSU->getNode()->isMachineOpcode() &&
2390               SuccSU->getNode()->getMachineOpcode() ==
2391                 TargetOpcode::COPY_TO_REGCLASS)
2392          SuccSU = SuccSU->Succs.front().getSUnit();
2393        // Don't constrain non-instruction nodes.
2394        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2395          continue;
2396        // Don't constrain nodes with physical register defs if the
2397        // predecessor can clobber them.
2398        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2399          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2400            continue;
2401        }
2402        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2403        // these may be coalesced away. We want them close to their uses.
2404        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2405        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2406            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2407            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2408          continue;
2409        if ((!canClobber(SuccSU, DUSU) ||
2410             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2411             (!SU->isCommutable && SuccSU->isCommutable)) &&
2412            !scheduleDAG->IsReachable(SuccSU, SU)) {
2413          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2414                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2415          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
2416                                        /*Reg=*/0, /*isNormalMemory=*/false,
2417                                        /*isMustAlias=*/false,
2418                                        /*isArtificial=*/true));
2419        }
2420      }
2421    }
2422  }
2423}
2424
2425/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
2426/// predecessors of the successors of the SUnit SU. Stop when the provided
2427/// limit is exceeded.
2428static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
2429                                                    unsigned Limit) {
2430  unsigned Sum = 0;
2431  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2432       I != E; ++I) {
2433    const SUnit *SuccSU = I->getSUnit();
2434    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
2435         EE = SuccSU->Preds.end(); II != EE; ++II) {
2436      SUnit *PredSU = II->getSUnit();
2437      if (!PredSU->isScheduled)
2438        if (++Sum > Limit)
2439          return Sum;
2440    }
2441  }
2442  return Sum;
2443}
2444
2445
2446// Top down
2447bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
2448  unsigned LPriority = SPQ->getNodePriority(left);
2449  unsigned RPriority = SPQ->getNodePriority(right);
2450  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
2451  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
2452  bool LIsFloater = LIsTarget && left->NumPreds == 0;
2453  bool RIsFloater = RIsTarget && right->NumPreds == 0;
2454  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
2455  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
2456
2457  if (left->NumSuccs == 0 && right->NumSuccs != 0)
2458    return false;
2459  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
2460    return true;
2461
2462  if (LIsFloater)
2463    LBonus -= 2;
2464  if (RIsFloater)
2465    RBonus -= 2;
2466  if (left->NumSuccs == 1)
2467    LBonus += 2;
2468  if (right->NumSuccs == 1)
2469    RBonus += 2;
2470
2471  if (LPriority+LBonus != RPriority+RBonus)
2472    return LPriority+LBonus < RPriority+RBonus;
2473
2474  if (left->getDepth() != right->getDepth())
2475    return left->getDepth() < right->getDepth();
2476
2477  if (left->NumSuccsLeft != right->NumSuccsLeft)
2478    return left->NumSuccsLeft > right->NumSuccsLeft;
2479
2480  assert(left->NodeQueueId && right->NodeQueueId &&
2481         "NodeQueueId cannot be zero");
2482  return (left->NodeQueueId > right->NodeQueueId);
2483}
2484
2485//===----------------------------------------------------------------------===//
2486//                         Public Constructor Functions
2487//===----------------------------------------------------------------------===//
2488
2489llvm::ScheduleDAGSDNodes *
2490llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2491                                 CodeGenOpt::Level OptLevel) {
2492  const TargetMachine &TM = IS->TM;
2493  const TargetInstrInfo *TII = TM.getInstrInfo();
2494  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2495
2496  BURegReductionPriorityQueue *PQ =
2497    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2498  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2499  PQ->setScheduleDAG(SD);
2500  return SD;
2501}
2502
2503llvm::ScheduleDAGSDNodes *
2504llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
2505                                 CodeGenOpt::Level OptLevel) {
2506  const TargetMachine &TM = IS->TM;
2507  const TargetInstrInfo *TII = TM.getInstrInfo();
2508  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2509
2510  TDRegReductionPriorityQueue *PQ =
2511    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2512  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2513  PQ->setScheduleDAG(SD);
2514  return SD;
2515}
2516
2517llvm::ScheduleDAGSDNodes *
2518llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2519                                   CodeGenOpt::Level OptLevel) {
2520  const TargetMachine &TM = IS->TM;
2521  const TargetInstrInfo *TII = TM.getInstrInfo();
2522  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2523
2524  SrcRegReductionPriorityQueue *PQ =
2525    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
2526  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2527  PQ->setScheduleDAG(SD);
2528  return SD;
2529}
2530
2531llvm::ScheduleDAGSDNodes *
2532llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
2533                                   CodeGenOpt::Level OptLevel) {
2534  const TargetMachine &TM = IS->TM;
2535  const TargetInstrInfo *TII = TM.getInstrInfo();
2536  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2537  const TargetLowering *TLI = &IS->getTargetLowering();
2538
2539  HybridBURRPriorityQueue *PQ =
2540    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2541
2542  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2543  PQ->setScheduleDAG(SD);
2544  return SD;
2545}
2546
2547llvm::ScheduleDAGSDNodes *
2548llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
2549                                CodeGenOpt::Level OptLevel) {
2550  const TargetMachine &TM = IS->TM;
2551  const TargetInstrInfo *TII = TM.getInstrInfo();
2552  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2553  const TargetLowering *TLI = &IS->getTargetLowering();
2554
2555  ILPBURRPriorityQueue *PQ =
2556    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
2557  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
2558  PQ->setScheduleDAG(SD);
2559  return SD;
2560}
2561