ScheduleDAGRRList.cpp revision 83bd3e6df5274f42bbae9f5f611b373ac61c945c
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//                     The LLVM Compiler Infrastructure
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This file is distributed under the University of Illinois Open Source
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// License. See LICENSE.TXT for details.
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This implements bottom-up and top-down register pressure reduction list
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// schedulers, using standard algorithms.  The basic approach uses a priority
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// queue of available nodes to schedule.  One at a time, nodes are taken from
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the priority queue (thus in priority order), checked for legality to
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// schedule, and emitted if legal.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define DEBUG_TYPE "pre-RA-sched"
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "ScheduleDAGSDNodes.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/InlineAsm.h"
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/SchedulerRegistry.h"
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/CodeGen/SelectionDAGISel.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetRegisterInfo.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetData.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetMachine.h"
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetInstrInfo.h"
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Target/TargetLowering.h"
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/SmallSet.h"
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/Statistic.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/ADT/STLExtras.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/CommandLine.h"
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/Debug.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/ErrorHandling.h"
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "llvm/Support/raw_ostream.h"
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <climits>
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace llvm;
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      cl::init(false), cl::Hidden);
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumUnfolds,    "Number of nodes unfolded");
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)STATISTIC(NumDups,       "Number of duplicated nodes");
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STATISTIC(NumPRCopies,   "Number of physical register copies");
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  burrListDAGScheduler("list-burr",
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       "Bottom-up register reduction list scheduling",
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       createBURRListDAGScheduler);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tdrListrDAGScheduler("list-tdrr",
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       "Top-down register reduction list scheduling",
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       createTDRRListDAGScheduler);
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sourceListDAGScheduler("source",
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         "Similar to list-burr but schedules in source "
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         "order when possible",
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         createSourceListDAGScheduler);
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static RegisterScheduler
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hybridListDAGScheduler("list-hybrid",
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         "Bottom-up rr list scheduling which avoid stalls for "
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         "long latency instructions",
642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                         createHybridListDAGScheduler);
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//===----------------------------------------------------------------------===//
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// ScheduleDAGRRList - The actual register reduction list scheduler
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/// implementation.  This supports both top-down and bottom-up scheduling.
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)///
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ScheduleDAGRRList : public ScheduleDAGSDNodes {
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)private:
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// it is top-down.
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool isBottomUp;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// NeedLatency - True if the scheduler will make use of latency information.
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ///
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool NeedLatency;
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// AvailableQueue - The priority queue to use for the available SUnits.
822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  SchedulingPriorityQueue *AvailableQueue;
832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// LiveRegDefs - A set of physical registers and their definition
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// that are "live". These nodes must be scheduled before any other nodes that
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// modifies the registers can be scheduled.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  unsigned NumLiveRegs;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<SUnit*> LiveRegDefs;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<unsigned> LiveRegCycles;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// Topo - A topological ordering for SUnits which permits fast IsReachable
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  /// and similar queries.
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ScheduleDAGTopologicalSort Topo;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)public:
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ScheduleDAGRRList(MachineFunction &mf,
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    bool isbottomup, bool needlatency,
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    SchedulingPriorityQueue *availqueue)
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
100      AvailableQueue(availqueue), Topo(SUnits) {
101    }
102
103  ~ScheduleDAGRRList() {
104    delete AvailableQueue;
105  }
106
107  void Schedule();
108
109  /// IsReachable - Checks if SU is reachable from TargetSU.
110  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
111    return Topo.IsReachable(SU, TargetSU);
112  }
113
114  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
115  /// create a cycle.
116  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
117    return Topo.WillCreateCycle(SU, TargetSU);
118  }
119
120  /// AddPred - adds a predecessor edge to SUnit SU.
121  /// This returns true if this is a new predecessor.
122  /// Updates the topological ordering if required.
123  void AddPred(SUnit *SU, const SDep &D) {
124    Topo.AddPred(SU, D.getSUnit());
125    SU->addPred(D);
126  }
127
128  /// RemovePred - removes a predecessor edge from SUnit SU.
129  /// This returns true if an edge was removed.
130  /// Updates the topological ordering if required.
131  void RemovePred(SUnit *SU, const SDep &D) {
132    Topo.RemovePred(SU, D.getSUnit());
133    SU->removePred(D);
134  }
135
136private:
137  void ReleasePred(SUnit *SU, const SDep *PredEdge);
138  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
139  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
140  void ReleaseSuccessors(SUnit *SU);
141  void CapturePred(SDep *PredEdge);
142  void ScheduleNodeBottomUp(SUnit*, unsigned);
143  void ScheduleNodeTopDown(SUnit*, unsigned);
144  void UnscheduleNodeBottomUp(SUnit*);
145  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
146  SUnit *CopyAndMoveSuccessors(SUnit*);
147  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
148                                const TargetRegisterClass*,
149                                const TargetRegisterClass*,
150                                SmallVector<SUnit*, 2>&);
151  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
152  void ListScheduleTopDown();
153  void ListScheduleBottomUp();
154
155
156  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
157  /// Updates the topological ordering if required.
158  SUnit *CreateNewSUnit(SDNode *N) {
159    unsigned NumSUnits = SUnits.size();
160    SUnit *NewNode = NewSUnit(N);
161    // Update the topological ordering.
162    if (NewNode->NodeNum >= NumSUnits)
163      Topo.InitDAGTopologicalSorting();
164    return NewNode;
165  }
166
167  /// CreateClone - Creates a new SUnit from an existing one.
168  /// Updates the topological ordering if required.
169  SUnit *CreateClone(SUnit *N) {
170    unsigned NumSUnits = SUnits.size();
171    SUnit *NewNode = Clone(N);
172    // Update the topological ordering.
173    if (NewNode->NodeNum >= NumSUnits)
174      Topo.InitDAGTopologicalSorting();
175    return NewNode;
176  }
177
178  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
179  /// need actual latency information but the hybrid scheduler does.
180  bool ForceUnitLatencies() const {
181    return !NeedLatency;
182  }
183};
184}  // end anonymous namespace
185
186
187/// Schedule - Schedule the DAG using list scheduling.
188void ScheduleDAGRRList::Schedule() {
189  DEBUG(dbgs()
190        << "********** List Scheduling BB#" << BB->getNumber()
191        << " **********\n");
192
193  NumLiveRegs = 0;
194  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
195  LiveRegCycles.resize(TRI->getNumRegs(), 0);
196
197  // Build the scheduling graph.
198  BuildSchedGraph(NULL);
199
200  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
201          SUnits[su].dumpAll(this));
202  Topo.InitDAGTopologicalSorting();
203
204  AvailableQueue->initNodes(SUnits);
205
206  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
207  if (isBottomUp)
208    ListScheduleBottomUp();
209  else
210    ListScheduleTopDown();
211
212  AvailableQueue->releaseState();
213}
214
215//===----------------------------------------------------------------------===//
216//  Bottom-Up Scheduling
217//===----------------------------------------------------------------------===//
218
219/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
220/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
221void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
222  SUnit *PredSU = PredEdge->getSUnit();
223
224#ifndef NDEBUG
225  if (PredSU->NumSuccsLeft == 0) {
226    dbgs() << "*** Scheduling failed! ***\n";
227    PredSU->dump(this);
228    dbgs() << " has been released too many times!\n";
229    llvm_unreachable(0);
230  }
231#endif
232  --PredSU->NumSuccsLeft;
233
234  if (!ForceUnitLatencies()) {
235    // Updating predecessor's height. This is now the cycle when the
236    // predecessor can be scheduled without causing a pipeline stall.
237    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
238  }
239
240  // If all the node's successors are scheduled, this node is ready
241  // to be scheduled. Ignore the special EntrySU node.
242  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
243    PredSU->isAvailable = true;
244    AvailableQueue->push(PredSU);
245  }
246}
247
248void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
249  // Bottom up: release predecessors
250  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
251       I != E; ++I) {
252    ReleasePred(SU, &*I);
253    if (I->isAssignedRegDep()) {
254      // This is a physical register dependency and it's impossible or
255      // expensive to copy the register. Make sure nothing that can
256      // clobber the register is scheduled between the predecessor and
257      // this node.
258      if (!LiveRegDefs[I->getReg()]) {
259        ++NumLiveRegs;
260        LiveRegDefs[I->getReg()] = I->getSUnit();
261        LiveRegCycles[I->getReg()] = CurCycle;
262      }
263    }
264  }
265}
266
267/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
268/// count of its predecessors. If a predecessor pending count is zero, add it to
269/// the Available queue.
270void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
271  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
272  DEBUG(SU->dump(this));
273
274#ifndef NDEBUG
275  if (CurCycle < SU->getHeight())
276    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
277#endif
278
279  // FIXME: Handle noop hazard.
280  SU->setHeightToAtLeast(CurCycle);
281  Sequence.push_back(SU);
282
283  ReleasePredecessors(SU, CurCycle);
284
285  // Release all the implicit physical register defs that are live.
286  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
287       I != E; ++I) {
288    if (I->isAssignedRegDep()) {
289      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
290        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
291        assert(LiveRegDefs[I->getReg()] == SU &&
292               "Physical register dependency violated?");
293        --NumLiveRegs;
294        LiveRegDefs[I->getReg()] = NULL;
295        LiveRegCycles[I->getReg()] = 0;
296      }
297    }
298  }
299
300  SU->isScheduled = true;
301  AvailableQueue->ScheduledNode(SU);
302}
303
304/// CapturePred - This does the opposite of ReleasePred. Since SU is being
305/// unscheduled, incrcease the succ left count of its predecessors. Remove
306/// them from AvailableQueue if necessary.
307void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
308  SUnit *PredSU = PredEdge->getSUnit();
309  if (PredSU->isAvailable) {
310    PredSU->isAvailable = false;
311    if (!PredSU->isPending)
312      AvailableQueue->remove(PredSU);
313  }
314
315  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
316  ++PredSU->NumSuccsLeft;
317}
318
319/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
320/// its predecessor states to reflect the change.
321void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
322  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
323  DEBUG(SU->dump(this));
324
325  AvailableQueue->UnscheduledNode(SU);
326
327  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
328       I != E; ++I) {
329    CapturePred(&*I);
330    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
331      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
332      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
333             "Physical register dependency violated?");
334      --NumLiveRegs;
335      LiveRegDefs[I->getReg()] = NULL;
336      LiveRegCycles[I->getReg()] = 0;
337    }
338  }
339
340  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
341       I != E; ++I) {
342    if (I->isAssignedRegDep()) {
343      if (!LiveRegDefs[I->getReg()]) {
344        LiveRegDefs[I->getReg()] = SU;
345        ++NumLiveRegs;
346      }
347      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
348        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
349    }
350  }
351
352  SU->setHeightDirty();
353  SU->isScheduled = false;
354  SU->isAvailable = true;
355  AvailableQueue->push(SU);
356}
357
358/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
359/// BTCycle in order to schedule a specific node.
360void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
361                                          unsigned &CurCycle) {
362  SUnit *OldSU = NULL;
363  while (CurCycle > BtCycle) {
364    OldSU = Sequence.back();
365    Sequence.pop_back();
366    if (SU->isSucc(OldSU))
367      // Don't try to remove SU from AvailableQueue.
368      SU->isAvailable = false;
369    UnscheduleNodeBottomUp(OldSU);
370    --CurCycle;
371    AvailableQueue->setCurCycle(CurCycle);
372  }
373
374  assert(!SU->isSucc(OldSU) && "Something is wrong!");
375
376  ++NumBacktracks;
377}
378
379static bool isOperandOf(const SUnit *SU, SDNode *N) {
380  for (const SDNode *SUNode = SU->getNode(); SUNode;
381       SUNode = SUNode->getFlaggedNode()) {
382    if (SUNode->isOperandOf(N))
383      return true;
384  }
385  return false;
386}
387
388/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
389/// successors to the newly created node.
390SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
391  if (SU->getNode()->getFlaggedNode())
392    return NULL;
393
394  SDNode *N = SU->getNode();
395  if (!N)
396    return NULL;
397
398  SUnit *NewSU;
399  bool TryUnfold = false;
400  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
401    EVT VT = N->getValueType(i);
402    if (VT == MVT::Flag)
403      return NULL;
404    else if (VT == MVT::Other)
405      TryUnfold = true;
406  }
407  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
408    const SDValue &Op = N->getOperand(i);
409    EVT VT = Op.getNode()->getValueType(Op.getResNo());
410    if (VT == MVT::Flag)
411      return NULL;
412  }
413
414  if (TryUnfold) {
415    SmallVector<SDNode*, 2> NewNodes;
416    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
417      return NULL;
418
419    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
420    assert(NewNodes.size() == 2 && "Expected a load folding node!");
421
422    N = NewNodes[1];
423    SDNode *LoadNode = NewNodes[0];
424    unsigned NumVals = N->getNumValues();
425    unsigned OldNumVals = SU->getNode()->getNumValues();
426    for (unsigned i = 0; i != NumVals; ++i)
427      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
428    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
429                                   SDValue(LoadNode, 1));
430
431    // LoadNode may already exist. This can happen when there is another
432    // load from the same location and producing the same type of value
433    // but it has different alignment or volatileness.
434    bool isNewLoad = true;
435    SUnit *LoadSU;
436    if (LoadNode->getNodeId() != -1) {
437      LoadSU = &SUnits[LoadNode->getNodeId()];
438      isNewLoad = false;
439    } else {
440      LoadSU = CreateNewSUnit(LoadNode);
441      LoadNode->setNodeId(LoadSU->NodeNum);
442      ComputeLatency(LoadSU);
443    }
444
445    SUnit *NewSU = CreateNewSUnit(N);
446    assert(N->getNodeId() == -1 && "Node already inserted!");
447    N->setNodeId(NewSU->NodeNum);
448
449    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
450    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
451      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
452        NewSU->isTwoAddress = true;
453        break;
454      }
455    }
456    if (TID.isCommutable())
457      NewSU->isCommutable = true;
458    ComputeLatency(NewSU);
459
460    // Record all the edges to and from the old SU, by category.
461    SmallVector<SDep, 4> ChainPreds;
462    SmallVector<SDep, 4> ChainSuccs;
463    SmallVector<SDep, 4> LoadPreds;
464    SmallVector<SDep, 4> NodePreds;
465    SmallVector<SDep, 4> NodeSuccs;
466    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
467         I != E; ++I) {
468      if (I->isCtrl())
469        ChainPreds.push_back(*I);
470      else if (isOperandOf(I->getSUnit(), LoadNode))
471        LoadPreds.push_back(*I);
472      else
473        NodePreds.push_back(*I);
474    }
475    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
476         I != E; ++I) {
477      if (I->isCtrl())
478        ChainSuccs.push_back(*I);
479      else
480        NodeSuccs.push_back(*I);
481    }
482
483    // Now assign edges to the newly-created nodes.
484    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
485      const SDep &Pred = ChainPreds[i];
486      RemovePred(SU, Pred);
487      if (isNewLoad)
488        AddPred(LoadSU, Pred);
489    }
490    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
491      const SDep &Pred = LoadPreds[i];
492      RemovePred(SU, Pred);
493      if (isNewLoad)
494        AddPred(LoadSU, Pred);
495    }
496    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
497      const SDep &Pred = NodePreds[i];
498      RemovePred(SU, Pred);
499      AddPred(NewSU, Pred);
500    }
501    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
502      SDep D = NodeSuccs[i];
503      SUnit *SuccDep = D.getSUnit();
504      D.setSUnit(SU);
505      RemovePred(SuccDep, D);
506      D.setSUnit(NewSU);
507      AddPred(SuccDep, D);
508    }
509    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
510      SDep D = ChainSuccs[i];
511      SUnit *SuccDep = D.getSUnit();
512      D.setSUnit(SU);
513      RemovePred(SuccDep, D);
514      if (isNewLoad) {
515        D.setSUnit(LoadSU);
516        AddPred(SuccDep, D);
517      }
518    }
519
520    // Add a data dependency to reflect that NewSU reads the value defined
521    // by LoadSU.
522    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
523
524    if (isNewLoad)
525      AvailableQueue->addNode(LoadSU);
526    AvailableQueue->addNode(NewSU);
527
528    ++NumUnfolds;
529
530    if (NewSU->NumSuccsLeft == 0) {
531      NewSU->isAvailable = true;
532      return NewSU;
533    }
534    SU = NewSU;
535  }
536
537  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
538  NewSU = CreateClone(SU);
539
540  // New SUnit has the exact same predecessors.
541  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
542       I != E; ++I)
543    if (!I->isArtificial())
544      AddPred(NewSU, *I);
545
546  // Only copy scheduled successors. Cut them from old node's successor
547  // list and move them over.
548  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
549  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
550       I != E; ++I) {
551    if (I->isArtificial())
552      continue;
553    SUnit *SuccSU = I->getSUnit();
554    if (SuccSU->isScheduled) {
555      SDep D = *I;
556      D.setSUnit(NewSU);
557      AddPred(SuccSU, D);
558      D.setSUnit(SU);
559      DelDeps.push_back(std::make_pair(SuccSU, D));
560    }
561  }
562  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
563    RemovePred(DelDeps[i].first, DelDeps[i].second);
564
565  AvailableQueue->updateNode(SU);
566  AvailableQueue->addNode(NewSU);
567
568  ++NumDups;
569  return NewSU;
570}
571
572/// InsertCopiesAndMoveSuccs - Insert register copies and move all
573/// scheduled successors of the given SUnit to the last copy.
574void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
575                                               const TargetRegisterClass *DestRC,
576                                               const TargetRegisterClass *SrcRC,
577                                               SmallVector<SUnit*, 2> &Copies) {
578  SUnit *CopyFromSU = CreateNewSUnit(NULL);
579  CopyFromSU->CopySrcRC = SrcRC;
580  CopyFromSU->CopyDstRC = DestRC;
581
582  SUnit *CopyToSU = CreateNewSUnit(NULL);
583  CopyToSU->CopySrcRC = DestRC;
584  CopyToSU->CopyDstRC = SrcRC;
585
586  // Only copy scheduled successors. Cut them from old node's successor
587  // list and move them over.
588  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
589  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
590       I != E; ++I) {
591    if (I->isArtificial())
592      continue;
593    SUnit *SuccSU = I->getSUnit();
594    if (SuccSU->isScheduled) {
595      SDep D = *I;
596      D.setSUnit(CopyToSU);
597      AddPred(SuccSU, D);
598      DelDeps.push_back(std::make_pair(SuccSU, *I));
599    }
600  }
601  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
602    RemovePred(DelDeps[i].first, DelDeps[i].second);
603
604  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
605  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
606
607  AvailableQueue->updateNode(SU);
608  AvailableQueue->addNode(CopyFromSU);
609  AvailableQueue->addNode(CopyToSU);
610  Copies.push_back(CopyFromSU);
611  Copies.push_back(CopyToSU);
612
613  ++NumPRCopies;
614}
615
616/// getPhysicalRegisterVT - Returns the ValueType of the physical register
617/// definition of the specified node.
618/// FIXME: Move to SelectionDAG?
619static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
620                                 const TargetInstrInfo *TII) {
621  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
622  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
623  unsigned NumRes = TID.getNumDefs();
624  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
625    if (Reg == *ImpDef)
626      break;
627    ++NumRes;
628  }
629  return N->getValueType(NumRes);
630}
631
632/// CheckForLiveRegDef - Return true and update live register vector if the
633/// specified register def of the specified SUnit clobbers any "live" registers.
634static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
635                               std::vector<SUnit*> &LiveRegDefs,
636                               SmallSet<unsigned, 4> &RegAdded,
637                               SmallVector<unsigned, 4> &LRegs,
638                               const TargetRegisterInfo *TRI) {
639  bool Added = false;
640  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
641    if (RegAdded.insert(Reg)) {
642      LRegs.push_back(Reg);
643      Added = true;
644    }
645  }
646  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
647    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
648      if (RegAdded.insert(*Alias)) {
649        LRegs.push_back(*Alias);
650        Added = true;
651      }
652    }
653  return Added;
654}
655
656/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
657/// scheduling of the given node to satisfy live physical register dependencies.
658/// If the specific node is the last one that's available to schedule, do
659/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
660bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
661                                                 SmallVector<unsigned, 4> &LRegs){
662  if (NumLiveRegs == 0)
663    return false;
664
665  SmallSet<unsigned, 4> RegAdded;
666  // If this node would clobber any "live" register, then it's not ready.
667  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
668       I != E; ++I) {
669    if (I->isAssignedRegDep())
670      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
671                         RegAdded, LRegs, TRI);
672  }
673
674  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
675    if (Node->getOpcode() == ISD::INLINEASM) {
676      // Inline asm can clobber physical defs.
677      unsigned NumOps = Node->getNumOperands();
678      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
679        --NumOps;  // Ignore the flag operand.
680
681      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
682        unsigned Flags =
683          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
684        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
685
686        ++i; // Skip the ID value.
687        if (InlineAsm::isRegDefKind(Flags) ||
688            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
689          // Check for def of register or earlyclobber register.
690          for (; NumVals; --NumVals, ++i) {
691            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
692            if (TargetRegisterInfo::isPhysicalRegister(Reg))
693              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
694          }
695        } else
696          i += NumVals;
697      }
698      continue;
699    }
700
701    if (!Node->isMachineOpcode())
702      continue;
703    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
704    if (!TID.ImplicitDefs)
705      continue;
706    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
707      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
708  }
709  return !LRegs.empty();
710}
711
712
713/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
714/// schedulers.
715void ScheduleDAGRRList::ListScheduleBottomUp() {
716  unsigned CurCycle = 0;
717
718  // Release any predecessors of the special Exit node.
719  ReleasePredecessors(&ExitSU, CurCycle);
720
721  // Add root to Available queue.
722  if (!SUnits.empty()) {
723    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
724    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
725    RootSU->isAvailable = true;
726    AvailableQueue->push(RootSU);
727  }
728
729  // While Available queue is not empty, grab the node with the highest
730  // priority. If it is not ready put it back.  Schedule the node.
731  SmallVector<SUnit*, 4> NotReady;
732  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
733  Sequence.reserve(SUnits.size());
734  while (!AvailableQueue->empty()) {
735    bool Delayed = false;
736    LRegsMap.clear();
737    SUnit *CurSU = AvailableQueue->pop();
738    while (CurSU) {
739      SmallVector<unsigned, 4> LRegs;
740      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
741        break;
742      Delayed = true;
743      LRegsMap.insert(std::make_pair(CurSU, LRegs));
744
745      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
746      NotReady.push_back(CurSU);
747      CurSU = AvailableQueue->pop();
748    }
749
750    // All candidates are delayed due to live physical reg dependencies.
751    // Try backtracking, code duplication, or inserting cross class copies
752    // to resolve it.
753    if (Delayed && !CurSU) {
754      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
755        SUnit *TrySU = NotReady[i];
756        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
757
758        // Try unscheduling up to the point where it's safe to schedule
759        // this node.
760        unsigned LiveCycle = CurCycle;
761        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
762          unsigned Reg = LRegs[j];
763          unsigned LCycle = LiveRegCycles[Reg];
764          LiveCycle = std::min(LiveCycle, LCycle);
765        }
766        SUnit *OldSU = Sequence[LiveCycle];
767        if (!WillCreateCycle(TrySU, OldSU))  {
768          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
769          // Force the current node to be scheduled before the node that
770          // requires the physical reg dep.
771          if (OldSU->isAvailable) {
772            OldSU->isAvailable = false;
773            AvailableQueue->remove(OldSU);
774          }
775          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
776                              /*Reg=*/0, /*isNormalMemory=*/false,
777                              /*isMustAlias=*/false, /*isArtificial=*/true));
778          // If one or more successors has been unscheduled, then the current
779          // node is no longer avaialable. Schedule a successor that's now
780          // available instead.
781          if (!TrySU->isAvailable)
782            CurSU = AvailableQueue->pop();
783          else {
784            CurSU = TrySU;
785            TrySU->isPending = false;
786            NotReady.erase(NotReady.begin()+i);
787          }
788          break;
789        }
790      }
791
792      if (!CurSU) {
793        // Can't backtrack. If it's too expensive to copy the value, then try
794        // duplicate the nodes that produces these "too expensive to copy"
795        // values to break the dependency. In case even that doesn't work,
796        // insert cross class copies.
797        // If it's not too expensive, i.e. cost != -1, issue copies.
798        SUnit *TrySU = NotReady[0];
799        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
800        assert(LRegs.size() == 1 && "Can't handle this yet!");
801        unsigned Reg = LRegs[0];
802        SUnit *LRDef = LiveRegDefs[Reg];
803        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
804        const TargetRegisterClass *RC =
805          TRI->getMinimalPhysRegClass(Reg, VT);
806        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
807
808        // If cross copy register class is null, then it must be possible copy
809        // the value directly. Do not try duplicate the def.
810        SUnit *NewDef = 0;
811        if (DestRC)
812          NewDef = CopyAndMoveSuccessors(LRDef);
813        else
814          DestRC = RC;
815        if (!NewDef) {
816          // Issue copies, these can be expensive cross register class copies.
817          SmallVector<SUnit*, 2> Copies;
818          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
819          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
820                       << " to SU #" << Copies.front()->NodeNum << "\n");
821          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
822                              /*Reg=*/0, /*isNormalMemory=*/false,
823                              /*isMustAlias=*/false,
824                              /*isArtificial=*/true));
825          NewDef = Copies.back();
826        }
827
828        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
829                     << " to SU #" << TrySU->NodeNum << "\n");
830        LiveRegDefs[Reg] = NewDef;
831        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
832                             /*Reg=*/0, /*isNormalMemory=*/false,
833                             /*isMustAlias=*/false,
834                             /*isArtificial=*/true));
835        TrySU->isAvailable = false;
836        CurSU = NewDef;
837      }
838
839      assert(CurSU && "Unable to resolve live physical register dependencies!");
840    }
841
842    // Add the nodes that aren't ready back onto the available list.
843    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
844      NotReady[i]->isPending = false;
845      // May no longer be available due to backtracking.
846      if (NotReady[i]->isAvailable)
847        AvailableQueue->push(NotReady[i]);
848    }
849    NotReady.clear();
850
851    if (CurSU)
852      ScheduleNodeBottomUp(CurSU, CurCycle);
853    ++CurCycle;
854    AvailableQueue->setCurCycle(CurCycle);
855  }
856
857  // Reverse the order if it is bottom up.
858  std::reverse(Sequence.begin(), Sequence.end());
859
860#ifndef NDEBUG
861  VerifySchedule(isBottomUp);
862#endif
863}
864
865//===----------------------------------------------------------------------===//
866//  Top-Down Scheduling
867//===----------------------------------------------------------------------===//
868
869/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
870/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
871void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
872  SUnit *SuccSU = SuccEdge->getSUnit();
873
874#ifndef NDEBUG
875  if (SuccSU->NumPredsLeft == 0) {
876    dbgs() << "*** Scheduling failed! ***\n";
877    SuccSU->dump(this);
878    dbgs() << " has been released too many times!\n";
879    llvm_unreachable(0);
880  }
881#endif
882  --SuccSU->NumPredsLeft;
883
884  // If all the node's predecessors are scheduled, this node is ready
885  // to be scheduled. Ignore the special ExitSU node.
886  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
887    SuccSU->isAvailable = true;
888    AvailableQueue->push(SuccSU);
889  }
890}
891
892void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
893  // Top down: release successors
894  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
895       I != E; ++I) {
896    assert(!I->isAssignedRegDep() &&
897           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
898
899    ReleaseSucc(SU, &*I);
900  }
901}
902
903/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
904/// count of its successors. If a successor pending count is zero, add it to
905/// the Available queue.
906void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
907  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
908  DEBUG(SU->dump(this));
909
910  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
911  SU->setDepthToAtLeast(CurCycle);
912  Sequence.push_back(SU);
913
914  ReleaseSuccessors(SU);
915  SU->isScheduled = true;
916  AvailableQueue->ScheduledNode(SU);
917}
918
919/// ListScheduleTopDown - The main loop of list scheduling for top-down
920/// schedulers.
921void ScheduleDAGRRList::ListScheduleTopDown() {
922  unsigned CurCycle = 0;
923  AvailableQueue->setCurCycle(CurCycle);
924
925  // Release any successors of the special Entry node.
926  ReleaseSuccessors(&EntrySU);
927
928  // All leaves to Available queue.
929  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
930    // It is available if it has no predecessors.
931    if (SUnits[i].Preds.empty()) {
932      AvailableQueue->push(&SUnits[i]);
933      SUnits[i].isAvailable = true;
934    }
935  }
936
937  // While Available queue is not empty, grab the node with the highest
938  // priority. If it is not ready put it back.  Schedule the node.
939  Sequence.reserve(SUnits.size());
940  while (!AvailableQueue->empty()) {
941    SUnit *CurSU = AvailableQueue->pop();
942
943    if (CurSU)
944      ScheduleNodeTopDown(CurSU, CurCycle);
945    ++CurCycle;
946    AvailableQueue->setCurCycle(CurCycle);
947  }
948
949#ifndef NDEBUG
950  VerifySchedule(isBottomUp);
951#endif
952}
953
954
955//===----------------------------------------------------------------------===//
956//                RegReductionPriorityQueue Implementation
957//===----------------------------------------------------------------------===//
958//
959// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
960// to reduce register pressure.
961//
962namespace {
963  template<class SF>
964  class RegReductionPriorityQueue;
965
966  /// Sorting functions for the Available queue.
967  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
968    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
969    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
970    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
971
972    bool operator()(const SUnit* left, const SUnit* right) const;
973  };
974
975  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
976    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
977    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
978    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
979
980    bool operator()(const SUnit* left, const SUnit* right) const;
981  };
982
983  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
984    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
985    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
986      : SPQ(spq) {}
987    src_ls_rr_sort(const src_ls_rr_sort &RHS)
988      : SPQ(RHS.SPQ) {}
989
990    bool operator()(const SUnit* left, const SUnit* right) const;
991  };
992
993  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
994    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
995    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
996      : SPQ(spq) {}
997    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
998      : SPQ(RHS.SPQ) {}
999
1000    bool operator()(const SUnit* left, const SUnit* right) const;
1001  };
1002}  // end anonymous namespace
1003
1004/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1005/// Smaller number is the higher priority.
1006static unsigned
1007CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1008  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1009  if (SethiUllmanNumber != 0)
1010    return SethiUllmanNumber;
1011
1012  unsigned Extra = 0;
1013  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1014       I != E; ++I) {
1015    if (I->isCtrl()) continue;  // ignore chain preds
1016    SUnit *PredSU = I->getSUnit();
1017    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1018    if (PredSethiUllman > SethiUllmanNumber) {
1019      SethiUllmanNumber = PredSethiUllman;
1020      Extra = 0;
1021    } else if (PredSethiUllman == SethiUllmanNumber)
1022      ++Extra;
1023  }
1024
1025  SethiUllmanNumber += Extra;
1026
1027  if (SethiUllmanNumber == 0)
1028    SethiUllmanNumber = 1;
1029
1030  return SethiUllmanNumber;
1031}
1032
1033namespace {
1034  template<class SF>
1035  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1036    std::vector<SUnit*> Queue;
1037    SF Picker;
1038    unsigned CurQueueId;
1039    bool isBottomUp;
1040
1041  protected:
1042    // SUnits - The SUnits for the current graph.
1043    std::vector<SUnit> *SUnits;
1044
1045    MachineFunction &MF;
1046    const TargetInstrInfo *TII;
1047    const TargetRegisterInfo *TRI;
1048    const TargetLowering *TLI;
1049    ScheduleDAGRRList *scheduleDAG;
1050
1051    // SethiUllmanNumbers - The SethiUllman number for each node.
1052    std::vector<unsigned> SethiUllmanNumbers;
1053
1054    /// RegPressure - Tracking current reg pressure per register class.
1055    ///
1056    std::vector<int> RegPressure;
1057
1058    /// RegLimit - Tracking the number of allocatable registers per register
1059    /// class.
1060    std::vector<int> RegLimit;
1061
1062  public:
1063    RegReductionPriorityQueue(MachineFunction &mf,
1064                              bool isbottomup,
1065                              const TargetInstrInfo *tii,
1066                              const TargetRegisterInfo *tri,
1067                              const TargetLowering *tli)
1068      : Picker(this), CurQueueId(0), isBottomUp(isbottomup),
1069        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1070      unsigned NumRC = TRI->getNumRegClasses();
1071      RegLimit.resize(NumRC);
1072      RegPressure.resize(NumRC);
1073      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1074      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1075      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1076             E = TRI->regclass_end(); I != E; ++I)
1077        RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
1078    }
1079
1080    void initNodes(std::vector<SUnit> &sunits) {
1081      SUnits = &sunits;
1082      // Add pseudo dependency edges for two-address nodes.
1083      AddPseudoTwoAddrDeps();
1084      // Reroute edges to nodes with multiple uses.
1085      PrescheduleNodesWithMultipleUses();
1086      // Calculate node priorities.
1087      CalculateSethiUllmanNumbers();
1088    }
1089
1090    void addNode(const SUnit *SU) {
1091      unsigned SUSize = SethiUllmanNumbers.size();
1092      if (SUnits->size() > SUSize)
1093        SethiUllmanNumbers.resize(SUSize*2, 0);
1094      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1095    }
1096
1097    void updateNode(const SUnit *SU) {
1098      SethiUllmanNumbers[SU->NodeNum] = 0;
1099      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1100    }
1101
1102    void releaseState() {
1103      SUnits = 0;
1104      SethiUllmanNumbers.clear();
1105      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1106    }
1107
1108    unsigned getNodePriority(const SUnit *SU) const {
1109      assert(SU->NodeNum < SethiUllmanNumbers.size());
1110      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1111      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1112        // CopyToReg should be close to its uses to facilitate coalescing and
1113        // avoid spilling.
1114        return 0;
1115      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1116          Opc == TargetOpcode::SUBREG_TO_REG ||
1117          Opc == TargetOpcode::INSERT_SUBREG)
1118        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1119        // close to their uses to facilitate coalescing.
1120        return 0;
1121      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1122        // If SU does not have a register use, i.e. it doesn't produce a value
1123        // that would be consumed (e.g. store), then it terminates a chain of
1124        // computation.  Give it a large SethiUllman number so it will be
1125        // scheduled right before its predecessors that it doesn't lengthen
1126        // their live ranges.
1127        return 0xffff;
1128      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1129        // If SU does not have a register def, schedule it close to its uses
1130        // because it does not lengthen any live ranges.
1131        return 0;
1132      return SethiUllmanNumbers[SU->NodeNum];
1133    }
1134
1135    unsigned getNodeOrdering(const SUnit *SU) const {
1136      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1137    }
1138
1139    bool empty() const { return Queue.empty(); }
1140
1141    void push(SUnit *U) {
1142      assert(!U->NodeQueueId && "Node in the queue already");
1143      U->NodeQueueId = ++CurQueueId;
1144      Queue.push_back(U);
1145    }
1146
1147    SUnit *pop() {
1148      if (empty()) return NULL;
1149      std::vector<SUnit *>::iterator Best = Queue.begin();
1150      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1151           E = Queue.end(); I != E; ++I)
1152        if (Picker(*Best, *I))
1153          Best = I;
1154      SUnit *V = *Best;
1155      if (Best != prior(Queue.end()))
1156        std::swap(*Best, Queue.back());
1157      Queue.pop_back();
1158      V->NodeQueueId = 0;
1159      return V;
1160    }
1161
1162    void remove(SUnit *SU) {
1163      assert(!Queue.empty() && "Queue is empty!");
1164      assert(SU->NodeQueueId != 0 && "Not in queue!");
1165      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1166                                                   SU);
1167      if (I != prior(Queue.end()))
1168        std::swap(*I, Queue.back());
1169      Queue.pop_back();
1170      SU->NodeQueueId = 0;
1171    }
1172
1173    // EstimateSpills - Given a scheduling unit, estimate the number of spills
1174    // it would cause by scheduling it at the current cycle.
1175    unsigned EstimateSpills(const SUnit *SU) const {
1176      if (!TLI)
1177        return 0;
1178
1179      unsigned Spills = 0;
1180      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1181           I != E; ++I) {
1182        if (I->isCtrl())
1183          continue;
1184        SUnit *PredSU = I->getSUnit();
1185        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
1186          continue;
1187        const SDNode *N = PredSU->getNode();
1188        if (!N->isMachineOpcode())
1189          continue;
1190        unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1191        for (unsigned i = 0; i != NumDefs; ++i) {
1192          EVT VT = N->getValueType(i);
1193          if (!N->hasAnyUseOfValue(i))
1194            continue;
1195          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1196          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1197          // Check if this increases register pressure of the specific register
1198          // class to the point where it would cause spills.
1199          int Excess = RegPressure[RCId] + Cost - RegLimit[RCId];
1200          if (Excess > 0)
1201            Spills += Excess;
1202        }
1203      }
1204
1205      if (!SU->NumSuccs || !Spills)
1206        return Spills;
1207      const SDNode *N = SU->getNode();
1208      if (!N->isMachineOpcode())
1209        return Spills;
1210      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1211      for (unsigned i = 0; i != NumDefs; ++i) {
1212        EVT VT = N->getValueType(i);
1213        if (!N->hasAnyUseOfValue(i))
1214          continue;
1215        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1216        unsigned Cost = TLI->getRepRegClassCostFor(VT);
1217        if (RegPressure[RCId] > RegLimit[RCId]) {
1218          int Less = RegLimit[RCId] - (RegPressure[RCId] - Cost);
1219          if (Less > 0) {
1220            if (Spills <= (unsigned)Less)
1221              return 0;
1222            Spills -= Less;
1223          }
1224        }
1225      }
1226
1227      return Spills;
1228    }
1229
1230    void OpenPredLives(SUnit *SU) {
1231      const SDNode *N = SU->getNode();
1232      if (!N->isMachineOpcode())
1233        return;
1234      unsigned Opc = N->getMachineOpcode();
1235      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1236          Opc == TargetOpcode::INSERT_SUBREG ||
1237          Opc == TargetOpcode::SUBREG_TO_REG ||
1238          Opc == TargetOpcode::COPY_TO_REGCLASS ||
1239          Opc == TargetOpcode::REG_SEQUENCE ||
1240          Opc == TargetOpcode::IMPLICIT_DEF)
1241        return;
1242
1243      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1244           I != E; ++I) {
1245        if (I->isCtrl())
1246          continue;
1247        SUnit *PredSU = I->getSUnit();
1248        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
1249          continue;
1250        const SDNode *PN = PredSU->getNode();
1251        if (!PN->isMachineOpcode())
1252          continue;
1253        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1254        for (unsigned i = 0; i != NumDefs; ++i) {
1255          EVT VT = PN->getValueType(i);
1256          if (!PN->hasAnyUseOfValue(i))
1257            continue;
1258          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1259          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1260        }
1261      }
1262
1263      if (!SU->NumSuccs)
1264        return;
1265      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1266      for (unsigned i = 0; i != NumDefs; ++i) {
1267        EVT VT = N->getValueType(i);
1268        if (!N->hasAnyUseOfValue(i))
1269          continue;
1270        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1271        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1272        if (RegPressure[RCId] < 0)
1273          // Register pressure tracking is imprecise. This can happen.
1274          RegPressure[RCId] = 0;
1275      }
1276    }
1277
1278    void ClosePredLives(SUnit *SU) {
1279      const SDNode *N = SU->getNode();
1280      if (!N->isMachineOpcode())
1281        return;
1282      unsigned Opc = N->getMachineOpcode();
1283      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1284          Opc == TargetOpcode::INSERT_SUBREG ||
1285          Opc == TargetOpcode::SUBREG_TO_REG ||
1286          Opc == TargetOpcode::COPY_TO_REGCLASS ||
1287          Opc == TargetOpcode::REG_SEQUENCE ||
1288          Opc == TargetOpcode::IMPLICIT_DEF)
1289        return;
1290
1291      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1292           I != E; ++I) {
1293        if (I->isCtrl())
1294          continue;
1295        SUnit *PredSU = I->getSUnit();
1296        if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1)
1297          continue;
1298        const SDNode *PN = PredSU->getNode();
1299        if (!PN->isMachineOpcode())
1300          continue;
1301        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1302        for (unsigned i = 0; i != NumDefs; ++i) {
1303          EVT VT = PN->getValueType(i);
1304          if (!PN->hasAnyUseOfValue(i))
1305            continue;
1306          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1307          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1308          if (RegPressure[RCId] < 0)
1309            // Register pressure tracking is imprecise. This can happen.
1310            RegPressure[RCId] = 0;
1311        }
1312      }
1313
1314      if (!SU->NumSuccs)
1315        return;
1316      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1317      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1318        EVT VT = N->getValueType(i);
1319        if (VT == MVT::Flag || VT == MVT::Other)
1320          continue;
1321        if (!N->hasAnyUseOfValue(i))
1322          continue;
1323        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1324        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1325      }
1326    }
1327
1328    void ScheduledNode(SUnit *SU) {
1329      if (!TLI || !isBottomUp)
1330        return;
1331      OpenPredLives(SU);
1332      dumpRegPressure();
1333    }
1334
1335    void UnscheduledNode(SUnit *SU) {
1336      if (!TLI || !isBottomUp)
1337        return;
1338      ClosePredLives(SU);
1339      dumpRegPressure();
1340    }
1341
1342    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1343      scheduleDAG = scheduleDag;
1344    }
1345
1346    void dumpRegPressure() const {
1347      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1348             E = TRI->regclass_end(); I != E; ++I) {
1349        const TargetRegisterClass *RC = *I;
1350        unsigned Id = RC->getID();
1351        unsigned RP = RegPressure[Id];
1352        if (!RP) continue;
1353        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1354              << '\n');
1355      }
1356    }
1357
1358  protected:
1359    bool canClobber(const SUnit *SU, const SUnit *Op);
1360    void AddPseudoTwoAddrDeps();
1361    void PrescheduleNodesWithMultipleUses();
1362    void CalculateSethiUllmanNumbers();
1363  };
1364
1365  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1366    BURegReductionPriorityQueue;
1367
1368  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1369    TDRegReductionPriorityQueue;
1370
1371  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1372    SrcRegReductionPriorityQueue;
1373
1374  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1375    HybridBURRPriorityQueue;
1376}
1377
1378/// closestSucc - Returns the scheduled cycle of the successor which is
1379/// closest to the current cycle.
1380static unsigned closestSucc(const SUnit *SU) {
1381  unsigned MaxHeight = 0;
1382  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1383       I != E; ++I) {
1384    if (I->isCtrl()) continue;  // ignore chain succs
1385    unsigned Height = I->getSUnit()->getHeight();
1386    // If there are bunch of CopyToRegs stacked up, they should be considered
1387    // to be at the same position.
1388    if (I->getSUnit()->getNode() &&
1389        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1390      Height = closestSucc(I->getSUnit())+1;
1391    if (Height > MaxHeight)
1392      MaxHeight = Height;
1393  }
1394  return MaxHeight;
1395}
1396
1397/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1398/// for scratch registers, i.e. number of data dependencies.
1399static unsigned calcMaxScratches(const SUnit *SU) {
1400  unsigned Scratches = 0;
1401  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1402       I != E; ++I) {
1403    if (I->isCtrl()) continue;  // ignore chain preds
1404    Scratches++;
1405  }
1406  return Scratches;
1407}
1408
1409template <typename RRSort>
1410static bool BURRSort(const SUnit *left, const SUnit *right,
1411                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1412  unsigned LPriority = SPQ->getNodePriority(left);
1413  unsigned RPriority = SPQ->getNodePriority(right);
1414  if (LPriority != RPriority)
1415    return LPriority > RPriority;
1416
1417  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1418  // e.g.
1419  // t1 = op t2, c1
1420  // t3 = op t4, c2
1421  //
1422  // and the following instructions are both ready.
1423  // t2 = op c3
1424  // t4 = op c4
1425  //
1426  // Then schedule t2 = op first.
1427  // i.e.
1428  // t4 = op c4
1429  // t2 = op c3
1430  // t1 = op t2, c1
1431  // t3 = op t4, c2
1432  //
1433  // This creates more short live intervals.
1434  unsigned LDist = closestSucc(left);
1435  unsigned RDist = closestSucc(right);
1436  if (LDist != RDist)
1437    return LDist < RDist;
1438
1439  // How many registers becomes live when the node is scheduled.
1440  unsigned LScratch = calcMaxScratches(left);
1441  unsigned RScratch = calcMaxScratches(right);
1442  if (LScratch != RScratch)
1443    return LScratch > RScratch;
1444
1445  if (left->getHeight() != right->getHeight())
1446    return left->getHeight() > right->getHeight();
1447
1448  if (left->getDepth() != right->getDepth())
1449    return left->getDepth() < right->getDepth();
1450
1451  assert(left->NodeQueueId && right->NodeQueueId &&
1452         "NodeQueueId cannot be zero");
1453  return (left->NodeQueueId > right->NodeQueueId);
1454}
1455
1456// Bottom up
1457bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1458  return BURRSort(left, right, SPQ);
1459}
1460
1461// Source order, otherwise bottom up.
1462bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1463  unsigned LOrder = SPQ->getNodeOrdering(left);
1464  unsigned ROrder = SPQ->getNodeOrdering(right);
1465
1466  // Prefer an ordering where the lower the non-zero order number, the higher
1467  // the preference.
1468  if ((LOrder || ROrder) && LOrder != ROrder)
1469    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1470
1471  return BURRSort(left, right, SPQ);
1472}
1473
1474bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1475  bool LStall = left->SchedulingPref == Sched::Latency &&
1476    SPQ->getCurCycle() < left->getHeight();
1477  bool RStall = right->SchedulingPref == Sched::Latency &&
1478    SPQ->getCurCycle() < right->getHeight();
1479  // If scheduling one of the node will cause a pipeline stall, delay it.
1480  // If scheduling either one of the node will cause a pipeline stall, sort them
1481  // according to their height.
1482  // If neither will cause a pipeline stall, try to reduce register pressure.
1483  if (LStall) {
1484    if (!RStall)
1485      return true;
1486    if (left->getHeight() != right->getHeight())
1487      return left->getHeight() > right->getHeight();
1488  } else if (RStall)
1489      return false;
1490
1491  // If either node is scheduling for latency, sort them by height and latency
1492  // first.
1493  if (left->SchedulingPref == Sched::Latency ||
1494      right->SchedulingPref == Sched::Latency) {
1495    if (left->getHeight() != right->getHeight())
1496      return left->getHeight() > right->getHeight();
1497    if (left->Latency != right->Latency)
1498      return left->Latency > right->Latency;
1499  }
1500
1501  return BURRSort(left, right, SPQ);
1502}
1503
1504template<class SF>
1505bool
1506RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1507  if (SU->isTwoAddress) {
1508    unsigned Opc = SU->getNode()->getMachineOpcode();
1509    const TargetInstrDesc &TID = TII->get(Opc);
1510    unsigned NumRes = TID.getNumDefs();
1511    unsigned NumOps = TID.getNumOperands() - NumRes;
1512    for (unsigned i = 0; i != NumOps; ++i) {
1513      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1514        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1515        if (DU->getNodeId() != -1 &&
1516            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1517          return true;
1518      }
1519    }
1520  }
1521  return false;
1522}
1523
1524/// hasCopyToRegUse - Return true if SU has a value successor that is a
1525/// CopyToReg node.
1526static bool hasCopyToRegUse(const SUnit *SU) {
1527  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1528       I != E; ++I) {
1529    if (I->isCtrl()) continue;
1530    const SUnit *SuccSU = I->getSUnit();
1531    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1532      return true;
1533  }
1534  return false;
1535}
1536
1537/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1538/// physical register defs.
1539static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1540                                  const TargetInstrInfo *TII,
1541                                  const TargetRegisterInfo *TRI) {
1542  SDNode *N = SuccSU->getNode();
1543  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1544  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1545  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1546  for (const SDNode *SUNode = SU->getNode(); SUNode;
1547       SUNode = SUNode->getFlaggedNode()) {
1548    if (!SUNode->isMachineOpcode())
1549      continue;
1550    const unsigned *SUImpDefs =
1551      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1552    if (!SUImpDefs)
1553      return false;
1554    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1555      EVT VT = N->getValueType(i);
1556      if (VT == MVT::Flag || VT == MVT::Other)
1557        continue;
1558      if (!N->hasAnyUseOfValue(i))
1559        continue;
1560      unsigned Reg = ImpDefs[i - NumDefs];
1561      for (;*SUImpDefs; ++SUImpDefs) {
1562        unsigned SUReg = *SUImpDefs;
1563        if (TRI->regsOverlap(Reg, SUReg))
1564          return true;
1565      }
1566    }
1567  }
1568  return false;
1569}
1570
1571/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1572/// are not handled well by the general register pressure reduction
1573/// heuristics. When presented with code like this:
1574///
1575///      N
1576///    / |
1577///   /  |
1578///  U  store
1579///  |
1580/// ...
1581///
1582/// the heuristics tend to push the store up, but since the
1583/// operand of the store has another use (U), this would increase
1584/// the length of that other use (the U->N edge).
1585///
1586/// This function transforms code like the above to route U's
1587/// dependence through the store when possible, like this:
1588///
1589///      N
1590///      ||
1591///      ||
1592///     store
1593///       |
1594///       U
1595///       |
1596///      ...
1597///
1598/// This results in the store being scheduled immediately
1599/// after N, which shortens the U->N live range, reducing
1600/// register pressure.
1601///
1602template<class SF>
1603void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1604  // Visit all the nodes in topological order, working top-down.
1605  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1606    SUnit *SU = &(*SUnits)[i];
1607    // For now, only look at nodes with no data successors, such as stores.
1608    // These are especially important, due to the heuristics in
1609    // getNodePriority for nodes with no data successors.
1610    if (SU->NumSuccs != 0)
1611      continue;
1612    // For now, only look at nodes with exactly one data predecessor.
1613    if (SU->NumPreds != 1)
1614      continue;
1615    // Avoid prescheduling copies to virtual registers, which don't behave
1616    // like other nodes from the perspective of scheduling heuristics.
1617    if (SDNode *N = SU->getNode())
1618      if (N->getOpcode() == ISD::CopyToReg &&
1619          TargetRegisterInfo::isVirtualRegister
1620            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1621        continue;
1622
1623    // Locate the single data predecessor.
1624    SUnit *PredSU = 0;
1625    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1626         EE = SU->Preds.end(); II != EE; ++II)
1627      if (!II->isCtrl()) {
1628        PredSU = II->getSUnit();
1629        break;
1630      }
1631    assert(PredSU);
1632
1633    // Don't rewrite edges that carry physregs, because that requires additional
1634    // support infrastructure.
1635    if (PredSU->hasPhysRegDefs)
1636      continue;
1637    // Short-circuit the case where SU is PredSU's only data successor.
1638    if (PredSU->NumSuccs == 1)
1639      continue;
1640    // Avoid prescheduling to copies from virtual registers, which don't behave
1641    // like other nodes from the perspective of scheduling // heuristics.
1642    if (SDNode *N = SU->getNode())
1643      if (N->getOpcode() == ISD::CopyFromReg &&
1644          TargetRegisterInfo::isVirtualRegister
1645            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1646        continue;
1647
1648    // Perform checks on the successors of PredSU.
1649    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1650         EE = PredSU->Succs.end(); II != EE; ++II) {
1651      SUnit *PredSuccSU = II->getSUnit();
1652      if (PredSuccSU == SU) continue;
1653      // If PredSU has another successor with no data successors, for
1654      // now don't attempt to choose either over the other.
1655      if (PredSuccSU->NumSuccs == 0)
1656        goto outer_loop_continue;
1657      // Don't break physical register dependencies.
1658      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1659        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1660          goto outer_loop_continue;
1661      // Don't introduce graph cycles.
1662      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1663        goto outer_loop_continue;
1664    }
1665
1666    // Ok, the transformation is safe and the heuristics suggest it is
1667    // profitable. Update the graph.
1668    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1669                 << " next to PredSU #" << PredSU->NodeNum
1670                 << " to guide scheduling in the presence of multiple uses\n");
1671    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1672      SDep Edge = PredSU->Succs[i];
1673      assert(!Edge.isAssignedRegDep());
1674      SUnit *SuccSU = Edge.getSUnit();
1675      if (SuccSU != SU) {
1676        Edge.setSUnit(PredSU);
1677        scheduleDAG->RemovePred(SuccSU, Edge);
1678        scheduleDAG->AddPred(SU, Edge);
1679        Edge.setSUnit(SU);
1680        scheduleDAG->AddPred(SuccSU, Edge);
1681        --i;
1682      }
1683    }
1684  outer_loop_continue:;
1685  }
1686}
1687
1688/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1689/// it as a def&use operand. Add a pseudo control edge from it to the other
1690/// node (if it won't create a cycle) so the two-address one will be scheduled
1691/// first (lower in the schedule). If both nodes are two-address, favor the
1692/// one that has a CopyToReg use (more likely to be a loop induction update).
1693/// If both are two-address, but one is commutable while the other is not
1694/// commutable, favor the one that's not commutable.
1695template<class SF>
1696void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1697  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1698    SUnit *SU = &(*SUnits)[i];
1699    if (!SU->isTwoAddress)
1700      continue;
1701
1702    SDNode *Node = SU->getNode();
1703    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1704      continue;
1705
1706    unsigned Opc = Node->getMachineOpcode();
1707    const TargetInstrDesc &TID = TII->get(Opc);
1708    unsigned NumRes = TID.getNumDefs();
1709    unsigned NumOps = TID.getNumOperands() - NumRes;
1710    for (unsigned j = 0; j != NumOps; ++j) {
1711      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1712        continue;
1713      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1714      if (DU->getNodeId() == -1)
1715        continue;
1716      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1717      if (!DUSU) continue;
1718      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1719           E = DUSU->Succs.end(); I != E; ++I) {
1720        if (I->isCtrl()) continue;
1721        SUnit *SuccSU = I->getSUnit();
1722        if (SuccSU == SU)
1723          continue;
1724        // Be conservative. Ignore if nodes aren't at roughly the same
1725        // depth and height.
1726        if (SuccSU->getHeight() < SU->getHeight() &&
1727            (SU->getHeight() - SuccSU->getHeight()) > 1)
1728          continue;
1729        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1730        // constrains whatever is using the copy, instead of the copy
1731        // itself. In the case that the copy is coalesced, this
1732        // preserves the intent of the pseudo two-address heurietics.
1733        while (SuccSU->Succs.size() == 1 &&
1734               SuccSU->getNode()->isMachineOpcode() &&
1735               SuccSU->getNode()->getMachineOpcode() ==
1736                 TargetOpcode::COPY_TO_REGCLASS)
1737          SuccSU = SuccSU->Succs.front().getSUnit();
1738        // Don't constrain non-instruction nodes.
1739        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1740          continue;
1741        // Don't constrain nodes with physical register defs if the
1742        // predecessor can clobber them.
1743        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1744          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1745            continue;
1746        }
1747        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1748        // these may be coalesced away. We want them close to their uses.
1749        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1750        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1751            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1752            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1753          continue;
1754        if ((!canClobber(SuccSU, DUSU) ||
1755             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1756             (!SU->isCommutable && SuccSU->isCommutable)) &&
1757            !scheduleDAG->IsReachable(SuccSU, SU)) {
1758          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1759                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1760          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1761                                        /*Reg=*/0, /*isNormalMemory=*/false,
1762                                        /*isMustAlias=*/false,
1763                                        /*isArtificial=*/true));
1764        }
1765      }
1766    }
1767  }
1768}
1769
1770/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1771/// scheduling units.
1772template<class SF>
1773void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1774  SethiUllmanNumbers.assign(SUnits->size(), 0);
1775
1776  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1777    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1778}
1779
1780/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1781/// predecessors of the successors of the SUnit SU. Stop when the provided
1782/// limit is exceeded.
1783static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1784                                                    unsigned Limit) {
1785  unsigned Sum = 0;
1786  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1787       I != E; ++I) {
1788    const SUnit *SuccSU = I->getSUnit();
1789    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1790         EE = SuccSU->Preds.end(); II != EE; ++II) {
1791      SUnit *PredSU = II->getSUnit();
1792      if (!PredSU->isScheduled)
1793        if (++Sum > Limit)
1794          return Sum;
1795    }
1796  }
1797  return Sum;
1798}
1799
1800
1801// Top down
1802bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1803  unsigned LPriority = SPQ->getNodePriority(left);
1804  unsigned RPriority = SPQ->getNodePriority(right);
1805  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1806  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1807  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1808  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1809  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1810  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1811
1812  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1813    return false;
1814  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1815    return true;
1816
1817  if (LIsFloater)
1818    LBonus -= 2;
1819  if (RIsFloater)
1820    RBonus -= 2;
1821  if (left->NumSuccs == 1)
1822    LBonus += 2;
1823  if (right->NumSuccs == 1)
1824    RBonus += 2;
1825
1826  if (LPriority+LBonus != RPriority+RBonus)
1827    return LPriority+LBonus < RPriority+RBonus;
1828
1829  if (left->getDepth() != right->getDepth())
1830    return left->getDepth() < right->getDepth();
1831
1832  if (left->NumSuccsLeft != right->NumSuccsLeft)
1833    return left->NumSuccsLeft > right->NumSuccsLeft;
1834
1835  assert(left->NodeQueueId && right->NodeQueueId &&
1836         "NodeQueueId cannot be zero");
1837  return (left->NodeQueueId > right->NodeQueueId);
1838}
1839
1840//===----------------------------------------------------------------------===//
1841//                         Public Constructor Functions
1842//===----------------------------------------------------------------------===//
1843
1844llvm::ScheduleDAGSDNodes *
1845llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1846  const TargetMachine &TM = IS->TM;
1847  const TargetInstrInfo *TII = TM.getInstrInfo();
1848  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1849
1850  BURegReductionPriorityQueue *PQ =
1851    new BURegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1852  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1853  PQ->setScheduleDAG(SD);
1854  return SD;
1855}
1856
1857llvm::ScheduleDAGSDNodes *
1858llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1859  const TargetMachine &TM = IS->TM;
1860  const TargetInstrInfo *TII = TM.getInstrInfo();
1861  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1862
1863  TDRegReductionPriorityQueue *PQ =
1864    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
1865  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1866  PQ->setScheduleDAG(SD);
1867  return SD;
1868}
1869
1870llvm::ScheduleDAGSDNodes *
1871llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1872  const TargetMachine &TM = IS->TM;
1873  const TargetInstrInfo *TII = TM.getInstrInfo();
1874  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1875
1876  SrcRegReductionPriorityQueue *PQ =
1877    new SrcRegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1878  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1879  PQ->setScheduleDAG(SD);
1880  return SD;
1881}
1882
1883llvm::ScheduleDAGSDNodes *
1884llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1885  const TargetMachine &TM = IS->TM;
1886  const TargetInstrInfo *TII = TM.getInstrInfo();
1887  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1888  const TargetLowering *TLI = &IS->getTargetLowering();
1889
1890  HybridBURRPriorityQueue *PQ =
1891    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI,
1892                                (RegPressureAware ? TLI : 0));
1893  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1894  PQ->setScheduleDAG(SD);
1895  return SD;
1896}
1897