ScheduleDAGRRList.cpp revision 4a863e2c75145432fd660ee65e61b578c5e90ac9
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/InlineAsm.h"
21#include "llvm/CodeGen/SchedulerRegistry.h"
22#include "llvm/CodeGen/SelectionDAGISel.h"
23#include "llvm/Target/TargetRegisterInfo.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Target/TargetMachine.h"
26#include "llvm/Target/TargetInstrInfo.h"
27#include "llvm/Target/TargetLowering.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/raw_ostream.h"
35#include <climits>
36using namespace llvm;
37
38static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
39                                      cl::init(false), cl::Hidden);
40
41STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
42STATISTIC(NumUnfolds,    "Number of nodes unfolded");
43STATISTIC(NumDups,       "Number of duplicated nodes");
44STATISTIC(NumPRCopies,   "Number of physical register copies");
45
46static RegisterScheduler
47  burrListDAGScheduler("list-burr",
48                       "Bottom-up register reduction list scheduling",
49                       createBURRListDAGScheduler);
50static RegisterScheduler
51  tdrListrDAGScheduler("list-tdrr",
52                       "Top-down register reduction list scheduling",
53                       createTDRRListDAGScheduler);
54static RegisterScheduler
55  sourceListDAGScheduler("source",
56                         "Similar to list-burr but schedules in source "
57                         "order when possible",
58                         createSourceListDAGScheduler);
59
60static RegisterScheduler
61  hybridListDAGScheduler("list-hybrid",
62                         "Bottom-up rr list scheduling which avoid stalls for "
63                         "long latency instructions",
64                         createHybridListDAGScheduler);
65
66namespace {
67//===----------------------------------------------------------------------===//
68/// ScheduleDAGRRList - The actual register reduction list scheduler
69/// implementation.  This supports both top-down and bottom-up scheduling.
70///
71class ScheduleDAGRRList : public ScheduleDAGSDNodes {
72private:
73  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
74  /// it is top-down.
75  bool isBottomUp;
76
77  /// NeedLatency - True if the scheduler will make use of latency information.
78  ///
79  bool NeedLatency;
80
81  /// AvailableQueue - The priority queue to use for the available SUnits.
82  SchedulingPriorityQueue *AvailableQueue;
83
84  /// LiveRegDefs - A set of physical registers and their definition
85  /// that are "live". These nodes must be scheduled before any other nodes that
86  /// modifies the registers can be scheduled.
87  unsigned NumLiveRegs;
88  std::vector<SUnit*> LiveRegDefs;
89  std::vector<unsigned> LiveRegCycles;
90
91  /// Topo - A topological ordering for SUnits which permits fast IsReachable
92  /// and similar queries.
93  ScheduleDAGTopologicalSort Topo;
94
95public:
96  ScheduleDAGRRList(MachineFunction &mf,
97                    bool isbottomup, bool needlatency,
98                    SchedulingPriorityQueue *availqueue)
99    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
100      AvailableQueue(availqueue), Topo(SUnits) {
101    }
102
103  ~ScheduleDAGRRList() {
104    delete AvailableQueue;
105  }
106
107  void Schedule();
108
109  /// IsReachable - Checks if SU is reachable from TargetSU.
110  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
111    return Topo.IsReachable(SU, TargetSU);
112  }
113
114  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
115  /// create a cycle.
116  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
117    return Topo.WillCreateCycle(SU, TargetSU);
118  }
119
120  /// AddPred - adds a predecessor edge to SUnit SU.
121  /// This returns true if this is a new predecessor.
122  /// Updates the topological ordering if required.
123  void AddPred(SUnit *SU, const SDep &D) {
124    Topo.AddPred(SU, D.getSUnit());
125    SU->addPred(D);
126  }
127
128  /// RemovePred - removes a predecessor edge from SUnit SU.
129  /// This returns true if an edge was removed.
130  /// Updates the topological ordering if required.
131  void RemovePred(SUnit *SU, const SDep &D) {
132    Topo.RemovePred(SU, D.getSUnit());
133    SU->removePred(D);
134  }
135
136private:
137  void ReleasePred(SUnit *SU, const SDep *PredEdge);
138  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
139  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
140  void ReleaseSuccessors(SUnit *SU);
141  void CapturePred(SDep *PredEdge);
142  void ScheduleNodeBottomUp(SUnit*, unsigned);
143  void ScheduleNodeTopDown(SUnit*, unsigned);
144  void UnscheduleNodeBottomUp(SUnit*);
145  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
146  SUnit *CopyAndMoveSuccessors(SUnit*);
147  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
148                                const TargetRegisterClass*,
149                                const TargetRegisterClass*,
150                                SmallVector<SUnit*, 2>&);
151  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
152  void ListScheduleTopDown();
153  void ListScheduleBottomUp();
154
155
156  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
157  /// Updates the topological ordering if required.
158  SUnit *CreateNewSUnit(SDNode *N) {
159    unsigned NumSUnits = SUnits.size();
160    SUnit *NewNode = NewSUnit(N);
161    // Update the topological ordering.
162    if (NewNode->NodeNum >= NumSUnits)
163      Topo.InitDAGTopologicalSorting();
164    return NewNode;
165  }
166
167  /// CreateClone - Creates a new SUnit from an existing one.
168  /// Updates the topological ordering if required.
169  SUnit *CreateClone(SUnit *N) {
170    unsigned NumSUnits = SUnits.size();
171    SUnit *NewNode = Clone(N);
172    // Update the topological ordering.
173    if (NewNode->NodeNum >= NumSUnits)
174      Topo.InitDAGTopologicalSorting();
175    return NewNode;
176  }
177
178  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
179  /// need actual latency information but the hybrid scheduler does.
180  bool ForceUnitLatencies() const {
181    return !NeedLatency;
182  }
183};
184}  // end anonymous namespace
185
186
187/// Schedule - Schedule the DAG using list scheduling.
188void ScheduleDAGRRList::Schedule() {
189  DEBUG(dbgs()
190        << "********** List Scheduling BB#" << BB->getNumber()
191        << " **********\n");
192
193  NumLiveRegs = 0;
194  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
195  LiveRegCycles.resize(TRI->getNumRegs(), 0);
196
197  // Build the scheduling graph.
198  BuildSchedGraph(NULL);
199
200  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
201          SUnits[su].dumpAll(this));
202  Topo.InitDAGTopologicalSorting();
203
204  AvailableQueue->initNodes(SUnits);
205
206  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
207  if (isBottomUp)
208    ListScheduleBottomUp();
209  else
210    ListScheduleTopDown();
211
212  AvailableQueue->releaseState();
213}
214
215//===----------------------------------------------------------------------===//
216//  Bottom-Up Scheduling
217//===----------------------------------------------------------------------===//
218
219/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
220/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
221void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
222  SUnit *PredSU = PredEdge->getSUnit();
223
224#ifndef NDEBUG
225  if (PredSU->NumSuccsLeft == 0) {
226    dbgs() << "*** Scheduling failed! ***\n";
227    PredSU->dump(this);
228    dbgs() << " has been released too many times!\n";
229    llvm_unreachable(0);
230  }
231#endif
232  --PredSU->NumSuccsLeft;
233
234  if (!ForceUnitLatencies()) {
235    // Updating predecessor's height. This is now the cycle when the
236    // predecessor can be scheduled without causing a pipeline stall.
237    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
238  }
239
240  // If all the node's successors are scheduled, this node is ready
241  // to be scheduled. Ignore the special EntrySU node.
242  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
243    PredSU->isAvailable = true;
244    AvailableQueue->push(PredSU);
245  }
246}
247
248void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
249  // Bottom up: release predecessors
250  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
251       I != E; ++I) {
252    ReleasePred(SU, &*I);
253    if (I->isAssignedRegDep()) {
254      // This is a physical register dependency and it's impossible or
255      // expensive to copy the register. Make sure nothing that can
256      // clobber the register is scheduled between the predecessor and
257      // this node.
258      if (!LiveRegDefs[I->getReg()]) {
259        ++NumLiveRegs;
260        LiveRegDefs[I->getReg()] = I->getSUnit();
261        LiveRegCycles[I->getReg()] = CurCycle;
262      }
263    }
264  }
265}
266
267/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
268/// count of its predecessors. If a predecessor pending count is zero, add it to
269/// the Available queue.
270void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
271  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
272  DEBUG(SU->dump(this));
273
274#ifndef NDEBUG
275  if (CurCycle < SU->getHeight())
276    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
277#endif
278
279  // FIXME: Handle noop hazard.
280  SU->setHeightToAtLeast(CurCycle);
281  Sequence.push_back(SU);
282
283  AvailableQueue->ScheduledNode(SU);
284
285  ReleasePredecessors(SU, CurCycle);
286
287  // Release all the implicit physical register defs that are live.
288  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
289       I != E; ++I) {
290    if (I->isAssignedRegDep()) {
291      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
292        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
293        assert(LiveRegDefs[I->getReg()] == SU &&
294               "Physical register dependency violated?");
295        --NumLiveRegs;
296        LiveRegDefs[I->getReg()] = NULL;
297        LiveRegCycles[I->getReg()] = 0;
298      }
299    }
300  }
301
302  SU->isScheduled = true;
303}
304
305/// CapturePred - This does the opposite of ReleasePred. Since SU is being
306/// unscheduled, incrcease the succ left count of its predecessors. Remove
307/// them from AvailableQueue if necessary.
308void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
309  SUnit *PredSU = PredEdge->getSUnit();
310  if (PredSU->isAvailable) {
311    PredSU->isAvailable = false;
312    if (!PredSU->isPending)
313      AvailableQueue->remove(PredSU);
314  }
315
316  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
317  ++PredSU->NumSuccsLeft;
318}
319
320/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
321/// its predecessor states to reflect the change.
322void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
323  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
324  DEBUG(SU->dump(this));
325
326  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
327       I != E; ++I) {
328    CapturePred(&*I);
329    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
330      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
331      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
332             "Physical register dependency violated?");
333      --NumLiveRegs;
334      LiveRegDefs[I->getReg()] = NULL;
335      LiveRegCycles[I->getReg()] = 0;
336    }
337  }
338
339  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
340       I != E; ++I) {
341    if (I->isAssignedRegDep()) {
342      if (!LiveRegDefs[I->getReg()]) {
343        LiveRegDefs[I->getReg()] = SU;
344        ++NumLiveRegs;
345      }
346      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
347        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
348    }
349  }
350
351  SU->setHeightDirty();
352  SU->isScheduled = false;
353  SU->isAvailable = true;
354  AvailableQueue->push(SU);
355  AvailableQueue->UnscheduledNode(SU);
356}
357
358/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
359/// BTCycle in order to schedule a specific node.
360void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
361                                          unsigned &CurCycle) {
362  SUnit *OldSU = NULL;
363  while (CurCycle > BtCycle) {
364    OldSU = Sequence.back();
365    Sequence.pop_back();
366    if (SU->isSucc(OldSU))
367      // Don't try to remove SU from AvailableQueue.
368      SU->isAvailable = false;
369    UnscheduleNodeBottomUp(OldSU);
370    --CurCycle;
371    AvailableQueue->setCurCycle(CurCycle);
372  }
373
374  assert(!SU->isSucc(OldSU) && "Something is wrong!");
375
376  ++NumBacktracks;
377}
378
379static bool isOperandOf(const SUnit *SU, SDNode *N) {
380  for (const SDNode *SUNode = SU->getNode(); SUNode;
381       SUNode = SUNode->getFlaggedNode()) {
382    if (SUNode->isOperandOf(N))
383      return true;
384  }
385  return false;
386}
387
388/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
389/// successors to the newly created node.
390SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
391  if (SU->getNode()->getFlaggedNode())
392    return NULL;
393
394  SDNode *N = SU->getNode();
395  if (!N)
396    return NULL;
397
398  SUnit *NewSU;
399  bool TryUnfold = false;
400  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
401    EVT VT = N->getValueType(i);
402    if (VT == MVT::Flag)
403      return NULL;
404    else if (VT == MVT::Other)
405      TryUnfold = true;
406  }
407  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
408    const SDValue &Op = N->getOperand(i);
409    EVT VT = Op.getNode()->getValueType(Op.getResNo());
410    if (VT == MVT::Flag)
411      return NULL;
412  }
413
414  if (TryUnfold) {
415    SmallVector<SDNode*, 2> NewNodes;
416    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
417      return NULL;
418
419    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
420    assert(NewNodes.size() == 2 && "Expected a load folding node!");
421
422    N = NewNodes[1];
423    SDNode *LoadNode = NewNodes[0];
424    unsigned NumVals = N->getNumValues();
425    unsigned OldNumVals = SU->getNode()->getNumValues();
426    for (unsigned i = 0; i != NumVals; ++i)
427      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
428    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
429                                   SDValue(LoadNode, 1));
430
431    // LoadNode may already exist. This can happen when there is another
432    // load from the same location and producing the same type of value
433    // but it has different alignment or volatileness.
434    bool isNewLoad = true;
435    SUnit *LoadSU;
436    if (LoadNode->getNodeId() != -1) {
437      LoadSU = &SUnits[LoadNode->getNodeId()];
438      isNewLoad = false;
439    } else {
440      LoadSU = CreateNewSUnit(LoadNode);
441      LoadNode->setNodeId(LoadSU->NodeNum);
442      ComputeLatency(LoadSU);
443    }
444
445    SUnit *NewSU = CreateNewSUnit(N);
446    assert(N->getNodeId() == -1 && "Node already inserted!");
447    N->setNodeId(NewSU->NodeNum);
448
449    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
450    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
451      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
452        NewSU->isTwoAddress = true;
453        break;
454      }
455    }
456    if (TID.isCommutable())
457      NewSU->isCommutable = true;
458    ComputeLatency(NewSU);
459
460    // Record all the edges to and from the old SU, by category.
461    SmallVector<SDep, 4> ChainPreds;
462    SmallVector<SDep, 4> ChainSuccs;
463    SmallVector<SDep, 4> LoadPreds;
464    SmallVector<SDep, 4> NodePreds;
465    SmallVector<SDep, 4> NodeSuccs;
466    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
467         I != E; ++I) {
468      if (I->isCtrl())
469        ChainPreds.push_back(*I);
470      else if (isOperandOf(I->getSUnit(), LoadNode))
471        LoadPreds.push_back(*I);
472      else
473        NodePreds.push_back(*I);
474    }
475    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
476         I != E; ++I) {
477      if (I->isCtrl())
478        ChainSuccs.push_back(*I);
479      else
480        NodeSuccs.push_back(*I);
481    }
482
483    // Now assign edges to the newly-created nodes.
484    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
485      const SDep &Pred = ChainPreds[i];
486      RemovePred(SU, Pred);
487      if (isNewLoad)
488        AddPred(LoadSU, Pred);
489    }
490    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
491      const SDep &Pred = LoadPreds[i];
492      RemovePred(SU, Pred);
493      if (isNewLoad)
494        AddPred(LoadSU, Pred);
495    }
496    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
497      const SDep &Pred = NodePreds[i];
498      RemovePred(SU, Pred);
499      AddPred(NewSU, Pred);
500    }
501    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
502      SDep D = NodeSuccs[i];
503      SUnit *SuccDep = D.getSUnit();
504      D.setSUnit(SU);
505      RemovePred(SuccDep, D);
506      D.setSUnit(NewSU);
507      AddPred(SuccDep, D);
508    }
509    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
510      SDep D = ChainSuccs[i];
511      SUnit *SuccDep = D.getSUnit();
512      D.setSUnit(SU);
513      RemovePred(SuccDep, D);
514      if (isNewLoad) {
515        D.setSUnit(LoadSU);
516        AddPred(SuccDep, D);
517      }
518    }
519
520    // Add a data dependency to reflect that NewSU reads the value defined
521    // by LoadSU.
522    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
523
524    if (isNewLoad)
525      AvailableQueue->addNode(LoadSU);
526    AvailableQueue->addNode(NewSU);
527
528    ++NumUnfolds;
529
530    if (NewSU->NumSuccsLeft == 0) {
531      NewSU->isAvailable = true;
532      return NewSU;
533    }
534    SU = NewSU;
535  }
536
537  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
538  NewSU = CreateClone(SU);
539
540  // New SUnit has the exact same predecessors.
541  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
542       I != E; ++I)
543    if (!I->isArtificial())
544      AddPred(NewSU, *I);
545
546  // Only copy scheduled successors. Cut them from old node's successor
547  // list and move them over.
548  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
549  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
550       I != E; ++I) {
551    if (I->isArtificial())
552      continue;
553    SUnit *SuccSU = I->getSUnit();
554    if (SuccSU->isScheduled) {
555      SDep D = *I;
556      D.setSUnit(NewSU);
557      AddPred(SuccSU, D);
558      D.setSUnit(SU);
559      DelDeps.push_back(std::make_pair(SuccSU, D));
560    }
561  }
562  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
563    RemovePred(DelDeps[i].first, DelDeps[i].second);
564
565  AvailableQueue->updateNode(SU);
566  AvailableQueue->addNode(NewSU);
567
568  ++NumDups;
569  return NewSU;
570}
571
572/// InsertCopiesAndMoveSuccs - Insert register copies and move all
573/// scheduled successors of the given SUnit to the last copy.
574void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
575                                               const TargetRegisterClass *DestRC,
576                                               const TargetRegisterClass *SrcRC,
577                                               SmallVector<SUnit*, 2> &Copies) {
578  SUnit *CopyFromSU = CreateNewSUnit(NULL);
579  CopyFromSU->CopySrcRC = SrcRC;
580  CopyFromSU->CopyDstRC = DestRC;
581
582  SUnit *CopyToSU = CreateNewSUnit(NULL);
583  CopyToSU->CopySrcRC = DestRC;
584  CopyToSU->CopyDstRC = SrcRC;
585
586  // Only copy scheduled successors. Cut them from old node's successor
587  // list and move them over.
588  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
589  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
590       I != E; ++I) {
591    if (I->isArtificial())
592      continue;
593    SUnit *SuccSU = I->getSUnit();
594    if (SuccSU->isScheduled) {
595      SDep D = *I;
596      D.setSUnit(CopyToSU);
597      AddPred(SuccSU, D);
598      DelDeps.push_back(std::make_pair(SuccSU, *I));
599    }
600  }
601  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
602    RemovePred(DelDeps[i].first, DelDeps[i].second);
603
604  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
605  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
606
607  AvailableQueue->updateNode(SU);
608  AvailableQueue->addNode(CopyFromSU);
609  AvailableQueue->addNode(CopyToSU);
610  Copies.push_back(CopyFromSU);
611  Copies.push_back(CopyToSU);
612
613  ++NumPRCopies;
614}
615
616/// getPhysicalRegisterVT - Returns the ValueType of the physical register
617/// definition of the specified node.
618/// FIXME: Move to SelectionDAG?
619static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
620                                 const TargetInstrInfo *TII) {
621  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
622  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
623  unsigned NumRes = TID.getNumDefs();
624  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
625    if (Reg == *ImpDef)
626      break;
627    ++NumRes;
628  }
629  return N->getValueType(NumRes);
630}
631
632/// CheckForLiveRegDef - Return true and update live register vector if the
633/// specified register def of the specified SUnit clobbers any "live" registers.
634static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
635                               std::vector<SUnit*> &LiveRegDefs,
636                               SmallSet<unsigned, 4> &RegAdded,
637                               SmallVector<unsigned, 4> &LRegs,
638                               const TargetRegisterInfo *TRI) {
639  bool Added = false;
640  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
641    if (RegAdded.insert(Reg)) {
642      LRegs.push_back(Reg);
643      Added = true;
644    }
645  }
646  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
647    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
648      if (RegAdded.insert(*Alias)) {
649        LRegs.push_back(*Alias);
650        Added = true;
651      }
652    }
653  return Added;
654}
655
656/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
657/// scheduling of the given node to satisfy live physical register dependencies.
658/// If the specific node is the last one that's available to schedule, do
659/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
660bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
661                                                 SmallVector<unsigned, 4> &LRegs){
662  if (NumLiveRegs == 0)
663    return false;
664
665  SmallSet<unsigned, 4> RegAdded;
666  // If this node would clobber any "live" register, then it's not ready.
667  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
668       I != E; ++I) {
669    if (I->isAssignedRegDep())
670      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
671                         RegAdded, LRegs, TRI);
672  }
673
674  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
675    if (Node->getOpcode() == ISD::INLINEASM) {
676      // Inline asm can clobber physical defs.
677      unsigned NumOps = Node->getNumOperands();
678      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
679        --NumOps;  // Ignore the flag operand.
680
681      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
682        unsigned Flags =
683          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
684        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
685
686        ++i; // Skip the ID value.
687        if (InlineAsm::isRegDefKind(Flags) ||
688            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
689          // Check for def of register or earlyclobber register.
690          for (; NumVals; --NumVals, ++i) {
691            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
692            if (TargetRegisterInfo::isPhysicalRegister(Reg))
693              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
694          }
695        } else
696          i += NumVals;
697      }
698      continue;
699    }
700
701    if (!Node->isMachineOpcode())
702      continue;
703    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
704    if (!TID.ImplicitDefs)
705      continue;
706    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
707      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
708  }
709  return !LRegs.empty();
710}
711
712
713/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
714/// schedulers.
715void ScheduleDAGRRList::ListScheduleBottomUp() {
716  unsigned CurCycle = 0;
717
718  // Release any predecessors of the special Exit node.
719  ReleasePredecessors(&ExitSU, CurCycle);
720
721  // Add root to Available queue.
722  if (!SUnits.empty()) {
723    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
724    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
725    RootSU->isAvailable = true;
726    AvailableQueue->push(RootSU);
727  }
728
729  // While Available queue is not empty, grab the node with the highest
730  // priority. If it is not ready put it back.  Schedule the node.
731  SmallVector<SUnit*, 4> NotReady;
732  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
733  Sequence.reserve(SUnits.size());
734  while (!AvailableQueue->empty()) {
735    bool Delayed = false;
736    LRegsMap.clear();
737    SUnit *CurSU = AvailableQueue->pop();
738    while (CurSU) {
739      SmallVector<unsigned, 4> LRegs;
740      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
741        break;
742      Delayed = true;
743      LRegsMap.insert(std::make_pair(CurSU, LRegs));
744
745      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
746      NotReady.push_back(CurSU);
747      CurSU = AvailableQueue->pop();
748    }
749
750    // All candidates are delayed due to live physical reg dependencies.
751    // Try backtracking, code duplication, or inserting cross class copies
752    // to resolve it.
753    if (Delayed && !CurSU) {
754      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
755        SUnit *TrySU = NotReady[i];
756        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
757
758        // Try unscheduling up to the point where it's safe to schedule
759        // this node.
760        unsigned LiveCycle = CurCycle;
761        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
762          unsigned Reg = LRegs[j];
763          unsigned LCycle = LiveRegCycles[Reg];
764          LiveCycle = std::min(LiveCycle, LCycle);
765        }
766        SUnit *OldSU = Sequence[LiveCycle];
767        if (!WillCreateCycle(TrySU, OldSU))  {
768          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
769          // Force the current node to be scheduled before the node that
770          // requires the physical reg dep.
771          if (OldSU->isAvailable) {
772            OldSU->isAvailable = false;
773            AvailableQueue->remove(OldSU);
774          }
775          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
776                              /*Reg=*/0, /*isNormalMemory=*/false,
777                              /*isMustAlias=*/false, /*isArtificial=*/true));
778          // If one or more successors has been unscheduled, then the current
779          // node is no longer avaialable. Schedule a successor that's now
780          // available instead.
781          if (!TrySU->isAvailable)
782            CurSU = AvailableQueue->pop();
783          else {
784            CurSU = TrySU;
785            TrySU->isPending = false;
786            NotReady.erase(NotReady.begin()+i);
787          }
788          break;
789        }
790      }
791
792      if (!CurSU) {
793        // Can't backtrack. If it's too expensive to copy the value, then try
794        // duplicate the nodes that produces these "too expensive to copy"
795        // values to break the dependency. In case even that doesn't work,
796        // insert cross class copies.
797        // If it's not too expensive, i.e. cost != -1, issue copies.
798        SUnit *TrySU = NotReady[0];
799        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
800        assert(LRegs.size() == 1 && "Can't handle this yet!");
801        unsigned Reg = LRegs[0];
802        SUnit *LRDef = LiveRegDefs[Reg];
803        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
804        const TargetRegisterClass *RC =
805          TRI->getMinimalPhysRegClass(Reg, VT);
806        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
807
808        // If cross copy register class is null, then it must be possible copy
809        // the value directly. Do not try duplicate the def.
810        SUnit *NewDef = 0;
811        if (DestRC)
812          NewDef = CopyAndMoveSuccessors(LRDef);
813        else
814          DestRC = RC;
815        if (!NewDef) {
816          // Issue copies, these can be expensive cross register class copies.
817          SmallVector<SUnit*, 2> Copies;
818          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
819          DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
820                       << " to SU #" << Copies.front()->NodeNum << "\n");
821          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
822                              /*Reg=*/0, /*isNormalMemory=*/false,
823                              /*isMustAlias=*/false,
824                              /*isArtificial=*/true));
825          NewDef = Copies.back();
826        }
827
828        DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
829                     << " to SU #" << TrySU->NodeNum << "\n");
830        LiveRegDefs[Reg] = NewDef;
831        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
832                             /*Reg=*/0, /*isNormalMemory=*/false,
833                             /*isMustAlias=*/false,
834                             /*isArtificial=*/true));
835        TrySU->isAvailable = false;
836        CurSU = NewDef;
837      }
838
839      assert(CurSU && "Unable to resolve live physical register dependencies!");
840    }
841
842    // Add the nodes that aren't ready back onto the available list.
843    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
844      NotReady[i]->isPending = false;
845      // May no longer be available due to backtracking.
846      if (NotReady[i]->isAvailable)
847        AvailableQueue->push(NotReady[i]);
848    }
849    NotReady.clear();
850
851    if (CurSU)
852      ScheduleNodeBottomUp(CurSU, CurCycle);
853    ++CurCycle;
854    AvailableQueue->setCurCycle(CurCycle);
855  }
856
857  // Reverse the order if it is bottom up.
858  std::reverse(Sequence.begin(), Sequence.end());
859
860#ifndef NDEBUG
861  VerifySchedule(isBottomUp);
862#endif
863}
864
865//===----------------------------------------------------------------------===//
866//  Top-Down Scheduling
867//===----------------------------------------------------------------------===//
868
869/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
870/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
871void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
872  SUnit *SuccSU = SuccEdge->getSUnit();
873
874#ifndef NDEBUG
875  if (SuccSU->NumPredsLeft == 0) {
876    dbgs() << "*** Scheduling failed! ***\n";
877    SuccSU->dump(this);
878    dbgs() << " has been released too many times!\n";
879    llvm_unreachable(0);
880  }
881#endif
882  --SuccSU->NumPredsLeft;
883
884  // If all the node's predecessors are scheduled, this node is ready
885  // to be scheduled. Ignore the special ExitSU node.
886  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
887    SuccSU->isAvailable = true;
888    AvailableQueue->push(SuccSU);
889  }
890}
891
892void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
893  // Top down: release successors
894  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
895       I != E; ++I) {
896    assert(!I->isAssignedRegDep() &&
897           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
898
899    ReleaseSucc(SU, &*I);
900  }
901}
902
903/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
904/// count of its successors. If a successor pending count is zero, add it to
905/// the Available queue.
906void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
907  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
908  DEBUG(SU->dump(this));
909
910  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
911  SU->setDepthToAtLeast(CurCycle);
912  Sequence.push_back(SU);
913
914  ReleaseSuccessors(SU);
915  SU->isScheduled = true;
916  AvailableQueue->ScheduledNode(SU);
917}
918
919/// ListScheduleTopDown - The main loop of list scheduling for top-down
920/// schedulers.
921void ScheduleDAGRRList::ListScheduleTopDown() {
922  unsigned CurCycle = 0;
923  AvailableQueue->setCurCycle(CurCycle);
924
925  // Release any successors of the special Entry node.
926  ReleaseSuccessors(&EntrySU);
927
928  // All leaves to Available queue.
929  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
930    // It is available if it has no predecessors.
931    if (SUnits[i].Preds.empty()) {
932      AvailableQueue->push(&SUnits[i]);
933      SUnits[i].isAvailable = true;
934    }
935  }
936
937  // While Available queue is not empty, grab the node with the highest
938  // priority. If it is not ready put it back.  Schedule the node.
939  Sequence.reserve(SUnits.size());
940  while (!AvailableQueue->empty()) {
941    SUnit *CurSU = AvailableQueue->pop();
942
943    if (CurSU)
944      ScheduleNodeTopDown(CurSU, CurCycle);
945    ++CurCycle;
946    AvailableQueue->setCurCycle(CurCycle);
947  }
948
949#ifndef NDEBUG
950  VerifySchedule(isBottomUp);
951#endif
952}
953
954
955//===----------------------------------------------------------------------===//
956//                RegReductionPriorityQueue Implementation
957//===----------------------------------------------------------------------===//
958//
959// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
960// to reduce register pressure.
961//
962namespace {
963  template<class SF>
964  class RegReductionPriorityQueue;
965
966  /// Sorting functions for the Available queue.
967  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
968    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
969    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
970    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
971
972    bool operator()(const SUnit* left, const SUnit* right) const;
973  };
974
975  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
976    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
977    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
978    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
979
980    bool operator()(const SUnit* left, const SUnit* right) const;
981  };
982
983  struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
984    RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
985    src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
986      : SPQ(spq) {}
987    src_ls_rr_sort(const src_ls_rr_sort &RHS)
988      : SPQ(RHS.SPQ) {}
989
990    bool operator()(const SUnit* left, const SUnit* right) const;
991  };
992
993  struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
994    RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
995    hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
996      : SPQ(spq) {}
997    hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
998      : SPQ(RHS.SPQ) {}
999
1000    bool operator()(const SUnit* left, const SUnit* right) const;
1001  };
1002}  // end anonymous namespace
1003
1004/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1005/// Smaller number is the higher priority.
1006static unsigned
1007CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1008  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1009  if (SethiUllmanNumber != 0)
1010    return SethiUllmanNumber;
1011
1012  unsigned Extra = 0;
1013  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1014       I != E; ++I) {
1015    if (I->isCtrl()) continue;  // ignore chain preds
1016    SUnit *PredSU = I->getSUnit();
1017    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1018    if (PredSethiUllman > SethiUllmanNumber) {
1019      SethiUllmanNumber = PredSethiUllman;
1020      Extra = 0;
1021    } else if (PredSethiUllman == SethiUllmanNumber)
1022      ++Extra;
1023  }
1024
1025  SethiUllmanNumber += Extra;
1026
1027  if (SethiUllmanNumber == 0)
1028    SethiUllmanNumber = 1;
1029
1030  return SethiUllmanNumber;
1031}
1032
1033namespace {
1034  template<class SF>
1035  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
1036    std::vector<SUnit*> Queue;
1037    SF Picker;
1038    unsigned CurQueueId;
1039    bool isBottomUp;
1040
1041  protected:
1042    // SUnits - The SUnits for the current graph.
1043    std::vector<SUnit> *SUnits;
1044
1045    MachineFunction &MF;
1046    const TargetInstrInfo *TII;
1047    const TargetRegisterInfo *TRI;
1048    const TargetLowering *TLI;
1049    ScheduleDAGRRList *scheduleDAG;
1050
1051    // SethiUllmanNumbers - The SethiUllman number for each node.
1052    std::vector<unsigned> SethiUllmanNumbers;
1053
1054    /// RegPressure - Tracking current reg pressure per register class.
1055    ///
1056    std::vector<unsigned> RegPressure;
1057
1058    /// RegLimit - Tracking the number of allocatable registers per register
1059    /// class.
1060    std::vector<unsigned> RegLimit;
1061
1062  public:
1063    RegReductionPriorityQueue(MachineFunction &mf,
1064                              bool isbottomup,
1065                              const TargetInstrInfo *tii,
1066                              const TargetRegisterInfo *tri,
1067                              const TargetLowering *tli)
1068      : Picker(this), CurQueueId(0), isBottomUp(isbottomup),
1069        MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
1070      unsigned NumRC = TRI->getNumRegClasses();
1071      RegLimit.resize(NumRC);
1072      RegPressure.resize(NumRC);
1073      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1074      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1075      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1076             E = TRI->regclass_end(); I != E; ++I)
1077        RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
1078    }
1079
1080    void initNodes(std::vector<SUnit> &sunits) {
1081      SUnits = &sunits;
1082      // Add pseudo dependency edges for two-address nodes.
1083      AddPseudoTwoAddrDeps();
1084      // Reroute edges to nodes with multiple uses.
1085      PrescheduleNodesWithMultipleUses();
1086      // Calculate node priorities.
1087      CalculateSethiUllmanNumbers();
1088    }
1089
1090    void addNode(const SUnit *SU) {
1091      unsigned SUSize = SethiUllmanNumbers.size();
1092      if (SUnits->size() > SUSize)
1093        SethiUllmanNumbers.resize(SUSize*2, 0);
1094      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1095    }
1096
1097    void updateNode(const SUnit *SU) {
1098      SethiUllmanNumbers[SU->NodeNum] = 0;
1099      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1100    }
1101
1102    void releaseState() {
1103      SUnits = 0;
1104      SethiUllmanNumbers.clear();
1105      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1106    }
1107
1108    unsigned getNodePriority(const SUnit *SU) const {
1109      assert(SU->NodeNum < SethiUllmanNumbers.size());
1110      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1111      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1112        // CopyToReg should be close to its uses to facilitate coalescing and
1113        // avoid spilling.
1114        return 0;
1115      if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1116          Opc == TargetOpcode::SUBREG_TO_REG ||
1117          Opc == TargetOpcode::INSERT_SUBREG)
1118        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1119        // close to their uses to facilitate coalescing.
1120        return 0;
1121      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1122        // If SU does not have a register use, i.e. it doesn't produce a value
1123        // that would be consumed (e.g. store), then it terminates a chain of
1124        // computation.  Give it a large SethiUllman number so it will be
1125        // scheduled right before its predecessors that it doesn't lengthen
1126        // their live ranges.
1127        return 0xffff;
1128      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1129        // If SU does not have a register def, schedule it close to its uses
1130        // because it does not lengthen any live ranges.
1131        return 0;
1132      return SethiUllmanNumbers[SU->NodeNum];
1133    }
1134
1135    unsigned getNodeOrdering(const SUnit *SU) const {
1136      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1137    }
1138
1139    bool empty() const { return Queue.empty(); }
1140
1141    void push(SUnit *U) {
1142      assert(!U->NodeQueueId && "Node in the queue already");
1143      U->NodeQueueId = ++CurQueueId;
1144      Queue.push_back(U);
1145    }
1146
1147    SUnit *pop() {
1148      if (empty()) return NULL;
1149      std::vector<SUnit *>::iterator Best = Queue.begin();
1150      for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
1151           E = Queue.end(); I != E; ++I)
1152        if (Picker(*Best, *I))
1153          Best = I;
1154      SUnit *V = *Best;
1155      if (Best != prior(Queue.end()))
1156        std::swap(*Best, Queue.back());
1157      Queue.pop_back();
1158      V->NodeQueueId = 0;
1159      return V;
1160    }
1161
1162    void remove(SUnit *SU) {
1163      assert(!Queue.empty() && "Queue is empty!");
1164      assert(SU->NodeQueueId != 0 && "Not in queue!");
1165      std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1166                                                   SU);
1167      if (I != prior(Queue.end()))
1168        std::swap(*I, Queue.back());
1169      Queue.pop_back();
1170      SU->NodeQueueId = 0;
1171    }
1172
1173    bool HighRegPressure(const SUnit *SU) const {
1174      if (!TLI)
1175        return false;
1176
1177      for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1178           I != E; ++I) {
1179        if (I->isCtrl())
1180          continue;
1181        SUnit *PredSU = I->getSUnit();
1182        const SDNode *PN = PredSU->getNode();
1183        if (!PN->isMachineOpcode()) {
1184          if (PN->getOpcode() == ISD::CopyToReg) {
1185            EVT VT = PN->getOperand(1).getValueType();
1186            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1187            unsigned Cost = TLI->getRepRegClassCostFor(VT);
1188            if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
1189              return true;
1190          }
1191          continue;
1192        }
1193        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1194        for (unsigned i = 0; i != NumDefs; ++i) {
1195          EVT VT = PN->getValueType(i);
1196          if (!PN->hasAnyUseOfValue(i))
1197            continue;
1198          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1199          unsigned Cost = TLI->getRepRegClassCostFor(VT);
1200          // Check if this increases register pressure of the specific register
1201          // class to the point where it would cause spills.
1202          if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
1203            return true;
1204        }
1205      }
1206
1207      return false;
1208    }
1209
1210    void OpenPredLives(SUnit *SU) {
1211      const SDNode *N = SU->getNode();
1212      if (!N->isMachineOpcode())
1213        return;
1214      unsigned Opc = N->getMachineOpcode();
1215      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
1216          Opc == TargetOpcode::REG_SEQUENCE ||
1217          Opc == TargetOpcode::IMPLICIT_DEF)
1218        return;
1219
1220      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1221           I != E; ++I) {
1222        if (I->isCtrl())
1223          continue;
1224        SUnit *PredSU = I->getSUnit();
1225        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1226          continue;
1227        const SDNode *PN = PredSU->getNode();
1228        if (!PN->isMachineOpcode()) {
1229          if (PN->getOpcode() == ISD::CopyToReg) {
1230            EVT VT = PN->getOperand(1).getValueType();
1231            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1232            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1233          }
1234          continue;
1235        }
1236        unsigned POpc = PN->getMachineOpcode();
1237        if (POpc == TargetOpcode::IMPLICIT_DEF)
1238          continue;
1239        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1240        for (unsigned i = 0; i != NumDefs; ++i) {
1241          EVT VT = PN->getValueType(i);
1242          if (!PN->hasAnyUseOfValue(i))
1243            continue;
1244          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1245          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1246        }
1247      }
1248
1249      if (!SU->NumSuccs)
1250        return;
1251      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1252      for (unsigned i = 0; i != NumDefs; ++i) {
1253        EVT VT = N->getValueType(i);
1254        if (!N->hasAnyUseOfValue(i))
1255          continue;
1256        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1257        if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1258          // Register pressure tracking is imprecise. This can happen.
1259          RegPressure[RCId] = 0;
1260        else
1261          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1262      }
1263    }
1264
1265    void ClosePredLives(SUnit *SU) {
1266      const SDNode *N = SU->getNode();
1267      if (!N->isMachineOpcode())
1268        return;
1269      unsigned Opc = N->getMachineOpcode();
1270      if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
1271          Opc == TargetOpcode::REG_SEQUENCE ||
1272          Opc == TargetOpcode::IMPLICIT_DEF)
1273        return;
1274
1275      for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1276           I != E; ++I) {
1277        if (I->isCtrl())
1278          continue;
1279        SUnit *PredSU = I->getSUnit();
1280        if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
1281          continue;
1282        const SDNode *PN = PredSU->getNode();
1283        if (!PN->isMachineOpcode()) {
1284          if (PN->getOpcode() == ISD::CopyToReg) {
1285            EVT VT = PN->getOperand(1).getValueType();
1286            unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1287            RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1288          }
1289          continue;
1290        }
1291        unsigned POpc = PN->getMachineOpcode();
1292        if (POpc == TargetOpcode::IMPLICIT_DEF)
1293          continue;
1294        unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
1295        for (unsigned i = 0; i != NumDefs; ++i) {
1296          EVT VT = PN->getValueType(i);
1297          if (!PN->hasAnyUseOfValue(i))
1298            continue;
1299          unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1300          if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
1301            // Register pressure tracking is imprecise. This can happen.
1302            RegPressure[RCId] = 0;
1303          else
1304            RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
1305        }
1306      }
1307
1308      if (!SU->NumSuccs)
1309        return;
1310      unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1311      for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1312        EVT VT = N->getValueType(i);
1313        if (VT == MVT::Flag || VT == MVT::Other)
1314          continue;
1315        if (!N->hasAnyUseOfValue(i))
1316          continue;
1317        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1318        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
1319      }
1320    }
1321
1322    void ScheduledNode(SUnit *SU) {
1323      if (!TLI || !isBottomUp)
1324        return;
1325      OpenPredLives(SU);
1326      dumpRegPressure();
1327    }
1328
1329    void UnscheduledNode(SUnit *SU) {
1330      if (!TLI || !isBottomUp)
1331        return;
1332      ClosePredLives(SU);
1333      dumpRegPressure();
1334    }
1335
1336    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1337      scheduleDAG = scheduleDag;
1338    }
1339
1340    void dumpRegPressure() const {
1341      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1342             E = TRI->regclass_end(); I != E; ++I) {
1343        const TargetRegisterClass *RC = *I;
1344        unsigned Id = RC->getID();
1345        unsigned RP = RegPressure[Id];
1346        if (!RP) continue;
1347        DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1348              << '\n');
1349      }
1350    }
1351
1352  protected:
1353    bool canClobber(const SUnit *SU, const SUnit *Op);
1354    void AddPseudoTwoAddrDeps();
1355    void PrescheduleNodesWithMultipleUses();
1356    void CalculateSethiUllmanNumbers();
1357  };
1358
1359  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1360    BURegReductionPriorityQueue;
1361
1362  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1363    TDRegReductionPriorityQueue;
1364
1365  typedef RegReductionPriorityQueue<src_ls_rr_sort>
1366    SrcRegReductionPriorityQueue;
1367
1368  typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1369    HybridBURRPriorityQueue;
1370}
1371
1372/// closestSucc - Returns the scheduled cycle of the successor which is
1373/// closest to the current cycle.
1374static unsigned closestSucc(const SUnit *SU) {
1375  unsigned MaxHeight = 0;
1376  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1377       I != E; ++I) {
1378    if (I->isCtrl()) continue;  // ignore chain succs
1379    unsigned Height = I->getSUnit()->getHeight();
1380    // If there are bunch of CopyToRegs stacked up, they should be considered
1381    // to be at the same position.
1382    if (I->getSUnit()->getNode() &&
1383        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1384      Height = closestSucc(I->getSUnit())+1;
1385    if (Height > MaxHeight)
1386      MaxHeight = Height;
1387  }
1388  return MaxHeight;
1389}
1390
1391/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1392/// for scratch registers, i.e. number of data dependencies.
1393static unsigned calcMaxScratches(const SUnit *SU) {
1394  unsigned Scratches = 0;
1395  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1396       I != E; ++I) {
1397    if (I->isCtrl()) continue;  // ignore chain preds
1398    Scratches++;
1399  }
1400  return Scratches;
1401}
1402
1403template <typename RRSort>
1404static bool BURRSort(const SUnit *left, const SUnit *right,
1405                     const RegReductionPriorityQueue<RRSort> *SPQ) {
1406  unsigned LPriority = SPQ->getNodePriority(left);
1407  unsigned RPriority = SPQ->getNodePriority(right);
1408  if (LPriority != RPriority)
1409    return LPriority > RPriority;
1410
1411  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1412  // e.g.
1413  // t1 = op t2, c1
1414  // t3 = op t4, c2
1415  //
1416  // and the following instructions are both ready.
1417  // t2 = op c3
1418  // t4 = op c4
1419  //
1420  // Then schedule t2 = op first.
1421  // i.e.
1422  // t4 = op c4
1423  // t2 = op c3
1424  // t1 = op t2, c1
1425  // t3 = op t4, c2
1426  //
1427  // This creates more short live intervals.
1428  unsigned LDist = closestSucc(left);
1429  unsigned RDist = closestSucc(right);
1430  if (LDist != RDist)
1431    return LDist < RDist;
1432
1433  // How many registers becomes live when the node is scheduled.
1434  unsigned LScratch = calcMaxScratches(left);
1435  unsigned RScratch = calcMaxScratches(right);
1436  if (LScratch != RScratch)
1437    return LScratch > RScratch;
1438
1439  if (left->getHeight() != right->getHeight())
1440    return left->getHeight() > right->getHeight();
1441
1442  if (left->getDepth() != right->getDepth())
1443    return left->getDepth() < right->getDepth();
1444
1445  assert(left->NodeQueueId && right->NodeQueueId &&
1446         "NodeQueueId cannot be zero");
1447  return (left->NodeQueueId > right->NodeQueueId);
1448}
1449
1450// Bottom up
1451bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1452  return BURRSort(left, right, SPQ);
1453}
1454
1455// Source order, otherwise bottom up.
1456bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1457  unsigned LOrder = SPQ->getNodeOrdering(left);
1458  unsigned ROrder = SPQ->getNodeOrdering(right);
1459
1460  // Prefer an ordering where the lower the non-zero order number, the higher
1461  // the preference.
1462  if ((LOrder || ROrder) && LOrder != ROrder)
1463    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
1464
1465  return BURRSort(left, right, SPQ);
1466}
1467
1468bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
1469  bool LHigh = SPQ->HighRegPressure(left);
1470  bool RHigh = SPQ->HighRegPressure(right);
1471  if (LHigh && !RHigh)
1472    return true;
1473  else if (!LHigh && RHigh)
1474    return false;
1475  else if (!LHigh && !RHigh) {
1476    // Low register pressure situation, schedule for latency if possible.
1477    bool LStall = left->SchedulingPref == Sched::Latency &&
1478      SPQ->getCurCycle() < left->getHeight();
1479    bool RStall = right->SchedulingPref == Sched::Latency &&
1480      SPQ->getCurCycle() < right->getHeight();
1481    // If scheduling one of the node will cause a pipeline stall, delay it.
1482    // If scheduling either one of the node will cause a pipeline stall, sort
1483    // them according to their height.
1484    // If neither will cause a pipeline stall, try to reduce register pressure.
1485    if (LStall) {
1486      if (!RStall)
1487        return true;
1488      if (left->getHeight() != right->getHeight())
1489        return left->getHeight() > right->getHeight();
1490    } else if (RStall)
1491      return false;
1492
1493    // If either node is scheduling for latency, sort them by height and latency
1494    // first.
1495    if (left->SchedulingPref == Sched::Latency ||
1496        right->SchedulingPref == Sched::Latency) {
1497      if (left->getHeight() != right->getHeight())
1498        return left->getHeight() > right->getHeight();
1499      if (left->Latency != right->Latency)
1500        return left->Latency > right->Latency;
1501    }
1502  }
1503
1504  return BURRSort(left, right, SPQ);
1505}
1506
1507template<class SF>
1508bool
1509RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1510  if (SU->isTwoAddress) {
1511    unsigned Opc = SU->getNode()->getMachineOpcode();
1512    const TargetInstrDesc &TID = TII->get(Opc);
1513    unsigned NumRes = TID.getNumDefs();
1514    unsigned NumOps = TID.getNumOperands() - NumRes;
1515    for (unsigned i = 0; i != NumOps; ++i) {
1516      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1517        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1518        if (DU->getNodeId() != -1 &&
1519            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1520          return true;
1521      }
1522    }
1523  }
1524  return false;
1525}
1526
1527/// hasCopyToRegUse - Return true if SU has a value successor that is a
1528/// CopyToReg node.
1529static bool hasCopyToRegUse(const SUnit *SU) {
1530  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1531       I != E; ++I) {
1532    if (I->isCtrl()) continue;
1533    const SUnit *SuccSU = I->getSUnit();
1534    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1535      return true;
1536  }
1537  return false;
1538}
1539
1540/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1541/// physical register defs.
1542static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1543                                  const TargetInstrInfo *TII,
1544                                  const TargetRegisterInfo *TRI) {
1545  SDNode *N = SuccSU->getNode();
1546  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1547  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1548  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1549  for (const SDNode *SUNode = SU->getNode(); SUNode;
1550       SUNode = SUNode->getFlaggedNode()) {
1551    if (!SUNode->isMachineOpcode())
1552      continue;
1553    const unsigned *SUImpDefs =
1554      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1555    if (!SUImpDefs)
1556      return false;
1557    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1558      EVT VT = N->getValueType(i);
1559      if (VT == MVT::Flag || VT == MVT::Other)
1560        continue;
1561      if (!N->hasAnyUseOfValue(i))
1562        continue;
1563      unsigned Reg = ImpDefs[i - NumDefs];
1564      for (;*SUImpDefs; ++SUImpDefs) {
1565        unsigned SUReg = *SUImpDefs;
1566        if (TRI->regsOverlap(Reg, SUReg))
1567          return true;
1568      }
1569    }
1570  }
1571  return false;
1572}
1573
1574/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1575/// are not handled well by the general register pressure reduction
1576/// heuristics. When presented with code like this:
1577///
1578///      N
1579///    / |
1580///   /  |
1581///  U  store
1582///  |
1583/// ...
1584///
1585/// the heuristics tend to push the store up, but since the
1586/// operand of the store has another use (U), this would increase
1587/// the length of that other use (the U->N edge).
1588///
1589/// This function transforms code like the above to route U's
1590/// dependence through the store when possible, like this:
1591///
1592///      N
1593///      ||
1594///      ||
1595///     store
1596///       |
1597///       U
1598///       |
1599///      ...
1600///
1601/// This results in the store being scheduled immediately
1602/// after N, which shortens the U->N live range, reducing
1603/// register pressure.
1604///
1605template<class SF>
1606void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1607  // Visit all the nodes in topological order, working top-down.
1608  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1609    SUnit *SU = &(*SUnits)[i];
1610    // For now, only look at nodes with no data successors, such as stores.
1611    // These are especially important, due to the heuristics in
1612    // getNodePriority for nodes with no data successors.
1613    if (SU->NumSuccs != 0)
1614      continue;
1615    // For now, only look at nodes with exactly one data predecessor.
1616    if (SU->NumPreds != 1)
1617      continue;
1618    // Avoid prescheduling copies to virtual registers, which don't behave
1619    // like other nodes from the perspective of scheduling heuristics.
1620    if (SDNode *N = SU->getNode())
1621      if (N->getOpcode() == ISD::CopyToReg &&
1622          TargetRegisterInfo::isVirtualRegister
1623            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1624        continue;
1625
1626    // Locate the single data predecessor.
1627    SUnit *PredSU = 0;
1628    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1629         EE = SU->Preds.end(); II != EE; ++II)
1630      if (!II->isCtrl()) {
1631        PredSU = II->getSUnit();
1632        break;
1633      }
1634    assert(PredSU);
1635
1636    // Don't rewrite edges that carry physregs, because that requires additional
1637    // support infrastructure.
1638    if (PredSU->hasPhysRegDefs)
1639      continue;
1640    // Short-circuit the case where SU is PredSU's only data successor.
1641    if (PredSU->NumSuccs == 1)
1642      continue;
1643    // Avoid prescheduling to copies from virtual registers, which don't behave
1644    // like other nodes from the perspective of scheduling // heuristics.
1645    if (SDNode *N = SU->getNode())
1646      if (N->getOpcode() == ISD::CopyFromReg &&
1647          TargetRegisterInfo::isVirtualRegister
1648            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1649        continue;
1650
1651    // Perform checks on the successors of PredSU.
1652    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1653         EE = PredSU->Succs.end(); II != EE; ++II) {
1654      SUnit *PredSuccSU = II->getSUnit();
1655      if (PredSuccSU == SU) continue;
1656      // If PredSU has another successor with no data successors, for
1657      // now don't attempt to choose either over the other.
1658      if (PredSuccSU->NumSuccs == 0)
1659        goto outer_loop_continue;
1660      // Don't break physical register dependencies.
1661      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1662        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1663          goto outer_loop_continue;
1664      // Don't introduce graph cycles.
1665      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1666        goto outer_loop_continue;
1667    }
1668
1669    // Ok, the transformation is safe and the heuristics suggest it is
1670    // profitable. Update the graph.
1671    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
1672                 << " next to PredSU #" << PredSU->NodeNum
1673                 << " to guide scheduling in the presence of multiple uses\n");
1674    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1675      SDep Edge = PredSU->Succs[i];
1676      assert(!Edge.isAssignedRegDep());
1677      SUnit *SuccSU = Edge.getSUnit();
1678      if (SuccSU != SU) {
1679        Edge.setSUnit(PredSU);
1680        scheduleDAG->RemovePred(SuccSU, Edge);
1681        scheduleDAG->AddPred(SU, Edge);
1682        Edge.setSUnit(SU);
1683        scheduleDAG->AddPred(SuccSU, Edge);
1684        --i;
1685      }
1686    }
1687  outer_loop_continue:;
1688  }
1689}
1690
1691/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1692/// it as a def&use operand. Add a pseudo control edge from it to the other
1693/// node (if it won't create a cycle) so the two-address one will be scheduled
1694/// first (lower in the schedule). If both nodes are two-address, favor the
1695/// one that has a CopyToReg use (more likely to be a loop induction update).
1696/// If both are two-address, but one is commutable while the other is not
1697/// commutable, favor the one that's not commutable.
1698template<class SF>
1699void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1700  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1701    SUnit *SU = &(*SUnits)[i];
1702    if (!SU->isTwoAddress)
1703      continue;
1704
1705    SDNode *Node = SU->getNode();
1706    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1707      continue;
1708
1709    unsigned Opc = Node->getMachineOpcode();
1710    const TargetInstrDesc &TID = TII->get(Opc);
1711    unsigned NumRes = TID.getNumDefs();
1712    unsigned NumOps = TID.getNumOperands() - NumRes;
1713    for (unsigned j = 0; j != NumOps; ++j) {
1714      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1715        continue;
1716      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1717      if (DU->getNodeId() == -1)
1718        continue;
1719      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1720      if (!DUSU) continue;
1721      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1722           E = DUSU->Succs.end(); I != E; ++I) {
1723        if (I->isCtrl()) continue;
1724        SUnit *SuccSU = I->getSUnit();
1725        if (SuccSU == SU)
1726          continue;
1727        // Be conservative. Ignore if nodes aren't at roughly the same
1728        // depth and height.
1729        if (SuccSU->getHeight() < SU->getHeight() &&
1730            (SU->getHeight() - SuccSU->getHeight()) > 1)
1731          continue;
1732        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1733        // constrains whatever is using the copy, instead of the copy
1734        // itself. In the case that the copy is coalesced, this
1735        // preserves the intent of the pseudo two-address heurietics.
1736        while (SuccSU->Succs.size() == 1 &&
1737               SuccSU->getNode()->isMachineOpcode() &&
1738               SuccSU->getNode()->getMachineOpcode() ==
1739                 TargetOpcode::COPY_TO_REGCLASS)
1740          SuccSU = SuccSU->Succs.front().getSUnit();
1741        // Don't constrain non-instruction nodes.
1742        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1743          continue;
1744        // Don't constrain nodes with physical register defs if the
1745        // predecessor can clobber them.
1746        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1747          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1748            continue;
1749        }
1750        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1751        // these may be coalesced away. We want them close to their uses.
1752        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1753        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
1754            SuccOpc == TargetOpcode::INSERT_SUBREG ||
1755            SuccOpc == TargetOpcode::SUBREG_TO_REG)
1756          continue;
1757        if ((!canClobber(SuccSU, DUSU) ||
1758             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1759             (!SU->isCommutable && SuccSU->isCommutable)) &&
1760            !scheduleDAG->IsReachable(SuccSU, SU)) {
1761          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
1762                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1763          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1764                                        /*Reg=*/0, /*isNormalMemory=*/false,
1765                                        /*isMustAlias=*/false,
1766                                        /*isArtificial=*/true));
1767        }
1768      }
1769    }
1770  }
1771}
1772
1773/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1774/// scheduling units.
1775template<class SF>
1776void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1777  SethiUllmanNumbers.assign(SUnits->size(), 0);
1778
1779  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1780    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1781}
1782
1783/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1784/// predecessors of the successors of the SUnit SU. Stop when the provided
1785/// limit is exceeded.
1786static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1787                                                    unsigned Limit) {
1788  unsigned Sum = 0;
1789  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1790       I != E; ++I) {
1791    const SUnit *SuccSU = I->getSUnit();
1792    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1793         EE = SuccSU->Preds.end(); II != EE; ++II) {
1794      SUnit *PredSU = II->getSUnit();
1795      if (!PredSU->isScheduled)
1796        if (++Sum > Limit)
1797          return Sum;
1798    }
1799  }
1800  return Sum;
1801}
1802
1803
1804// Top down
1805bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1806  unsigned LPriority = SPQ->getNodePriority(left);
1807  unsigned RPriority = SPQ->getNodePriority(right);
1808  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1809  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1810  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1811  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1812  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1813  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1814
1815  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1816    return false;
1817  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1818    return true;
1819
1820  if (LIsFloater)
1821    LBonus -= 2;
1822  if (RIsFloater)
1823    RBonus -= 2;
1824  if (left->NumSuccs == 1)
1825    LBonus += 2;
1826  if (right->NumSuccs == 1)
1827    RBonus += 2;
1828
1829  if (LPriority+LBonus != RPriority+RBonus)
1830    return LPriority+LBonus < RPriority+RBonus;
1831
1832  if (left->getDepth() != right->getDepth())
1833    return left->getDepth() < right->getDepth();
1834
1835  if (left->NumSuccsLeft != right->NumSuccsLeft)
1836    return left->NumSuccsLeft > right->NumSuccsLeft;
1837
1838  assert(left->NodeQueueId && right->NodeQueueId &&
1839         "NodeQueueId cannot be zero");
1840  return (left->NodeQueueId > right->NodeQueueId);
1841}
1842
1843//===----------------------------------------------------------------------===//
1844//                         Public Constructor Functions
1845//===----------------------------------------------------------------------===//
1846
1847llvm::ScheduleDAGSDNodes *
1848llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1849  const TargetMachine &TM = IS->TM;
1850  const TargetInstrInfo *TII = TM.getInstrInfo();
1851  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1852
1853  BURegReductionPriorityQueue *PQ =
1854    new BURegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1855  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1856  PQ->setScheduleDAG(SD);
1857  return SD;
1858}
1859
1860llvm::ScheduleDAGSDNodes *
1861llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1862  const TargetMachine &TM = IS->TM;
1863  const TargetInstrInfo *TII = TM.getInstrInfo();
1864  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1865
1866  TDRegReductionPriorityQueue *PQ =
1867    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
1868  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
1869  PQ->setScheduleDAG(SD);
1870  return SD;
1871}
1872
1873llvm::ScheduleDAGSDNodes *
1874llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1875  const TargetMachine &TM = IS->TM;
1876  const TargetInstrInfo *TII = TM.getInstrInfo();
1877  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1878
1879  SrcRegReductionPriorityQueue *PQ =
1880    new SrcRegReductionPriorityQueue(*IS->MF, true, TII, TRI, 0);
1881  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
1882  PQ->setScheduleDAG(SD);
1883  return SD;
1884}
1885
1886llvm::ScheduleDAGSDNodes *
1887llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1888  const TargetMachine &TM = IS->TM;
1889  const TargetInstrInfo *TII = TM.getInstrInfo();
1890  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1891  const TargetLowering *TLI = &IS->getTargetLowering();
1892
1893  HybridBURRPriorityQueue *PQ =
1894    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI,
1895                                (RegPressureAware ? TLI : 0));
1896  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
1897  PQ->setScheduleDAG(SD);
1898  return SD;
1899}
1900