ScheduleDAGRRList.cpp revision 47ac0f0c7c39289f5970688154e385be22b7f293
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/PriorityQueue.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include <climits>
33using namespace llvm;
34
35STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
36STATISTIC(NumUnfolds,    "Number of nodes unfolded");
37STATISTIC(NumDups,       "Number of duplicated nodes");
38STATISTIC(NumPRCopies,   "Number of physical register copies");
39
40static RegisterScheduler
41  burrListDAGScheduler("list-burr",
42                       "Bottom-up register reduction list scheduling",
43                       createBURRListDAGScheduler);
44static RegisterScheduler
45  tdrListrDAGScheduler("list-tdrr",
46                       "Top-down register reduction list scheduling",
47                       createTDRRListDAGScheduler);
48
49namespace {
50//===----------------------------------------------------------------------===//
51/// ScheduleDAGRRList - The actual register reduction list scheduler
52/// implementation.  This supports both top-down and bottom-up scheduling.
53///
54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
55private:
56  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
57  /// it is top-down.
58  bool isBottomUp;
59
60  /// AvailableQueue - The priority queue to use for the available SUnits.
61  SchedulingPriorityQueue *AvailableQueue;
62
63  /// LiveRegDefs - A set of physical registers and their definition
64  /// that are "live". These nodes must be scheduled before any other nodes that
65  /// modifies the registers can be scheduled.
66  unsigned NumLiveRegs;
67  std::vector<SUnit*> LiveRegDefs;
68  std::vector<unsigned> LiveRegCycles;
69
70  /// Topo - A topological ordering for SUnits which permits fast IsReachable
71  /// and similar queries.
72  ScheduleDAGTopologicalSort Topo;
73
74public:
75  ScheduleDAGRRList(MachineFunction &mf,
76                    bool isbottomup,
77                    SchedulingPriorityQueue *availqueue)
78    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
79      AvailableQueue(availqueue), Topo(SUnits) {
80    }
81
82  ~ScheduleDAGRRList() {
83    delete AvailableQueue;
84  }
85
86  void Schedule();
87
88  /// IsReachable - Checks if SU is reachable from TargetSU.
89  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
90    return Topo.IsReachable(SU, TargetSU);
91  }
92
93  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
94  /// create a cycle.
95  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
96    return Topo.WillCreateCycle(SU, TargetSU);
97  }
98
99  /// AddPred - adds a predecessor edge to SUnit SU.
100  /// This returns true if this is a new predecessor.
101  /// Updates the topological ordering if required.
102  void AddPred(SUnit *SU, const SDep &D) {
103    Topo.AddPred(SU, D.getSUnit());
104    SU->addPred(D);
105  }
106
107  /// RemovePred - removes a predecessor edge from SUnit SU.
108  /// This returns true if an edge was removed.
109  /// Updates the topological ordering if required.
110  void RemovePred(SUnit *SU, const SDep &D) {
111    Topo.RemovePred(SU, D.getSUnit());
112    SU->removePred(D);
113  }
114
115private:
116  void ReleasePred(SUnit *SU, const SDep *PredEdge);
117  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
118  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
119  void ReleaseSuccessors(SUnit *SU);
120  void CapturePred(SDep *PredEdge);
121  void ScheduleNodeBottomUp(SUnit*, unsigned);
122  void ScheduleNodeTopDown(SUnit*, unsigned);
123  void UnscheduleNodeBottomUp(SUnit*);
124  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
125  SUnit *CopyAndMoveSuccessors(SUnit*);
126  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
127                                const TargetRegisterClass*,
128                                const TargetRegisterClass*,
129                                SmallVector<SUnit*, 2>&);
130  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
131  void ListScheduleTopDown();
132  void ListScheduleBottomUp();
133
134
135  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
136  /// Updates the topological ordering if required.
137  SUnit *CreateNewSUnit(SDNode *N) {
138    unsigned NumSUnits = SUnits.size();
139    SUnit *NewNode = NewSUnit(N);
140    // Update the topological ordering.
141    if (NewNode->NodeNum >= NumSUnits)
142      Topo.InitDAGTopologicalSorting();
143    return NewNode;
144  }
145
146  /// CreateClone - Creates a new SUnit from an existing one.
147  /// Updates the topological ordering if required.
148  SUnit *CreateClone(SUnit *N) {
149    unsigned NumSUnits = SUnits.size();
150    SUnit *NewNode = Clone(N);
151    // Update the topological ordering.
152    if (NewNode->NodeNum >= NumSUnits)
153      Topo.InitDAGTopologicalSorting();
154    return NewNode;
155  }
156
157  /// ForceUnitLatencies - Return true, since register-pressure-reducing
158  /// scheduling doesn't need actual latency information.
159  bool ForceUnitLatencies() const { return true; }
160};
161}  // end anonymous namespace
162
163
164/// Schedule - Schedule the DAG using list scheduling.
165void ScheduleDAGRRList::Schedule() {
166  DOUT << "********** List Scheduling **********\n";
167
168  NumLiveRegs = 0;
169  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
170  LiveRegCycles.resize(TRI->getNumRegs(), 0);
171
172  // Build the scheduling graph.
173  BuildSchedGraph();
174
175  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
176          SUnits[su].dumpAll(this));
177  Topo.InitDAGTopologicalSorting();
178
179  AvailableQueue->initNodes(SUnits);
180
181  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
182  if (isBottomUp)
183    ListScheduleBottomUp();
184  else
185    ListScheduleTopDown();
186
187  AvailableQueue->releaseState();
188}
189
190//===----------------------------------------------------------------------===//
191//  Bottom-Up Scheduling
192//===----------------------------------------------------------------------===//
193
194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
195/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
196void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
197  SUnit *PredSU = PredEdge->getSUnit();
198  --PredSU->NumSuccsLeft;
199
200#ifndef NDEBUG
201  if (PredSU->NumSuccsLeft < 0) {
202    cerr << "*** Scheduling failed! ***\n";
203    PredSU->dump(this);
204    cerr << " has been released too many times!\n";
205    assert(0);
206  }
207#endif
208
209  // If all the node's successors are scheduled, this node is ready
210  // to be scheduled. Ignore the special EntrySU node.
211  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
212    PredSU->isAvailable = true;
213    AvailableQueue->push(PredSU);
214  }
215}
216
217void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
218  // Bottom up: release predecessors
219  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
220       I != E; ++I) {
221    ReleasePred(SU, &*I);
222    if (I->isAssignedRegDep()) {
223      // This is a physical register dependency and it's impossible or
224      // expensive to copy the register. Make sure nothing that can
225      // clobber the register is scheduled between the predecessor and
226      // this node.
227      if (!LiveRegDefs[I->getReg()]) {
228        ++NumLiveRegs;
229        LiveRegDefs[I->getReg()] = I->getSUnit();
230        LiveRegCycles[I->getReg()] = CurCycle;
231      }
232    }
233  }
234}
235
236/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
237/// count of its predecessors. If a predecessor pending count is zero, add it to
238/// the Available queue.
239void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
240  DOUT << "*** Scheduling [" << CurCycle << "]: ";
241  DEBUG(SU->dump(this));
242
243  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
244  SU->setHeightToAtLeast(CurCycle);
245  Sequence.push_back(SU);
246
247  ReleasePredecessors(SU, CurCycle);
248
249  // Release all the implicit physical register defs that are live.
250  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
251       I != E; ++I) {
252    if (I->isAssignedRegDep()) {
253      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
254        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
255        assert(LiveRegDefs[I->getReg()] == SU &&
256               "Physical register dependency violated?");
257        --NumLiveRegs;
258        LiveRegDefs[I->getReg()] = NULL;
259        LiveRegCycles[I->getReg()] = 0;
260      }
261    }
262  }
263
264  SU->isScheduled = true;
265  AvailableQueue->ScheduledNode(SU);
266}
267
268/// CapturePred - This does the opposite of ReleasePred. Since SU is being
269/// unscheduled, incrcease the succ left count of its predecessors. Remove
270/// them from AvailableQueue if necessary.
271void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
272  SUnit *PredSU = PredEdge->getSUnit();
273  if (PredSU->isAvailable) {
274    PredSU->isAvailable = false;
275    if (!PredSU->isPending)
276      AvailableQueue->remove(PredSU);
277  }
278
279  ++PredSU->NumSuccsLeft;
280}
281
282/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
283/// its predecessor states to reflect the change.
284void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
285  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
286  DEBUG(SU->dump(this));
287
288  AvailableQueue->UnscheduledNode(SU);
289
290  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
291       I != E; ++I) {
292    CapturePred(&*I);
293    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
294      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
296             "Physical register dependency violated?");
297      --NumLiveRegs;
298      LiveRegDefs[I->getReg()] = NULL;
299      LiveRegCycles[I->getReg()] = 0;
300    }
301  }
302
303  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
304       I != E; ++I) {
305    if (I->isAssignedRegDep()) {
306      if (!LiveRegDefs[I->getReg()]) {
307        LiveRegDefs[I->getReg()] = SU;
308        ++NumLiveRegs;
309      }
310      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
311        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
312    }
313  }
314
315  SU->setHeightDirty();
316  SU->isScheduled = false;
317  SU->isAvailable = true;
318  AvailableQueue->push(SU);
319}
320
321/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
322/// BTCycle in order to schedule a specific node.
323void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
324                                          unsigned &CurCycle) {
325  SUnit *OldSU = NULL;
326  while (CurCycle > BtCycle) {
327    OldSU = Sequence.back();
328    Sequence.pop_back();
329    if (SU->isSucc(OldSU))
330      // Don't try to remove SU from AvailableQueue.
331      SU->isAvailable = false;
332    UnscheduleNodeBottomUp(OldSU);
333    --CurCycle;
334  }
335
336  assert(!SU->isSucc(OldSU) && "Something is wrong!");
337
338  ++NumBacktracks;
339}
340
341/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
342/// successors to the newly created node.
343SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
344  if (SU->getNode()->getFlaggedNode())
345    return NULL;
346
347  SDNode *N = SU->getNode();
348  if (!N)
349    return NULL;
350
351  SUnit *NewSU;
352  bool TryUnfold = false;
353  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
354    MVT VT = N->getValueType(i);
355    if (VT == MVT::Flag)
356      return NULL;
357    else if (VT == MVT::Other)
358      TryUnfold = true;
359  }
360  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
361    const SDValue &Op = N->getOperand(i);
362    MVT VT = Op.getNode()->getValueType(Op.getResNo());
363    if (VT == MVT::Flag)
364      return NULL;
365  }
366
367  if (TryUnfold) {
368    SmallVector<SDNode*, 2> NewNodes;
369    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
370      return NULL;
371
372    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
373    assert(NewNodes.size() == 2 && "Expected a load folding node!");
374
375    N = NewNodes[1];
376    SDNode *LoadNode = NewNodes[0];
377    unsigned NumVals = N->getNumValues();
378    unsigned OldNumVals = SU->getNode()->getNumValues();
379    for (unsigned i = 0; i != NumVals; ++i)
380      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
381    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
382                                   SDValue(LoadNode, 1));
383
384    // LoadNode may already exist. This can happen when there is another
385    // load from the same location and producing the same type of value
386    // but it has different alignment or volatileness.
387    bool isNewLoad = true;
388    SUnit *LoadSU;
389    if (LoadNode->getNodeId() != -1) {
390      LoadSU = &SUnits[LoadNode->getNodeId()];
391      isNewLoad = false;
392    } else {
393      LoadSU = CreateNewSUnit(LoadNode);
394      LoadNode->setNodeId(LoadSU->NodeNum);
395      ComputeLatency(LoadSU);
396    }
397
398    SUnit *NewSU = CreateNewSUnit(N);
399    assert(N->getNodeId() == -1 && "Node already inserted!");
400    N->setNodeId(NewSU->NodeNum);
401
402    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
403    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
404      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
405        NewSU->isTwoAddress = true;
406        break;
407      }
408    }
409    if (TID.isCommutable())
410      NewSU->isCommutable = true;
411    ComputeLatency(NewSU);
412
413    SDep ChainPred;
414    SmallVector<SDep, 4> ChainSuccs;
415    SmallVector<SDep, 4> LoadPreds;
416    SmallVector<SDep, 4> NodePreds;
417    SmallVector<SDep, 4> NodeSuccs;
418    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
419         I != E; ++I) {
420      if (I->isCtrl())
421        ChainPred = *I;
422      else if (I->getSUnit()->getNode() &&
423               I->getSUnit()->getNode()->isOperandOf(LoadNode))
424        LoadPreds.push_back(*I);
425      else
426        NodePreds.push_back(*I);
427    }
428    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
429         I != E; ++I) {
430      if (I->isCtrl())
431        ChainSuccs.push_back(*I);
432      else
433        NodeSuccs.push_back(*I);
434    }
435
436    if (ChainPred.getSUnit()) {
437      RemovePred(SU, ChainPred);
438      if (isNewLoad)
439        AddPred(LoadSU, ChainPred);
440    }
441    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
442      const SDep &Pred = LoadPreds[i];
443      RemovePred(SU, Pred);
444      if (isNewLoad) {
445        AddPred(LoadSU, Pred);
446      }
447    }
448    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
449      const SDep &Pred = NodePreds[i];
450      RemovePred(SU, Pred);
451      AddPred(NewSU, Pred);
452    }
453    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
454      SDep D = NodeSuccs[i];
455      SUnit *SuccDep = D.getSUnit();
456      D.setSUnit(SU);
457      RemovePred(SuccDep, D);
458      D.setSUnit(NewSU);
459      AddPred(SuccDep, D);
460    }
461    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
462      SDep D = ChainSuccs[i];
463      SUnit *SuccDep = D.getSUnit();
464      D.setSUnit(SU);
465      RemovePred(SuccDep, D);
466      if (isNewLoad) {
467        D.setSUnit(LoadSU);
468        AddPred(SuccDep, D);
469      }
470    }
471    if (isNewLoad) {
472      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
473    }
474
475    if (isNewLoad)
476      AvailableQueue->addNode(LoadSU);
477    AvailableQueue->addNode(NewSU);
478
479    ++NumUnfolds;
480
481    if (NewSU->NumSuccsLeft == 0) {
482      NewSU->isAvailable = true;
483      return NewSU;
484    }
485    SU = NewSU;
486  }
487
488  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
489  NewSU = CreateClone(SU);
490
491  // New SUnit has the exact same predecessors.
492  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
493       I != E; ++I)
494    if (!I->isArtificial())
495      AddPred(NewSU, *I);
496
497  // Only copy scheduled successors. Cut them from old node's successor
498  // list and move them over.
499  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
500  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
501       I != E; ++I) {
502    if (I->isArtificial())
503      continue;
504    SUnit *SuccSU = I->getSUnit();
505    if (SuccSU->isScheduled) {
506      SDep D = *I;
507      D.setSUnit(NewSU);
508      AddPred(SuccSU, D);
509      D.setSUnit(SU);
510      DelDeps.push_back(std::make_pair(SuccSU, D));
511    }
512  }
513  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
514    RemovePred(DelDeps[i].first, DelDeps[i].second);
515
516  AvailableQueue->updateNode(SU);
517  AvailableQueue->addNode(NewSU);
518
519  ++NumDups;
520  return NewSU;
521}
522
523/// InsertCopiesAndMoveSuccs - Insert register copies and move all
524/// scheduled successors of the given SUnit to the last copy.
525void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
526                                               const TargetRegisterClass *DestRC,
527                                               const TargetRegisterClass *SrcRC,
528                                               SmallVector<SUnit*, 2> &Copies) {
529  SUnit *CopyFromSU = CreateNewSUnit(NULL);
530  CopyFromSU->CopySrcRC = SrcRC;
531  CopyFromSU->CopyDstRC = DestRC;
532
533  SUnit *CopyToSU = CreateNewSUnit(NULL);
534  CopyToSU->CopySrcRC = DestRC;
535  CopyToSU->CopyDstRC = SrcRC;
536
537  // Only copy scheduled successors. Cut them from old node's successor
538  // list and move them over.
539  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
540  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
541       I != E; ++I) {
542    if (I->isArtificial())
543      continue;
544    SUnit *SuccSU = I->getSUnit();
545    if (SuccSU->isScheduled) {
546      SDep D = *I;
547      D.setSUnit(CopyToSU);
548      AddPred(SuccSU, D);
549      DelDeps.push_back(std::make_pair(SuccSU, *I));
550    }
551  }
552  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
553    RemovePred(DelDeps[i].first, DelDeps[i].second);
554
555  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
556  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
557
558  AvailableQueue->updateNode(SU);
559  AvailableQueue->addNode(CopyFromSU);
560  AvailableQueue->addNode(CopyToSU);
561  Copies.push_back(CopyFromSU);
562  Copies.push_back(CopyToSU);
563
564  ++NumPRCopies;
565}
566
567/// getPhysicalRegisterVT - Returns the ValueType of the physical register
568/// definition of the specified node.
569/// FIXME: Move to SelectionDAG?
570static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
571                                 const TargetInstrInfo *TII) {
572  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
573  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
574  unsigned NumRes = TID.getNumDefs();
575  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
576    if (Reg == *ImpDef)
577      break;
578    ++NumRes;
579  }
580  return N->getValueType(NumRes);
581}
582
583/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
584/// scheduling of the given node to satisfy live physical register dependencies.
585/// If the specific node is the last one that's available to schedule, do
586/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
587bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
588                                                 SmallVector<unsigned, 4> &LRegs){
589  if (NumLiveRegs == 0)
590    return false;
591
592  SmallSet<unsigned, 4> RegAdded;
593  // If this node would clobber any "live" register, then it's not ready.
594  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
595       I != E; ++I) {
596    if (I->isAssignedRegDep()) {
597      unsigned Reg = I->getReg();
598      if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
599        if (RegAdded.insert(Reg))
600          LRegs.push_back(Reg);
601      }
602      for (const unsigned *Alias = TRI->getAliasSet(Reg);
603           *Alias; ++Alias)
604        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
605          if (RegAdded.insert(*Alias))
606            LRegs.push_back(*Alias);
607        }
608    }
609  }
610
611  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
612    if (!Node->isMachineOpcode())
613      continue;
614    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
615    if (!TID.ImplicitDefs)
616      continue;
617    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
618      if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
619        if (RegAdded.insert(*Reg))
620          LRegs.push_back(*Reg);
621      }
622      for (const unsigned *Alias = TRI->getAliasSet(*Reg);
623           *Alias; ++Alias)
624        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
625          if (RegAdded.insert(*Alias))
626            LRegs.push_back(*Alias);
627        }
628    }
629  }
630  return !LRegs.empty();
631}
632
633
634/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
635/// schedulers.
636void ScheduleDAGRRList::ListScheduleBottomUp() {
637  unsigned CurCycle = 0;
638
639  // Release any predecessors of the special Exit node.
640  ReleasePredecessors(&ExitSU, CurCycle);
641
642  // Add root to Available queue.
643  if (!SUnits.empty()) {
644    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
645    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
646    RootSU->isAvailable = true;
647    AvailableQueue->push(RootSU);
648  }
649
650  // While Available queue is not empty, grab the node with the highest
651  // priority. If it is not ready put it back.  Schedule the node.
652  SmallVector<SUnit*, 4> NotReady;
653  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
654  Sequence.reserve(SUnits.size());
655  while (!AvailableQueue->empty()) {
656    bool Delayed = false;
657    LRegsMap.clear();
658    SUnit *CurSU = AvailableQueue->pop();
659    while (CurSU) {
660      SmallVector<unsigned, 4> LRegs;
661      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
662        break;
663      Delayed = true;
664      LRegsMap.insert(std::make_pair(CurSU, LRegs));
665
666      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
667      NotReady.push_back(CurSU);
668      CurSU = AvailableQueue->pop();
669    }
670
671    // All candidates are delayed due to live physical reg dependencies.
672    // Try backtracking, code duplication, or inserting cross class copies
673    // to resolve it.
674    if (Delayed && !CurSU) {
675      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
676        SUnit *TrySU = NotReady[i];
677        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
678
679        // Try unscheduling up to the point where it's safe to schedule
680        // this node.
681        unsigned LiveCycle = CurCycle;
682        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
683          unsigned Reg = LRegs[j];
684          unsigned LCycle = LiveRegCycles[Reg];
685          LiveCycle = std::min(LiveCycle, LCycle);
686        }
687        SUnit *OldSU = Sequence[LiveCycle];
688        if (!WillCreateCycle(TrySU, OldSU))  {
689          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
690          // Force the current node to be scheduled before the node that
691          // requires the physical reg dep.
692          if (OldSU->isAvailable) {
693            OldSU->isAvailable = false;
694            AvailableQueue->remove(OldSU);
695          }
696          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
697                              /*Reg=*/0, /*isNormalMemory=*/false,
698                              /*isMustAlias=*/false, /*isArtificial=*/true));
699          // If one or more successors has been unscheduled, then the current
700          // node is no longer avaialable. Schedule a successor that's now
701          // available instead.
702          if (!TrySU->isAvailable)
703            CurSU = AvailableQueue->pop();
704          else {
705            CurSU = TrySU;
706            TrySU->isPending = false;
707            NotReady.erase(NotReady.begin()+i);
708          }
709          break;
710        }
711      }
712
713      if (!CurSU) {
714        // Can't backtrack. If it's too expensive to copy the value, then try
715        // duplicate the nodes that produces these "too expensive to copy"
716        // values to break the dependency. In case even that doesn't work,
717        // insert cross class copies.
718        // If it's not too expensive, i.e. cost != -1, issue copies.
719        SUnit *TrySU = NotReady[0];
720        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
721        assert(LRegs.size() == 1 && "Can't handle this yet!");
722        unsigned Reg = LRegs[0];
723        SUnit *LRDef = LiveRegDefs[Reg];
724        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
725        const TargetRegisterClass *RC =
726          TRI->getPhysicalRegisterRegClass(Reg, VT);
727        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
728
729        // If cross copy register class is null, then it must be possible copy
730        // the value directly. Do not try duplicate the def.
731        SUnit *NewDef = 0;
732        if (DestRC)
733          NewDef = CopyAndMoveSuccessors(LRDef);
734        else
735          DestRC = RC;
736        if (!NewDef) {
737          // Issue copies, these can be expensive cross register class copies.
738          SmallVector<SUnit*, 2> Copies;
739          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
740          DOUT << "Adding an edge from SU #" << TrySU->NodeNum
741               << " to SU #" << Copies.front()->NodeNum << "\n";
742          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
743                              /*Reg=*/0, /*isNormalMemory=*/false,
744                              /*isMustAlias=*/false,
745                              /*isArtificial=*/true));
746          NewDef = Copies.back();
747        }
748
749        DOUT << "Adding an edge from SU #" << NewDef->NodeNum
750             << " to SU #" << TrySU->NodeNum << "\n";
751        LiveRegDefs[Reg] = NewDef;
752        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
753                             /*Reg=*/0, /*isNormalMemory=*/false,
754                             /*isMustAlias=*/false,
755                             /*isArtificial=*/true));
756        TrySU->isAvailable = false;
757        CurSU = NewDef;
758      }
759
760      assert(CurSU && "Unable to resolve live physical register dependencies!");
761    }
762
763    // Add the nodes that aren't ready back onto the available list.
764    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
765      NotReady[i]->isPending = false;
766      // May no longer be available due to backtracking.
767      if (NotReady[i]->isAvailable)
768        AvailableQueue->push(NotReady[i]);
769    }
770    NotReady.clear();
771
772    if (CurSU)
773      ScheduleNodeBottomUp(CurSU, CurCycle);
774    ++CurCycle;
775  }
776
777  // Reverse the order if it is bottom up.
778  std::reverse(Sequence.begin(), Sequence.end());
779
780#ifndef NDEBUG
781  VerifySchedule(isBottomUp);
782#endif
783}
784
785//===----------------------------------------------------------------------===//
786//  Top-Down Scheduling
787//===----------------------------------------------------------------------===//
788
789/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
790/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
791void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
792  SUnit *SuccSU = SuccEdge->getSUnit();
793  --SuccSU->NumPredsLeft;
794
795#ifndef NDEBUG
796  if (SuccSU->NumPredsLeft < 0) {
797    cerr << "*** Scheduling failed! ***\n";
798    SuccSU->dump(this);
799    cerr << " has been released too many times!\n";
800    assert(0);
801  }
802#endif
803
804  // If all the node's predecessors are scheduled, this node is ready
805  // to be scheduled. Ignore the special ExitSU node.
806  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
807    SuccSU->isAvailable = true;
808    AvailableQueue->push(SuccSU);
809  }
810}
811
812void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
813  // Top down: release successors
814  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
815       I != E; ++I) {
816    assert(!I->isAssignedRegDep() &&
817           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
818
819    ReleaseSucc(SU, &*I);
820  }
821}
822
823/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
824/// count of its successors. If a successor pending count is zero, add it to
825/// the Available queue.
826void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
827  DOUT << "*** Scheduling [" << CurCycle << "]: ";
828  DEBUG(SU->dump(this));
829
830  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
831  SU->setDepthToAtLeast(CurCycle);
832  Sequence.push_back(SU);
833
834  ReleaseSuccessors(SU);
835  SU->isScheduled = true;
836  AvailableQueue->ScheduledNode(SU);
837}
838
839/// ListScheduleTopDown - The main loop of list scheduling for top-down
840/// schedulers.
841void ScheduleDAGRRList::ListScheduleTopDown() {
842  unsigned CurCycle = 0;
843
844  // Release any successors of the special Entry node.
845  ReleaseSuccessors(&EntrySU);
846
847  // All leaves to Available queue.
848  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
849    // It is available if it has no predecessors.
850    if (SUnits[i].Preds.empty()) {
851      AvailableQueue->push(&SUnits[i]);
852      SUnits[i].isAvailable = true;
853    }
854  }
855
856  // While Available queue is not empty, grab the node with the highest
857  // priority. If it is not ready put it back.  Schedule the node.
858  Sequence.reserve(SUnits.size());
859  while (!AvailableQueue->empty()) {
860    SUnit *CurSU = AvailableQueue->pop();
861
862    if (CurSU)
863      ScheduleNodeTopDown(CurSU, CurCycle);
864    ++CurCycle;
865  }
866
867#ifndef NDEBUG
868  VerifySchedule(isBottomUp);
869#endif
870}
871
872
873//===----------------------------------------------------------------------===//
874//                RegReductionPriorityQueue Implementation
875//===----------------------------------------------------------------------===//
876//
877// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
878// to reduce register pressure.
879//
880namespace {
881  template<class SF>
882  class RegReductionPriorityQueue;
883
884  /// Sorting functions for the Available queue.
885  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
886    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
887    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
888    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
889
890    bool operator()(const SUnit* left, const SUnit* right) const;
891  };
892
893  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
894    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
895    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
896    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
897
898    bool operator()(const SUnit* left, const SUnit* right) const;
899  };
900}  // end anonymous namespace
901
902static inline bool isCopyFromLiveIn(const SUnit *SU) {
903  SDNode *N = SU->getNode();
904  return N && N->getOpcode() == ISD::CopyFromReg &&
905    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
906}
907
908/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
909/// Smaller number is the higher priority.
910static unsigned
911CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
912  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
913  if (SethiUllmanNumber != 0)
914    return SethiUllmanNumber;
915
916  unsigned Extra = 0;
917  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
918       I != E; ++I) {
919    if (I->isCtrl()) continue;  // ignore chain preds
920    SUnit *PredSU = I->getSUnit();
921    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
922    if (PredSethiUllman > SethiUllmanNumber) {
923      SethiUllmanNumber = PredSethiUllman;
924      Extra = 0;
925    } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl())
926      ++Extra;
927  }
928
929  SethiUllmanNumber += Extra;
930
931  if (SethiUllmanNumber == 0)
932    SethiUllmanNumber = 1;
933
934  return SethiUllmanNumber;
935}
936
937namespace {
938  template<class SF>
939  class VISIBILITY_HIDDEN RegReductionPriorityQueue
940   : public SchedulingPriorityQueue {
941    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
942    unsigned currentQueueId;
943
944  protected:
945    // SUnits - The SUnits for the current graph.
946    std::vector<SUnit> *SUnits;
947
948    const TargetInstrInfo *TII;
949    const TargetRegisterInfo *TRI;
950    ScheduleDAGRRList *scheduleDAG;
951
952    // SethiUllmanNumbers - The SethiUllman number for each node.
953    std::vector<unsigned> SethiUllmanNumbers;
954
955  public:
956    RegReductionPriorityQueue(const TargetInstrInfo *tii,
957                              const TargetRegisterInfo *tri) :
958    Queue(SF(this)), currentQueueId(0),
959    TII(tii), TRI(tri), scheduleDAG(NULL) {}
960
961    void initNodes(std::vector<SUnit> &sunits) {
962      SUnits = &sunits;
963      // Add pseudo dependency edges for two-address nodes.
964      AddPseudoTwoAddrDeps();
965      // Calculate node priorities.
966      CalculateSethiUllmanNumbers();
967    }
968
969    void addNode(const SUnit *SU) {
970      unsigned SUSize = SethiUllmanNumbers.size();
971      if (SUnits->size() > SUSize)
972        SethiUllmanNumbers.resize(SUSize*2, 0);
973      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
974    }
975
976    void updateNode(const SUnit *SU) {
977      SethiUllmanNumbers[SU->NodeNum] = 0;
978      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
979    }
980
981    void releaseState() {
982      SUnits = 0;
983      SethiUllmanNumbers.clear();
984    }
985
986    unsigned getNodePriority(const SUnit *SU) const {
987      assert(SU->NodeNum < SethiUllmanNumbers.size());
988      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
989      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
990        // CopyFromReg should be close to its def because it restricts
991        // allocation choices. But if it is a livein then perhaps we want it
992        // closer to its uses so it can be coalesced.
993        return 0xffff;
994      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
995        // CopyToReg should be close to its uses to facilitate coalescing and
996        // avoid spilling.
997        return 0;
998      if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
999          Opc == TargetInstrInfo::INSERT_SUBREG)
1000        // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
1001        // facilitate coalescing.
1002        return 0;
1003      if (SU->NumSuccs == 0)
1004        // If SU does not have a use, i.e. it doesn't produce a value that would
1005        // be consumed (e.g. store), then it terminates a chain of computation.
1006        // Give it a large SethiUllman number so it will be scheduled right
1007        // before its predecessors that it doesn't lengthen their live ranges.
1008        return 0xffff;
1009      if (SU->NumPreds == 0)
1010        // If SU does not have a def, schedule it close to its uses because it
1011        // does not lengthen any live ranges.
1012        return 0;
1013      return SethiUllmanNumbers[SU->NodeNum];
1014    }
1015
1016    unsigned size() const { return Queue.size(); }
1017
1018    bool empty() const { return Queue.empty(); }
1019
1020    void push(SUnit *U) {
1021      assert(!U->NodeQueueId && "Node in the queue already");
1022      U->NodeQueueId = ++currentQueueId;
1023      Queue.push(U);
1024    }
1025
1026    void push_all(const std::vector<SUnit *> &Nodes) {
1027      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1028        push(Nodes[i]);
1029    }
1030
1031    SUnit *pop() {
1032      if (empty()) return NULL;
1033      SUnit *V = Queue.top();
1034      Queue.pop();
1035      V->NodeQueueId = 0;
1036      return V;
1037    }
1038
1039    void remove(SUnit *SU) {
1040      assert(!Queue.empty() && "Queue is empty!");
1041      assert(SU->NodeQueueId != 0 && "Not in queue!");
1042      Queue.erase_one(SU);
1043      SU->NodeQueueId = 0;
1044    }
1045
1046    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1047      scheduleDAG = scheduleDag;
1048    }
1049
1050  protected:
1051    bool canClobber(const SUnit *SU, const SUnit *Op);
1052    void AddPseudoTwoAddrDeps();
1053    void CalculateSethiUllmanNumbers();
1054  };
1055
1056  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1057    BURegReductionPriorityQueue;
1058
1059  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1060    TDRegReductionPriorityQueue;
1061}
1062
1063/// closestSucc - Returns the scheduled cycle of the successor which is
1064/// closet to the current cycle.
1065static unsigned closestSucc(const SUnit *SU) {
1066  unsigned MaxHeight = 0;
1067  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1068       I != E; ++I) {
1069    if (I->isCtrl()) continue;  // ignore chain succs
1070    unsigned Height = I->getSUnit()->getHeight();
1071    // If there are bunch of CopyToRegs stacked up, they should be considered
1072    // to be at the same position.
1073    if (I->getSUnit()->getNode() &&
1074        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1075      Height = closestSucc(I->getSUnit())+1;
1076    if (Height > MaxHeight)
1077      MaxHeight = Height;
1078  }
1079  return MaxHeight;
1080}
1081
1082/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1083/// for scratch registers. Live-in operands and live-out results don't count
1084/// since they are "fixed".
1085static unsigned calcMaxScratches(const SUnit *SU) {
1086  unsigned Scratches = 0;
1087  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1088       I != E; ++I) {
1089    if (I->isCtrl()) continue;  // ignore chain preds
1090    if (!I->getSUnit()->getNode() ||
1091        I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg)
1092      Scratches++;
1093  }
1094  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1095       I != E; ++I) {
1096    if (I->isCtrl()) continue;  // ignore chain succs
1097    if (!I->getSUnit()->getNode() ||
1098        I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg)
1099      Scratches += 10;
1100  }
1101  return Scratches;
1102}
1103
1104// Bottom up
1105bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1106  unsigned LPriority = SPQ->getNodePriority(left);
1107  unsigned RPriority = SPQ->getNodePriority(right);
1108  if (LPriority != RPriority)
1109    return LPriority > RPriority;
1110
1111  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1112  // e.g.
1113  // t1 = op t2, c1
1114  // t3 = op t4, c2
1115  //
1116  // and the following instructions are both ready.
1117  // t2 = op c3
1118  // t4 = op c4
1119  //
1120  // Then schedule t2 = op first.
1121  // i.e.
1122  // t4 = op c4
1123  // t2 = op c3
1124  // t1 = op t2, c1
1125  // t3 = op t4, c2
1126  //
1127  // This creates more short live intervals.
1128  unsigned LDist = closestSucc(left);
1129  unsigned RDist = closestSucc(right);
1130  if (LDist != RDist)
1131    return LDist < RDist;
1132
1133  // Intuitively, it's good to push down instructions whose results are
1134  // liveout so their long live ranges won't conflict with other values
1135  // which are needed inside the BB. Further prioritize liveout instructions
1136  // by the number of operands which are calculated within the BB.
1137  unsigned LScratch = calcMaxScratches(left);
1138  unsigned RScratch = calcMaxScratches(right);
1139  if (LScratch != RScratch)
1140    return LScratch > RScratch;
1141
1142  if (left->getHeight() != right->getHeight())
1143    return left->getHeight() > right->getHeight();
1144
1145  if (left->getDepth() != right->getDepth())
1146    return left->getDepth() < right->getDepth();
1147
1148  assert(left->NodeQueueId && right->NodeQueueId &&
1149         "NodeQueueId cannot be zero");
1150  return (left->NodeQueueId > right->NodeQueueId);
1151}
1152
1153template<class SF>
1154bool
1155RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1156  if (SU->isTwoAddress) {
1157    unsigned Opc = SU->getNode()->getMachineOpcode();
1158    const TargetInstrDesc &TID = TII->get(Opc);
1159    unsigned NumRes = TID.getNumDefs();
1160    unsigned NumOps = TID.getNumOperands() - NumRes;
1161    for (unsigned i = 0; i != NumOps; ++i) {
1162      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1163        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1164        if (DU->getNodeId() != -1 &&
1165            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1166          return true;
1167      }
1168    }
1169  }
1170  return false;
1171}
1172
1173
1174/// hasCopyToRegUse - Return true if SU has a value successor that is a
1175/// CopyToReg node.
1176static bool hasCopyToRegUse(const SUnit *SU) {
1177  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1178       I != E; ++I) {
1179    if (I->isCtrl()) continue;
1180    const SUnit *SuccSU = I->getSUnit();
1181    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1182      return true;
1183  }
1184  return false;
1185}
1186
1187/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1188/// physical register defs.
1189static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1190                                  const TargetInstrInfo *TII,
1191                                  const TargetRegisterInfo *TRI) {
1192  SDNode *N = SuccSU->getNode();
1193  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1194  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1195  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1196  const unsigned *SUImpDefs =
1197    TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
1198  if (!SUImpDefs)
1199    return false;
1200  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1201    MVT VT = N->getValueType(i);
1202    if (VT == MVT::Flag || VT == MVT::Other)
1203      continue;
1204    if (!N->hasAnyUseOfValue(i))
1205      continue;
1206    unsigned Reg = ImpDefs[i - NumDefs];
1207    for (;*SUImpDefs; ++SUImpDefs) {
1208      unsigned SUReg = *SUImpDefs;
1209      if (TRI->regsOverlap(Reg, SUReg))
1210        return true;
1211    }
1212  }
1213  return false;
1214}
1215
1216/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1217/// it as a def&use operand. Add a pseudo control edge from it to the other
1218/// node (if it won't create a cycle) so the two-address one will be scheduled
1219/// first (lower in the schedule). If both nodes are two-address, favor the
1220/// one that has a CopyToReg use (more likely to be a loop induction update).
1221/// If both are two-address, but one is commutable while the other is not
1222/// commutable, favor the one that's not commutable.
1223template<class SF>
1224void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1225  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1226    SUnit *SU = &(*SUnits)[i];
1227    if (!SU->isTwoAddress)
1228      continue;
1229
1230    SDNode *Node = SU->getNode();
1231    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1232      continue;
1233
1234    unsigned Opc = Node->getMachineOpcode();
1235    const TargetInstrDesc &TID = TII->get(Opc);
1236    unsigned NumRes = TID.getNumDefs();
1237    unsigned NumOps = TID.getNumOperands() - NumRes;
1238    for (unsigned j = 0; j != NumOps; ++j) {
1239      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1240        continue;
1241      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1242      if (DU->getNodeId() == -1)
1243        continue;
1244      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1245      if (!DUSU) continue;
1246      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1247           E = DUSU->Succs.end(); I != E; ++I) {
1248        if (I->isCtrl()) continue;
1249        SUnit *SuccSU = I->getSUnit();
1250        if (SuccSU == SU)
1251          continue;
1252        // Be conservative. Ignore if nodes aren't at roughly the same
1253        // depth and height.
1254        if (SuccSU->getHeight() < SU->getHeight() &&
1255            (SU->getHeight() - SuccSU->getHeight()) > 1)
1256          continue;
1257        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1258          continue;
1259        // Don't constrain nodes with physical register defs if the
1260        // predecessor can clobber them.
1261        if (SuccSU->hasPhysRegDefs) {
1262          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1263            continue;
1264        }
1265        // Don't constraint extract_subreg / insert_subreg these may be
1266        // coalesced away. We don't them close to their uses.
1267        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1268        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1269            SuccOpc == TargetInstrInfo::INSERT_SUBREG)
1270          continue;
1271        if ((!canClobber(SuccSU, DUSU) ||
1272             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1273             (!SU->isCommutable && SuccSU->isCommutable)) &&
1274            !scheduleDAG->IsReachable(SuccSU, SU)) {
1275          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1276               << " to SU #" << SuccSU->NodeNum << "\n";
1277          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1278                                        /*Reg=*/0, /*isNormalMemory=*/false,
1279                                        /*isMustAlias=*/false,
1280                                        /*isArtificial=*/true));
1281        }
1282      }
1283    }
1284  }
1285}
1286
1287/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1288/// scheduling units.
1289template<class SF>
1290void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1291  SethiUllmanNumbers.assign(SUnits->size(), 0);
1292
1293  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1294    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1295}
1296
1297/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1298/// predecessors of the successors of the SUnit SU. Stop when the provided
1299/// limit is exceeded.
1300static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1301                                                    unsigned Limit) {
1302  unsigned Sum = 0;
1303  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1304       I != E; ++I) {
1305    const SUnit *SuccSU = I->getSUnit();
1306    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1307         EE = SuccSU->Preds.end(); II != EE; ++II) {
1308      SUnit *PredSU = II->getSUnit();
1309      if (!PredSU->isScheduled)
1310        if (++Sum > Limit)
1311          return Sum;
1312    }
1313  }
1314  return Sum;
1315}
1316
1317
1318// Top down
1319bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1320  unsigned LPriority = SPQ->getNodePriority(left);
1321  unsigned RPriority = SPQ->getNodePriority(right);
1322  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1323  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1324  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1325  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1326  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1327  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1328
1329  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1330    return false;
1331  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1332    return true;
1333
1334  if (LIsFloater)
1335    LBonus -= 2;
1336  if (RIsFloater)
1337    RBonus -= 2;
1338  if (left->NumSuccs == 1)
1339    LBonus += 2;
1340  if (right->NumSuccs == 1)
1341    RBonus += 2;
1342
1343  if (LPriority+LBonus != RPriority+RBonus)
1344    return LPriority+LBonus < RPriority+RBonus;
1345
1346  if (left->getDepth() != right->getDepth())
1347    return left->getDepth() < right->getDepth();
1348
1349  if (left->NumSuccsLeft != right->NumSuccsLeft)
1350    return left->NumSuccsLeft > right->NumSuccsLeft;
1351
1352  assert(left->NodeQueueId && right->NodeQueueId &&
1353         "NodeQueueId cannot be zero");
1354  return (left->NodeQueueId > right->NodeQueueId);
1355}
1356
1357//===----------------------------------------------------------------------===//
1358//                         Public Constructor Functions
1359//===----------------------------------------------------------------------===//
1360
1361llvm::ScheduleDAGSDNodes *
1362llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) {
1363  const TargetMachine &TM = IS->TM;
1364  const TargetInstrInfo *TII = TM.getInstrInfo();
1365  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1366
1367  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1368
1369  ScheduleDAGRRList *SD =
1370    new ScheduleDAGRRList(*IS->MF, true, PQ);
1371  PQ->setScheduleDAG(SD);
1372  return SD;
1373}
1374
1375llvm::ScheduleDAGSDNodes *
1376llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, bool) {
1377  const TargetMachine &TM = IS->TM;
1378  const TargetInstrInfo *TII = TM.getInstrInfo();
1379  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1380
1381  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1382
1383  ScheduleDAGRRList *SD =
1384    new ScheduleDAGRRList(*IS->MF, false, PQ);
1385  PQ->setScheduleDAG(SD);
1386  return SD;
1387}
1388