ScheduleDAGRRList.cpp revision 3f23744df4809eba94284e601e81489212c974d4
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/Target/TargetRegisterInfo.h"
22#include "llvm/Target/TargetData.h"
23#include "llvm/Target/TargetMachine.h"
24#include "llvm/Target/TargetInstrInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/Compiler.h"
27#include "llvm/ADT/BitVector.h"
28#include "llvm/ADT/PriorityQueue.h"
29#include "llvm/ADT/SmallPtrSet.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/STLExtras.h"
33#include <climits>
34#include "llvm/Support/CommandLine.h"
35using namespace llvm;
36
37STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
38STATISTIC(NumUnfolds,    "Number of nodes unfolded");
39STATISTIC(NumDups,       "Number of duplicated nodes");
40STATISTIC(NumCCCopies,   "Number of cross class copies");
41
42static RegisterScheduler
43  burrListDAGScheduler("list-burr",
44                       "Bottom-up register reduction list scheduling",
45                       createBURRListDAGScheduler);
46static RegisterScheduler
47  tdrListrDAGScheduler("list-tdrr",
48                       "Top-down register reduction list scheduling",
49                       createTDRRListDAGScheduler);
50
51namespace {
52//===----------------------------------------------------------------------===//
53/// ScheduleDAGRRList - The actual register reduction list scheduler
54/// implementation.  This supports both top-down and bottom-up scheduling.
55///
56class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
57private:
58  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
59  /// it is top-down.
60  bool isBottomUp;
61
62  /// AvailableQueue - The priority queue to use for the available SUnits.
63  SchedulingPriorityQueue *AvailableQueue;
64
65  /// LiveRegDefs - A set of physical registers and their definition
66  /// that are "live". These nodes must be scheduled before any other nodes that
67  /// modifies the registers can be scheduled.
68  unsigned NumLiveRegs;
69  std::vector<SUnit*> LiveRegDefs;
70  std::vector<unsigned> LiveRegCycles;
71
72  /// Topo - A topological ordering for SUnits which permits fast IsReachable
73  /// and similar queries.
74  ScheduleDAGTopologicalSort Topo;
75
76public:
77  ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb,
78                    const TargetMachine &tm, bool isbottomup,
79                    SchedulingPriorityQueue *availqueue)
80    : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup),
81      AvailableQueue(availqueue), Topo(SUnits) {
82    }
83
84  ~ScheduleDAGRRList() {
85    delete AvailableQueue;
86  }
87
88  void Schedule();
89
90  /// IsReachable - Checks if SU is reachable from TargetSU.
91  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
92    return Topo.IsReachable(SU, TargetSU);
93  }
94
95  /// willCreateCycle - Returns true if adding an edge from SU to TargetSU will
96  /// create a cycle.
97  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
98    return Topo.WillCreateCycle(SU, TargetSU);
99  }
100
101  /// AddPred - adds a predecessor edge to SUnit SU.
102  /// This returns true if this is a new predecessor.
103  /// Updates the topological ordering if required.
104  void AddPred(SUnit *SU, const SDep &D) {
105    Topo.AddPred(SU, D.getSUnit());
106    SU->addPred(D);
107  }
108
109  /// RemovePred - removes a predecessor edge from SUnit SU.
110  /// This returns true if an edge was removed.
111  /// Updates the topological ordering if required.
112  void RemovePred(SUnit *SU, const SDep &D) {
113    Topo.RemovePred(SU, D.getSUnit());
114    SU->removePred(D);
115  }
116
117private:
118  void ReleasePred(SUnit *SU, SDep *PredEdge);
119  void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
120  void CapturePred(SDep *PredEdge);
121  void ScheduleNodeBottomUp(SUnit*, unsigned);
122  void ScheduleNodeTopDown(SUnit*, unsigned);
123  void UnscheduleNodeBottomUp(SUnit*);
124  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
125  SUnit *CopyAndMoveSuccessors(SUnit*);
126  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
127                                  const TargetRegisterClass*,
128                                  const TargetRegisterClass*,
129                                  SmallVector<SUnit*, 2>&);
130  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
131  void ListScheduleTopDown();
132  void ListScheduleBottomUp();
133  void CommuteNodesToReducePressure();
134
135
136  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
137  /// Updates the topological ordering if required.
138  SUnit *CreateNewSUnit(SDNode *N) {
139    unsigned NumSUnits = SUnits.size();
140    SUnit *NewNode = NewSUnit(N);
141    // Update the topological ordering.
142    if (NewNode->NodeNum >= NumSUnits)
143      Topo.InitDAGTopologicalSorting();
144    return NewNode;
145  }
146
147  /// CreateClone - Creates a new SUnit from an existing one.
148  /// Updates the topological ordering if required.
149  SUnit *CreateClone(SUnit *N) {
150    unsigned NumSUnits = SUnits.size();
151    SUnit *NewNode = Clone(N);
152    // Update the topological ordering.
153    if (NewNode->NodeNum >= NumSUnits)
154      Topo.InitDAGTopologicalSorting();
155    return NewNode;
156  }
157
158  /// ForceUnitLatencies - Return true, since register-pressure-reducing
159  /// scheduling doesn't need actual latency information.
160  bool ForceUnitLatencies() const { return true; }
161};
162}  // end anonymous namespace
163
164
165/// Schedule - Schedule the DAG using list scheduling.
166void ScheduleDAGRRList::Schedule() {
167  DOUT << "********** List Scheduling **********\n";
168
169  NumLiveRegs = 0;
170  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
171  LiveRegCycles.resize(TRI->getNumRegs(), 0);
172
173  // Build scheduling units.
174  BuildSchedUnits();
175
176  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
177          SUnits[su].dumpAll(this));
178  Topo.InitDAGTopologicalSorting();
179
180  AvailableQueue->initNodes(SUnits);
181
182  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
183  if (isBottomUp)
184    ListScheduleBottomUp();
185  else
186    ListScheduleTopDown();
187
188  AvailableQueue->releaseState();
189
190  CommuteNodesToReducePressure();
191}
192
193/// CommuteNodesToReducePressure - If a node is two-address and commutable, and
194/// it is not the last use of its first operand, add it to the CommuteSet if
195/// possible. It will be commuted when it is translated to a MI.
196void ScheduleDAGRRList::CommuteNodesToReducePressure() {
197  SmallPtrSet<SUnit*, 4> OperandSeen;
198  for (unsigned i = Sequence.size(); i != 0; ) {
199    --i;
200    SUnit *SU = Sequence[i];
201    if (!SU || !SU->getNode()) continue;
202    if (SU->isCommutable) {
203      unsigned Opc = SU->getNode()->getMachineOpcode();
204      const TargetInstrDesc &TID = TII->get(Opc);
205      unsigned NumRes = TID.getNumDefs();
206      unsigned NumOps = TID.getNumOperands() - NumRes;
207      for (unsigned j = 0; j != NumOps; ++j) {
208        if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
209          continue;
210
211        SDNode *OpN = SU->getNode()->getOperand(j).getNode();
212        SUnit *OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()];
213        if (OpSU && OperandSeen.count(OpSU) == 1) {
214          // Ok, so SU is not the last use of OpSU, but SU is two-address so
215          // it will clobber OpSU. Try to commute SU if no other source operands
216          // are live below.
217          bool DoCommute = true;
218          for (unsigned k = 0; k < NumOps; ++k) {
219            if (k != j) {
220              OpN = SU->getNode()->getOperand(k).getNode();
221              OpSU = isPassiveNode(OpN) ? NULL : &SUnits[OpN->getNodeId()];
222              if (OpSU && OperandSeen.count(OpSU) == 1) {
223                DoCommute = false;
224                break;
225              }
226            }
227          }
228          if (DoCommute)
229            CommuteSet.insert(SU->getNode());
230        }
231
232        // Only look at the first use&def node for now.
233        break;
234      }
235    }
236
237    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
238         I != E; ++I) {
239      if (!I->isCtrl())
240        OperandSeen.insert(I->getSUnit()->OrigNode);
241    }
242  }
243}
244
245//===----------------------------------------------------------------------===//
246//  Bottom-Up Scheduling
247//===----------------------------------------------------------------------===//
248
249/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
250/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
251void ScheduleDAGRRList::ReleasePred(SUnit *SU, SDep *PredEdge) {
252  SUnit *PredSU = PredEdge->getSUnit();
253  --PredSU->NumSuccsLeft;
254
255#ifndef NDEBUG
256  if (PredSU->NumSuccsLeft < 0) {
257    cerr << "*** Scheduling failed! ***\n";
258    PredSU->dump(this);
259    cerr << " has been released too many times!\n";
260    assert(0);
261  }
262#endif
263
264  if (PredSU->NumSuccsLeft == 0) {
265    PredSU->isAvailable = true;
266    AvailableQueue->push(PredSU);
267  }
268}
269
270/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
271/// count of its predecessors. If a predecessor pending count is zero, add it to
272/// the Available queue.
273void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
274  DOUT << "*** Scheduling [" << CurCycle << "]: ";
275  DEBUG(SU->dump(this));
276
277  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
278  SU->setHeightToAtLeast(CurCycle);
279  Sequence.push_back(SU);
280
281  // Bottom up: release predecessors
282  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
283       I != E; ++I) {
284    ReleasePred(SU, &*I);
285    if (I->isAssignedRegDep()) {
286      // This is a physical register dependency and it's impossible or
287      // expensive to copy the register. Make sure nothing that can
288      // clobber the register is scheduled between the predecessor and
289      // this node.
290      if (!LiveRegDefs[I->getReg()]) {
291        ++NumLiveRegs;
292        LiveRegDefs[I->getReg()] = I->getSUnit();
293        LiveRegCycles[I->getReg()] = CurCycle;
294      }
295    }
296  }
297
298  // Release all the implicit physical register defs that are live.
299  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
300       I != E; ++I) {
301    if (I->isAssignedRegDep()) {
302      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
303        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
304        assert(LiveRegDefs[I->getReg()] == SU &&
305               "Physical register dependency violated?");
306        --NumLiveRegs;
307        LiveRegDefs[I->getReg()] = NULL;
308        LiveRegCycles[I->getReg()] = 0;
309      }
310    }
311  }
312
313  SU->isScheduled = true;
314  AvailableQueue->ScheduledNode(SU);
315}
316
317/// CapturePred - This does the opposite of ReleasePred. Since SU is being
318/// unscheduled, incrcease the succ left count of its predecessors. Remove
319/// them from AvailableQueue if necessary.
320void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
321  SUnit *PredSU = PredEdge->getSUnit();
322  if (PredSU->isAvailable) {
323    PredSU->isAvailable = false;
324    if (!PredSU->isPending)
325      AvailableQueue->remove(PredSU);
326  }
327
328  ++PredSU->NumSuccsLeft;
329}
330
331/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
332/// its predecessor states to reflect the change.
333void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
334  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
335  DEBUG(SU->dump(this));
336
337  AvailableQueue->UnscheduledNode(SU);
338
339  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
340       I != E; ++I) {
341    CapturePred(&*I);
342    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
343      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
344      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
345             "Physical register dependency violated?");
346      --NumLiveRegs;
347      LiveRegDefs[I->getReg()] = NULL;
348      LiveRegCycles[I->getReg()] = 0;
349    }
350  }
351
352  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
353       I != E; ++I) {
354    if (I->isAssignedRegDep()) {
355      if (!LiveRegDefs[I->getReg()]) {
356        LiveRegDefs[I->getReg()] = SU;
357        ++NumLiveRegs;
358      }
359      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
360        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
361    }
362  }
363
364  SU->setHeightDirty();
365  SU->isScheduled = false;
366  SU->isAvailable = true;
367  AvailableQueue->push(SU);
368}
369
370/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
371/// BTCycle in order to schedule a specific node. Returns the last unscheduled
372/// SUnit. Also returns if a successor is unscheduled in the process.
373void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
374                                          unsigned &CurCycle) {
375  SUnit *OldSU = NULL;
376  while (CurCycle > BtCycle) {
377    OldSU = Sequence.back();
378    Sequence.pop_back();
379    if (SU->isSucc(OldSU))
380      // Don't try to remove SU from AvailableQueue.
381      SU->isAvailable = false;
382    UnscheduleNodeBottomUp(OldSU);
383    --CurCycle;
384  }
385
386
387  if (SU->isSucc(OldSU)) {
388    assert(false && "Something is wrong!");
389    abort();
390  }
391
392  ++NumBacktracks;
393}
394
395/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
396/// successors to the newly created node.
397SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
398  if (SU->getNode()->getFlaggedNode())
399    return NULL;
400
401  SDNode *N = SU->getNode();
402  if (!N)
403    return NULL;
404
405  SUnit *NewSU;
406  bool TryUnfold = false;
407  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
408    MVT VT = N->getValueType(i);
409    if (VT == MVT::Flag)
410      return NULL;
411    else if (VT == MVT::Other)
412      TryUnfold = true;
413  }
414  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
415    const SDValue &Op = N->getOperand(i);
416    MVT VT = Op.getNode()->getValueType(Op.getResNo());
417    if (VT == MVT::Flag)
418      return NULL;
419  }
420
421  if (TryUnfold) {
422    SmallVector<SDNode*, 2> NewNodes;
423    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
424      return NULL;
425
426    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
427    assert(NewNodes.size() == 2 && "Expected a load folding node!");
428
429    N = NewNodes[1];
430    SDNode *LoadNode = NewNodes[0];
431    unsigned NumVals = N->getNumValues();
432    unsigned OldNumVals = SU->getNode()->getNumValues();
433    for (unsigned i = 0; i != NumVals; ++i)
434      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
435    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
436                                   SDValue(LoadNode, 1));
437
438    // LoadNode may already exist. This can happen when there is another
439    // load from the same location and producing the same type of value
440    // but it has different alignment or volatileness.
441    bool isNewLoad = true;
442    SUnit *LoadSU;
443    if (LoadNode->getNodeId() != -1) {
444      LoadSU = &SUnits[LoadNode->getNodeId()];
445      isNewLoad = false;
446    } else {
447      LoadSU = CreateNewSUnit(LoadNode);
448      LoadNode->setNodeId(LoadSU->NodeNum);
449      ComputeLatency(LoadSU);
450    }
451
452    SUnit *NewSU = CreateNewSUnit(N);
453    assert(N->getNodeId() == -1 && "Node already inserted!");
454    N->setNodeId(NewSU->NodeNum);
455
456    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
457    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
458      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
459        NewSU->isTwoAddress = true;
460        break;
461      }
462    }
463    if (TID.isCommutable())
464      NewSU->isCommutable = true;
465    ComputeLatency(NewSU);
466
467    SDep ChainPred;
468    SmallVector<SDep, 4> ChainSuccs;
469    SmallVector<SDep, 4> LoadPreds;
470    SmallVector<SDep, 4> NodePreds;
471    SmallVector<SDep, 4> NodeSuccs;
472    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
473         I != E; ++I) {
474      if (I->isCtrl())
475        ChainPred = *I;
476      else if (I->getSUnit()->getNode() &&
477               I->getSUnit()->getNode()->isOperandOf(LoadNode))
478        LoadPreds.push_back(*I);
479      else
480        NodePreds.push_back(*I);
481    }
482    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
483         I != E; ++I) {
484      if (I->isCtrl())
485        ChainSuccs.push_back(*I);
486      else
487        NodeSuccs.push_back(*I);
488    }
489
490    if (ChainPred.getSUnit()) {
491      RemovePred(SU, ChainPred);
492      if (isNewLoad)
493        AddPred(LoadSU, ChainPred);
494    }
495    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
496      const SDep &Pred = LoadPreds[i];
497      RemovePred(SU, Pred);
498      if (isNewLoad) {
499        AddPred(LoadSU, Pred);
500      }
501    }
502    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
503      const SDep &Pred = NodePreds[i];
504      RemovePred(SU, Pred);
505      AddPred(NewSU, Pred);
506    }
507    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
508      SDep D = NodeSuccs[i];
509      SUnit *SuccDep = D.getSUnit();
510      D.setSUnit(SU);
511      RemovePred(SuccDep, D);
512      D.setSUnit(NewSU);
513      AddPred(SuccDep, D);
514    }
515    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
516      SDep D = ChainSuccs[i];
517      SUnit *SuccDep = D.getSUnit();
518      D.setSUnit(SU);
519      RemovePred(SuccDep, D);
520      if (isNewLoad) {
521        D.setSUnit(LoadSU);
522        AddPred(SuccDep, D);
523      }
524    }
525    if (isNewLoad) {
526      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
527    }
528
529    if (isNewLoad)
530      AvailableQueue->addNode(LoadSU);
531    AvailableQueue->addNode(NewSU);
532
533    ++NumUnfolds;
534
535    if (NewSU->NumSuccsLeft == 0) {
536      NewSU->isAvailable = true;
537      return NewSU;
538    }
539    SU = NewSU;
540  }
541
542  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
543  NewSU = CreateClone(SU);
544
545  // New SUnit has the exact same predecessors.
546  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
547       I != E; ++I)
548    if (!I->isArtificial())
549      AddPred(NewSU, *I);
550
551  // Only copy scheduled successors. Cut them from old node's successor
552  // list and move them over.
553  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
554  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
555       I != E; ++I) {
556    if (I->isArtificial())
557      continue;
558    SUnit *SuccSU = I->getSUnit();
559    if (SuccSU->isScheduled) {
560      SDep D = *I;
561      D.setSUnit(NewSU);
562      AddPred(SuccSU, D);
563      D.setSUnit(SU);
564      DelDeps.push_back(std::make_pair(SuccSU, D));
565    }
566  }
567  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
568    RemovePred(DelDeps[i].first, DelDeps[i].second);
569
570  AvailableQueue->updateNode(SU);
571  AvailableQueue->addNode(NewSU);
572
573  ++NumDups;
574  return NewSU;
575}
576
577/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
578/// and move all scheduled successors of the given SUnit to the last copy.
579void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
580                                              const TargetRegisterClass *DestRC,
581                                              const TargetRegisterClass *SrcRC,
582                                               SmallVector<SUnit*, 2> &Copies) {
583  SUnit *CopyFromSU = CreateNewSUnit(NULL);
584  CopyFromSU->CopySrcRC = SrcRC;
585  CopyFromSU->CopyDstRC = DestRC;
586
587  SUnit *CopyToSU = CreateNewSUnit(NULL);
588  CopyToSU->CopySrcRC = DestRC;
589  CopyToSU->CopyDstRC = SrcRC;
590
591  // Only copy scheduled successors. Cut them from old node's successor
592  // list and move them over.
593  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
594  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
595       I != E; ++I) {
596    if (I->isArtificial())
597      continue;
598    SUnit *SuccSU = I->getSUnit();
599    if (SuccSU->isScheduled) {
600      SDep D = *I;
601      D.setSUnit(CopyToSU);
602      AddPred(SuccSU, D);
603      DelDeps.push_back(std::make_pair(SuccSU, *I));
604    }
605  }
606  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
607    RemovePred(DelDeps[i].first, DelDeps[i].second);
608  }
609
610  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
611  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
612
613  AvailableQueue->updateNode(SU);
614  AvailableQueue->addNode(CopyFromSU);
615  AvailableQueue->addNode(CopyToSU);
616  Copies.push_back(CopyFromSU);
617  Copies.push_back(CopyToSU);
618
619  ++NumCCCopies;
620}
621
622/// getPhysicalRegisterVT - Returns the ValueType of the physical register
623/// definition of the specified node.
624/// FIXME: Move to SelectionDAG?
625static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
626                                 const TargetInstrInfo *TII) {
627  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
628  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
629  unsigned NumRes = TID.getNumDefs();
630  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
631    if (Reg == *ImpDef)
632      break;
633    ++NumRes;
634  }
635  return N->getValueType(NumRes);
636}
637
638/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
639/// scheduling of the given node to satisfy live physical register dependencies.
640/// If the specific node is the last one that's available to schedule, do
641/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
642bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
643                                                 SmallVector<unsigned, 4> &LRegs){
644  if (NumLiveRegs == 0)
645    return false;
646
647  SmallSet<unsigned, 4> RegAdded;
648  // If this node would clobber any "live" register, then it's not ready.
649  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
650       I != E; ++I) {
651    if (I->isAssignedRegDep()) {
652      unsigned Reg = I->getReg();
653      if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
654        if (RegAdded.insert(Reg))
655          LRegs.push_back(Reg);
656      }
657      for (const unsigned *Alias = TRI->getAliasSet(Reg);
658           *Alias; ++Alias)
659        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
660          if (RegAdded.insert(*Alias))
661            LRegs.push_back(*Alias);
662        }
663    }
664  }
665
666  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
667    if (!Node->isMachineOpcode())
668      continue;
669    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
670    if (!TID.ImplicitDefs)
671      continue;
672    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
673      if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
674        if (RegAdded.insert(*Reg))
675          LRegs.push_back(*Reg);
676      }
677      for (const unsigned *Alias = TRI->getAliasSet(*Reg);
678           *Alias; ++Alias)
679        if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
680          if (RegAdded.insert(*Alias))
681            LRegs.push_back(*Alias);
682        }
683    }
684  }
685  return !LRegs.empty();
686}
687
688
689/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
690/// schedulers.
691void ScheduleDAGRRList::ListScheduleBottomUp() {
692  unsigned CurCycle = 0;
693  // Add root to Available queue.
694  if (!SUnits.empty()) {
695    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
696    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
697    RootSU->isAvailable = true;
698    AvailableQueue->push(RootSU);
699  }
700
701  // While Available queue is not empty, grab the node with the highest
702  // priority. If it is not ready put it back.  Schedule the node.
703  SmallVector<SUnit*, 4> NotReady;
704  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
705  Sequence.reserve(SUnits.size());
706  while (!AvailableQueue->empty()) {
707    bool Delayed = false;
708    LRegsMap.clear();
709    SUnit *CurSU = AvailableQueue->pop();
710    while (CurSU) {
711      SmallVector<unsigned, 4> LRegs;
712      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
713        break;
714      Delayed = true;
715      LRegsMap.insert(std::make_pair(CurSU, LRegs));
716
717      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
718      NotReady.push_back(CurSU);
719      CurSU = AvailableQueue->pop();
720    }
721
722    // All candidates are delayed due to live physical reg dependencies.
723    // Try backtracking, code duplication, or inserting cross class copies
724    // to resolve it.
725    if (Delayed && !CurSU) {
726      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
727        SUnit *TrySU = NotReady[i];
728        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
729
730        // Try unscheduling up to the point where it's safe to schedule
731        // this node.
732        unsigned LiveCycle = CurCycle;
733        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
734          unsigned Reg = LRegs[j];
735          unsigned LCycle = LiveRegCycles[Reg];
736          LiveCycle = std::min(LiveCycle, LCycle);
737        }
738        SUnit *OldSU = Sequence[LiveCycle];
739        if (!WillCreateCycle(TrySU, OldSU))  {
740          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
741          // Force the current node to be scheduled before the node that
742          // requires the physical reg dep.
743          if (OldSU->isAvailable) {
744            OldSU->isAvailable = false;
745            AvailableQueue->remove(OldSU);
746          }
747          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
748                              /*Reg=*/0, /*isNormalMemory=*/false,
749                              /*isMustAlias=*/false, /*isArtificial=*/true));
750          // If one or more successors has been unscheduled, then the current
751          // node is no longer avaialable. Schedule a successor that's now
752          // available instead.
753          if (!TrySU->isAvailable)
754            CurSU = AvailableQueue->pop();
755          else {
756            CurSU = TrySU;
757            TrySU->isPending = false;
758            NotReady.erase(NotReady.begin()+i);
759          }
760          break;
761        }
762      }
763
764      if (!CurSU) {
765        // Can't backtrack. Try duplicating the nodes that produces these
766        // "expensive to copy" values to break the dependency. In case even
767        // that doesn't work, insert cross class copies.
768        SUnit *TrySU = NotReady[0];
769        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
770        assert(LRegs.size() == 1 && "Can't handle this yet!");
771        unsigned Reg = LRegs[0];
772        SUnit *LRDef = LiveRegDefs[Reg];
773        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
774        if (!NewDef) {
775          // Issue expensive cross register class copies.
776          MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
777          const TargetRegisterClass *RC =
778            TRI->getPhysicalRegisterRegClass(Reg, VT);
779          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
780          if (!DestRC) {
781            assert(false && "Don't know how to copy this physical register!");
782            abort();
783          }
784          SmallVector<SUnit*, 2> Copies;
785          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
786          DOUT << "Adding an edge from SU # " << TrySU->NodeNum
787               << " to SU #" << Copies.front()->NodeNum << "\n";
788          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
789                              /*Reg=*/0, /*isMustAlias=*/false,
790                              /*isArtificial=*/true));
791          NewDef = Copies.back();
792        }
793
794        DOUT << "Adding an edge from SU # " << NewDef->NodeNum
795             << " to SU #" << TrySU->NodeNum << "\n";
796        LiveRegDefs[Reg] = NewDef;
797        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
798                             /*Reg=*/0, /*isMustAlias=*/false,
799                             /*isArtificial=*/true));
800        TrySU->isAvailable = false;
801        CurSU = NewDef;
802      }
803
804      if (!CurSU) {
805        assert(false && "Unable to resolve live physical register dependencies!");
806        abort();
807      }
808    }
809
810    // Add the nodes that aren't ready back onto the available list.
811    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
812      NotReady[i]->isPending = false;
813      // May no longer be available due to backtracking.
814      if (NotReady[i]->isAvailable)
815        AvailableQueue->push(NotReady[i]);
816    }
817    NotReady.clear();
818
819    if (CurSU)
820      ScheduleNodeBottomUp(CurSU, CurCycle);
821    ++CurCycle;
822  }
823
824  // Reverse the order if it is bottom up.
825  std::reverse(Sequence.begin(), Sequence.end());
826
827#ifndef NDEBUG
828  VerifySchedule(isBottomUp);
829#endif
830}
831
832//===----------------------------------------------------------------------===//
833//  Top-Down Scheduling
834//===----------------------------------------------------------------------===//
835
836/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
837/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
838void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
839  SUnit *SuccSU = SuccEdge->getSUnit();
840  --SuccSU->NumPredsLeft;
841
842#ifndef NDEBUG
843  if (SuccSU->NumPredsLeft < 0) {
844    cerr << "*** Scheduling failed! ***\n";
845    SuccSU->dump(this);
846    cerr << " has been released too many times!\n";
847    assert(0);
848  }
849#endif
850
851  if (SuccSU->NumPredsLeft == 0) {
852    SuccSU->isAvailable = true;
853    AvailableQueue->push(SuccSU);
854  }
855}
856
857/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
858/// count of its successors. If a successor pending count is zero, add it to
859/// the Available queue.
860void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
861  DOUT << "*** Scheduling [" << CurCycle << "]: ";
862  DEBUG(SU->dump(this));
863
864  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
865  SU->setDepthToAtLeast(CurCycle);
866  Sequence.push_back(SU);
867
868  // Top down: release successors
869  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
870       I != E; ++I)
871    ReleaseSucc(SU, &*I);
872
873  SU->isScheduled = true;
874  AvailableQueue->ScheduledNode(SU);
875}
876
877/// ListScheduleTopDown - The main loop of list scheduling for top-down
878/// schedulers.
879void ScheduleDAGRRList::ListScheduleTopDown() {
880  unsigned CurCycle = 0;
881
882  // All leaves to Available queue.
883  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
884    // It is available if it has no predecessors.
885    if (SUnits[i].Preds.empty()) {
886      AvailableQueue->push(&SUnits[i]);
887      SUnits[i].isAvailable = true;
888    }
889  }
890
891  // While Available queue is not empty, grab the node with the highest
892  // priority. If it is not ready put it back.  Schedule the node.
893  Sequence.reserve(SUnits.size());
894  while (!AvailableQueue->empty()) {
895    SUnit *CurSU = AvailableQueue->pop();
896
897    if (CurSU)
898      ScheduleNodeTopDown(CurSU, CurCycle);
899    ++CurCycle;
900  }
901
902#ifndef NDEBUG
903  VerifySchedule(isBottomUp);
904#endif
905}
906
907
908//===----------------------------------------------------------------------===//
909//                RegReductionPriorityQueue Implementation
910//===----------------------------------------------------------------------===//
911//
912// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
913// to reduce register pressure.
914//
915namespace {
916  template<class SF>
917  class RegReductionPriorityQueue;
918
919  /// Sorting functions for the Available queue.
920  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
921    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
922    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
923    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
924
925    bool operator()(const SUnit* left, const SUnit* right) const;
926  };
927
928  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
929    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
930    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
931    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
932
933    bool operator()(const SUnit* left, const SUnit* right) const;
934  };
935}  // end anonymous namespace
936
937static inline bool isCopyFromLiveIn(const SUnit *SU) {
938  SDNode *N = SU->getNode();
939  return N && N->getOpcode() == ISD::CopyFromReg &&
940    N->getOperand(N->getNumOperands()-1).getValueType() != MVT::Flag;
941}
942
943/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
944/// Smaller number is the higher priority.
945static unsigned
946CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
947  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
948  if (SethiUllmanNumber != 0)
949    return SethiUllmanNumber;
950
951  unsigned Extra = 0;
952  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
953       I != E; ++I) {
954    if (I->isCtrl()) continue;  // ignore chain preds
955    SUnit *PredSU = I->getSUnit();
956    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
957    if (PredSethiUllman > SethiUllmanNumber) {
958      SethiUllmanNumber = PredSethiUllman;
959      Extra = 0;
960    } else if (PredSethiUllman == SethiUllmanNumber && !I->isCtrl())
961      ++Extra;
962  }
963
964  SethiUllmanNumber += Extra;
965
966  if (SethiUllmanNumber == 0)
967    SethiUllmanNumber = 1;
968
969  return SethiUllmanNumber;
970}
971
972namespace {
973  template<class SF>
974  class VISIBILITY_HIDDEN RegReductionPriorityQueue
975   : public SchedulingPriorityQueue {
976    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
977    unsigned currentQueueId;
978
979  protected:
980    // SUnits - The SUnits for the current graph.
981    std::vector<SUnit> *SUnits;
982
983    const TargetInstrInfo *TII;
984    const TargetRegisterInfo *TRI;
985    ScheduleDAGRRList *scheduleDAG;
986
987    // SethiUllmanNumbers - The SethiUllman number for each node.
988    std::vector<unsigned> SethiUllmanNumbers;
989
990  public:
991    RegReductionPriorityQueue(const TargetInstrInfo *tii,
992                              const TargetRegisterInfo *tri) :
993    Queue(SF(this)), currentQueueId(0),
994    TII(tii), TRI(tri), scheduleDAG(NULL) {}
995
996    void initNodes(std::vector<SUnit> &sunits) {
997      SUnits = &sunits;
998      // Add pseudo dependency edges for two-address nodes.
999      AddPseudoTwoAddrDeps();
1000      // Calculate node priorities.
1001      CalculateSethiUllmanNumbers();
1002    }
1003
1004    void addNode(const SUnit *SU) {
1005      unsigned SUSize = SethiUllmanNumbers.size();
1006      if (SUnits->size() > SUSize)
1007        SethiUllmanNumbers.resize(SUSize*2, 0);
1008      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1009    }
1010
1011    void updateNode(const SUnit *SU) {
1012      SethiUllmanNumbers[SU->NodeNum] = 0;
1013      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1014    }
1015
1016    void releaseState() {
1017      SUnits = 0;
1018      SethiUllmanNumbers.clear();
1019    }
1020
1021    unsigned getNodePriority(const SUnit *SU) const {
1022      assert(SU->NodeNum < SethiUllmanNumbers.size());
1023      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1024      if (Opc == ISD::CopyFromReg && !isCopyFromLiveIn(SU))
1025        // CopyFromReg should be close to its def because it restricts
1026        // allocation choices. But if it is a livein then perhaps we want it
1027        // closer to its uses so it can be coalesced.
1028        return 0xffff;
1029      else if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1030        // CopyToReg should be close to its uses to facilitate coalescing and
1031        // avoid spilling.
1032        return 0;
1033      else if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1034               Opc == TargetInstrInfo::INSERT_SUBREG)
1035        // EXTRACT_SUBREG / INSERT_SUBREG should be close to its use to
1036        // facilitate coalescing.
1037        return 0;
1038      else if (SU->NumSuccs == 0)
1039        // If SU does not have a use, i.e. it doesn't produce a value that would
1040        // be consumed (e.g. store), then it terminates a chain of computation.
1041        // Give it a large SethiUllman number so it will be scheduled right
1042        // before its predecessors that it doesn't lengthen their live ranges.
1043        return 0xffff;
1044      else if (SU->NumPreds == 0)
1045        // If SU does not have a def, schedule it close to its uses because it
1046        // does not lengthen any live ranges.
1047        return 0;
1048      else
1049        return SethiUllmanNumbers[SU->NodeNum];
1050    }
1051
1052    unsigned size() const { return Queue.size(); }
1053
1054    bool empty() const { return Queue.empty(); }
1055
1056    void push(SUnit *U) {
1057      assert(!U->NodeQueueId && "Node in the queue already");
1058      U->NodeQueueId = ++currentQueueId;
1059      Queue.push(U);
1060    }
1061
1062    void push_all(const std::vector<SUnit *> &Nodes) {
1063      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1064        push(Nodes[i]);
1065    }
1066
1067    SUnit *pop() {
1068      if (empty()) return NULL;
1069      SUnit *V = Queue.top();
1070      Queue.pop();
1071      V->NodeQueueId = 0;
1072      return V;
1073    }
1074
1075    void remove(SUnit *SU) {
1076      assert(!Queue.empty() && "Queue is empty!");
1077      assert(SU->NodeQueueId != 0 && "Not in queue!");
1078      Queue.erase_one(SU);
1079      SU->NodeQueueId = 0;
1080    }
1081
1082    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1083      scheduleDAG = scheduleDag;
1084    }
1085
1086  protected:
1087    bool canClobber(const SUnit *SU, const SUnit *Op);
1088    void AddPseudoTwoAddrDeps();
1089    void CalculateSethiUllmanNumbers();
1090  };
1091
1092  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1093    BURegReductionPriorityQueue;
1094
1095  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1096    TDRegReductionPriorityQueue;
1097}
1098
1099/// closestSucc - Returns the scheduled cycle of the successor which is
1100/// closet to the current cycle.
1101static unsigned closestSucc(const SUnit *SU) {
1102  unsigned MaxHeight = 0;
1103  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1104       I != E; ++I) {
1105    unsigned Height = I->getSUnit()->getHeight();
1106    // If there are bunch of CopyToRegs stacked up, they should be considered
1107    // to be at the same position.
1108    if (I->getSUnit()->getNode() &&
1109        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1110      Height = closestSucc(I->getSUnit())+1;
1111    if (Height > MaxHeight)
1112      MaxHeight = Height;
1113  }
1114  return MaxHeight;
1115}
1116
1117/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1118/// for scratch registers. Live-in operands and live-out results don't count
1119/// since they are "fixed".
1120static unsigned calcMaxScratches(const SUnit *SU) {
1121  unsigned Scratches = 0;
1122  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1123       I != E; ++I) {
1124    if (I->isCtrl()) continue;  // ignore chain preds
1125    if (!I->getSUnit()->getNode() ||
1126        I->getSUnit()->getNode()->getOpcode() != ISD::CopyFromReg)
1127      Scratches++;
1128  }
1129  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1130       I != E; ++I) {
1131    if (I->isCtrl()) continue;  // ignore chain succs
1132    if (!I->getSUnit()->getNode() ||
1133        I->getSUnit()->getNode()->getOpcode() != ISD::CopyToReg)
1134      Scratches += 10;
1135  }
1136  return Scratches;
1137}
1138
1139// Bottom up
1140bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1141  unsigned LPriority = SPQ->getNodePriority(left);
1142  unsigned RPriority = SPQ->getNodePriority(right);
1143  if (LPriority != RPriority)
1144    return LPriority > RPriority;
1145
1146  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1147  // e.g.
1148  // t1 = op t2, c1
1149  // t3 = op t4, c2
1150  //
1151  // and the following instructions are both ready.
1152  // t2 = op c3
1153  // t4 = op c4
1154  //
1155  // Then schedule t2 = op first.
1156  // i.e.
1157  // t4 = op c4
1158  // t2 = op c3
1159  // t1 = op t2, c1
1160  // t3 = op t4, c2
1161  //
1162  // This creates more short live intervals.
1163  unsigned LDist = closestSucc(left);
1164  unsigned RDist = closestSucc(right);
1165  if (LDist != RDist)
1166    return LDist < RDist;
1167
1168  // Intuitively, it's good to push down instructions whose results are
1169  // liveout so their long live ranges won't conflict with other values
1170  // which are needed inside the BB. Further prioritize liveout instructions
1171  // by the number of operands which are calculated within the BB.
1172  unsigned LScratch = calcMaxScratches(left);
1173  unsigned RScratch = calcMaxScratches(right);
1174  if (LScratch != RScratch)
1175    return LScratch > RScratch;
1176
1177  if (left->getHeight() != right->getHeight())
1178    return left->getHeight() > right->getHeight();
1179
1180  if (left->getDepth() != right->getDepth())
1181    return left->getDepth() < right->getDepth();
1182
1183  assert(left->NodeQueueId && right->NodeQueueId &&
1184         "NodeQueueId cannot be zero");
1185  return (left->NodeQueueId > right->NodeQueueId);
1186}
1187
1188template<class SF>
1189bool
1190RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1191  if (SU->isTwoAddress) {
1192    unsigned Opc = SU->getNode()->getMachineOpcode();
1193    const TargetInstrDesc &TID = TII->get(Opc);
1194    unsigned NumRes = TID.getNumDefs();
1195    unsigned NumOps = TID.getNumOperands() - NumRes;
1196    for (unsigned i = 0; i != NumOps; ++i) {
1197      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1198        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1199        if (DU->getNodeId() != -1 &&
1200            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1201          return true;
1202      }
1203    }
1204  }
1205  return false;
1206}
1207
1208
1209/// hasCopyToRegUse - Return true if SU has a value successor that is a
1210/// CopyToReg node.
1211static bool hasCopyToRegUse(const SUnit *SU) {
1212  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1213       I != E; ++I) {
1214    if (I->isCtrl()) continue;
1215    const SUnit *SuccSU = I->getSUnit();
1216    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1217      return true;
1218  }
1219  return false;
1220}
1221
1222/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1223/// physical register defs.
1224static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1225                                  const TargetInstrInfo *TII,
1226                                  const TargetRegisterInfo *TRI) {
1227  SDNode *N = SuccSU->getNode();
1228  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1229  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1230  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1231  const unsigned *SUImpDefs =
1232    TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
1233  if (!SUImpDefs)
1234    return false;
1235  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1236    MVT VT = N->getValueType(i);
1237    if (VT == MVT::Flag || VT == MVT::Other)
1238      continue;
1239    if (!N->hasAnyUseOfValue(i))
1240      continue;
1241    unsigned Reg = ImpDefs[i - NumDefs];
1242    for (;*SUImpDefs; ++SUImpDefs) {
1243      unsigned SUReg = *SUImpDefs;
1244      if (TRI->regsOverlap(Reg, SUReg))
1245        return true;
1246    }
1247  }
1248  return false;
1249}
1250
1251/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1252/// it as a def&use operand. Add a pseudo control edge from it to the other
1253/// node (if it won't create a cycle) so the two-address one will be scheduled
1254/// first (lower in the schedule). If both nodes are two-address, favor the
1255/// one that has a CopyToReg use (more likely to be a loop induction update).
1256/// If both are two-address, but one is commutable while the other is not
1257/// commutable, favor the one that's not commutable.
1258template<class SF>
1259void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1260  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1261    SUnit *SU = &(*SUnits)[i];
1262    if (!SU->isTwoAddress)
1263      continue;
1264
1265    SDNode *Node = SU->getNode();
1266    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1267      continue;
1268
1269    unsigned Opc = Node->getMachineOpcode();
1270    const TargetInstrDesc &TID = TII->get(Opc);
1271    unsigned NumRes = TID.getNumDefs();
1272    unsigned NumOps = TID.getNumOperands() - NumRes;
1273    for (unsigned j = 0; j != NumOps; ++j) {
1274      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1275        continue;
1276      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1277      if (DU->getNodeId() == -1)
1278        continue;
1279      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1280      if (!DUSU) continue;
1281      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1282           E = DUSU->Succs.end(); I != E; ++I) {
1283        if (I->isCtrl()) continue;
1284        SUnit *SuccSU = I->getSUnit();
1285        if (SuccSU == SU)
1286          continue;
1287        // Be conservative. Ignore if nodes aren't at roughly the same
1288        // depth and height.
1289        if (SuccSU->getHeight() < SU->getHeight() &&
1290            (SU->getHeight() - SuccSU->getHeight()) > 1)
1291          continue;
1292        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1293          continue;
1294        // Don't constrain nodes with physical register defs if the
1295        // predecessor can clobber them.
1296        if (SuccSU->hasPhysRegDefs) {
1297          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1298            continue;
1299        }
1300        // Don't constraint extract_subreg / insert_subreg these may be
1301        // coalesced away. We don't them close to their uses.
1302        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1303        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1304            SuccOpc == TargetInstrInfo::INSERT_SUBREG)
1305          continue;
1306        if ((!canClobber(SuccSU, DUSU) ||
1307             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1308             (!SU->isCommutable && SuccSU->isCommutable)) &&
1309            !scheduleDAG->IsReachable(SuccSU, SU)) {
1310          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1311               << " to SU #" << SuccSU->NodeNum << "\n";
1312          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/1,
1313                                        /*Reg=*/0, /*isMustAlias=*/false,
1314                                        /*isArtificial=*/true));
1315        }
1316      }
1317    }
1318  }
1319}
1320
1321/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1322/// scheduling units.
1323template<class SF>
1324void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1325  SethiUllmanNumbers.assign(SUnits->size(), 0);
1326
1327  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1328    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1329}
1330
1331/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1332/// predecessors of the successors of the SUnit SU. Stop when the provided
1333/// limit is exceeded.
1334static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1335                                                    unsigned Limit) {
1336  unsigned Sum = 0;
1337  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1338       I != E; ++I) {
1339    const SUnit *SuccSU = I->getSUnit();
1340    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1341         EE = SuccSU->Preds.end(); II != EE; ++II) {
1342      SUnit *PredSU = II->getSUnit();
1343      if (!PredSU->isScheduled)
1344        if (++Sum > Limit)
1345          return Sum;
1346    }
1347  }
1348  return Sum;
1349}
1350
1351
1352// Top down
1353bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1354  unsigned LPriority = SPQ->getNodePriority(left);
1355  unsigned RPriority = SPQ->getNodePriority(right);
1356  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1357  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1358  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1359  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1360  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1361  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1362
1363  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1364    return false;
1365  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1366    return true;
1367
1368  if (LIsFloater)
1369    LBonus -= 2;
1370  if (RIsFloater)
1371    RBonus -= 2;
1372  if (left->NumSuccs == 1)
1373    LBonus += 2;
1374  if (right->NumSuccs == 1)
1375    RBonus += 2;
1376
1377  if (LPriority+LBonus != RPriority+RBonus)
1378    return LPriority+LBonus < RPriority+RBonus;
1379
1380  if (left->getDepth() != right->getDepth())
1381    return left->getDepth() < right->getDepth();
1382
1383  if (left->NumSuccsLeft != right->NumSuccsLeft)
1384    return left->NumSuccsLeft > right->NumSuccsLeft;
1385
1386  assert(left->NodeQueueId && right->NodeQueueId &&
1387         "NodeQueueId cannot be zero");
1388  return (left->NodeQueueId > right->NodeQueueId);
1389}
1390
1391//===----------------------------------------------------------------------===//
1392//                         Public Constructor Functions
1393//===----------------------------------------------------------------------===//
1394
1395llvm::ScheduleDAG* llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
1396                                                    SelectionDAG *DAG,
1397                                                    const TargetMachine *TM,
1398                                                    MachineBasicBlock *BB,
1399                                                    bool) {
1400  const TargetInstrInfo *TII = TM->getInstrInfo();
1401  const TargetRegisterInfo *TRI = TM->getRegisterInfo();
1402
1403  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1404
1405  ScheduleDAGRRList *SD =
1406    new ScheduleDAGRRList(DAG, BB, *TM, true, PQ);
1407  PQ->setScheduleDAG(SD);
1408  return SD;
1409}
1410
1411llvm::ScheduleDAG* llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
1412                                                    SelectionDAG *DAG,
1413                                                    const TargetMachine *TM,
1414                                                    MachineBasicBlock *BB,
1415                                                    bool) {
1416  const TargetInstrInfo *TII = TM->getInstrInfo();
1417  const TargetRegisterInfo *TRI = TM->getRegisterInfo();
1418
1419  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1420
1421  ScheduleDAGRRList *SD = new ScheduleDAGRRList(DAG, BB, *TM, false, PQ);
1422  PQ->setScheduleDAG(SD);
1423  return SD;
1424}
1425