ScheduleDAGRRList.cpp revision 0b33cd55d1cfaa98fe4571d48caae4946a5e3a54
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/ADT/PriorityQueue.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include <climits>
33using namespace llvm;
34
35STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
36STATISTIC(NumUnfolds,    "Number of nodes unfolded");
37STATISTIC(NumDups,       "Number of duplicated nodes");
38STATISTIC(NumPRCopies,   "Number of physical register copies");
39
40static RegisterScheduler
41  burrListDAGScheduler("list-burr",
42                       "Bottom-up register reduction list scheduling",
43                       createBURRListDAGScheduler);
44static RegisterScheduler
45  tdrListrDAGScheduler("list-tdrr",
46                       "Top-down register reduction list scheduling",
47                       createTDRRListDAGScheduler);
48
49namespace {
50//===----------------------------------------------------------------------===//
51/// ScheduleDAGRRList - The actual register reduction list scheduler
52/// implementation.  This supports both top-down and bottom-up scheduling.
53///
54class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
55private:
56  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
57  /// it is top-down.
58  bool isBottomUp;
59
60  /// AvailableQueue - The priority queue to use for the available SUnits.
61  SchedulingPriorityQueue *AvailableQueue;
62
63  /// LiveRegDefs - A set of physical registers and their definition
64  /// that are "live". These nodes must be scheduled before any other nodes that
65  /// modifies the registers can be scheduled.
66  unsigned NumLiveRegs;
67  std::vector<SUnit*> LiveRegDefs;
68  std::vector<unsigned> LiveRegCycles;
69
70  /// Topo - A topological ordering for SUnits which permits fast IsReachable
71  /// and similar queries.
72  ScheduleDAGTopologicalSort Topo;
73
74public:
75  ScheduleDAGRRList(MachineFunction &mf,
76                    bool isbottomup,
77                    SchedulingPriorityQueue *availqueue)
78    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
79      AvailableQueue(availqueue), Topo(SUnits) {
80    }
81
82  ~ScheduleDAGRRList() {
83    delete AvailableQueue;
84  }
85
86  void Schedule();
87
88  /// IsReachable - Checks if SU is reachable from TargetSU.
89  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
90    return Topo.IsReachable(SU, TargetSU);
91  }
92
93  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
94  /// create a cycle.
95  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
96    return Topo.WillCreateCycle(SU, TargetSU);
97  }
98
99  /// AddPred - adds a predecessor edge to SUnit SU.
100  /// This returns true if this is a new predecessor.
101  /// Updates the topological ordering if required.
102  void AddPred(SUnit *SU, const SDep &D) {
103    Topo.AddPred(SU, D.getSUnit());
104    SU->addPred(D);
105  }
106
107  /// RemovePred - removes a predecessor edge from SUnit SU.
108  /// This returns true if an edge was removed.
109  /// Updates the topological ordering if required.
110  void RemovePred(SUnit *SU, const SDep &D) {
111    Topo.RemovePred(SU, D.getSUnit());
112    SU->removePred(D);
113  }
114
115private:
116  void ReleasePred(SUnit *SU, const SDep *PredEdge);
117  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
118  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
119  void ReleaseSuccessors(SUnit *SU);
120  void CapturePred(SDep *PredEdge);
121  void ScheduleNodeBottomUp(SUnit*, unsigned);
122  void ScheduleNodeTopDown(SUnit*, unsigned);
123  void UnscheduleNodeBottomUp(SUnit*);
124  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
125  SUnit *CopyAndMoveSuccessors(SUnit*);
126  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
127                                const TargetRegisterClass*,
128                                const TargetRegisterClass*,
129                                SmallVector<SUnit*, 2>&);
130  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
131  void ListScheduleTopDown();
132  void ListScheduleBottomUp();
133
134
135  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
136  /// Updates the topological ordering if required.
137  SUnit *CreateNewSUnit(SDNode *N) {
138    unsigned NumSUnits = SUnits.size();
139    SUnit *NewNode = NewSUnit(N);
140    // Update the topological ordering.
141    if (NewNode->NodeNum >= NumSUnits)
142      Topo.InitDAGTopologicalSorting();
143    return NewNode;
144  }
145
146  /// CreateClone - Creates a new SUnit from an existing one.
147  /// Updates the topological ordering if required.
148  SUnit *CreateClone(SUnit *N) {
149    unsigned NumSUnits = SUnits.size();
150    SUnit *NewNode = Clone(N);
151    // Update the topological ordering.
152    if (NewNode->NodeNum >= NumSUnits)
153      Topo.InitDAGTopologicalSorting();
154    return NewNode;
155  }
156
157  /// ForceUnitLatencies - Return true, since register-pressure-reducing
158  /// scheduling doesn't need actual latency information.
159  bool ForceUnitLatencies() const { return true; }
160};
161}  // end anonymous namespace
162
163
164/// Schedule - Schedule the DAG using list scheduling.
165void ScheduleDAGRRList::Schedule() {
166  DOUT << "********** List Scheduling **********\n";
167
168  NumLiveRegs = 0;
169  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
170  LiveRegCycles.resize(TRI->getNumRegs(), 0);
171
172  // Build the scheduling graph.
173  BuildSchedGraph();
174
175  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
176          SUnits[su].dumpAll(this));
177  Topo.InitDAGTopologicalSorting();
178
179  AvailableQueue->initNodes(SUnits);
180
181  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
182  if (isBottomUp)
183    ListScheduleBottomUp();
184  else
185    ListScheduleTopDown();
186
187  AvailableQueue->releaseState();
188}
189
190//===----------------------------------------------------------------------===//
191//  Bottom-Up Scheduling
192//===----------------------------------------------------------------------===//
193
194/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
195/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
196void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
197  SUnit *PredSU = PredEdge->getSUnit();
198  --PredSU->NumSuccsLeft;
199
200#ifndef NDEBUG
201  if (PredSU->NumSuccsLeft < 0) {
202    cerr << "*** Scheduling failed! ***\n";
203    PredSU->dump(this);
204    cerr << " has been released too many times!\n";
205    assert(0);
206  }
207#endif
208
209  // If all the node's successors are scheduled, this node is ready
210  // to be scheduled. Ignore the special EntrySU node.
211  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
212    PredSU->isAvailable = true;
213    AvailableQueue->push(PredSU);
214  }
215}
216
217void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
218  // Bottom up: release predecessors
219  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
220       I != E; ++I) {
221    ReleasePred(SU, &*I);
222    if (I->isAssignedRegDep()) {
223      // This is a physical register dependency and it's impossible or
224      // expensive to copy the register. Make sure nothing that can
225      // clobber the register is scheduled between the predecessor and
226      // this node.
227      if (!LiveRegDefs[I->getReg()]) {
228        ++NumLiveRegs;
229        LiveRegDefs[I->getReg()] = I->getSUnit();
230        LiveRegCycles[I->getReg()] = CurCycle;
231      }
232    }
233  }
234}
235
236/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
237/// count of its predecessors. If a predecessor pending count is zero, add it to
238/// the Available queue.
239void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
240  DOUT << "*** Scheduling [" << CurCycle << "]: ";
241  DEBUG(SU->dump(this));
242
243  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
244  SU->setHeightToAtLeast(CurCycle);
245  Sequence.push_back(SU);
246
247  ReleasePredecessors(SU, CurCycle);
248
249  // Release all the implicit physical register defs that are live.
250  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
251       I != E; ++I) {
252    if (I->isAssignedRegDep()) {
253      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
254        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
255        assert(LiveRegDefs[I->getReg()] == SU &&
256               "Physical register dependency violated?");
257        --NumLiveRegs;
258        LiveRegDefs[I->getReg()] = NULL;
259        LiveRegCycles[I->getReg()] = 0;
260      }
261    }
262  }
263
264  SU->isScheduled = true;
265  AvailableQueue->ScheduledNode(SU);
266}
267
268/// CapturePred - This does the opposite of ReleasePred. Since SU is being
269/// unscheduled, incrcease the succ left count of its predecessors. Remove
270/// them from AvailableQueue if necessary.
271void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
272  SUnit *PredSU = PredEdge->getSUnit();
273  if (PredSU->isAvailable) {
274    PredSU->isAvailable = false;
275    if (!PredSU->isPending)
276      AvailableQueue->remove(PredSU);
277  }
278
279  ++PredSU->NumSuccsLeft;
280}
281
282/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
283/// its predecessor states to reflect the change.
284void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
285  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
286  DEBUG(SU->dump(this));
287
288  AvailableQueue->UnscheduledNode(SU);
289
290  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
291       I != E; ++I) {
292    CapturePred(&*I);
293    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
294      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
295      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
296             "Physical register dependency violated?");
297      --NumLiveRegs;
298      LiveRegDefs[I->getReg()] = NULL;
299      LiveRegCycles[I->getReg()] = 0;
300    }
301  }
302
303  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
304       I != E; ++I) {
305    if (I->isAssignedRegDep()) {
306      if (!LiveRegDefs[I->getReg()]) {
307        LiveRegDefs[I->getReg()] = SU;
308        ++NumLiveRegs;
309      }
310      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
311        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
312    }
313  }
314
315  SU->setHeightDirty();
316  SU->isScheduled = false;
317  SU->isAvailable = true;
318  AvailableQueue->push(SU);
319}
320
321/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
322/// BTCycle in order to schedule a specific node.
323void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
324                                          unsigned &CurCycle) {
325  SUnit *OldSU = NULL;
326  while (CurCycle > BtCycle) {
327    OldSU = Sequence.back();
328    Sequence.pop_back();
329    if (SU->isSucc(OldSU))
330      // Don't try to remove SU from AvailableQueue.
331      SU->isAvailable = false;
332    UnscheduleNodeBottomUp(OldSU);
333    --CurCycle;
334  }
335
336  assert(!SU->isSucc(OldSU) && "Something is wrong!");
337
338  ++NumBacktracks;
339}
340
341/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
342/// successors to the newly created node.
343SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
344  if (SU->getNode()->getFlaggedNode())
345    return NULL;
346
347  SDNode *N = SU->getNode();
348  if (!N)
349    return NULL;
350
351  SUnit *NewSU;
352  bool TryUnfold = false;
353  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
354    MVT VT = N->getValueType(i);
355    if (VT == MVT::Flag)
356      return NULL;
357    else if (VT == MVT::Other)
358      TryUnfold = true;
359  }
360  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
361    const SDValue &Op = N->getOperand(i);
362    MVT VT = Op.getNode()->getValueType(Op.getResNo());
363    if (VT == MVT::Flag)
364      return NULL;
365  }
366
367  if (TryUnfold) {
368    SmallVector<SDNode*, 2> NewNodes;
369    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
370      return NULL;
371
372    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
373    assert(NewNodes.size() == 2 && "Expected a load folding node!");
374
375    N = NewNodes[1];
376    SDNode *LoadNode = NewNodes[0];
377    unsigned NumVals = N->getNumValues();
378    unsigned OldNumVals = SU->getNode()->getNumValues();
379    for (unsigned i = 0; i != NumVals; ++i)
380      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
381    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
382                                   SDValue(LoadNode, 1));
383
384    // LoadNode may already exist. This can happen when there is another
385    // load from the same location and producing the same type of value
386    // but it has different alignment or volatileness.
387    bool isNewLoad = true;
388    SUnit *LoadSU;
389    if (LoadNode->getNodeId() != -1) {
390      LoadSU = &SUnits[LoadNode->getNodeId()];
391      isNewLoad = false;
392    } else {
393      LoadSU = CreateNewSUnit(LoadNode);
394      LoadNode->setNodeId(LoadSU->NodeNum);
395      ComputeLatency(LoadSU);
396    }
397
398    SUnit *NewSU = CreateNewSUnit(N);
399    assert(N->getNodeId() == -1 && "Node already inserted!");
400    N->setNodeId(NewSU->NodeNum);
401
402    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
403    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
404      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
405        NewSU->isTwoAddress = true;
406        break;
407      }
408    }
409    if (TID.isCommutable())
410      NewSU->isCommutable = true;
411    ComputeLatency(NewSU);
412
413    // Record all the edges to and from the old SU, by category.
414    SmallVector<SDep, 4> ChainPreds;
415    SmallVector<SDep, 4> ChainSuccs;
416    SmallVector<SDep, 4> LoadPreds;
417    SmallVector<SDep, 4> NodePreds;
418    SmallVector<SDep, 4> NodeSuccs;
419    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
420         I != E; ++I) {
421      if (I->isCtrl())
422        ChainPreds.push_back(*I);
423      else if (I->getSUnit()->getNode() &&
424               I->getSUnit()->getNode()->isOperandOf(LoadNode))
425        LoadPreds.push_back(*I);
426      else
427        NodePreds.push_back(*I);
428    }
429    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
430         I != E; ++I) {
431      if (I->isCtrl())
432        ChainSuccs.push_back(*I);
433      else
434        NodeSuccs.push_back(*I);
435    }
436
437    // Now assign edges to the newly-created nodes.
438    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
439      const SDep &Pred = ChainPreds[i];
440      RemovePred(SU, Pred);
441      if (isNewLoad)
442        AddPred(LoadSU, Pred);
443    }
444    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
445      const SDep &Pred = LoadPreds[i];
446      RemovePred(SU, Pred);
447      if (isNewLoad)
448        AddPred(LoadSU, Pred);
449    }
450    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
451      const SDep &Pred = NodePreds[i];
452      RemovePred(SU, Pred);
453      AddPred(NewSU, Pred);
454    }
455    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
456      SDep D = NodeSuccs[i];
457      SUnit *SuccDep = D.getSUnit();
458      D.setSUnit(SU);
459      RemovePred(SuccDep, D);
460      D.setSUnit(NewSU);
461      AddPred(SuccDep, D);
462    }
463    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
464      SDep D = ChainSuccs[i];
465      SUnit *SuccDep = D.getSUnit();
466      D.setSUnit(SU);
467      RemovePred(SuccDep, D);
468      if (isNewLoad) {
469        D.setSUnit(LoadSU);
470        AddPred(SuccDep, D);
471      }
472    }
473
474    // Add a data dependency to reflect that NewSU reads the value defined
475    // by LoadSU.
476    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
477
478    if (isNewLoad)
479      AvailableQueue->addNode(LoadSU);
480    AvailableQueue->addNode(NewSU);
481
482    ++NumUnfolds;
483
484    if (NewSU->NumSuccsLeft == 0) {
485      NewSU->isAvailable = true;
486      return NewSU;
487    }
488    SU = NewSU;
489  }
490
491  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
492  NewSU = CreateClone(SU);
493
494  // New SUnit has the exact same predecessors.
495  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
496       I != E; ++I)
497    if (!I->isArtificial())
498      AddPred(NewSU, *I);
499
500  // Only copy scheduled successors. Cut them from old node's successor
501  // list and move them over.
502  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
503  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
504       I != E; ++I) {
505    if (I->isArtificial())
506      continue;
507    SUnit *SuccSU = I->getSUnit();
508    if (SuccSU->isScheduled) {
509      SDep D = *I;
510      D.setSUnit(NewSU);
511      AddPred(SuccSU, D);
512      D.setSUnit(SU);
513      DelDeps.push_back(std::make_pair(SuccSU, D));
514    }
515  }
516  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
517    RemovePred(DelDeps[i].first, DelDeps[i].second);
518
519  AvailableQueue->updateNode(SU);
520  AvailableQueue->addNode(NewSU);
521
522  ++NumDups;
523  return NewSU;
524}
525
526/// InsertCopiesAndMoveSuccs - Insert register copies and move all
527/// scheduled successors of the given SUnit to the last copy.
528void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
529                                               const TargetRegisterClass *DestRC,
530                                               const TargetRegisterClass *SrcRC,
531                                               SmallVector<SUnit*, 2> &Copies) {
532  SUnit *CopyFromSU = CreateNewSUnit(NULL);
533  CopyFromSU->CopySrcRC = SrcRC;
534  CopyFromSU->CopyDstRC = DestRC;
535
536  SUnit *CopyToSU = CreateNewSUnit(NULL);
537  CopyToSU->CopySrcRC = DestRC;
538  CopyToSU->CopyDstRC = SrcRC;
539
540  // Only copy scheduled successors. Cut them from old node's successor
541  // list and move them over.
542  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
543  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
544       I != E; ++I) {
545    if (I->isArtificial())
546      continue;
547    SUnit *SuccSU = I->getSUnit();
548    if (SuccSU->isScheduled) {
549      SDep D = *I;
550      D.setSUnit(CopyToSU);
551      AddPred(SuccSU, D);
552      DelDeps.push_back(std::make_pair(SuccSU, *I));
553    }
554  }
555  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
556    RemovePred(DelDeps[i].first, DelDeps[i].second);
557
558  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
559  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
560
561  AvailableQueue->updateNode(SU);
562  AvailableQueue->addNode(CopyFromSU);
563  AvailableQueue->addNode(CopyToSU);
564  Copies.push_back(CopyFromSU);
565  Copies.push_back(CopyToSU);
566
567  ++NumPRCopies;
568}
569
570/// getPhysicalRegisterVT - Returns the ValueType of the physical register
571/// definition of the specified node.
572/// FIXME: Move to SelectionDAG?
573static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
574                                 const TargetInstrInfo *TII) {
575  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
576  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
577  unsigned NumRes = TID.getNumDefs();
578  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
579    if (Reg == *ImpDef)
580      break;
581    ++NumRes;
582  }
583  return N->getValueType(NumRes);
584}
585
586/// CheckForLiveRegDef - Return true and update live register vector if the
587/// specified register def of the specified SUnit clobbers any "live" registers.
588static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
589                               std::vector<SUnit*> &LiveRegDefs,
590                               SmallSet<unsigned, 4> &RegAdded,
591                               SmallVector<unsigned, 4> &LRegs,
592                               const TargetRegisterInfo *TRI) {
593  bool Added = false;
594  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
595    if (RegAdded.insert(Reg)) {
596      LRegs.push_back(Reg);
597      Added = true;
598    }
599  }
600  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
601    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
602      if (RegAdded.insert(*Alias)) {
603        LRegs.push_back(*Alias);
604        Added = true;
605      }
606    }
607  return Added;
608}
609
610/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
611/// scheduling of the given node to satisfy live physical register dependencies.
612/// If the specific node is the last one that's available to schedule, do
613/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
614bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
615                                                 SmallVector<unsigned, 4> &LRegs){
616  if (NumLiveRegs == 0)
617    return false;
618
619  SmallSet<unsigned, 4> RegAdded;
620  // If this node would clobber any "live" register, then it's not ready.
621  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
622       I != E; ++I) {
623    if (I->isAssignedRegDep())
624      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
625                         RegAdded, LRegs, TRI);
626  }
627
628  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
629    if (Node->getOpcode() == ISD::INLINEASM) {
630      // Inline asm can clobber physical defs.
631      unsigned NumOps = Node->getNumOperands();
632      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
633        --NumOps;  // Ignore the flag operand.
634
635      for (unsigned i = 2; i != NumOps;) {
636        unsigned Flags =
637          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
638        unsigned NumVals = (Flags & 0xffff) >> 3;
639
640        ++i; // Skip the ID value.
641        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
642          // Check for def of register or earlyclobber register.
643          for (; NumVals; --NumVals, ++i) {
644            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
645            if (TargetRegisterInfo::isPhysicalRegister(Reg))
646              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
647          }
648        } else
649          i += NumVals;
650      }
651      continue;
652    }
653
654    if (!Node->isMachineOpcode())
655      continue;
656    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
657    if (!TID.ImplicitDefs)
658      continue;
659    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
660      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
661  }
662  return !LRegs.empty();
663}
664
665
666/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
667/// schedulers.
668void ScheduleDAGRRList::ListScheduleBottomUp() {
669  unsigned CurCycle = 0;
670
671  // Release any predecessors of the special Exit node.
672  ReleasePredecessors(&ExitSU, CurCycle);
673
674  // Add root to Available queue.
675  if (!SUnits.empty()) {
676    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
677    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
678    RootSU->isAvailable = true;
679    AvailableQueue->push(RootSU);
680  }
681
682  // While Available queue is not empty, grab the node with the highest
683  // priority. If it is not ready put it back.  Schedule the node.
684  SmallVector<SUnit*, 4> NotReady;
685  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
686  Sequence.reserve(SUnits.size());
687  while (!AvailableQueue->empty()) {
688    bool Delayed = false;
689    LRegsMap.clear();
690    SUnit *CurSU = AvailableQueue->pop();
691    while (CurSU) {
692      SmallVector<unsigned, 4> LRegs;
693      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
694        break;
695      Delayed = true;
696      LRegsMap.insert(std::make_pair(CurSU, LRegs));
697
698      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
699      NotReady.push_back(CurSU);
700      CurSU = AvailableQueue->pop();
701    }
702
703    // All candidates are delayed due to live physical reg dependencies.
704    // Try backtracking, code duplication, or inserting cross class copies
705    // to resolve it.
706    if (Delayed && !CurSU) {
707      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
708        SUnit *TrySU = NotReady[i];
709        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
710
711        // Try unscheduling up to the point where it's safe to schedule
712        // this node.
713        unsigned LiveCycle = CurCycle;
714        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
715          unsigned Reg = LRegs[j];
716          unsigned LCycle = LiveRegCycles[Reg];
717          LiveCycle = std::min(LiveCycle, LCycle);
718        }
719        SUnit *OldSU = Sequence[LiveCycle];
720        if (!WillCreateCycle(TrySU, OldSU))  {
721          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
722          // Force the current node to be scheduled before the node that
723          // requires the physical reg dep.
724          if (OldSU->isAvailable) {
725            OldSU->isAvailable = false;
726            AvailableQueue->remove(OldSU);
727          }
728          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
729                              /*Reg=*/0, /*isNormalMemory=*/false,
730                              /*isMustAlias=*/false, /*isArtificial=*/true));
731          // If one or more successors has been unscheduled, then the current
732          // node is no longer avaialable. Schedule a successor that's now
733          // available instead.
734          if (!TrySU->isAvailable)
735            CurSU = AvailableQueue->pop();
736          else {
737            CurSU = TrySU;
738            TrySU->isPending = false;
739            NotReady.erase(NotReady.begin()+i);
740          }
741          break;
742        }
743      }
744
745      if (!CurSU) {
746        // Can't backtrack. If it's too expensive to copy the value, then try
747        // duplicate the nodes that produces these "too expensive to copy"
748        // values to break the dependency. In case even that doesn't work,
749        // insert cross class copies.
750        // If it's not too expensive, i.e. cost != -1, issue copies.
751        SUnit *TrySU = NotReady[0];
752        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
753        assert(LRegs.size() == 1 && "Can't handle this yet!");
754        unsigned Reg = LRegs[0];
755        SUnit *LRDef = LiveRegDefs[Reg];
756        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
757        const TargetRegisterClass *RC =
758          TRI->getPhysicalRegisterRegClass(Reg, VT);
759        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
760
761        // If cross copy register class is null, then it must be possible copy
762        // the value directly. Do not try duplicate the def.
763        SUnit *NewDef = 0;
764        if (DestRC)
765          NewDef = CopyAndMoveSuccessors(LRDef);
766        else
767          DestRC = RC;
768        if (!NewDef) {
769          // Issue copies, these can be expensive cross register class copies.
770          SmallVector<SUnit*, 2> Copies;
771          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
772          DOUT << "Adding an edge from SU #" << TrySU->NodeNum
773               << " to SU #" << Copies.front()->NodeNum << "\n";
774          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
775                              /*Reg=*/0, /*isNormalMemory=*/false,
776                              /*isMustAlias=*/false,
777                              /*isArtificial=*/true));
778          NewDef = Copies.back();
779        }
780
781        DOUT << "Adding an edge from SU #" << NewDef->NodeNum
782             << " to SU #" << TrySU->NodeNum << "\n";
783        LiveRegDefs[Reg] = NewDef;
784        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
785                             /*Reg=*/0, /*isNormalMemory=*/false,
786                             /*isMustAlias=*/false,
787                             /*isArtificial=*/true));
788        TrySU->isAvailable = false;
789        CurSU = NewDef;
790      }
791
792      assert(CurSU && "Unable to resolve live physical register dependencies!");
793    }
794
795    // Add the nodes that aren't ready back onto the available list.
796    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
797      NotReady[i]->isPending = false;
798      // May no longer be available due to backtracking.
799      if (NotReady[i]->isAvailable)
800        AvailableQueue->push(NotReady[i]);
801    }
802    NotReady.clear();
803
804    if (CurSU)
805      ScheduleNodeBottomUp(CurSU, CurCycle);
806    ++CurCycle;
807  }
808
809  // Reverse the order if it is bottom up.
810  std::reverse(Sequence.begin(), Sequence.end());
811
812#ifndef NDEBUG
813  VerifySchedule(isBottomUp);
814#endif
815}
816
817//===----------------------------------------------------------------------===//
818//  Top-Down Scheduling
819//===----------------------------------------------------------------------===//
820
821/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
822/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
823void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
824  SUnit *SuccSU = SuccEdge->getSUnit();
825  --SuccSU->NumPredsLeft;
826
827#ifndef NDEBUG
828  if (SuccSU->NumPredsLeft < 0) {
829    cerr << "*** Scheduling failed! ***\n";
830    SuccSU->dump(this);
831    cerr << " has been released too many times!\n";
832    assert(0);
833  }
834#endif
835
836  // If all the node's predecessors are scheduled, this node is ready
837  // to be scheduled. Ignore the special ExitSU node.
838  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
839    SuccSU->isAvailable = true;
840    AvailableQueue->push(SuccSU);
841  }
842}
843
844void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
845  // Top down: release successors
846  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
847       I != E; ++I) {
848    assert(!I->isAssignedRegDep() &&
849           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
850
851    ReleaseSucc(SU, &*I);
852  }
853}
854
855/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
856/// count of its successors. If a successor pending count is zero, add it to
857/// the Available queue.
858void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
859  DOUT << "*** Scheduling [" << CurCycle << "]: ";
860  DEBUG(SU->dump(this));
861
862  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
863  SU->setDepthToAtLeast(CurCycle);
864  Sequence.push_back(SU);
865
866  ReleaseSuccessors(SU);
867  SU->isScheduled = true;
868  AvailableQueue->ScheduledNode(SU);
869}
870
871/// ListScheduleTopDown - The main loop of list scheduling for top-down
872/// schedulers.
873void ScheduleDAGRRList::ListScheduleTopDown() {
874  unsigned CurCycle = 0;
875
876  // Release any successors of the special Entry node.
877  ReleaseSuccessors(&EntrySU);
878
879  // All leaves to Available queue.
880  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
881    // It is available if it has no predecessors.
882    if (SUnits[i].Preds.empty()) {
883      AvailableQueue->push(&SUnits[i]);
884      SUnits[i].isAvailable = true;
885    }
886  }
887
888  // While Available queue is not empty, grab the node with the highest
889  // priority. If it is not ready put it back.  Schedule the node.
890  Sequence.reserve(SUnits.size());
891  while (!AvailableQueue->empty()) {
892    SUnit *CurSU = AvailableQueue->pop();
893
894    if (CurSU)
895      ScheduleNodeTopDown(CurSU, CurCycle);
896    ++CurCycle;
897  }
898
899#ifndef NDEBUG
900  VerifySchedule(isBottomUp);
901#endif
902}
903
904
905//===----------------------------------------------------------------------===//
906//                RegReductionPriorityQueue Implementation
907//===----------------------------------------------------------------------===//
908//
909// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
910// to reduce register pressure.
911//
912namespace {
913  template<class SF>
914  class RegReductionPriorityQueue;
915
916  /// Sorting functions for the Available queue.
917  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
918    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
919    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
920    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
921
922    bool operator()(const SUnit* left, const SUnit* right) const;
923  };
924
925  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
926    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
927    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
928    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
929
930    bool operator()(const SUnit* left, const SUnit* right) const;
931  };
932}  // end anonymous namespace
933
934/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
935/// Smaller number is the higher priority.
936static unsigned
937CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
938  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
939  if (SethiUllmanNumber != 0)
940    return SethiUllmanNumber;
941
942  unsigned Extra = 0;
943  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
944       I != E; ++I) {
945    if (I->isCtrl()) continue;  // ignore chain preds
946    SUnit *PredSU = I->getSUnit();
947    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
948    if (PredSethiUllman > SethiUllmanNumber) {
949      SethiUllmanNumber = PredSethiUllman;
950      Extra = 0;
951    } else if (PredSethiUllman == SethiUllmanNumber)
952      ++Extra;
953  }
954
955  SethiUllmanNumber += Extra;
956
957  if (SethiUllmanNumber == 0)
958    SethiUllmanNumber = 1;
959
960  return SethiUllmanNumber;
961}
962
963namespace {
964  template<class SF>
965  class VISIBILITY_HIDDEN RegReductionPriorityQueue
966   : public SchedulingPriorityQueue {
967    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
968    unsigned currentQueueId;
969
970  protected:
971    // SUnits - The SUnits for the current graph.
972    std::vector<SUnit> *SUnits;
973
974    const TargetInstrInfo *TII;
975    const TargetRegisterInfo *TRI;
976    ScheduleDAGRRList *scheduleDAG;
977
978    // SethiUllmanNumbers - The SethiUllman number for each node.
979    std::vector<unsigned> SethiUllmanNumbers;
980
981  public:
982    RegReductionPriorityQueue(const TargetInstrInfo *tii,
983                              const TargetRegisterInfo *tri) :
984    Queue(SF(this)), currentQueueId(0),
985    TII(tii), TRI(tri), scheduleDAG(NULL) {}
986
987    void initNodes(std::vector<SUnit> &sunits) {
988      SUnits = &sunits;
989      // Add pseudo dependency edges for two-address nodes.
990      AddPseudoTwoAddrDeps();
991      // Reroute edges to nodes with multiple uses.
992      PrescheduleNodesWithMultipleUses();
993      // Calculate node priorities.
994      CalculateSethiUllmanNumbers();
995    }
996
997    void addNode(const SUnit *SU) {
998      unsigned SUSize = SethiUllmanNumbers.size();
999      if (SUnits->size() > SUSize)
1000        SethiUllmanNumbers.resize(SUSize*2, 0);
1001      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1002    }
1003
1004    void updateNode(const SUnit *SU) {
1005      SethiUllmanNumbers[SU->NodeNum] = 0;
1006      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1007    }
1008
1009    void releaseState() {
1010      SUnits = 0;
1011      SethiUllmanNumbers.clear();
1012    }
1013
1014    unsigned getNodePriority(const SUnit *SU) const {
1015      assert(SU->NodeNum < SethiUllmanNumbers.size());
1016      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1017      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1018        // CopyToReg should be close to its uses to facilitate coalescing and
1019        // avoid spilling.
1020        return 0;
1021      if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1022          Opc == TargetInstrInfo::SUBREG_TO_REG ||
1023          Opc == TargetInstrInfo::INSERT_SUBREG)
1024        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1025        // close to their uses to facilitate coalescing.
1026        return 0;
1027      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1028        // If SU does not have a register use, i.e. it doesn't produce a value
1029        // that would be consumed (e.g. store), then it terminates a chain of
1030        // computation.  Give it a large SethiUllman number so it will be
1031        // scheduled right before its predecessors that it doesn't lengthen
1032        // their live ranges.
1033        return 0xffff;
1034      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1035        // If SU does not have a register def, schedule it close to its uses
1036        // because it does not lengthen any live ranges.
1037        return 0;
1038      return SethiUllmanNumbers[SU->NodeNum];
1039    }
1040
1041    unsigned size() const { return Queue.size(); }
1042
1043    bool empty() const { return Queue.empty(); }
1044
1045    void push(SUnit *U) {
1046      assert(!U->NodeQueueId && "Node in the queue already");
1047      U->NodeQueueId = ++currentQueueId;
1048      Queue.push(U);
1049    }
1050
1051    void push_all(const std::vector<SUnit *> &Nodes) {
1052      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1053        push(Nodes[i]);
1054    }
1055
1056    SUnit *pop() {
1057      if (empty()) return NULL;
1058      SUnit *V = Queue.top();
1059      Queue.pop();
1060      V->NodeQueueId = 0;
1061      return V;
1062    }
1063
1064    void remove(SUnit *SU) {
1065      assert(!Queue.empty() && "Queue is empty!");
1066      assert(SU->NodeQueueId != 0 && "Not in queue!");
1067      Queue.erase_one(SU);
1068      SU->NodeQueueId = 0;
1069    }
1070
1071    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1072      scheduleDAG = scheduleDag;
1073    }
1074
1075  protected:
1076    bool canClobber(const SUnit *SU, const SUnit *Op);
1077    void AddPseudoTwoAddrDeps();
1078    void PrescheduleNodesWithMultipleUses();
1079    void CalculateSethiUllmanNumbers();
1080  };
1081
1082  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1083    BURegReductionPriorityQueue;
1084
1085  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1086    TDRegReductionPriorityQueue;
1087}
1088
1089/// closestSucc - Returns the scheduled cycle of the successor which is
1090/// closest to the current cycle.
1091static unsigned closestSucc(const SUnit *SU) {
1092  unsigned MaxHeight = 0;
1093  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1094       I != E; ++I) {
1095    if (I->isCtrl()) continue;  // ignore chain succs
1096    unsigned Height = I->getSUnit()->getHeight();
1097    // If there are bunch of CopyToRegs stacked up, they should be considered
1098    // to be at the same position.
1099    if (I->getSUnit()->getNode() &&
1100        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1101      Height = closestSucc(I->getSUnit())+1;
1102    if (Height > MaxHeight)
1103      MaxHeight = Height;
1104  }
1105  return MaxHeight;
1106}
1107
1108/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1109/// for scratch registers, i.e. number of data dependencies.
1110static unsigned calcMaxScratches(const SUnit *SU) {
1111  unsigned Scratches = 0;
1112  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1113       I != E; ++I) {
1114    if (I->isCtrl()) continue;  // ignore chain preds
1115    Scratches++;
1116  }
1117  return Scratches;
1118}
1119
1120// Bottom up
1121bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1122  unsigned LPriority = SPQ->getNodePriority(left);
1123  unsigned RPriority = SPQ->getNodePriority(right);
1124  if (LPriority != RPriority)
1125    return LPriority > RPriority;
1126
1127  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1128  // e.g.
1129  // t1 = op t2, c1
1130  // t3 = op t4, c2
1131  //
1132  // and the following instructions are both ready.
1133  // t2 = op c3
1134  // t4 = op c4
1135  //
1136  // Then schedule t2 = op first.
1137  // i.e.
1138  // t4 = op c4
1139  // t2 = op c3
1140  // t1 = op t2, c1
1141  // t3 = op t4, c2
1142  //
1143  // This creates more short live intervals.
1144  unsigned LDist = closestSucc(left);
1145  unsigned RDist = closestSucc(right);
1146  if (LDist != RDist)
1147    return LDist < RDist;
1148
1149  // How many registers becomes live when the node is scheduled.
1150  unsigned LScratch = calcMaxScratches(left);
1151  unsigned RScratch = calcMaxScratches(right);
1152  if (LScratch != RScratch)
1153    return LScratch > RScratch;
1154
1155  if (left->getHeight() != right->getHeight())
1156    return left->getHeight() > right->getHeight();
1157
1158  if (left->getDepth() != right->getDepth())
1159    return left->getDepth() < right->getDepth();
1160
1161  assert(left->NodeQueueId && right->NodeQueueId &&
1162         "NodeQueueId cannot be zero");
1163  return (left->NodeQueueId > right->NodeQueueId);
1164}
1165
1166template<class SF>
1167bool
1168RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1169  if (SU->isTwoAddress) {
1170    unsigned Opc = SU->getNode()->getMachineOpcode();
1171    const TargetInstrDesc &TID = TII->get(Opc);
1172    unsigned NumRes = TID.getNumDefs();
1173    unsigned NumOps = TID.getNumOperands() - NumRes;
1174    for (unsigned i = 0; i != NumOps; ++i) {
1175      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1176        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1177        if (DU->getNodeId() != -1 &&
1178            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1179          return true;
1180      }
1181    }
1182  }
1183  return false;
1184}
1185
1186
1187/// hasCopyToRegUse - Return true if SU has a value successor that is a
1188/// CopyToReg node.
1189static bool hasCopyToRegUse(const SUnit *SU) {
1190  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1191       I != E; ++I) {
1192    if (I->isCtrl()) continue;
1193    const SUnit *SuccSU = I->getSUnit();
1194    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1195      return true;
1196  }
1197  return false;
1198}
1199
1200/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1201/// physical register defs.
1202static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1203                                  const TargetInstrInfo *TII,
1204                                  const TargetRegisterInfo *TRI) {
1205  SDNode *N = SuccSU->getNode();
1206  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1207  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1208  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1209  for (const SDNode *SUNode = SU->getNode(); SUNode;
1210       SUNode = SUNode->getFlaggedNode()) {
1211    if (!SUNode->isMachineOpcode())
1212      continue;
1213    const unsigned *SUImpDefs =
1214      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1215    if (!SUImpDefs)
1216      return false;
1217    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1218      MVT VT = N->getValueType(i);
1219      if (VT == MVT::Flag || VT == MVT::Other)
1220        continue;
1221      if (!N->hasAnyUseOfValue(i))
1222        continue;
1223      unsigned Reg = ImpDefs[i - NumDefs];
1224      for (;*SUImpDefs; ++SUImpDefs) {
1225        unsigned SUReg = *SUImpDefs;
1226        if (TRI->regsOverlap(Reg, SUReg))
1227          return true;
1228      }
1229    }
1230  }
1231  return false;
1232}
1233
1234/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1235/// are not handled well by the general register pressure reduction
1236/// heuristics. When presented with code like this:
1237///
1238///      N
1239///    / |
1240///   /  |
1241///  U  store
1242///  |
1243/// ...
1244///
1245/// the heuristics tend to push the store up, but since the
1246/// operand of the store has another use (U), this would increase
1247/// the length of that other use (the U->N edge).
1248///
1249/// This function transforms code like the above to route U's
1250/// dependence through the store when possible, like this:
1251///
1252///      N
1253///      ||
1254///      ||
1255///     store
1256///       |
1257///       U
1258///       |
1259///      ...
1260///
1261/// This results in the store being scheduled immediately
1262/// after N, which shortens the U->N live range, reducing
1263/// register pressure.
1264///
1265template<class SF>
1266void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1267  // Visit all the nodes in topological order, working top-down.
1268  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1269    SUnit *SU = &(*SUnits)[i];
1270    // For now, only look at nodes with no data successors, such as stores.
1271    // These are especially important, due to the heuristics in
1272    // getNodePriority for nodes with no data successors.
1273    if (SU->NumSuccs != 0)
1274      continue;
1275    // For now, only look at nodes with exactly one data predecessor.
1276    if (SU->NumPreds != 1)
1277      continue;
1278    // Avoid prescheduling copies to virtual registers, which don't behave
1279    // like other nodes from the perspective of scheduling heuristics.
1280    if (SDNode *N = SU->getNode())
1281      if (N->getOpcode() == ISD::CopyToReg &&
1282          TargetRegisterInfo::isVirtualRegister
1283            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1284        continue;
1285
1286    // Locate the single data predecessor.
1287    SUnit *PredSU = 0;
1288    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1289         EE = SU->Preds.end(); II != EE; ++II)
1290      if (!II->isCtrl()) {
1291        PredSU = II->getSUnit();
1292        break;
1293      }
1294    assert(PredSU);
1295
1296    // Don't rewrite edges that carry physregs, because that requires additional
1297    // support infrastructure.
1298    if (PredSU->hasPhysRegDefs)
1299      continue;
1300    // Short-circuit the case where SU is PredSU's only data successor.
1301    if (PredSU->NumSuccs == 1)
1302      continue;
1303    // Avoid prescheduling to copies from virtual registers, which don't behave
1304    // like other nodes from the perspective of scheduling // heuristics.
1305    if (SDNode *N = SU->getNode())
1306      if (N->getOpcode() == ISD::CopyFromReg &&
1307          TargetRegisterInfo::isVirtualRegister
1308            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1309        continue;
1310
1311    // Perform checks on the successors of PredSU.
1312    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1313         EE = PredSU->Succs.end(); II != EE; ++II) {
1314      SUnit *PredSuccSU = II->getSUnit();
1315      if (PredSuccSU == SU) continue;
1316      // If PredSU has another successor with no data successors, for
1317      // now don't attempt to choose either over the other.
1318      if (PredSuccSU->NumSuccs == 0)
1319        goto outer_loop_continue;
1320      // Don't break physical register dependencies.
1321      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1322        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1323          goto outer_loop_continue;
1324      // Don't introduce graph cycles.
1325      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1326        goto outer_loop_continue;
1327    }
1328
1329    // Ok, the transformation is safe and the heuristics suggest it is
1330    // profitable. Update the graph.
1331    DOUT << "Prescheduling SU # " << SU->NodeNum
1332         << " next to PredSU # " << PredSU->NodeNum
1333         << " to guide scheduling in the presence of multiple uses\n";
1334    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1335      SDep Edge = PredSU->Succs[i];
1336      assert(!Edge.isAssignedRegDep());
1337      SUnit *SuccSU = Edge.getSUnit();
1338      if (SuccSU != SU) {
1339        Edge.setSUnit(PredSU);
1340        scheduleDAG->RemovePred(SuccSU, Edge);
1341        scheduleDAG->AddPred(SU, Edge);
1342        Edge.setSUnit(SU);
1343        scheduleDAG->AddPred(SuccSU, Edge);
1344        --i;
1345      }
1346    }
1347  outer_loop_continue:;
1348  }
1349}
1350
1351/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1352/// it as a def&use operand. Add a pseudo control edge from it to the other
1353/// node (if it won't create a cycle) so the two-address one will be scheduled
1354/// first (lower in the schedule). If both nodes are two-address, favor the
1355/// one that has a CopyToReg use (more likely to be a loop induction update).
1356/// If both are two-address, but one is commutable while the other is not
1357/// commutable, favor the one that's not commutable.
1358template<class SF>
1359void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1360  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1361    SUnit *SU = &(*SUnits)[i];
1362    if (!SU->isTwoAddress)
1363      continue;
1364
1365    SDNode *Node = SU->getNode();
1366    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1367      continue;
1368
1369    unsigned Opc = Node->getMachineOpcode();
1370    const TargetInstrDesc &TID = TII->get(Opc);
1371    unsigned NumRes = TID.getNumDefs();
1372    unsigned NumOps = TID.getNumOperands() - NumRes;
1373    for (unsigned j = 0; j != NumOps; ++j) {
1374      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1375        continue;
1376      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1377      if (DU->getNodeId() == -1)
1378        continue;
1379      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1380      if (!DUSU) continue;
1381      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1382           E = DUSU->Succs.end(); I != E; ++I) {
1383        if (I->isCtrl()) continue;
1384        SUnit *SuccSU = I->getSUnit();
1385        if (SuccSU == SU)
1386          continue;
1387        // Be conservative. Ignore if nodes aren't at roughly the same
1388        // depth and height.
1389        if (SuccSU->getHeight() < SU->getHeight() &&
1390            (SU->getHeight() - SuccSU->getHeight()) > 1)
1391          continue;
1392        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1393          continue;
1394        // Don't constrain nodes with physical register defs if the
1395        // predecessor can clobber them.
1396        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1397          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1398            continue;
1399        }
1400        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1401        // these may be coalesced away. We want them close to their uses.
1402        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1403        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1404            SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
1405            SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
1406          continue;
1407        if ((!canClobber(SuccSU, DUSU) ||
1408             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1409             (!SU->isCommutable && SuccSU->isCommutable)) &&
1410            !scheduleDAG->IsReachable(SuccSU, SU)) {
1411          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1412               << " to SU #" << SuccSU->NodeNum << "\n";
1413          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1414                                        /*Reg=*/0, /*isNormalMemory=*/false,
1415                                        /*isMustAlias=*/false,
1416                                        /*isArtificial=*/true));
1417        }
1418      }
1419    }
1420  }
1421}
1422
1423/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1424/// scheduling units.
1425template<class SF>
1426void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1427  SethiUllmanNumbers.assign(SUnits->size(), 0);
1428
1429  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1430    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1431}
1432
1433/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1434/// predecessors of the successors of the SUnit SU. Stop when the provided
1435/// limit is exceeded.
1436static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1437                                                    unsigned Limit) {
1438  unsigned Sum = 0;
1439  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1440       I != E; ++I) {
1441    const SUnit *SuccSU = I->getSUnit();
1442    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1443         EE = SuccSU->Preds.end(); II != EE; ++II) {
1444      SUnit *PredSU = II->getSUnit();
1445      if (!PredSU->isScheduled)
1446        if (++Sum > Limit)
1447          return Sum;
1448    }
1449  }
1450  return Sum;
1451}
1452
1453
1454// Top down
1455bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1456  unsigned LPriority = SPQ->getNodePriority(left);
1457  unsigned RPriority = SPQ->getNodePriority(right);
1458  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1459  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1460  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1461  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1462  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1463  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1464
1465  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1466    return false;
1467  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1468    return true;
1469
1470  if (LIsFloater)
1471    LBonus -= 2;
1472  if (RIsFloater)
1473    RBonus -= 2;
1474  if (left->NumSuccs == 1)
1475    LBonus += 2;
1476  if (right->NumSuccs == 1)
1477    RBonus += 2;
1478
1479  if (LPriority+LBonus != RPriority+RBonus)
1480    return LPriority+LBonus < RPriority+RBonus;
1481
1482  if (left->getDepth() != right->getDepth())
1483    return left->getDepth() < right->getDepth();
1484
1485  if (left->NumSuccsLeft != right->NumSuccsLeft)
1486    return left->NumSuccsLeft > right->NumSuccsLeft;
1487
1488  assert(left->NodeQueueId && right->NodeQueueId &&
1489         "NodeQueueId cannot be zero");
1490  return (left->NodeQueueId > right->NodeQueueId);
1491}
1492
1493//===----------------------------------------------------------------------===//
1494//                         Public Constructor Functions
1495//===----------------------------------------------------------------------===//
1496
1497llvm::ScheduleDAGSDNodes *
1498llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, bool) {
1499  const TargetMachine &TM = IS->TM;
1500  const TargetInstrInfo *TII = TM.getInstrInfo();
1501  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1502
1503  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1504
1505  ScheduleDAGRRList *SD =
1506    new ScheduleDAGRRList(*IS->MF, true, PQ);
1507  PQ->setScheduleDAG(SD);
1508  return SD;
1509}
1510
1511llvm::ScheduleDAGSDNodes *
1512llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, bool) {
1513  const TargetMachine &TM = IS->TM;
1514  const TargetInstrInfo *TII = TM.getInstrInfo();
1515  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1516
1517  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1518
1519  ScheduleDAGRRList *SD =
1520    new ScheduleDAGRRList(*IS->MF, false, PQ);
1521  PQ->setScheduleDAG(SD);
1522  return SD;
1523}
1524