ScheduleDAGRRList.cpp revision ac9de036dc0e4065ee2c03419f11515a34ce505e
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/Support/ErrorHandling.h"
29#include "llvm/ADT/PriorityQueue.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/Statistic.h"
32#include "llvm/ADT/STLExtras.h"
33#include <climits>
34using namespace llvm;
35
36STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37STATISTIC(NumUnfolds,    "Number of nodes unfolded");
38STATISTIC(NumDups,       "Number of duplicated nodes");
39STATISTIC(NumPRCopies,   "Number of physical register copies");
40
41static RegisterScheduler
42  burrListDAGScheduler("list-burr",
43                       "Bottom-up register reduction list scheduling",
44                       createBURRListDAGScheduler);
45static RegisterScheduler
46  tdrListrDAGScheduler("list-tdrr",
47                       "Top-down register reduction list scheduling",
48                       createTDRRListDAGScheduler);
49
50namespace {
51//===----------------------------------------------------------------------===//
52/// ScheduleDAGRRList - The actual register reduction list scheduler
53/// implementation.  This supports both top-down and bottom-up scheduling.
54///
55class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
56private:
57  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
58  /// it is top-down.
59  bool isBottomUp;
60
61  /// AvailableQueue - The priority queue to use for the available SUnits.
62  SchedulingPriorityQueue *AvailableQueue;
63
64  /// LiveRegDefs - A set of physical registers and their definition
65  /// that are "live". These nodes must be scheduled before any other nodes that
66  /// modifies the registers can be scheduled.
67  unsigned NumLiveRegs;
68  std::vector<SUnit*> LiveRegDefs;
69  std::vector<unsigned> LiveRegCycles;
70
71  /// Topo - A topological ordering for SUnits which permits fast IsReachable
72  /// and similar queries.
73  ScheduleDAGTopologicalSort Topo;
74
75public:
76  ScheduleDAGRRList(MachineFunction &mf,
77                    bool isbottomup,
78                    SchedulingPriorityQueue *availqueue)
79    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
80      AvailableQueue(availqueue), Topo(SUnits) {
81    }
82
83  ~ScheduleDAGRRList() {
84    delete AvailableQueue;
85  }
86
87  void Schedule();
88
89  /// IsReachable - Checks if SU is reachable from TargetSU.
90  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
91    return Topo.IsReachable(SU, TargetSU);
92  }
93
94  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
95  /// create a cycle.
96  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
97    return Topo.WillCreateCycle(SU, TargetSU);
98  }
99
100  /// AddPred - adds a predecessor edge to SUnit SU.
101  /// This returns true if this is a new predecessor.
102  /// Updates the topological ordering if required.
103  void AddPred(SUnit *SU, const SDep &D) {
104    Topo.AddPred(SU, D.getSUnit());
105    SU->addPred(D);
106  }
107
108  /// RemovePred - removes a predecessor edge from SUnit SU.
109  /// This returns true if an edge was removed.
110  /// Updates the topological ordering if required.
111  void RemovePred(SUnit *SU, const SDep &D) {
112    Topo.RemovePred(SU, D.getSUnit());
113    SU->removePred(D);
114  }
115
116private:
117  void ReleasePred(SUnit *SU, const SDep *PredEdge);
118  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
119  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
120  void ReleaseSuccessors(SUnit *SU);
121  void CapturePred(SDep *PredEdge);
122  void ScheduleNodeBottomUp(SUnit*, unsigned);
123  void ScheduleNodeTopDown(SUnit*, unsigned);
124  void UnscheduleNodeBottomUp(SUnit*);
125  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
126  SUnit *CopyAndMoveSuccessors(SUnit*);
127  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
128                                const TargetRegisterClass*,
129                                const TargetRegisterClass*,
130                                SmallVector<SUnit*, 2>&);
131  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
132  void ListScheduleTopDown();
133  void ListScheduleBottomUp();
134
135
136  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
137  /// Updates the topological ordering if required.
138  SUnit *CreateNewSUnit(SDNode *N) {
139    unsigned NumSUnits = SUnits.size();
140    SUnit *NewNode = NewSUnit(N);
141    // Update the topological ordering.
142    if (NewNode->NodeNum >= NumSUnits)
143      Topo.InitDAGTopologicalSorting();
144    return NewNode;
145  }
146
147  /// CreateClone - Creates a new SUnit from an existing one.
148  /// Updates the topological ordering if required.
149  SUnit *CreateClone(SUnit *N) {
150    unsigned NumSUnits = SUnits.size();
151    SUnit *NewNode = Clone(N);
152    // Update the topological ordering.
153    if (NewNode->NodeNum >= NumSUnits)
154      Topo.InitDAGTopologicalSorting();
155    return NewNode;
156  }
157
158  /// ForceUnitLatencies - Return true, since register-pressure-reducing
159  /// scheduling doesn't need actual latency information.
160  bool ForceUnitLatencies() const { return true; }
161};
162}  // end anonymous namespace
163
164
165/// Schedule - Schedule the DAG using list scheduling.
166void ScheduleDAGRRList::Schedule() {
167  DOUT << "********** List Scheduling **********\n";
168
169  NumLiveRegs = 0;
170  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
171  LiveRegCycles.resize(TRI->getNumRegs(), 0);
172
173  // Build the scheduling graph.
174  BuildSchedGraph();
175
176  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
177          SUnits[su].dumpAll(this));
178  Topo.InitDAGTopologicalSorting();
179
180  AvailableQueue->initNodes(SUnits);
181
182  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
183  if (isBottomUp)
184    ListScheduleBottomUp();
185  else
186    ListScheduleTopDown();
187
188  AvailableQueue->releaseState();
189}
190
191//===----------------------------------------------------------------------===//
192//  Bottom-Up Scheduling
193//===----------------------------------------------------------------------===//
194
195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
196/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
197void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
198  SUnit *PredSU = PredEdge->getSUnit();
199  --PredSU->NumSuccsLeft;
200
201#ifndef NDEBUG
202  if (PredSU->NumSuccsLeft < 0) {
203    cerr << "*** Scheduling failed! ***\n";
204    PredSU->dump(this);
205    cerr << " has been released too many times!\n";
206    llvm_unreachable(0);
207  }
208#endif
209
210  // If all the node's successors are scheduled, this node is ready
211  // to be scheduled. Ignore the special EntrySU node.
212  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
213    PredSU->isAvailable = true;
214    AvailableQueue->push(PredSU);
215  }
216}
217
218void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
219  // Bottom up: release predecessors
220  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
221       I != E; ++I) {
222    ReleasePred(SU, &*I);
223    if (I->isAssignedRegDep()) {
224      // This is a physical register dependency and it's impossible or
225      // expensive to copy the register. Make sure nothing that can
226      // clobber the register is scheduled between the predecessor and
227      // this node.
228      if (!LiveRegDefs[I->getReg()]) {
229        ++NumLiveRegs;
230        LiveRegDefs[I->getReg()] = I->getSUnit();
231        LiveRegCycles[I->getReg()] = CurCycle;
232      }
233    }
234  }
235}
236
237/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
238/// count of its predecessors. If a predecessor pending count is zero, add it to
239/// the Available queue.
240void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
241  DOUT << "*** Scheduling [" << CurCycle << "]: ";
242  DEBUG(SU->dump(this));
243
244  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
245  SU->setHeightToAtLeast(CurCycle);
246  Sequence.push_back(SU);
247
248  ReleasePredecessors(SU, CurCycle);
249
250  // Release all the implicit physical register defs that are live.
251  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
252       I != E; ++I) {
253    if (I->isAssignedRegDep()) {
254      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
255        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
256        assert(LiveRegDefs[I->getReg()] == SU &&
257               "Physical register dependency violated?");
258        --NumLiveRegs;
259        LiveRegDefs[I->getReg()] = NULL;
260        LiveRegCycles[I->getReg()] = 0;
261      }
262    }
263  }
264
265  SU->isScheduled = true;
266  AvailableQueue->ScheduledNode(SU);
267}
268
269/// CapturePred - This does the opposite of ReleasePred. Since SU is being
270/// unscheduled, incrcease the succ left count of its predecessors. Remove
271/// them from AvailableQueue if necessary.
272void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
273  SUnit *PredSU = PredEdge->getSUnit();
274  if (PredSU->isAvailable) {
275    PredSU->isAvailable = false;
276    if (!PredSU->isPending)
277      AvailableQueue->remove(PredSU);
278  }
279
280  ++PredSU->NumSuccsLeft;
281}
282
283/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
284/// its predecessor states to reflect the change.
285void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
286  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
287  DEBUG(SU->dump(this));
288
289  AvailableQueue->UnscheduledNode(SU);
290
291  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
292       I != E; ++I) {
293    CapturePred(&*I);
294    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
295      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
296      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
297             "Physical register dependency violated?");
298      --NumLiveRegs;
299      LiveRegDefs[I->getReg()] = NULL;
300      LiveRegCycles[I->getReg()] = 0;
301    }
302  }
303
304  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
305       I != E; ++I) {
306    if (I->isAssignedRegDep()) {
307      if (!LiveRegDefs[I->getReg()]) {
308        LiveRegDefs[I->getReg()] = SU;
309        ++NumLiveRegs;
310      }
311      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
312        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
313    }
314  }
315
316  SU->setHeightDirty();
317  SU->isScheduled = false;
318  SU->isAvailable = true;
319  AvailableQueue->push(SU);
320}
321
322/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
323/// BTCycle in order to schedule a specific node.
324void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
325                                          unsigned &CurCycle) {
326  SUnit *OldSU = NULL;
327  while (CurCycle > BtCycle) {
328    OldSU = Sequence.back();
329    Sequence.pop_back();
330    if (SU->isSucc(OldSU))
331      // Don't try to remove SU from AvailableQueue.
332      SU->isAvailable = false;
333    UnscheduleNodeBottomUp(OldSU);
334    --CurCycle;
335  }
336
337  assert(!SU->isSucc(OldSU) && "Something is wrong!");
338
339  ++NumBacktracks;
340}
341
342/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
343/// successors to the newly created node.
344SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
345  if (SU->getNode()->getFlaggedNode())
346    return NULL;
347
348  SDNode *N = SU->getNode();
349  if (!N)
350    return NULL;
351
352  SUnit *NewSU;
353  bool TryUnfold = false;
354  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
355    EVT VT = N->getValueType(i);
356    if (VT == EVT::Flag)
357      return NULL;
358    else if (VT == EVT::Other)
359      TryUnfold = true;
360  }
361  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
362    const SDValue &Op = N->getOperand(i);
363    EVT VT = Op.getNode()->getValueType(Op.getResNo());
364    if (VT == EVT::Flag)
365      return NULL;
366  }
367
368  if (TryUnfold) {
369    SmallVector<SDNode*, 2> NewNodes;
370    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
371      return NULL;
372
373    DOUT << "Unfolding SU # " << SU->NodeNum << "\n";
374    assert(NewNodes.size() == 2 && "Expected a load folding node!");
375
376    N = NewNodes[1];
377    SDNode *LoadNode = NewNodes[0];
378    unsigned NumVals = N->getNumValues();
379    unsigned OldNumVals = SU->getNode()->getNumValues();
380    for (unsigned i = 0; i != NumVals; ++i)
381      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
382    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
383                                   SDValue(LoadNode, 1));
384
385    // LoadNode may already exist. This can happen when there is another
386    // load from the same location and producing the same type of value
387    // but it has different alignment or volatileness.
388    bool isNewLoad = true;
389    SUnit *LoadSU;
390    if (LoadNode->getNodeId() != -1) {
391      LoadSU = &SUnits[LoadNode->getNodeId()];
392      isNewLoad = false;
393    } else {
394      LoadSU = CreateNewSUnit(LoadNode);
395      LoadNode->setNodeId(LoadSU->NodeNum);
396      ComputeLatency(LoadSU);
397    }
398
399    SUnit *NewSU = CreateNewSUnit(N);
400    assert(N->getNodeId() == -1 && "Node already inserted!");
401    N->setNodeId(NewSU->NodeNum);
402
403    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
404    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
405      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
406        NewSU->isTwoAddress = true;
407        break;
408      }
409    }
410    if (TID.isCommutable())
411      NewSU->isCommutable = true;
412    ComputeLatency(NewSU);
413
414    // Record all the edges to and from the old SU, by category.
415    SmallVector<SDep, 4> ChainPreds;
416    SmallVector<SDep, 4> ChainSuccs;
417    SmallVector<SDep, 4> LoadPreds;
418    SmallVector<SDep, 4> NodePreds;
419    SmallVector<SDep, 4> NodeSuccs;
420    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
421         I != E; ++I) {
422      if (I->isCtrl())
423        ChainPreds.push_back(*I);
424      else if (I->getSUnit()->getNode() &&
425               I->getSUnit()->getNode()->isOperandOf(LoadNode))
426        LoadPreds.push_back(*I);
427      else
428        NodePreds.push_back(*I);
429    }
430    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
431         I != E; ++I) {
432      if (I->isCtrl())
433        ChainSuccs.push_back(*I);
434      else
435        NodeSuccs.push_back(*I);
436    }
437
438    // Now assign edges to the newly-created nodes.
439    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
440      const SDep &Pred = ChainPreds[i];
441      RemovePred(SU, Pred);
442      if (isNewLoad)
443        AddPred(LoadSU, Pred);
444    }
445    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
446      const SDep &Pred = LoadPreds[i];
447      RemovePred(SU, Pred);
448      if (isNewLoad)
449        AddPred(LoadSU, Pred);
450    }
451    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
452      const SDep &Pred = NodePreds[i];
453      RemovePred(SU, Pred);
454      AddPred(NewSU, Pred);
455    }
456    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
457      SDep D = NodeSuccs[i];
458      SUnit *SuccDep = D.getSUnit();
459      D.setSUnit(SU);
460      RemovePred(SuccDep, D);
461      D.setSUnit(NewSU);
462      AddPred(SuccDep, D);
463    }
464    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
465      SDep D = ChainSuccs[i];
466      SUnit *SuccDep = D.getSUnit();
467      D.setSUnit(SU);
468      RemovePred(SuccDep, D);
469      if (isNewLoad) {
470        D.setSUnit(LoadSU);
471        AddPred(SuccDep, D);
472      }
473    }
474
475    // Add a data dependency to reflect that NewSU reads the value defined
476    // by LoadSU.
477    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
478
479    if (isNewLoad)
480      AvailableQueue->addNode(LoadSU);
481    AvailableQueue->addNode(NewSU);
482
483    ++NumUnfolds;
484
485    if (NewSU->NumSuccsLeft == 0) {
486      NewSU->isAvailable = true;
487      return NewSU;
488    }
489    SU = NewSU;
490  }
491
492  DOUT << "Duplicating SU # " << SU->NodeNum << "\n";
493  NewSU = CreateClone(SU);
494
495  // New SUnit has the exact same predecessors.
496  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
497       I != E; ++I)
498    if (!I->isArtificial())
499      AddPred(NewSU, *I);
500
501  // Only copy scheduled successors. Cut them from old node's successor
502  // list and move them over.
503  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
504  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
505       I != E; ++I) {
506    if (I->isArtificial())
507      continue;
508    SUnit *SuccSU = I->getSUnit();
509    if (SuccSU->isScheduled) {
510      SDep D = *I;
511      D.setSUnit(NewSU);
512      AddPred(SuccSU, D);
513      D.setSUnit(SU);
514      DelDeps.push_back(std::make_pair(SuccSU, D));
515    }
516  }
517  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
518    RemovePred(DelDeps[i].first, DelDeps[i].second);
519
520  AvailableQueue->updateNode(SU);
521  AvailableQueue->addNode(NewSU);
522
523  ++NumDups;
524  return NewSU;
525}
526
527/// InsertCopiesAndMoveSuccs - Insert register copies and move all
528/// scheduled successors of the given SUnit to the last copy.
529void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
530                                               const TargetRegisterClass *DestRC,
531                                               const TargetRegisterClass *SrcRC,
532                                               SmallVector<SUnit*, 2> &Copies) {
533  SUnit *CopyFromSU = CreateNewSUnit(NULL);
534  CopyFromSU->CopySrcRC = SrcRC;
535  CopyFromSU->CopyDstRC = DestRC;
536
537  SUnit *CopyToSU = CreateNewSUnit(NULL);
538  CopyToSU->CopySrcRC = DestRC;
539  CopyToSU->CopyDstRC = SrcRC;
540
541  // Only copy scheduled successors. Cut them from old node's successor
542  // list and move them over.
543  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
544  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
545       I != E; ++I) {
546    if (I->isArtificial())
547      continue;
548    SUnit *SuccSU = I->getSUnit();
549    if (SuccSU->isScheduled) {
550      SDep D = *I;
551      D.setSUnit(CopyToSU);
552      AddPred(SuccSU, D);
553      DelDeps.push_back(std::make_pair(SuccSU, *I));
554    }
555  }
556  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
557    RemovePred(DelDeps[i].first, DelDeps[i].second);
558
559  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
560  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
561
562  AvailableQueue->updateNode(SU);
563  AvailableQueue->addNode(CopyFromSU);
564  AvailableQueue->addNode(CopyToSU);
565  Copies.push_back(CopyFromSU);
566  Copies.push_back(CopyToSU);
567
568  ++NumPRCopies;
569}
570
571/// getPhysicalRegisterVT - Returns the ValueType of the physical register
572/// definition of the specified node.
573/// FIXME: Move to SelectionDAG?
574static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
575                                 const TargetInstrInfo *TII) {
576  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
577  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
578  unsigned NumRes = TID.getNumDefs();
579  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
580    if (Reg == *ImpDef)
581      break;
582    ++NumRes;
583  }
584  return N->getValueType(NumRes);
585}
586
587/// CheckForLiveRegDef - Return true and update live register vector if the
588/// specified register def of the specified SUnit clobbers any "live" registers.
589static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
590                               std::vector<SUnit*> &LiveRegDefs,
591                               SmallSet<unsigned, 4> &RegAdded,
592                               SmallVector<unsigned, 4> &LRegs,
593                               const TargetRegisterInfo *TRI) {
594  bool Added = false;
595  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
596    if (RegAdded.insert(Reg)) {
597      LRegs.push_back(Reg);
598      Added = true;
599    }
600  }
601  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
602    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
603      if (RegAdded.insert(*Alias)) {
604        LRegs.push_back(*Alias);
605        Added = true;
606      }
607    }
608  return Added;
609}
610
611/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
612/// scheduling of the given node to satisfy live physical register dependencies.
613/// If the specific node is the last one that's available to schedule, do
614/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
615bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
616                                                 SmallVector<unsigned, 4> &LRegs){
617  if (NumLiveRegs == 0)
618    return false;
619
620  SmallSet<unsigned, 4> RegAdded;
621  // If this node would clobber any "live" register, then it's not ready.
622  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
623       I != E; ++I) {
624    if (I->isAssignedRegDep())
625      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
626                         RegAdded, LRegs, TRI);
627  }
628
629  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
630    if (Node->getOpcode() == ISD::INLINEASM) {
631      // Inline asm can clobber physical defs.
632      unsigned NumOps = Node->getNumOperands();
633      if (Node->getOperand(NumOps-1).getValueType() == EVT::Flag)
634        --NumOps;  // Ignore the flag operand.
635
636      for (unsigned i = 2; i != NumOps;) {
637        unsigned Flags =
638          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
639        unsigned NumVals = (Flags & 0xffff) >> 3;
640
641        ++i; // Skip the ID value.
642        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
643          // Check for def of register or earlyclobber register.
644          for (; NumVals; --NumVals, ++i) {
645            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
646            if (TargetRegisterInfo::isPhysicalRegister(Reg))
647              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
648          }
649        } else
650          i += NumVals;
651      }
652      continue;
653    }
654
655    if (!Node->isMachineOpcode())
656      continue;
657    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
658    if (!TID.ImplicitDefs)
659      continue;
660    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
661      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
662  }
663  return !LRegs.empty();
664}
665
666
667/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
668/// schedulers.
669void ScheduleDAGRRList::ListScheduleBottomUp() {
670  unsigned CurCycle = 0;
671
672  // Release any predecessors of the special Exit node.
673  ReleasePredecessors(&ExitSU, CurCycle);
674
675  // Add root to Available queue.
676  if (!SUnits.empty()) {
677    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
678    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
679    RootSU->isAvailable = true;
680    AvailableQueue->push(RootSU);
681  }
682
683  // While Available queue is not empty, grab the node with the highest
684  // priority. If it is not ready put it back.  Schedule the node.
685  SmallVector<SUnit*, 4> NotReady;
686  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
687  Sequence.reserve(SUnits.size());
688  while (!AvailableQueue->empty()) {
689    bool Delayed = false;
690    LRegsMap.clear();
691    SUnit *CurSU = AvailableQueue->pop();
692    while (CurSU) {
693      SmallVector<unsigned, 4> LRegs;
694      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
695        break;
696      Delayed = true;
697      LRegsMap.insert(std::make_pair(CurSU, LRegs));
698
699      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
700      NotReady.push_back(CurSU);
701      CurSU = AvailableQueue->pop();
702    }
703
704    // All candidates are delayed due to live physical reg dependencies.
705    // Try backtracking, code duplication, or inserting cross class copies
706    // to resolve it.
707    if (Delayed && !CurSU) {
708      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
709        SUnit *TrySU = NotReady[i];
710        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
711
712        // Try unscheduling up to the point where it's safe to schedule
713        // this node.
714        unsigned LiveCycle = CurCycle;
715        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
716          unsigned Reg = LRegs[j];
717          unsigned LCycle = LiveRegCycles[Reg];
718          LiveCycle = std::min(LiveCycle, LCycle);
719        }
720        SUnit *OldSU = Sequence[LiveCycle];
721        if (!WillCreateCycle(TrySU, OldSU))  {
722          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
723          // Force the current node to be scheduled before the node that
724          // requires the physical reg dep.
725          if (OldSU->isAvailable) {
726            OldSU->isAvailable = false;
727            AvailableQueue->remove(OldSU);
728          }
729          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
730                              /*Reg=*/0, /*isNormalMemory=*/false,
731                              /*isMustAlias=*/false, /*isArtificial=*/true));
732          // If one or more successors has been unscheduled, then the current
733          // node is no longer avaialable. Schedule a successor that's now
734          // available instead.
735          if (!TrySU->isAvailable)
736            CurSU = AvailableQueue->pop();
737          else {
738            CurSU = TrySU;
739            TrySU->isPending = false;
740            NotReady.erase(NotReady.begin()+i);
741          }
742          break;
743        }
744      }
745
746      if (!CurSU) {
747        // Can't backtrack. If it's too expensive to copy the value, then try
748        // duplicate the nodes that produces these "too expensive to copy"
749        // values to break the dependency. In case even that doesn't work,
750        // insert cross class copies.
751        // If it's not too expensive, i.e. cost != -1, issue copies.
752        SUnit *TrySU = NotReady[0];
753        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
754        assert(LRegs.size() == 1 && "Can't handle this yet!");
755        unsigned Reg = LRegs[0];
756        SUnit *LRDef = LiveRegDefs[Reg];
757        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
758        const TargetRegisterClass *RC =
759          TRI->getPhysicalRegisterRegClass(Reg, VT);
760        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
761
762        // If cross copy register class is null, then it must be possible copy
763        // the value directly. Do not try duplicate the def.
764        SUnit *NewDef = 0;
765        if (DestRC)
766          NewDef = CopyAndMoveSuccessors(LRDef);
767        else
768          DestRC = RC;
769        if (!NewDef) {
770          // Issue copies, these can be expensive cross register class copies.
771          SmallVector<SUnit*, 2> Copies;
772          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
773          DOUT << "Adding an edge from SU #" << TrySU->NodeNum
774               << " to SU #" << Copies.front()->NodeNum << "\n";
775          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
776                              /*Reg=*/0, /*isNormalMemory=*/false,
777                              /*isMustAlias=*/false,
778                              /*isArtificial=*/true));
779          NewDef = Copies.back();
780        }
781
782        DOUT << "Adding an edge from SU #" << NewDef->NodeNum
783             << " to SU #" << TrySU->NodeNum << "\n";
784        LiveRegDefs[Reg] = NewDef;
785        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
786                             /*Reg=*/0, /*isNormalMemory=*/false,
787                             /*isMustAlias=*/false,
788                             /*isArtificial=*/true));
789        TrySU->isAvailable = false;
790        CurSU = NewDef;
791      }
792
793      assert(CurSU && "Unable to resolve live physical register dependencies!");
794    }
795
796    // Add the nodes that aren't ready back onto the available list.
797    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
798      NotReady[i]->isPending = false;
799      // May no longer be available due to backtracking.
800      if (NotReady[i]->isAvailable)
801        AvailableQueue->push(NotReady[i]);
802    }
803    NotReady.clear();
804
805    if (CurSU)
806      ScheduleNodeBottomUp(CurSU, CurCycle);
807    ++CurCycle;
808  }
809
810  // Reverse the order if it is bottom up.
811  std::reverse(Sequence.begin(), Sequence.end());
812
813#ifndef NDEBUG
814  VerifySchedule(isBottomUp);
815#endif
816}
817
818//===----------------------------------------------------------------------===//
819//  Top-Down Scheduling
820//===----------------------------------------------------------------------===//
821
822/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
823/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
824void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
825  SUnit *SuccSU = SuccEdge->getSUnit();
826  --SuccSU->NumPredsLeft;
827
828#ifndef NDEBUG
829  if (SuccSU->NumPredsLeft < 0) {
830    cerr << "*** Scheduling failed! ***\n";
831    SuccSU->dump(this);
832    cerr << " has been released too many times!\n";
833    llvm_unreachable(0);
834  }
835#endif
836
837  // If all the node's predecessors are scheduled, this node is ready
838  // to be scheduled. Ignore the special ExitSU node.
839  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
840    SuccSU->isAvailable = true;
841    AvailableQueue->push(SuccSU);
842  }
843}
844
845void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
846  // Top down: release successors
847  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
848       I != E; ++I) {
849    assert(!I->isAssignedRegDep() &&
850           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
851
852    ReleaseSucc(SU, &*I);
853  }
854}
855
856/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
857/// count of its successors. If a successor pending count is zero, add it to
858/// the Available queue.
859void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
860  DOUT << "*** Scheduling [" << CurCycle << "]: ";
861  DEBUG(SU->dump(this));
862
863  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
864  SU->setDepthToAtLeast(CurCycle);
865  Sequence.push_back(SU);
866
867  ReleaseSuccessors(SU);
868  SU->isScheduled = true;
869  AvailableQueue->ScheduledNode(SU);
870}
871
872/// ListScheduleTopDown - The main loop of list scheduling for top-down
873/// schedulers.
874void ScheduleDAGRRList::ListScheduleTopDown() {
875  unsigned CurCycle = 0;
876
877  // Release any successors of the special Entry node.
878  ReleaseSuccessors(&EntrySU);
879
880  // All leaves to Available queue.
881  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
882    // It is available if it has no predecessors.
883    if (SUnits[i].Preds.empty()) {
884      AvailableQueue->push(&SUnits[i]);
885      SUnits[i].isAvailable = true;
886    }
887  }
888
889  // While Available queue is not empty, grab the node with the highest
890  // priority. If it is not ready put it back.  Schedule the node.
891  Sequence.reserve(SUnits.size());
892  while (!AvailableQueue->empty()) {
893    SUnit *CurSU = AvailableQueue->pop();
894
895    if (CurSU)
896      ScheduleNodeTopDown(CurSU, CurCycle);
897    ++CurCycle;
898  }
899
900#ifndef NDEBUG
901  VerifySchedule(isBottomUp);
902#endif
903}
904
905
906//===----------------------------------------------------------------------===//
907//                RegReductionPriorityQueue Implementation
908//===----------------------------------------------------------------------===//
909//
910// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
911// to reduce register pressure.
912//
913namespace {
914  template<class SF>
915  class RegReductionPriorityQueue;
916
917  /// Sorting functions for the Available queue.
918  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
919    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
920    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
921    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
922
923    bool operator()(const SUnit* left, const SUnit* right) const;
924  };
925
926  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
927    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
928    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
929    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
930
931    bool operator()(const SUnit* left, const SUnit* right) const;
932  };
933}  // end anonymous namespace
934
935/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
936/// Smaller number is the higher priority.
937static unsigned
938CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
939  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
940  if (SethiUllmanNumber != 0)
941    return SethiUllmanNumber;
942
943  unsigned Extra = 0;
944  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
945       I != E; ++I) {
946    if (I->isCtrl()) continue;  // ignore chain preds
947    SUnit *PredSU = I->getSUnit();
948    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
949    if (PredSethiUllman > SethiUllmanNumber) {
950      SethiUllmanNumber = PredSethiUllman;
951      Extra = 0;
952    } else if (PredSethiUllman == SethiUllmanNumber)
953      ++Extra;
954  }
955
956  SethiUllmanNumber += Extra;
957
958  if (SethiUllmanNumber == 0)
959    SethiUllmanNumber = 1;
960
961  return SethiUllmanNumber;
962}
963
964namespace {
965  template<class SF>
966  class VISIBILITY_HIDDEN RegReductionPriorityQueue
967   : public SchedulingPriorityQueue {
968    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
969    unsigned currentQueueId;
970
971  protected:
972    // SUnits - The SUnits for the current graph.
973    std::vector<SUnit> *SUnits;
974
975    const TargetInstrInfo *TII;
976    const TargetRegisterInfo *TRI;
977    ScheduleDAGRRList *scheduleDAG;
978
979    // SethiUllmanNumbers - The SethiUllman number for each node.
980    std::vector<unsigned> SethiUllmanNumbers;
981
982  public:
983    RegReductionPriorityQueue(const TargetInstrInfo *tii,
984                              const TargetRegisterInfo *tri) :
985    Queue(SF(this)), currentQueueId(0),
986    TII(tii), TRI(tri), scheduleDAG(NULL) {}
987
988    void initNodes(std::vector<SUnit> &sunits) {
989      SUnits = &sunits;
990      // Add pseudo dependency edges for two-address nodes.
991      AddPseudoTwoAddrDeps();
992      // Reroute edges to nodes with multiple uses.
993      PrescheduleNodesWithMultipleUses();
994      // Calculate node priorities.
995      CalculateSethiUllmanNumbers();
996    }
997
998    void addNode(const SUnit *SU) {
999      unsigned SUSize = SethiUllmanNumbers.size();
1000      if (SUnits->size() > SUSize)
1001        SethiUllmanNumbers.resize(SUSize*2, 0);
1002      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1003    }
1004
1005    void updateNode(const SUnit *SU) {
1006      SethiUllmanNumbers[SU->NodeNum] = 0;
1007      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1008    }
1009
1010    void releaseState() {
1011      SUnits = 0;
1012      SethiUllmanNumbers.clear();
1013    }
1014
1015    unsigned getNodePriority(const SUnit *SU) const {
1016      assert(SU->NodeNum < SethiUllmanNumbers.size());
1017      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1018      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1019        // CopyToReg should be close to its uses to facilitate coalescing and
1020        // avoid spilling.
1021        return 0;
1022      if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1023          Opc == TargetInstrInfo::SUBREG_TO_REG ||
1024          Opc == TargetInstrInfo::INSERT_SUBREG)
1025        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1026        // close to their uses to facilitate coalescing.
1027        return 0;
1028      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1029        // If SU does not have a register use, i.e. it doesn't produce a value
1030        // that would be consumed (e.g. store), then it terminates a chain of
1031        // computation.  Give it a large SethiUllman number so it will be
1032        // scheduled right before its predecessors that it doesn't lengthen
1033        // their live ranges.
1034        return 0xffff;
1035      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1036        // If SU does not have a register def, schedule it close to its uses
1037        // because it does not lengthen any live ranges.
1038        return 0;
1039      return SethiUllmanNumbers[SU->NodeNum];
1040    }
1041
1042    unsigned size() const { return Queue.size(); }
1043
1044    bool empty() const { return Queue.empty(); }
1045
1046    void push(SUnit *U) {
1047      assert(!U->NodeQueueId && "Node in the queue already");
1048      U->NodeQueueId = ++currentQueueId;
1049      Queue.push(U);
1050    }
1051
1052    void push_all(const std::vector<SUnit *> &Nodes) {
1053      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1054        push(Nodes[i]);
1055    }
1056
1057    SUnit *pop() {
1058      if (empty()) return NULL;
1059      SUnit *V = Queue.top();
1060      Queue.pop();
1061      V->NodeQueueId = 0;
1062      return V;
1063    }
1064
1065    void remove(SUnit *SU) {
1066      assert(!Queue.empty() && "Queue is empty!");
1067      assert(SU->NodeQueueId != 0 && "Not in queue!");
1068      Queue.erase_one(SU);
1069      SU->NodeQueueId = 0;
1070    }
1071
1072    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1073      scheduleDAG = scheduleDag;
1074    }
1075
1076  protected:
1077    bool canClobber(const SUnit *SU, const SUnit *Op);
1078    void AddPseudoTwoAddrDeps();
1079    void PrescheduleNodesWithMultipleUses();
1080    void CalculateSethiUllmanNumbers();
1081  };
1082
1083  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1084    BURegReductionPriorityQueue;
1085
1086  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1087    TDRegReductionPriorityQueue;
1088}
1089
1090/// closestSucc - Returns the scheduled cycle of the successor which is
1091/// closest to the current cycle.
1092static unsigned closestSucc(const SUnit *SU) {
1093  unsigned MaxHeight = 0;
1094  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1095       I != E; ++I) {
1096    if (I->isCtrl()) continue;  // ignore chain succs
1097    unsigned Height = I->getSUnit()->getHeight();
1098    // If there are bunch of CopyToRegs stacked up, they should be considered
1099    // to be at the same position.
1100    if (I->getSUnit()->getNode() &&
1101        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1102      Height = closestSucc(I->getSUnit())+1;
1103    if (Height > MaxHeight)
1104      MaxHeight = Height;
1105  }
1106  return MaxHeight;
1107}
1108
1109/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1110/// for scratch registers, i.e. number of data dependencies.
1111static unsigned calcMaxScratches(const SUnit *SU) {
1112  unsigned Scratches = 0;
1113  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1114       I != E; ++I) {
1115    if (I->isCtrl()) continue;  // ignore chain preds
1116    Scratches++;
1117  }
1118  return Scratches;
1119}
1120
1121// Bottom up
1122bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1123  unsigned LPriority = SPQ->getNodePriority(left);
1124  unsigned RPriority = SPQ->getNodePriority(right);
1125  if (LPriority != RPriority)
1126    return LPriority > RPriority;
1127
1128  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1129  // e.g.
1130  // t1 = op t2, c1
1131  // t3 = op t4, c2
1132  //
1133  // and the following instructions are both ready.
1134  // t2 = op c3
1135  // t4 = op c4
1136  //
1137  // Then schedule t2 = op first.
1138  // i.e.
1139  // t4 = op c4
1140  // t2 = op c3
1141  // t1 = op t2, c1
1142  // t3 = op t4, c2
1143  //
1144  // This creates more short live intervals.
1145  unsigned LDist = closestSucc(left);
1146  unsigned RDist = closestSucc(right);
1147  if (LDist != RDist)
1148    return LDist < RDist;
1149
1150  // How many registers becomes live when the node is scheduled.
1151  unsigned LScratch = calcMaxScratches(left);
1152  unsigned RScratch = calcMaxScratches(right);
1153  if (LScratch != RScratch)
1154    return LScratch > RScratch;
1155
1156  if (left->getHeight() != right->getHeight())
1157    return left->getHeight() > right->getHeight();
1158
1159  if (left->getDepth() != right->getDepth())
1160    return left->getDepth() < right->getDepth();
1161
1162  assert(left->NodeQueueId && right->NodeQueueId &&
1163         "NodeQueueId cannot be zero");
1164  return (left->NodeQueueId > right->NodeQueueId);
1165}
1166
1167template<class SF>
1168bool
1169RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1170  if (SU->isTwoAddress) {
1171    unsigned Opc = SU->getNode()->getMachineOpcode();
1172    const TargetInstrDesc &TID = TII->get(Opc);
1173    unsigned NumRes = TID.getNumDefs();
1174    unsigned NumOps = TID.getNumOperands() - NumRes;
1175    for (unsigned i = 0; i != NumOps; ++i) {
1176      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1177        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1178        if (DU->getNodeId() != -1 &&
1179            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1180          return true;
1181      }
1182    }
1183  }
1184  return false;
1185}
1186
1187
1188/// hasCopyToRegUse - Return true if SU has a value successor that is a
1189/// CopyToReg node.
1190static bool hasCopyToRegUse(const SUnit *SU) {
1191  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1192       I != E; ++I) {
1193    if (I->isCtrl()) continue;
1194    const SUnit *SuccSU = I->getSUnit();
1195    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1196      return true;
1197  }
1198  return false;
1199}
1200
1201/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1202/// physical register defs.
1203static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1204                                  const TargetInstrInfo *TII,
1205                                  const TargetRegisterInfo *TRI) {
1206  SDNode *N = SuccSU->getNode();
1207  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1208  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1209  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1210  for (const SDNode *SUNode = SU->getNode(); SUNode;
1211       SUNode = SUNode->getFlaggedNode()) {
1212    if (!SUNode->isMachineOpcode())
1213      continue;
1214    const unsigned *SUImpDefs =
1215      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1216    if (!SUImpDefs)
1217      return false;
1218    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1219      EVT VT = N->getValueType(i);
1220      if (VT == EVT::Flag || VT == EVT::Other)
1221        continue;
1222      if (!N->hasAnyUseOfValue(i))
1223        continue;
1224      unsigned Reg = ImpDefs[i - NumDefs];
1225      for (;*SUImpDefs; ++SUImpDefs) {
1226        unsigned SUReg = *SUImpDefs;
1227        if (TRI->regsOverlap(Reg, SUReg))
1228          return true;
1229      }
1230    }
1231  }
1232  return false;
1233}
1234
1235/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1236/// are not handled well by the general register pressure reduction
1237/// heuristics. When presented with code like this:
1238///
1239///      N
1240///    / |
1241///   /  |
1242///  U  store
1243///  |
1244/// ...
1245///
1246/// the heuristics tend to push the store up, but since the
1247/// operand of the store has another use (U), this would increase
1248/// the length of that other use (the U->N edge).
1249///
1250/// This function transforms code like the above to route U's
1251/// dependence through the store when possible, like this:
1252///
1253///      N
1254///      ||
1255///      ||
1256///     store
1257///       |
1258///       U
1259///       |
1260///      ...
1261///
1262/// This results in the store being scheduled immediately
1263/// after N, which shortens the U->N live range, reducing
1264/// register pressure.
1265///
1266template<class SF>
1267void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1268  // Visit all the nodes in topological order, working top-down.
1269  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1270    SUnit *SU = &(*SUnits)[i];
1271    // For now, only look at nodes with no data successors, such as stores.
1272    // These are especially important, due to the heuristics in
1273    // getNodePriority for nodes with no data successors.
1274    if (SU->NumSuccs != 0)
1275      continue;
1276    // For now, only look at nodes with exactly one data predecessor.
1277    if (SU->NumPreds != 1)
1278      continue;
1279    // Avoid prescheduling copies to virtual registers, which don't behave
1280    // like other nodes from the perspective of scheduling heuristics.
1281    if (SDNode *N = SU->getNode())
1282      if (N->getOpcode() == ISD::CopyToReg &&
1283          TargetRegisterInfo::isVirtualRegister
1284            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1285        continue;
1286
1287    // Locate the single data predecessor.
1288    SUnit *PredSU = 0;
1289    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1290         EE = SU->Preds.end(); II != EE; ++II)
1291      if (!II->isCtrl()) {
1292        PredSU = II->getSUnit();
1293        break;
1294      }
1295    assert(PredSU);
1296
1297    // Don't rewrite edges that carry physregs, because that requires additional
1298    // support infrastructure.
1299    if (PredSU->hasPhysRegDefs)
1300      continue;
1301    // Short-circuit the case where SU is PredSU's only data successor.
1302    if (PredSU->NumSuccs == 1)
1303      continue;
1304    // Avoid prescheduling to copies from virtual registers, which don't behave
1305    // like other nodes from the perspective of scheduling // heuristics.
1306    if (SDNode *N = SU->getNode())
1307      if (N->getOpcode() == ISD::CopyFromReg &&
1308          TargetRegisterInfo::isVirtualRegister
1309            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1310        continue;
1311
1312    // Perform checks on the successors of PredSU.
1313    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1314         EE = PredSU->Succs.end(); II != EE; ++II) {
1315      SUnit *PredSuccSU = II->getSUnit();
1316      if (PredSuccSU == SU) continue;
1317      // If PredSU has another successor with no data successors, for
1318      // now don't attempt to choose either over the other.
1319      if (PredSuccSU->NumSuccs == 0)
1320        goto outer_loop_continue;
1321      // Don't break physical register dependencies.
1322      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1323        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1324          goto outer_loop_continue;
1325      // Don't introduce graph cycles.
1326      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1327        goto outer_loop_continue;
1328    }
1329
1330    // Ok, the transformation is safe and the heuristics suggest it is
1331    // profitable. Update the graph.
1332    DOUT << "Prescheduling SU # " << SU->NodeNum
1333         << " next to PredSU # " << PredSU->NodeNum
1334         << " to guide scheduling in the presence of multiple uses\n";
1335    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1336      SDep Edge = PredSU->Succs[i];
1337      assert(!Edge.isAssignedRegDep());
1338      SUnit *SuccSU = Edge.getSUnit();
1339      if (SuccSU != SU) {
1340        Edge.setSUnit(PredSU);
1341        scheduleDAG->RemovePred(SuccSU, Edge);
1342        scheduleDAG->AddPred(SU, Edge);
1343        Edge.setSUnit(SU);
1344        scheduleDAG->AddPred(SuccSU, Edge);
1345        --i;
1346      }
1347    }
1348  outer_loop_continue:;
1349  }
1350}
1351
1352/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1353/// it as a def&use operand. Add a pseudo control edge from it to the other
1354/// node (if it won't create a cycle) so the two-address one will be scheduled
1355/// first (lower in the schedule). If both nodes are two-address, favor the
1356/// one that has a CopyToReg use (more likely to be a loop induction update).
1357/// If both are two-address, but one is commutable while the other is not
1358/// commutable, favor the one that's not commutable.
1359template<class SF>
1360void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1361  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1362    SUnit *SU = &(*SUnits)[i];
1363    if (!SU->isTwoAddress)
1364      continue;
1365
1366    SDNode *Node = SU->getNode();
1367    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1368      continue;
1369
1370    unsigned Opc = Node->getMachineOpcode();
1371    const TargetInstrDesc &TID = TII->get(Opc);
1372    unsigned NumRes = TID.getNumDefs();
1373    unsigned NumOps = TID.getNumOperands() - NumRes;
1374    for (unsigned j = 0; j != NumOps; ++j) {
1375      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1376        continue;
1377      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1378      if (DU->getNodeId() == -1)
1379        continue;
1380      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1381      if (!DUSU) continue;
1382      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1383           E = DUSU->Succs.end(); I != E; ++I) {
1384        if (I->isCtrl()) continue;
1385        SUnit *SuccSU = I->getSUnit();
1386        if (SuccSU == SU)
1387          continue;
1388        // Be conservative. Ignore if nodes aren't at roughly the same
1389        // depth and height.
1390        if (SuccSU->getHeight() < SU->getHeight() &&
1391            (SU->getHeight() - SuccSU->getHeight()) > 1)
1392          continue;
1393        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1394        // constrains whatever is using the copy, instead of the copy
1395        // itself. In the case that the copy is coalesced, this
1396        // preserves the intent of the pseudo two-address heurietics.
1397        while (SuccSU->Succs.size() == 1 &&
1398               SuccSU->getNode()->isMachineOpcode() &&
1399               SuccSU->getNode()->getMachineOpcode() ==
1400                 TargetInstrInfo::COPY_TO_REGCLASS)
1401          SuccSU = SuccSU->Succs.front().getSUnit();
1402        // Don't constrain non-instruction nodes.
1403        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1404          continue;
1405        // Don't constrain nodes with physical register defs if the
1406        // predecessor can clobber them.
1407        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1408          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1409            continue;
1410        }
1411        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1412        // these may be coalesced away. We want them close to their uses.
1413        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1414        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1415            SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
1416            SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
1417          continue;
1418        if ((!canClobber(SuccSU, DUSU) ||
1419             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1420             (!SU->isCommutable && SuccSU->isCommutable)) &&
1421            !scheduleDAG->IsReachable(SuccSU, SU)) {
1422          DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum
1423               << " to SU #" << SuccSU->NodeNum << "\n";
1424          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1425                                        /*Reg=*/0, /*isNormalMemory=*/false,
1426                                        /*isMustAlias=*/false,
1427                                        /*isArtificial=*/true));
1428        }
1429      }
1430    }
1431  }
1432}
1433
1434/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1435/// scheduling units.
1436template<class SF>
1437void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1438  SethiUllmanNumbers.assign(SUnits->size(), 0);
1439
1440  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1441    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1442}
1443
1444/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1445/// predecessors of the successors of the SUnit SU. Stop when the provided
1446/// limit is exceeded.
1447static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1448                                                    unsigned Limit) {
1449  unsigned Sum = 0;
1450  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1451       I != E; ++I) {
1452    const SUnit *SuccSU = I->getSUnit();
1453    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1454         EE = SuccSU->Preds.end(); II != EE; ++II) {
1455      SUnit *PredSU = II->getSUnit();
1456      if (!PredSU->isScheduled)
1457        if (++Sum > Limit)
1458          return Sum;
1459    }
1460  }
1461  return Sum;
1462}
1463
1464
1465// Top down
1466bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1467  unsigned LPriority = SPQ->getNodePriority(left);
1468  unsigned RPriority = SPQ->getNodePriority(right);
1469  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1470  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1471  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1472  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1473  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1474  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1475
1476  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1477    return false;
1478  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1479    return true;
1480
1481  if (LIsFloater)
1482    LBonus -= 2;
1483  if (RIsFloater)
1484    RBonus -= 2;
1485  if (left->NumSuccs == 1)
1486    LBonus += 2;
1487  if (right->NumSuccs == 1)
1488    RBonus += 2;
1489
1490  if (LPriority+LBonus != RPriority+RBonus)
1491    return LPriority+LBonus < RPriority+RBonus;
1492
1493  if (left->getDepth() != right->getDepth())
1494    return left->getDepth() < right->getDepth();
1495
1496  if (left->NumSuccsLeft != right->NumSuccsLeft)
1497    return left->NumSuccsLeft > right->NumSuccsLeft;
1498
1499  assert(left->NodeQueueId && right->NodeQueueId &&
1500         "NodeQueueId cannot be zero");
1501  return (left->NodeQueueId > right->NodeQueueId);
1502}
1503
1504//===----------------------------------------------------------------------===//
1505//                         Public Constructor Functions
1506//===----------------------------------------------------------------------===//
1507
1508llvm::ScheduleDAGSDNodes *
1509llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1510  const TargetMachine &TM = IS->TM;
1511  const TargetInstrInfo *TII = TM.getInstrInfo();
1512  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1513
1514  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1515
1516  ScheduleDAGRRList *SD =
1517    new ScheduleDAGRRList(*IS->MF, true, PQ);
1518  PQ->setScheduleDAG(SD);
1519  return SD;
1520}
1521
1522llvm::ScheduleDAGSDNodes *
1523llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1524  const TargetMachine &TM = IS->TM;
1525  const TargetInstrInfo *TII = TM.getInstrInfo();
1526  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1527
1528  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1529
1530  ScheduleDAGRRList *SD =
1531    new ScheduleDAGRRList(*IS->MF, false, PQ);
1532  PQ->setScheduleDAG(SD);
1533  return SD;
1534}
1535