ScheduleDAGRRList.cpp revision 882f79a3e629020bcd5b5059bf6df67764aaaa67
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#define DEBUG_TYPE "pre-RA-sched"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/CodeGen/SchedulerRegistry.h"
21#include "llvm/CodeGen/SelectionDAGISel.h"
22#include "llvm/Target/TargetRegisterInfo.h"
23#include "llvm/Target/TargetData.h"
24#include "llvm/Target/TargetMachine.h"
25#include "llvm/Target/TargetInstrInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/ErrorHandling.h"
28#include "llvm/ADT/PriorityQueue.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <climits>
34using namespace llvm;
35
36STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
37STATISTIC(NumUnfolds,    "Number of nodes unfolded");
38STATISTIC(NumDups,       "Number of duplicated nodes");
39STATISTIC(NumPRCopies,   "Number of physical register copies");
40
41static RegisterScheduler
42  burrListDAGScheduler("list-burr",
43                       "Bottom-up register reduction list scheduling",
44                       createBURRListDAGScheduler);
45static RegisterScheduler
46  tdrListrDAGScheduler("list-tdrr",
47                       "Top-down register reduction list scheduling",
48                       createTDRRListDAGScheduler);
49
50namespace {
51//===----------------------------------------------------------------------===//
52/// ScheduleDAGRRList - The actual register reduction list scheduler
53/// implementation.  This supports both top-down and bottom-up scheduling.
54///
55class ScheduleDAGRRList : public ScheduleDAGSDNodes {
56private:
57  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
58  /// it is top-down.
59  bool isBottomUp;
60
61  /// AvailableQueue - The priority queue to use for the available SUnits.
62  SchedulingPriorityQueue *AvailableQueue;
63
64  /// LiveRegDefs - A set of physical registers and their definition
65  /// that are "live". These nodes must be scheduled before any other nodes that
66  /// modifies the registers can be scheduled.
67  unsigned NumLiveRegs;
68  std::vector<SUnit*> LiveRegDefs;
69  std::vector<unsigned> LiveRegCycles;
70
71  /// Topo - A topological ordering for SUnits which permits fast IsReachable
72  /// and similar queries.
73  ScheduleDAGTopologicalSort Topo;
74
75public:
76  ScheduleDAGRRList(MachineFunction &mf,
77                    bool isbottomup,
78                    SchedulingPriorityQueue *availqueue)
79    : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
80      AvailableQueue(availqueue), Topo(SUnits) {
81    }
82
83  ~ScheduleDAGRRList() {
84    delete AvailableQueue;
85  }
86
87  void Schedule();
88
89  /// IsReachable - Checks if SU is reachable from TargetSU.
90  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
91    return Topo.IsReachable(SU, TargetSU);
92  }
93
94  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
95  /// create a cycle.
96  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
97    return Topo.WillCreateCycle(SU, TargetSU);
98  }
99
100  /// AddPred - adds a predecessor edge to SUnit SU.
101  /// This returns true if this is a new predecessor.
102  /// Updates the topological ordering if required.
103  void AddPred(SUnit *SU, const SDep &D) {
104    Topo.AddPred(SU, D.getSUnit());
105    SU->addPred(D);
106  }
107
108  /// RemovePred - removes a predecessor edge from SUnit SU.
109  /// This returns true if an edge was removed.
110  /// Updates the topological ordering if required.
111  void RemovePred(SUnit *SU, const SDep &D) {
112    Topo.RemovePred(SU, D.getSUnit());
113    SU->removePred(D);
114  }
115
116private:
117  void ReleasePred(SUnit *SU, const SDep *PredEdge);
118  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
119  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
120  void ReleaseSuccessors(SUnit *SU);
121  void CapturePred(SDep *PredEdge);
122  void ScheduleNodeBottomUp(SUnit*, unsigned);
123  void ScheduleNodeTopDown(SUnit*, unsigned);
124  void UnscheduleNodeBottomUp(SUnit*);
125  void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
126  SUnit *CopyAndMoveSuccessors(SUnit*);
127  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
128                                const TargetRegisterClass*,
129                                const TargetRegisterClass*,
130                                SmallVector<SUnit*, 2>&);
131  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
132  void ListScheduleTopDown();
133  void ListScheduleBottomUp();
134
135
136  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
137  /// Updates the topological ordering if required.
138  SUnit *CreateNewSUnit(SDNode *N) {
139    unsigned NumSUnits = SUnits.size();
140    SUnit *NewNode = NewSUnit(N);
141    // Update the topological ordering.
142    if (NewNode->NodeNum >= NumSUnits)
143      Topo.InitDAGTopologicalSorting();
144    return NewNode;
145  }
146
147  /// CreateClone - Creates a new SUnit from an existing one.
148  /// Updates the topological ordering if required.
149  SUnit *CreateClone(SUnit *N) {
150    unsigned NumSUnits = SUnits.size();
151    SUnit *NewNode = Clone(N);
152    // Update the topological ordering.
153    if (NewNode->NodeNum >= NumSUnits)
154      Topo.InitDAGTopologicalSorting();
155    return NewNode;
156  }
157
158  /// ForceUnitLatencies - Return true, since register-pressure-reducing
159  /// scheduling doesn't need actual latency information.
160  bool ForceUnitLatencies() const { return true; }
161};
162}  // end anonymous namespace
163
164
165/// Schedule - Schedule the DAG using list scheduling.
166void ScheduleDAGRRList::Schedule() {
167  DEBUG(dbgs() << "********** List Scheduling **********\n");
168
169  NumLiveRegs = 0;
170  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
171  LiveRegCycles.resize(TRI->getNumRegs(), 0);
172
173  // Build the scheduling graph.
174  BuildSchedGraph(NULL);
175
176  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
177          SUnits[su].dumpAll(this));
178  Topo.InitDAGTopologicalSorting();
179
180  AvailableQueue->initNodes(SUnits);
181
182  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
183  if (isBottomUp)
184    ListScheduleBottomUp();
185  else
186    ListScheduleTopDown();
187
188  AvailableQueue->releaseState();
189}
190
191//===----------------------------------------------------------------------===//
192//  Bottom-Up Scheduling
193//===----------------------------------------------------------------------===//
194
195/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
196/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
197void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
198  SUnit *PredSU = PredEdge->getSUnit();
199
200#ifndef NDEBUG
201  if (PredSU->NumSuccsLeft == 0) {
202    dbgs() << "*** Scheduling failed! ***\n";
203    PredSU->dump(this);
204    dbgs() << " has been released too many times!\n";
205    llvm_unreachable(0);
206  }
207#endif
208  --PredSU->NumSuccsLeft;
209
210  // If all the node's successors are scheduled, this node is ready
211  // to be scheduled. Ignore the special EntrySU node.
212  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
213    PredSU->isAvailable = true;
214    AvailableQueue->push(PredSU);
215  }
216}
217
218void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
219  // Bottom up: release predecessors
220  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
221       I != E; ++I) {
222    ReleasePred(SU, &*I);
223    if (I->isAssignedRegDep()) {
224      // This is a physical register dependency and it's impossible or
225      // expensive to copy the register. Make sure nothing that can
226      // clobber the register is scheduled between the predecessor and
227      // this node.
228      if (!LiveRegDefs[I->getReg()]) {
229        ++NumLiveRegs;
230        LiveRegDefs[I->getReg()] = I->getSUnit();
231        LiveRegCycles[I->getReg()] = CurCycle;
232      }
233    }
234  }
235}
236
237/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
238/// count of its predecessors. If a predecessor pending count is zero, add it to
239/// the Available queue.
240void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
241  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
242  DEBUG(SU->dump(this));
243
244  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
245  SU->setHeightToAtLeast(CurCycle);
246  Sequence.push_back(SU);
247
248  ReleasePredecessors(SU, CurCycle);
249
250  // Release all the implicit physical register defs that are live.
251  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
252       I != E; ++I) {
253    if (I->isAssignedRegDep()) {
254      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
255        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
256        assert(LiveRegDefs[I->getReg()] == SU &&
257               "Physical register dependency violated?");
258        --NumLiveRegs;
259        LiveRegDefs[I->getReg()] = NULL;
260        LiveRegCycles[I->getReg()] = 0;
261      }
262    }
263  }
264
265  SU->isScheduled = true;
266  AvailableQueue->ScheduledNode(SU);
267}
268
269/// CapturePred - This does the opposite of ReleasePred. Since SU is being
270/// unscheduled, incrcease the succ left count of its predecessors. Remove
271/// them from AvailableQueue if necessary.
272void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
273  SUnit *PredSU = PredEdge->getSUnit();
274  if (PredSU->isAvailable) {
275    PredSU->isAvailable = false;
276    if (!PredSU->isPending)
277      AvailableQueue->remove(PredSU);
278  }
279
280  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
281  ++PredSU->NumSuccsLeft;
282}
283
284/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
285/// its predecessor states to reflect the change.
286void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
287  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
288  DEBUG(SU->dump(this));
289
290  AvailableQueue->UnscheduledNode(SU);
291
292  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
293       I != E; ++I) {
294    CapturePred(&*I);
295    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
296      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
297      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
298             "Physical register dependency violated?");
299      --NumLiveRegs;
300      LiveRegDefs[I->getReg()] = NULL;
301      LiveRegCycles[I->getReg()] = 0;
302    }
303  }
304
305  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
306       I != E; ++I) {
307    if (I->isAssignedRegDep()) {
308      if (!LiveRegDefs[I->getReg()]) {
309        LiveRegDefs[I->getReg()] = SU;
310        ++NumLiveRegs;
311      }
312      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
313        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
314    }
315  }
316
317  SU->setHeightDirty();
318  SU->isScheduled = false;
319  SU->isAvailable = true;
320  AvailableQueue->push(SU);
321}
322
323/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
324/// BTCycle in order to schedule a specific node.
325void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
326                                          unsigned &CurCycle) {
327  SUnit *OldSU = NULL;
328  while (CurCycle > BtCycle) {
329    OldSU = Sequence.back();
330    Sequence.pop_back();
331    if (SU->isSucc(OldSU))
332      // Don't try to remove SU from AvailableQueue.
333      SU->isAvailable = false;
334    UnscheduleNodeBottomUp(OldSU);
335    --CurCycle;
336  }
337
338  assert(!SU->isSucc(OldSU) && "Something is wrong!");
339
340  ++NumBacktracks;
341}
342
343/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
344/// successors to the newly created node.
345SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
346  if (SU->getNode()->getFlaggedNode())
347    return NULL;
348
349  SDNode *N = SU->getNode();
350  if (!N)
351    return NULL;
352
353  SUnit *NewSU;
354  bool TryUnfold = false;
355  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
356    EVT VT = N->getValueType(i);
357    if (VT == MVT::Flag)
358      return NULL;
359    else if (VT == MVT::Other)
360      TryUnfold = true;
361  }
362  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
363    const SDValue &Op = N->getOperand(i);
364    EVT VT = Op.getNode()->getValueType(Op.getResNo());
365    if (VT == MVT::Flag)
366      return NULL;
367  }
368
369  if (TryUnfold) {
370    SmallVector<SDNode*, 2> NewNodes;
371    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
372      return NULL;
373
374    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
375    assert(NewNodes.size() == 2 && "Expected a load folding node!");
376
377    N = NewNodes[1];
378    SDNode *LoadNode = NewNodes[0];
379    unsigned NumVals = N->getNumValues();
380    unsigned OldNumVals = SU->getNode()->getNumValues();
381    for (unsigned i = 0; i != NumVals; ++i)
382      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
383    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
384                                   SDValue(LoadNode, 1));
385
386    // LoadNode may already exist. This can happen when there is another
387    // load from the same location and producing the same type of value
388    // but it has different alignment or volatileness.
389    bool isNewLoad = true;
390    SUnit *LoadSU;
391    if (LoadNode->getNodeId() != -1) {
392      LoadSU = &SUnits[LoadNode->getNodeId()];
393      isNewLoad = false;
394    } else {
395      LoadSU = CreateNewSUnit(LoadNode);
396      LoadNode->setNodeId(LoadSU->NodeNum);
397      ComputeLatency(LoadSU);
398    }
399
400    SUnit *NewSU = CreateNewSUnit(N);
401    assert(N->getNodeId() == -1 && "Node already inserted!");
402    N->setNodeId(NewSU->NodeNum);
403
404    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
405    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
406      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
407        NewSU->isTwoAddress = true;
408        break;
409      }
410    }
411    if (TID.isCommutable())
412      NewSU->isCommutable = true;
413    ComputeLatency(NewSU);
414
415    // Record all the edges to and from the old SU, by category.
416    SmallVector<SDep, 4> ChainPreds;
417    SmallVector<SDep, 4> ChainSuccs;
418    SmallVector<SDep, 4> LoadPreds;
419    SmallVector<SDep, 4> NodePreds;
420    SmallVector<SDep, 4> NodeSuccs;
421    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
422         I != E; ++I) {
423      if (I->isCtrl())
424        ChainPreds.push_back(*I);
425      else if (I->getSUnit()->getNode() &&
426               I->getSUnit()->getNode()->isOperandOf(LoadNode))
427        LoadPreds.push_back(*I);
428      else
429        NodePreds.push_back(*I);
430    }
431    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
432         I != E; ++I) {
433      if (I->isCtrl())
434        ChainSuccs.push_back(*I);
435      else
436        NodeSuccs.push_back(*I);
437    }
438
439    // Now assign edges to the newly-created nodes.
440    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
441      const SDep &Pred = ChainPreds[i];
442      RemovePred(SU, Pred);
443      if (isNewLoad)
444        AddPred(LoadSU, Pred);
445    }
446    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
447      const SDep &Pred = LoadPreds[i];
448      RemovePred(SU, Pred);
449      if (isNewLoad)
450        AddPred(LoadSU, Pred);
451    }
452    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
453      const SDep &Pred = NodePreds[i];
454      RemovePred(SU, Pred);
455      AddPred(NewSU, Pred);
456    }
457    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
458      SDep D = NodeSuccs[i];
459      SUnit *SuccDep = D.getSUnit();
460      D.setSUnit(SU);
461      RemovePred(SuccDep, D);
462      D.setSUnit(NewSU);
463      AddPred(SuccDep, D);
464    }
465    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
466      SDep D = ChainSuccs[i];
467      SUnit *SuccDep = D.getSUnit();
468      D.setSUnit(SU);
469      RemovePred(SuccDep, D);
470      if (isNewLoad) {
471        D.setSUnit(LoadSU);
472        AddPred(SuccDep, D);
473      }
474    }
475
476    // Add a data dependency to reflect that NewSU reads the value defined
477    // by LoadSU.
478    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
479
480    if (isNewLoad)
481      AvailableQueue->addNode(LoadSU);
482    AvailableQueue->addNode(NewSU);
483
484    ++NumUnfolds;
485
486    if (NewSU->NumSuccsLeft == 0) {
487      NewSU->isAvailable = true;
488      return NewSU;
489    }
490    SU = NewSU;
491  }
492
493  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
494  NewSU = CreateClone(SU);
495
496  // New SUnit has the exact same predecessors.
497  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
498       I != E; ++I)
499    if (!I->isArtificial())
500      AddPred(NewSU, *I);
501
502  // Only copy scheduled successors. Cut them from old node's successor
503  // list and move them over.
504  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
505  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
506       I != E; ++I) {
507    if (I->isArtificial())
508      continue;
509    SUnit *SuccSU = I->getSUnit();
510    if (SuccSU->isScheduled) {
511      SDep D = *I;
512      D.setSUnit(NewSU);
513      AddPred(SuccSU, D);
514      D.setSUnit(SU);
515      DelDeps.push_back(std::make_pair(SuccSU, D));
516    }
517  }
518  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
519    RemovePred(DelDeps[i].first, DelDeps[i].second);
520
521  AvailableQueue->updateNode(SU);
522  AvailableQueue->addNode(NewSU);
523
524  ++NumDups;
525  return NewSU;
526}
527
528/// InsertCopiesAndMoveSuccs - Insert register copies and move all
529/// scheduled successors of the given SUnit to the last copy.
530void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
531                                               const TargetRegisterClass *DestRC,
532                                               const TargetRegisterClass *SrcRC,
533                                               SmallVector<SUnit*, 2> &Copies) {
534  SUnit *CopyFromSU = CreateNewSUnit(NULL);
535  CopyFromSU->CopySrcRC = SrcRC;
536  CopyFromSU->CopyDstRC = DestRC;
537
538  SUnit *CopyToSU = CreateNewSUnit(NULL);
539  CopyToSU->CopySrcRC = DestRC;
540  CopyToSU->CopyDstRC = SrcRC;
541
542  // Only copy scheduled successors. Cut them from old node's successor
543  // list and move them over.
544  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
545  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
546       I != E; ++I) {
547    if (I->isArtificial())
548      continue;
549    SUnit *SuccSU = I->getSUnit();
550    if (SuccSU->isScheduled) {
551      SDep D = *I;
552      D.setSUnit(CopyToSU);
553      AddPred(SuccSU, D);
554      DelDeps.push_back(std::make_pair(SuccSU, *I));
555    }
556  }
557  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
558    RemovePred(DelDeps[i].first, DelDeps[i].second);
559
560  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
561  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
562
563  AvailableQueue->updateNode(SU);
564  AvailableQueue->addNode(CopyFromSU);
565  AvailableQueue->addNode(CopyToSU);
566  Copies.push_back(CopyFromSU);
567  Copies.push_back(CopyToSU);
568
569  ++NumPRCopies;
570}
571
572/// getPhysicalRegisterVT - Returns the ValueType of the physical register
573/// definition of the specified node.
574/// FIXME: Move to SelectionDAG?
575static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
576                                 const TargetInstrInfo *TII) {
577  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
578  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
579  unsigned NumRes = TID.getNumDefs();
580  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
581    if (Reg == *ImpDef)
582      break;
583    ++NumRes;
584  }
585  return N->getValueType(NumRes);
586}
587
588/// CheckForLiveRegDef - Return true and update live register vector if the
589/// specified register def of the specified SUnit clobbers any "live" registers.
590static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
591                               std::vector<SUnit*> &LiveRegDefs,
592                               SmallSet<unsigned, 4> &RegAdded,
593                               SmallVector<unsigned, 4> &LRegs,
594                               const TargetRegisterInfo *TRI) {
595  bool Added = false;
596  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
597    if (RegAdded.insert(Reg)) {
598      LRegs.push_back(Reg);
599      Added = true;
600    }
601  }
602  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
603    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
604      if (RegAdded.insert(*Alias)) {
605        LRegs.push_back(*Alias);
606        Added = true;
607      }
608    }
609  return Added;
610}
611
612/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
613/// scheduling of the given node to satisfy live physical register dependencies.
614/// If the specific node is the last one that's available to schedule, do
615/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
616bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
617                                                 SmallVector<unsigned, 4> &LRegs){
618  if (NumLiveRegs == 0)
619    return false;
620
621  SmallSet<unsigned, 4> RegAdded;
622  // If this node would clobber any "live" register, then it's not ready.
623  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
624       I != E; ++I) {
625    if (I->isAssignedRegDep())
626      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
627                         RegAdded, LRegs, TRI);
628  }
629
630  for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
631    if (Node->getOpcode() == ISD::INLINEASM) {
632      // Inline asm can clobber physical defs.
633      unsigned NumOps = Node->getNumOperands();
634      if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
635        --NumOps;  // Ignore the flag operand.
636
637      for (unsigned i = 2; i != NumOps;) {
638        unsigned Flags =
639          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
640        unsigned NumVals = (Flags & 0xffff) >> 3;
641
642        ++i; // Skip the ID value.
643        if ((Flags & 7) == 2 || (Flags & 7) == 6) {
644          // Check for def of register or earlyclobber register.
645          for (; NumVals; --NumVals, ++i) {
646            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
647            if (TargetRegisterInfo::isPhysicalRegister(Reg))
648              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
649          }
650        } else
651          i += NumVals;
652      }
653      continue;
654    }
655
656    if (!Node->isMachineOpcode())
657      continue;
658    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
659    if (!TID.ImplicitDefs)
660      continue;
661    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
662      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
663  }
664  return !LRegs.empty();
665}
666
667
668/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
669/// schedulers.
670void ScheduleDAGRRList::ListScheduleBottomUp() {
671  unsigned CurCycle = 0;
672
673  // Release any predecessors of the special Exit node.
674  ReleasePredecessors(&ExitSU, CurCycle);
675
676  // Add root to Available queue.
677  if (!SUnits.empty()) {
678    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
679    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
680    RootSU->isAvailable = true;
681    AvailableQueue->push(RootSU);
682  }
683
684  // While Available queue is not empty, grab the node with the highest
685  // priority. If it is not ready put it back.  Schedule the node.
686  SmallVector<SUnit*, 4> NotReady;
687  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
688  Sequence.reserve(SUnits.size());
689  while (!AvailableQueue->empty()) {
690    bool Delayed = false;
691    LRegsMap.clear();
692    SUnit *CurSU = AvailableQueue->pop();
693    while (CurSU) {
694      SmallVector<unsigned, 4> LRegs;
695      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
696        break;
697      Delayed = true;
698      LRegsMap.insert(std::make_pair(CurSU, LRegs));
699
700      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
701      NotReady.push_back(CurSU);
702      CurSU = AvailableQueue->pop();
703    }
704
705    // All candidates are delayed due to live physical reg dependencies.
706    // Try backtracking, code duplication, or inserting cross class copies
707    // to resolve it.
708    if (Delayed && !CurSU) {
709      for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
710        SUnit *TrySU = NotReady[i];
711        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
712
713        // Try unscheduling up to the point where it's safe to schedule
714        // this node.
715        unsigned LiveCycle = CurCycle;
716        for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
717          unsigned Reg = LRegs[j];
718          unsigned LCycle = LiveRegCycles[Reg];
719          LiveCycle = std::min(LiveCycle, LCycle);
720        }
721        SUnit *OldSU = Sequence[LiveCycle];
722        if (!WillCreateCycle(TrySU, OldSU))  {
723          BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
724          // Force the current node to be scheduled before the node that
725          // requires the physical reg dep.
726          if (OldSU->isAvailable) {
727            OldSU->isAvailable = false;
728            AvailableQueue->remove(OldSU);
729          }
730          AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
731                              /*Reg=*/0, /*isNormalMemory=*/false,
732                              /*isMustAlias=*/false, /*isArtificial=*/true));
733          // If one or more successors has been unscheduled, then the current
734          // node is no longer avaialable. Schedule a successor that's now
735          // available instead.
736          if (!TrySU->isAvailable)
737            CurSU = AvailableQueue->pop();
738          else {
739            CurSU = TrySU;
740            TrySU->isPending = false;
741            NotReady.erase(NotReady.begin()+i);
742          }
743          break;
744        }
745      }
746
747      if (!CurSU) {
748        // Can't backtrack. If it's too expensive to copy the value, then try
749        // duplicate the nodes that produces these "too expensive to copy"
750        // values to break the dependency. In case even that doesn't work,
751        // insert cross class copies.
752        // If it's not too expensive, i.e. cost != -1, issue copies.
753        SUnit *TrySU = NotReady[0];
754        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
755        assert(LRegs.size() == 1 && "Can't handle this yet!");
756        unsigned Reg = LRegs[0];
757        SUnit *LRDef = LiveRegDefs[Reg];
758        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
759        const TargetRegisterClass *RC =
760          TRI->getPhysicalRegisterRegClass(Reg, VT);
761        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
762
763        // If cross copy register class is null, then it must be possible copy
764        // the value directly. Do not try duplicate the def.
765        SUnit *NewDef = 0;
766        if (DestRC)
767          NewDef = CopyAndMoveSuccessors(LRDef);
768        else
769          DestRC = RC;
770        if (!NewDef) {
771          // Issue copies, these can be expensive cross register class copies.
772          SmallVector<SUnit*, 2> Copies;
773          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
774          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
775                       << " to SU #" << Copies.front()->NodeNum << "\n");
776          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
777                              /*Reg=*/0, /*isNormalMemory=*/false,
778                              /*isMustAlias=*/false,
779                              /*isArtificial=*/true));
780          NewDef = Copies.back();
781        }
782
783        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
784                     << " to SU #" << TrySU->NodeNum << "\n");
785        LiveRegDefs[Reg] = NewDef;
786        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
787                             /*Reg=*/0, /*isNormalMemory=*/false,
788                             /*isMustAlias=*/false,
789                             /*isArtificial=*/true));
790        TrySU->isAvailable = false;
791        CurSU = NewDef;
792      }
793
794      assert(CurSU && "Unable to resolve live physical register dependencies!");
795    }
796
797    // Add the nodes that aren't ready back onto the available list.
798    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
799      NotReady[i]->isPending = false;
800      // May no longer be available due to backtracking.
801      if (NotReady[i]->isAvailable)
802        AvailableQueue->push(NotReady[i]);
803    }
804    NotReady.clear();
805
806    if (CurSU)
807      ScheduleNodeBottomUp(CurSU, CurCycle);
808    ++CurCycle;
809  }
810
811  // Reverse the order if it is bottom up.
812  std::reverse(Sequence.begin(), Sequence.end());
813
814#ifndef NDEBUG
815  VerifySchedule(isBottomUp);
816#endif
817}
818
819//===----------------------------------------------------------------------===//
820//  Top-Down Scheduling
821//===----------------------------------------------------------------------===//
822
823/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
824/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
825void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
826  SUnit *SuccSU = SuccEdge->getSUnit();
827
828#ifndef NDEBUG
829  if (SuccSU->NumPredsLeft == 0) {
830    dbgs() << "*** Scheduling failed! ***\n";
831    SuccSU->dump(this);
832    dbgs() << " has been released too many times!\n";
833    llvm_unreachable(0);
834  }
835#endif
836  --SuccSU->NumPredsLeft;
837
838  // If all the node's predecessors are scheduled, this node is ready
839  // to be scheduled. Ignore the special ExitSU node.
840  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
841    SuccSU->isAvailable = true;
842    AvailableQueue->push(SuccSU);
843  }
844}
845
846void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
847  // Top down: release successors
848  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
849       I != E; ++I) {
850    assert(!I->isAssignedRegDep() &&
851           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
852
853    ReleaseSucc(SU, &*I);
854  }
855}
856
857/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
858/// count of its successors. If a successor pending count is zero, add it to
859/// the Available queue.
860void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
861  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
862  DEBUG(SU->dump(this));
863
864  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
865  SU->setDepthToAtLeast(CurCycle);
866  Sequence.push_back(SU);
867
868  ReleaseSuccessors(SU);
869  SU->isScheduled = true;
870  AvailableQueue->ScheduledNode(SU);
871}
872
873/// ListScheduleTopDown - The main loop of list scheduling for top-down
874/// schedulers.
875void ScheduleDAGRRList::ListScheduleTopDown() {
876  unsigned CurCycle = 0;
877
878  // Release any successors of the special Entry node.
879  ReleaseSuccessors(&EntrySU);
880
881  // All leaves to Available queue.
882  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
883    // It is available if it has no predecessors.
884    if (SUnits[i].Preds.empty()) {
885      AvailableQueue->push(&SUnits[i]);
886      SUnits[i].isAvailable = true;
887    }
888  }
889
890  // While Available queue is not empty, grab the node with the highest
891  // priority. If it is not ready put it back.  Schedule the node.
892  Sequence.reserve(SUnits.size());
893  while (!AvailableQueue->empty()) {
894    SUnit *CurSU = AvailableQueue->pop();
895
896    if (CurSU)
897      ScheduleNodeTopDown(CurSU, CurCycle);
898    ++CurCycle;
899  }
900
901#ifndef NDEBUG
902  VerifySchedule(isBottomUp);
903#endif
904}
905
906
907//===----------------------------------------------------------------------===//
908//                RegReductionPriorityQueue Implementation
909//===----------------------------------------------------------------------===//
910//
911// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
912// to reduce register pressure.
913//
914namespace {
915  template<class SF>
916  class RegReductionPriorityQueue;
917
918  /// Sorting functions for the Available queue.
919  struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
920    RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
921    bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
922    bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
923
924    bool operator()(const SUnit* left, const SUnit* right) const;
925  };
926
927  struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
928    RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
929    td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
930    td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
931
932    bool operator()(const SUnit* left, const SUnit* right) const;
933  };
934}  // end anonymous namespace
935
936/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
937/// Smaller number is the higher priority.
938static unsigned
939CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
940  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
941  if (SethiUllmanNumber != 0)
942    return SethiUllmanNumber;
943
944  unsigned Extra = 0;
945  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
946       I != E; ++I) {
947    if (I->isCtrl()) continue;  // ignore chain preds
948    SUnit *PredSU = I->getSUnit();
949    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
950    if (PredSethiUllman > SethiUllmanNumber) {
951      SethiUllmanNumber = PredSethiUllman;
952      Extra = 0;
953    } else if (PredSethiUllman == SethiUllmanNumber)
954      ++Extra;
955  }
956
957  SethiUllmanNumber += Extra;
958
959  if (SethiUllmanNumber == 0)
960    SethiUllmanNumber = 1;
961
962  return SethiUllmanNumber;
963}
964
965namespace {
966  template<class SF>
967  class RegReductionPriorityQueue : public SchedulingPriorityQueue {
968    PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
969    unsigned currentQueueId;
970
971  protected:
972    // SUnits - The SUnits for the current graph.
973    std::vector<SUnit> *SUnits;
974
975    const TargetInstrInfo *TII;
976    const TargetRegisterInfo *TRI;
977    ScheduleDAGRRList *scheduleDAG;
978
979    // SethiUllmanNumbers - The SethiUllman number for each node.
980    std::vector<unsigned> SethiUllmanNumbers;
981
982  public:
983    RegReductionPriorityQueue(const TargetInstrInfo *tii,
984                              const TargetRegisterInfo *tri) :
985    Queue(SF(this)), currentQueueId(0),
986    TII(tii), TRI(tri), scheduleDAG(NULL) {}
987
988    void initNodes(std::vector<SUnit> &sunits) {
989      SUnits = &sunits;
990      // Add pseudo dependency edges for two-address nodes.
991      AddPseudoTwoAddrDeps();
992      // Reroute edges to nodes with multiple uses.
993      PrescheduleNodesWithMultipleUses();
994      // Calculate node priorities.
995      CalculateSethiUllmanNumbers();
996    }
997
998    void addNode(const SUnit *SU) {
999      unsigned SUSize = SethiUllmanNumbers.size();
1000      if (SUnits->size() > SUSize)
1001        SethiUllmanNumbers.resize(SUSize*2, 0);
1002      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1003    }
1004
1005    void updateNode(const SUnit *SU) {
1006      SethiUllmanNumbers[SU->NodeNum] = 0;
1007      CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1008    }
1009
1010    void releaseState() {
1011      SUnits = 0;
1012      SethiUllmanNumbers.clear();
1013    }
1014
1015    unsigned getNodePriority(const SUnit *SU) const {
1016      assert(SU->NodeNum < SethiUllmanNumbers.size());
1017      unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1018      if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1019        // CopyToReg should be close to its uses to facilitate coalescing and
1020        // avoid spilling.
1021        return 0;
1022      if (Opc == TargetInstrInfo::EXTRACT_SUBREG ||
1023          Opc == TargetInstrInfo::SUBREG_TO_REG ||
1024          Opc == TargetInstrInfo::INSERT_SUBREG)
1025        // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1026        // close to their uses to facilitate coalescing.
1027        return 0;
1028      if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1029        // If SU does not have a register use, i.e. it doesn't produce a value
1030        // that would be consumed (e.g. store), then it terminates a chain of
1031        // computation.  Give it a large SethiUllman number so it will be
1032        // scheduled right before its predecessors that it doesn't lengthen
1033        // their live ranges.
1034        return 0xffff;
1035      if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1036        // If SU does not have a register def, schedule it close to its uses
1037        // because it does not lengthen any live ranges.
1038        return 0;
1039      return SethiUllmanNumbers[SU->NodeNum];
1040    }
1041
1042    unsigned getNodeOrdering(const SUnit *SU) const {
1043      return scheduleDAG->DAG->GetOrdering(SU->getNode());
1044    }
1045
1046    unsigned size() const { return Queue.size(); }
1047
1048    bool empty() const { return Queue.empty(); }
1049
1050    void push(SUnit *U) {
1051      assert(!U->NodeQueueId && "Node in the queue already");
1052      U->NodeQueueId = ++currentQueueId;
1053      Queue.push(U);
1054    }
1055
1056    void push_all(const std::vector<SUnit *> &Nodes) {
1057      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
1058        push(Nodes[i]);
1059    }
1060
1061    SUnit *pop() {
1062      if (empty()) return NULL;
1063      SUnit *V = Queue.top();
1064      Queue.pop();
1065      V->NodeQueueId = 0;
1066      return V;
1067    }
1068
1069    void remove(SUnit *SU) {
1070      assert(!Queue.empty() && "Queue is empty!");
1071      assert(SU->NodeQueueId != 0 && "Not in queue!");
1072      Queue.erase_one(SU);
1073      SU->NodeQueueId = 0;
1074    }
1075
1076    void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1077      scheduleDAG = scheduleDag;
1078    }
1079
1080  protected:
1081    bool canClobber(const SUnit *SU, const SUnit *Op);
1082    void AddPseudoTwoAddrDeps();
1083    void PrescheduleNodesWithMultipleUses();
1084    void CalculateSethiUllmanNumbers();
1085  };
1086
1087  typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1088    BURegReductionPriorityQueue;
1089
1090  typedef RegReductionPriorityQueue<td_ls_rr_sort>
1091    TDRegReductionPriorityQueue;
1092}
1093
1094/// closestSucc - Returns the scheduled cycle of the successor which is
1095/// closest to the current cycle.
1096static unsigned closestSucc(const SUnit *SU) {
1097  unsigned MaxHeight = 0;
1098  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1099       I != E; ++I) {
1100    if (I->isCtrl()) continue;  // ignore chain succs
1101    unsigned Height = I->getSUnit()->getHeight();
1102    // If there are bunch of CopyToRegs stacked up, they should be considered
1103    // to be at the same position.
1104    if (I->getSUnit()->getNode() &&
1105        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
1106      Height = closestSucc(I->getSUnit())+1;
1107    if (Height > MaxHeight)
1108      MaxHeight = Height;
1109  }
1110  return MaxHeight;
1111}
1112
1113/// calcMaxScratches - Returns an cost estimate of the worse case requirement
1114/// for scratch registers, i.e. number of data dependencies.
1115static unsigned calcMaxScratches(const SUnit *SU) {
1116  unsigned Scratches = 0;
1117  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1118       I != E; ++I) {
1119    if (I->isCtrl()) continue;  // ignore chain preds
1120    Scratches++;
1121  }
1122  return Scratches;
1123}
1124
1125// Bottom up
1126bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1127  unsigned LOrder = SPQ->getNodeOrdering(left);
1128  unsigned ROrder = SPQ->getNodeOrdering(right);
1129
1130  // Prefer an ordering where the lower the non-zero order number, the higher
1131  // the preference.
1132  if (LOrder && ROrder && LOrder != ROrder)
1133    return LOrder < ROrder;
1134
1135  unsigned LPriority = SPQ->getNodePriority(left);
1136  unsigned RPriority = SPQ->getNodePriority(right);
1137  if (LPriority != RPriority)
1138    return LPriority > RPriority;
1139
1140  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
1141  // e.g.
1142  // t1 = op t2, c1
1143  // t3 = op t4, c2
1144  //
1145  // and the following instructions are both ready.
1146  // t2 = op c3
1147  // t4 = op c4
1148  //
1149  // Then schedule t2 = op first.
1150  // i.e.
1151  // t4 = op c4
1152  // t2 = op c3
1153  // t1 = op t2, c1
1154  // t3 = op t4, c2
1155  //
1156  // This creates more short live intervals.
1157  unsigned LDist = closestSucc(left);
1158  unsigned RDist = closestSucc(right);
1159  if (LDist != RDist)
1160    return LDist < RDist;
1161
1162  // How many registers becomes live when the node is scheduled.
1163  unsigned LScratch = calcMaxScratches(left);
1164  unsigned RScratch = calcMaxScratches(right);
1165  if (LScratch != RScratch)
1166    return LScratch > RScratch;
1167
1168  if (left->getHeight() != right->getHeight())
1169    return left->getHeight() > right->getHeight();
1170
1171  if (left->getDepth() != right->getDepth())
1172    return left->getDepth() < right->getDepth();
1173
1174  assert(left->NodeQueueId && right->NodeQueueId &&
1175         "NodeQueueId cannot be zero");
1176  return (left->NodeQueueId > right->NodeQueueId);
1177}
1178
1179template<class SF>
1180bool
1181RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
1182  if (SU->isTwoAddress) {
1183    unsigned Opc = SU->getNode()->getMachineOpcode();
1184    const TargetInstrDesc &TID = TII->get(Opc);
1185    unsigned NumRes = TID.getNumDefs();
1186    unsigned NumOps = TID.getNumOperands() - NumRes;
1187    for (unsigned i = 0; i != NumOps; ++i) {
1188      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
1189        SDNode *DU = SU->getNode()->getOperand(i).getNode();
1190        if (DU->getNodeId() != -1 &&
1191            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
1192          return true;
1193      }
1194    }
1195  }
1196  return false;
1197}
1198
1199
1200/// hasCopyToRegUse - Return true if SU has a value successor that is a
1201/// CopyToReg node.
1202static bool hasCopyToRegUse(const SUnit *SU) {
1203  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1204       I != E; ++I) {
1205    if (I->isCtrl()) continue;
1206    const SUnit *SuccSU = I->getSUnit();
1207    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
1208      return true;
1209  }
1210  return false;
1211}
1212
1213/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
1214/// physical register defs.
1215static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
1216                                  const TargetInstrInfo *TII,
1217                                  const TargetRegisterInfo *TRI) {
1218  SDNode *N = SuccSU->getNode();
1219  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1220  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
1221  assert(ImpDefs && "Caller should check hasPhysRegDefs");
1222  for (const SDNode *SUNode = SU->getNode(); SUNode;
1223       SUNode = SUNode->getFlaggedNode()) {
1224    if (!SUNode->isMachineOpcode())
1225      continue;
1226    const unsigned *SUImpDefs =
1227      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
1228    if (!SUImpDefs)
1229      return false;
1230    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
1231      EVT VT = N->getValueType(i);
1232      if (VT == MVT::Flag || VT == MVT::Other)
1233        continue;
1234      if (!N->hasAnyUseOfValue(i))
1235        continue;
1236      unsigned Reg = ImpDefs[i - NumDefs];
1237      for (;*SUImpDefs; ++SUImpDefs) {
1238        unsigned SUReg = *SUImpDefs;
1239        if (TRI->regsOverlap(Reg, SUReg))
1240          return true;
1241      }
1242    }
1243  }
1244  return false;
1245}
1246
1247/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
1248/// are not handled well by the general register pressure reduction
1249/// heuristics. When presented with code like this:
1250///
1251///      N
1252///    / |
1253///   /  |
1254///  U  store
1255///  |
1256/// ...
1257///
1258/// the heuristics tend to push the store up, but since the
1259/// operand of the store has another use (U), this would increase
1260/// the length of that other use (the U->N edge).
1261///
1262/// This function transforms code like the above to route U's
1263/// dependence through the store when possible, like this:
1264///
1265///      N
1266///      ||
1267///      ||
1268///     store
1269///       |
1270///       U
1271///       |
1272///      ...
1273///
1274/// This results in the store being scheduled immediately
1275/// after N, which shortens the U->N live range, reducing
1276/// register pressure.
1277///
1278template<class SF>
1279void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
1280  // Visit all the nodes in topological order, working top-down.
1281  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1282    SUnit *SU = &(*SUnits)[i];
1283    // For now, only look at nodes with no data successors, such as stores.
1284    // These are especially important, due to the heuristics in
1285    // getNodePriority for nodes with no data successors.
1286    if (SU->NumSuccs != 0)
1287      continue;
1288    // For now, only look at nodes with exactly one data predecessor.
1289    if (SU->NumPreds != 1)
1290      continue;
1291    // Avoid prescheduling copies to virtual registers, which don't behave
1292    // like other nodes from the perspective of scheduling heuristics.
1293    if (SDNode *N = SU->getNode())
1294      if (N->getOpcode() == ISD::CopyToReg &&
1295          TargetRegisterInfo::isVirtualRegister
1296            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1297        continue;
1298
1299    // Locate the single data predecessor.
1300    SUnit *PredSU = 0;
1301    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
1302         EE = SU->Preds.end(); II != EE; ++II)
1303      if (!II->isCtrl()) {
1304        PredSU = II->getSUnit();
1305        break;
1306      }
1307    assert(PredSU);
1308
1309    // Don't rewrite edges that carry physregs, because that requires additional
1310    // support infrastructure.
1311    if (PredSU->hasPhysRegDefs)
1312      continue;
1313    // Short-circuit the case where SU is PredSU's only data successor.
1314    if (PredSU->NumSuccs == 1)
1315      continue;
1316    // Avoid prescheduling to copies from virtual registers, which don't behave
1317    // like other nodes from the perspective of scheduling // heuristics.
1318    if (SDNode *N = SU->getNode())
1319      if (N->getOpcode() == ISD::CopyFromReg &&
1320          TargetRegisterInfo::isVirtualRegister
1321            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
1322        continue;
1323
1324    // Perform checks on the successors of PredSU.
1325    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
1326         EE = PredSU->Succs.end(); II != EE; ++II) {
1327      SUnit *PredSuccSU = II->getSUnit();
1328      if (PredSuccSU == SU) continue;
1329      // If PredSU has another successor with no data successors, for
1330      // now don't attempt to choose either over the other.
1331      if (PredSuccSU->NumSuccs == 0)
1332        goto outer_loop_continue;
1333      // Don't break physical register dependencies.
1334      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
1335        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
1336          goto outer_loop_continue;
1337      // Don't introduce graph cycles.
1338      if (scheduleDAG->IsReachable(SU, PredSuccSU))
1339        goto outer_loop_continue;
1340    }
1341
1342    // Ok, the transformation is safe and the heuristics suggest it is
1343    // profitable. Update the graph.
1344    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
1345                 << " next to PredSU # " << PredSU->NodeNum
1346                 << " to guide scheduling in the presence of multiple uses\n");
1347    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
1348      SDep Edge = PredSU->Succs[i];
1349      assert(!Edge.isAssignedRegDep());
1350      SUnit *SuccSU = Edge.getSUnit();
1351      if (SuccSU != SU) {
1352        Edge.setSUnit(PredSU);
1353        scheduleDAG->RemovePred(SuccSU, Edge);
1354        scheduleDAG->AddPred(SU, Edge);
1355        Edge.setSUnit(SU);
1356        scheduleDAG->AddPred(SuccSU, Edge);
1357        --i;
1358      }
1359    }
1360  outer_loop_continue:;
1361  }
1362}
1363
1364/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
1365/// it as a def&use operand. Add a pseudo control edge from it to the other
1366/// node (if it won't create a cycle) so the two-address one will be scheduled
1367/// first (lower in the schedule). If both nodes are two-address, favor the
1368/// one that has a CopyToReg use (more likely to be a loop induction update).
1369/// If both are two-address, but one is commutable while the other is not
1370/// commutable, favor the one that's not commutable.
1371template<class SF>
1372void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
1373  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
1374    SUnit *SU = &(*SUnits)[i];
1375    if (!SU->isTwoAddress)
1376      continue;
1377
1378    SDNode *Node = SU->getNode();
1379    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
1380      continue;
1381
1382    unsigned Opc = Node->getMachineOpcode();
1383    const TargetInstrDesc &TID = TII->get(Opc);
1384    unsigned NumRes = TID.getNumDefs();
1385    unsigned NumOps = TID.getNumOperands() - NumRes;
1386    for (unsigned j = 0; j != NumOps; ++j) {
1387      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
1388        continue;
1389      SDNode *DU = SU->getNode()->getOperand(j).getNode();
1390      if (DU->getNodeId() == -1)
1391        continue;
1392      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
1393      if (!DUSU) continue;
1394      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
1395           E = DUSU->Succs.end(); I != E; ++I) {
1396        if (I->isCtrl()) continue;
1397        SUnit *SuccSU = I->getSUnit();
1398        if (SuccSU == SU)
1399          continue;
1400        // Be conservative. Ignore if nodes aren't at roughly the same
1401        // depth and height.
1402        if (SuccSU->getHeight() < SU->getHeight() &&
1403            (SU->getHeight() - SuccSU->getHeight()) > 1)
1404          continue;
1405        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
1406        // constrains whatever is using the copy, instead of the copy
1407        // itself. In the case that the copy is coalesced, this
1408        // preserves the intent of the pseudo two-address heurietics.
1409        while (SuccSU->Succs.size() == 1 &&
1410               SuccSU->getNode()->isMachineOpcode() &&
1411               SuccSU->getNode()->getMachineOpcode() ==
1412                 TargetInstrInfo::COPY_TO_REGCLASS)
1413          SuccSU = SuccSU->Succs.front().getSUnit();
1414        // Don't constrain non-instruction nodes.
1415        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
1416          continue;
1417        // Don't constrain nodes with physical register defs if the
1418        // predecessor can clobber them.
1419        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
1420          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
1421            continue;
1422        }
1423        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
1424        // these may be coalesced away. We want them close to their uses.
1425        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
1426        if (SuccOpc == TargetInstrInfo::EXTRACT_SUBREG ||
1427            SuccOpc == TargetInstrInfo::INSERT_SUBREG ||
1428            SuccOpc == TargetInstrInfo::SUBREG_TO_REG)
1429          continue;
1430        if ((!canClobber(SuccSU, DUSU) ||
1431             (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
1432             (!SU->isCommutable && SuccSU->isCommutable)) &&
1433            !scheduleDAG->IsReachable(SuccSU, SU)) {
1434          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
1435                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
1436          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
1437                                        /*Reg=*/0, /*isNormalMemory=*/false,
1438                                        /*isMustAlias=*/false,
1439                                        /*isArtificial=*/true));
1440        }
1441      }
1442    }
1443  }
1444}
1445
1446/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1447/// scheduling units.
1448template<class SF>
1449void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
1450  SethiUllmanNumbers.assign(SUnits->size(), 0);
1451
1452  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1453    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1454}
1455
1456/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
1457/// predecessors of the successors of the SUnit SU. Stop when the provided
1458/// limit is exceeded.
1459static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
1460                                                    unsigned Limit) {
1461  unsigned Sum = 0;
1462  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1463       I != E; ++I) {
1464    const SUnit *SuccSU = I->getSUnit();
1465    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
1466         EE = SuccSU->Preds.end(); II != EE; ++II) {
1467      SUnit *PredSU = II->getSUnit();
1468      if (!PredSU->isScheduled)
1469        if (++Sum > Limit)
1470          return Sum;
1471    }
1472  }
1473  return Sum;
1474}
1475
1476
1477// Top down
1478bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
1479  unsigned LPriority = SPQ->getNodePriority(left);
1480  unsigned RPriority = SPQ->getNodePriority(right);
1481  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
1482  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
1483  bool LIsFloater = LIsTarget && left->NumPreds == 0;
1484  bool RIsFloater = RIsTarget && right->NumPreds == 0;
1485  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
1486  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
1487
1488  if (left->NumSuccs == 0 && right->NumSuccs != 0)
1489    return false;
1490  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
1491    return true;
1492
1493  if (LIsFloater)
1494    LBonus -= 2;
1495  if (RIsFloater)
1496    RBonus -= 2;
1497  if (left->NumSuccs == 1)
1498    LBonus += 2;
1499  if (right->NumSuccs == 1)
1500    RBonus += 2;
1501
1502  if (LPriority+LBonus != RPriority+RBonus)
1503    return LPriority+LBonus < RPriority+RBonus;
1504
1505  if (left->getDepth() != right->getDepth())
1506    return left->getDepth() < right->getDepth();
1507
1508  if (left->NumSuccsLeft != right->NumSuccsLeft)
1509    return left->NumSuccsLeft > right->NumSuccsLeft;
1510
1511  assert(left->NodeQueueId && right->NodeQueueId &&
1512         "NodeQueueId cannot be zero");
1513  return (left->NodeQueueId > right->NodeQueueId);
1514}
1515
1516//===----------------------------------------------------------------------===//
1517//                         Public Constructor Functions
1518//===----------------------------------------------------------------------===//
1519
1520llvm::ScheduleDAGSDNodes *
1521llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1522  const TargetMachine &TM = IS->TM;
1523  const TargetInstrInfo *TII = TM.getInstrInfo();
1524  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1525
1526  BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
1527
1528  ScheduleDAGRRList *SD =
1529    new ScheduleDAGRRList(*IS->MF, true, PQ);
1530  PQ->setScheduleDAG(SD);
1531  return SD;
1532}
1533
1534llvm::ScheduleDAGSDNodes *
1535llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
1536  const TargetMachine &TM = IS->TM;
1537  const TargetInstrInfo *TII = TM.getInstrInfo();
1538  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
1539
1540  TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
1541
1542  ScheduleDAGRRList *SD =
1543    new ScheduleDAGRRList(*IS->MF, false, PQ);
1544  PQ->setScheduleDAG(SD);
1545  return SD;
1546}
1547