ScheduleDAGRRList.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/SchedulerRegistry.h"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/InlineAsm.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetLowering.h"
33#include "llvm/Target/TargetMachine.h"
34#include "llvm/Target/TargetRegisterInfo.h"
35#include <climits>
36using namespace llvm;
37
38#define DEBUG_TYPE "pre-RA-sched"
39
40STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
41STATISTIC(NumUnfolds,    "Number of nodes unfolded");
42STATISTIC(NumDups,       "Number of duplicated nodes");
43STATISTIC(NumPRCopies,   "Number of physical register copies");
44
45static RegisterScheduler
46  burrListDAGScheduler("list-burr",
47                       "Bottom-up register reduction list scheduling",
48                       createBURRListDAGScheduler);
49static RegisterScheduler
50  sourceListDAGScheduler("source",
51                         "Similar to list-burr but schedules in source "
52                         "order when possible",
53                         createSourceListDAGScheduler);
54
55static RegisterScheduler
56  hybridListDAGScheduler("list-hybrid",
57                         "Bottom-up register pressure aware list scheduling "
58                         "which tries to balance latency and register pressure",
59                         createHybridListDAGScheduler);
60
61static RegisterScheduler
62  ILPListDAGScheduler("list-ilp",
63                      "Bottom-up register pressure aware list scheduling "
64                      "which tries to balance ILP and register pressure",
65                      createILPListDAGScheduler);
66
67static cl::opt<bool> DisableSchedCycles(
68  "disable-sched-cycles", cl::Hidden, cl::init(false),
69  cl::desc("Disable cycle-level precision during preRA scheduling"));
70
71// Temporary sched=list-ilp flags until the heuristics are robust.
72// Some options are also available under sched=list-hybrid.
73static cl::opt<bool> DisableSchedRegPressure(
74  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
75  cl::desc("Disable regpressure priority in sched=list-ilp"));
76static cl::opt<bool> DisableSchedLiveUses(
77  "disable-sched-live-uses", cl::Hidden, cl::init(true),
78  cl::desc("Disable live use priority in sched=list-ilp"));
79static cl::opt<bool> DisableSchedVRegCycle(
80  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
81  cl::desc("Disable virtual register cycle interference checks"));
82static cl::opt<bool> DisableSchedPhysRegJoin(
83  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
84  cl::desc("Disable physreg def-use affinity"));
85static cl::opt<bool> DisableSchedStalls(
86  "disable-sched-stalls", cl::Hidden, cl::init(true),
87  cl::desc("Disable no-stall priority in sched=list-ilp"));
88static cl::opt<bool> DisableSchedCriticalPath(
89  "disable-sched-critical-path", cl::Hidden, cl::init(false),
90  cl::desc("Disable critical path priority in sched=list-ilp"));
91static cl::opt<bool> DisableSchedHeight(
92  "disable-sched-height", cl::Hidden, cl::init(false),
93  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
94static cl::opt<bool> Disable2AddrHack(
95  "disable-2addr-hack", cl::Hidden, cl::init(true),
96  cl::desc("Disable scheduler's two-address hack"));
97
98static cl::opt<int> MaxReorderWindow(
99  "max-sched-reorder", cl::Hidden, cl::init(6),
100  cl::desc("Number of instructions to allow ahead of the critical path "
101           "in sched=list-ilp"));
102
103static cl::opt<unsigned> AvgIPC(
104  "sched-avg-ipc", cl::Hidden, cl::init(1),
105  cl::desc("Average inst/cycle whan no target itinerary exists."));
106
107namespace {
108//===----------------------------------------------------------------------===//
109/// ScheduleDAGRRList - The actual register reduction list scheduler
110/// implementation.  This supports both top-down and bottom-up scheduling.
111///
112class ScheduleDAGRRList : public ScheduleDAGSDNodes {
113private:
114  /// NeedLatency - True if the scheduler will make use of latency information.
115  ///
116  bool NeedLatency;
117
118  /// AvailableQueue - The priority queue to use for the available SUnits.
119  SchedulingPriorityQueue *AvailableQueue;
120
121  /// PendingQueue - This contains all of the instructions whose operands have
122  /// been issued, but their results are not ready yet (due to the latency of
123  /// the operation).  Once the operands becomes available, the instruction is
124  /// added to the AvailableQueue.
125  std::vector<SUnit*> PendingQueue;
126
127  /// HazardRec - The hazard recognizer to use.
128  ScheduleHazardRecognizer *HazardRec;
129
130  /// CurCycle - The current scheduler state corresponds to this cycle.
131  unsigned CurCycle;
132
133  /// MinAvailableCycle - Cycle of the soonest available instruction.
134  unsigned MinAvailableCycle;
135
136  /// IssueCount - Count instructions issued in this cycle
137  /// Currently valid only for bottom-up scheduling.
138  unsigned IssueCount;
139
140  /// LiveRegDefs - A set of physical registers and their definition
141  /// that are "live". These nodes must be scheduled before any other nodes that
142  /// modifies the registers can be scheduled.
143  unsigned NumLiveRegs;
144  std::vector<SUnit*> LiveRegDefs;
145  std::vector<SUnit*> LiveRegGens;
146
147  // Collect interferences between physical register use/defs.
148  // Each interference is an SUnit and set of physical registers.
149  SmallVector<SUnit*, 4> Interferences;
150  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
151  LRegsMapT LRegsMap;
152
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
155  ScheduleDAGTopologicalSort Topo;
156
157  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
158  // DAG crawling.
159  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
160
161public:
162  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
163                    SchedulingPriorityQueue *availqueue,
164                    CodeGenOpt::Level OptLevel)
165    : ScheduleDAGSDNodes(mf),
166      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
167      Topo(SUnits, nullptr) {
168
169    const TargetMachine &tm = mf.getTarget();
170    if (DisableSchedCycles || !NeedLatency)
171      HazardRec = new ScheduleHazardRecognizer();
172    else
173      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
174  }
175
176  ~ScheduleDAGRRList() {
177    delete HazardRec;
178    delete AvailableQueue;
179  }
180
181  void Schedule() override;
182
183  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
184
185  /// IsReachable - Checks if SU is reachable from TargetSU.
186  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
187    return Topo.IsReachable(SU, TargetSU);
188  }
189
190  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
191  /// create a cycle.
192  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
193    return Topo.WillCreateCycle(SU, TargetSU);
194  }
195
196  /// AddPred - adds a predecessor edge to SUnit SU.
197  /// This returns true if this is a new predecessor.
198  /// Updates the topological ordering if required.
199  void AddPred(SUnit *SU, const SDep &D) {
200    Topo.AddPred(SU, D.getSUnit());
201    SU->addPred(D);
202  }
203
204  /// RemovePred - removes a predecessor edge from SUnit SU.
205  /// This returns true if an edge was removed.
206  /// Updates the topological ordering if required.
207  void RemovePred(SUnit *SU, const SDep &D) {
208    Topo.RemovePred(SU, D.getSUnit());
209    SU->removePred(D);
210  }
211
212private:
213  bool isReady(SUnit *SU) {
214    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
215      AvailableQueue->isReady(SU);
216  }
217
218  void ReleasePred(SUnit *SU, const SDep *PredEdge);
219  void ReleasePredecessors(SUnit *SU);
220  void ReleasePending();
221  void AdvanceToCycle(unsigned NextCycle);
222  void AdvancePastStalls(SUnit *SU);
223  void EmitNode(SUnit *SU);
224  void ScheduleNodeBottomUp(SUnit*);
225  void CapturePred(SDep *PredEdge);
226  void UnscheduleNodeBottomUp(SUnit*);
227  void RestoreHazardCheckerBottomUp();
228  void BacktrackBottomUp(SUnit*, SUnit*);
229  SUnit *CopyAndMoveSuccessors(SUnit*);
230  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
231                                const TargetRegisterClass*,
232                                const TargetRegisterClass*,
233                                SmallVectorImpl<SUnit*>&);
234  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
235
236  void releaseInterferences(unsigned Reg = 0);
237
238  SUnit *PickNodeToScheduleBottomUp();
239  void ListScheduleBottomUp();
240
241  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
242  /// Updates the topological ordering if required.
243  SUnit *CreateNewSUnit(SDNode *N) {
244    unsigned NumSUnits = SUnits.size();
245    SUnit *NewNode = newSUnit(N);
246    // Update the topological ordering.
247    if (NewNode->NodeNum >= NumSUnits)
248      Topo.InitDAGTopologicalSorting();
249    return NewNode;
250  }
251
252  /// CreateClone - Creates a new SUnit from an existing one.
253  /// Updates the topological ordering if required.
254  SUnit *CreateClone(SUnit *N) {
255    unsigned NumSUnits = SUnits.size();
256    SUnit *NewNode = Clone(N);
257    // Update the topological ordering.
258    if (NewNode->NodeNum >= NumSUnits)
259      Topo.InitDAGTopologicalSorting();
260    return NewNode;
261  }
262
263  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
264  /// need actual latency information but the hybrid scheduler does.
265  bool forceUnitLatencies() const override {
266    return !NeedLatency;
267  }
268};
269}  // end anonymous namespace
270
271/// GetCostForDef - Looks up the register class and cost for a given definition.
272/// Typically this just means looking up the representative register class,
273/// but for untyped values (MVT::Untyped) it means inspecting the node's
274/// opcode to determine what register class is being generated.
275static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
276                          const TargetLowering *TLI,
277                          const TargetInstrInfo *TII,
278                          const TargetRegisterInfo *TRI,
279                          unsigned &RegClass, unsigned &Cost,
280                          const MachineFunction &MF) {
281  MVT VT = RegDefPos.GetValue();
282
283  // Special handling for untyped values.  These values can only come from
284  // the expansion of custom DAG-to-DAG patterns.
285  if (VT == MVT::Untyped) {
286    const SDNode *Node = RegDefPos.GetNode();
287
288    // Special handling for CopyFromReg of untyped values.
289    if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
290      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
291      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
292      RegClass = RC->getID();
293      Cost = 1;
294      return;
295    }
296
297    unsigned Opcode = Node->getMachineOpcode();
298    if (Opcode == TargetOpcode::REG_SEQUENCE) {
299      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
300      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
301      RegClass = RC->getID();
302      Cost = 1;
303      return;
304    }
305
306    unsigned Idx = RegDefPos.GetIdx();
307    const MCInstrDesc Desc = TII->get(Opcode);
308    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
309    RegClass = RC->getID();
310    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
311    // better way to determine it.
312    Cost = 1;
313  } else {
314    RegClass = TLI->getRepRegClassFor(VT)->getID();
315    Cost = TLI->getRepRegClassCostFor(VT);
316  }
317}
318
319/// Schedule - Schedule the DAG using list scheduling.
320void ScheduleDAGRRList::Schedule() {
321  DEBUG(dbgs()
322        << "********** List Scheduling BB#" << BB->getNumber()
323        << " '" << BB->getName() << "' **********\n");
324
325  CurCycle = 0;
326  IssueCount = 0;
327  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
328  NumLiveRegs = 0;
329  // Allocate slots for each physical register, plus one for a special register
330  // to track the virtual resource of a calling sequence.
331  LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
332  LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
333  CallSeqEndForStart.clear();
334  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
335
336  // Build the scheduling graph.
337  BuildSchedGraph(nullptr);
338
339  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
340          SUnits[su].dumpAll(this));
341  Topo.InitDAGTopologicalSorting();
342
343  AvailableQueue->initNodes(SUnits);
344
345  HazardRec->Reset();
346
347  // Execute the actual scheduling loop.
348  ListScheduleBottomUp();
349
350  AvailableQueue->releaseState();
351
352  DEBUG({
353      dbgs() << "*** Final schedule ***\n";
354      dumpSchedule();
355      dbgs() << '\n';
356    });
357}
358
359//===----------------------------------------------------------------------===//
360//  Bottom-Up Scheduling
361//===----------------------------------------------------------------------===//
362
363/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
364/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
365void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
366  SUnit *PredSU = PredEdge->getSUnit();
367
368#ifndef NDEBUG
369  if (PredSU->NumSuccsLeft == 0) {
370    dbgs() << "*** Scheduling failed! ***\n";
371    PredSU->dump(this);
372    dbgs() << " has been released too many times!\n";
373    llvm_unreachable(nullptr);
374  }
375#endif
376  --PredSU->NumSuccsLeft;
377
378  if (!forceUnitLatencies()) {
379    // Updating predecessor's height. This is now the cycle when the
380    // predecessor can be scheduled without causing a pipeline stall.
381    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
382  }
383
384  // If all the node's successors are scheduled, this node is ready
385  // to be scheduled. Ignore the special EntrySU node.
386  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
387    PredSU->isAvailable = true;
388
389    unsigned Height = PredSU->getHeight();
390    if (Height < MinAvailableCycle)
391      MinAvailableCycle = Height;
392
393    if (isReady(PredSU)) {
394      AvailableQueue->push(PredSU);
395    }
396    // CapturePred and others may have left the node in the pending queue, avoid
397    // adding it twice.
398    else if (!PredSU->isPending) {
399      PredSU->isPending = true;
400      PendingQueue.push_back(PredSU);
401    }
402  }
403}
404
405/// IsChainDependent - Test if Outer is reachable from Inner through
406/// chain dependencies.
407static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
408                             unsigned NestLevel,
409                             const TargetInstrInfo *TII) {
410  SDNode *N = Outer;
411  for (;;) {
412    if (N == Inner)
413      return true;
414    // For a TokenFactor, examine each operand. There may be multiple ways
415    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
416    // most nesting in order to ensure that we find the corresponding match.
417    if (N->getOpcode() == ISD::TokenFactor) {
418      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
419        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
420          return true;
421      return false;
422    }
423    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
424    if (N->isMachineOpcode()) {
425      if (N->getMachineOpcode() ==
426          (unsigned)TII->getCallFrameDestroyOpcode()) {
427        ++NestLevel;
428      } else if (N->getMachineOpcode() ==
429                 (unsigned)TII->getCallFrameSetupOpcode()) {
430        if (NestLevel == 0)
431          return false;
432        --NestLevel;
433      }
434    }
435    // Otherwise, find the chain and continue climbing.
436    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
437      if (N->getOperand(i).getValueType() == MVT::Other) {
438        N = N->getOperand(i).getNode();
439        goto found_chain_operand;
440      }
441    return false;
442  found_chain_operand:;
443    if (N->getOpcode() == ISD::EntryToken)
444      return false;
445  }
446}
447
448/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
449/// the corresponding (lowered) CALLSEQ_BEGIN node.
450///
451/// NestLevel and MaxNested are used in recursion to indcate the current level
452/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
453/// level seen so far.
454///
455/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
456/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
457static SDNode *
458FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
459                 const TargetInstrInfo *TII) {
460  for (;;) {
461    // For a TokenFactor, examine each operand. There may be multiple ways
462    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
463    // most nesting in order to ensure that we find the corresponding match.
464    if (N->getOpcode() == ISD::TokenFactor) {
465      SDNode *Best = nullptr;
466      unsigned BestMaxNest = MaxNest;
467      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
468        unsigned MyNestLevel = NestLevel;
469        unsigned MyMaxNest = MaxNest;
470        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
471                                           MyNestLevel, MyMaxNest, TII))
472          if (!Best || (MyMaxNest > BestMaxNest)) {
473            Best = New;
474            BestMaxNest = MyMaxNest;
475          }
476      }
477      assert(Best);
478      MaxNest = BestMaxNest;
479      return Best;
480    }
481    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
482    if (N->isMachineOpcode()) {
483      if (N->getMachineOpcode() ==
484          (unsigned)TII->getCallFrameDestroyOpcode()) {
485        ++NestLevel;
486        MaxNest = std::max(MaxNest, NestLevel);
487      } else if (N->getMachineOpcode() ==
488                 (unsigned)TII->getCallFrameSetupOpcode()) {
489        assert(NestLevel != 0);
490        --NestLevel;
491        if (NestLevel == 0)
492          return N;
493      }
494    }
495    // Otherwise, find the chain and continue climbing.
496    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
497      if (N->getOperand(i).getValueType() == MVT::Other) {
498        N = N->getOperand(i).getNode();
499        goto found_chain_operand;
500      }
501    return nullptr;
502  found_chain_operand:;
503    if (N->getOpcode() == ISD::EntryToken)
504      return nullptr;
505  }
506}
507
508/// Call ReleasePred for each predecessor, then update register live def/gen.
509/// Always update LiveRegDefs for a register dependence even if the current SU
510/// also defines the register. This effectively create one large live range
511/// across a sequence of two-address node. This is important because the
512/// entire chain must be scheduled together. Example:
513///
514/// flags = (3) add
515/// flags = (2) addc flags
516/// flags = (1) addc flags
517///
518/// results in
519///
520/// LiveRegDefs[flags] = 3
521/// LiveRegGens[flags] = 1
522///
523/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
524/// interference on flags.
525void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
526  // Bottom up: release predecessors
527  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
528       I != E; ++I) {
529    ReleasePred(SU, &*I);
530    if (I->isAssignedRegDep()) {
531      // This is a physical register dependency and it's impossible or
532      // expensive to copy the register. Make sure nothing that can
533      // clobber the register is scheduled between the predecessor and
534      // this node.
535      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
536      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
537             "interference on register dependence");
538      LiveRegDefs[I->getReg()] = I->getSUnit();
539      if (!LiveRegGens[I->getReg()]) {
540        ++NumLiveRegs;
541        LiveRegGens[I->getReg()] = SU;
542      }
543    }
544  }
545
546  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
547  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
548  // these nodes, to prevent other calls from being interscheduled with them.
549  unsigned CallResource = TRI->getNumRegs();
550  if (!LiveRegDefs[CallResource])
551    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
552      if (Node->isMachineOpcode() &&
553          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
554        unsigned NestLevel = 0;
555        unsigned MaxNest = 0;
556        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
557
558        SUnit *Def = &SUnits[N->getNodeId()];
559        CallSeqEndForStart[Def] = SU;
560
561        ++NumLiveRegs;
562        LiveRegDefs[CallResource] = Def;
563        LiveRegGens[CallResource] = SU;
564        break;
565      }
566}
567
568/// Check to see if any of the pending instructions are ready to issue.  If
569/// so, add them to the available queue.
570void ScheduleDAGRRList::ReleasePending() {
571  if (DisableSchedCycles) {
572    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
573    return;
574  }
575
576  // If the available queue is empty, it is safe to reset MinAvailableCycle.
577  if (AvailableQueue->empty())
578    MinAvailableCycle = UINT_MAX;
579
580  // Check to see if any of the pending instructions are ready to issue.  If
581  // so, add them to the available queue.
582  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
583    unsigned ReadyCycle = PendingQueue[i]->getHeight();
584    if (ReadyCycle < MinAvailableCycle)
585      MinAvailableCycle = ReadyCycle;
586
587    if (PendingQueue[i]->isAvailable) {
588      if (!isReady(PendingQueue[i]))
589          continue;
590      AvailableQueue->push(PendingQueue[i]);
591    }
592    PendingQueue[i]->isPending = false;
593    PendingQueue[i] = PendingQueue.back();
594    PendingQueue.pop_back();
595    --i; --e;
596  }
597}
598
599/// Move the scheduler state forward by the specified number of Cycles.
600void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
601  if (NextCycle <= CurCycle)
602    return;
603
604  IssueCount = 0;
605  AvailableQueue->setCurCycle(NextCycle);
606  if (!HazardRec->isEnabled()) {
607    // Bypass lots of virtual calls in case of long latency.
608    CurCycle = NextCycle;
609  }
610  else {
611    for (; CurCycle != NextCycle; ++CurCycle) {
612      HazardRec->RecedeCycle();
613    }
614  }
615  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
616  // available Q to release pending nodes at least once before popping.
617  ReleasePending();
618}
619
620/// Move the scheduler state forward until the specified node's dependents are
621/// ready and can be scheduled with no resource conflicts.
622void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
623  if (DisableSchedCycles)
624    return;
625
626  // FIXME: Nodes such as CopyFromReg probably should not advance the current
627  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
628  // has predecessors the cycle will be advanced when they are scheduled.
629  // But given the crude nature of modeling latency though such nodes, we
630  // currently need to treat these nodes like real instructions.
631  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
632
633  unsigned ReadyCycle = SU->getHeight();
634
635  // Bump CurCycle to account for latency. We assume the latency of other
636  // available instructions may be hidden by the stall (not a full pipe stall).
637  // This updates the hazard recognizer's cycle before reserving resources for
638  // this instruction.
639  AdvanceToCycle(ReadyCycle);
640
641  // Calls are scheduled in their preceding cycle, so don't conflict with
642  // hazards from instructions after the call. EmitNode will reset the
643  // scoreboard state before emitting the call.
644  if (SU->isCall)
645    return;
646
647  // FIXME: For resource conflicts in very long non-pipelined stages, we
648  // should probably skip ahead here to avoid useless scoreboard checks.
649  int Stalls = 0;
650  while (true) {
651    ScheduleHazardRecognizer::HazardType HT =
652      HazardRec->getHazardType(SU, -Stalls);
653
654    if (HT == ScheduleHazardRecognizer::NoHazard)
655      break;
656
657    ++Stalls;
658  }
659  AdvanceToCycle(CurCycle + Stalls);
660}
661
662/// Record this SUnit in the HazardRecognizer.
663/// Does not update CurCycle.
664void ScheduleDAGRRList::EmitNode(SUnit *SU) {
665  if (!HazardRec->isEnabled())
666    return;
667
668  // Check for phys reg copy.
669  if (!SU->getNode())
670    return;
671
672  switch (SU->getNode()->getOpcode()) {
673  default:
674    assert(SU->getNode()->isMachineOpcode() &&
675           "This target-independent node should not be scheduled.");
676    break;
677  case ISD::MERGE_VALUES:
678  case ISD::TokenFactor:
679  case ISD::LIFETIME_START:
680  case ISD::LIFETIME_END:
681  case ISD::CopyToReg:
682  case ISD::CopyFromReg:
683  case ISD::EH_LABEL:
684    // Noops don't affect the scoreboard state. Copies are likely to be
685    // removed.
686    return;
687  case ISD::INLINEASM:
688    // For inline asm, clear the pipeline state.
689    HazardRec->Reset();
690    return;
691  }
692  if (SU->isCall) {
693    // Calls are scheduled with their preceding instructions. For bottom-up
694    // scheduling, clear the pipeline state before emitting.
695    HazardRec->Reset();
696  }
697
698  HazardRec->EmitInstruction(SU);
699}
700
701static void resetVRegCycle(SUnit *SU);
702
703/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
704/// count of its predecessors. If a predecessor pending count is zero, add it to
705/// the Available queue.
706void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
707  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
708  DEBUG(SU->dump(this));
709
710#ifndef NDEBUG
711  if (CurCycle < SU->getHeight())
712    DEBUG(dbgs() << "   Height [" << SU->getHeight()
713          << "] pipeline stall!\n");
714#endif
715
716  // FIXME: Do not modify node height. It may interfere with
717  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
718  // node its ready cycle can aid heuristics, and after scheduling it can
719  // indicate the scheduled cycle.
720  SU->setHeightToAtLeast(CurCycle);
721
722  // Reserve resources for the scheduled instruction.
723  EmitNode(SU);
724
725  Sequence.push_back(SU);
726
727  AvailableQueue->scheduledNode(SU);
728
729  // If HazardRec is disabled, and each inst counts as one cycle, then
730  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
731  // PendingQueue for schedulers that implement HasReadyFilter.
732  if (!HazardRec->isEnabled() && AvgIPC < 2)
733    AdvanceToCycle(CurCycle + 1);
734
735  // Update liveness of predecessors before successors to avoid treating a
736  // two-address node as a live range def.
737  ReleasePredecessors(SU);
738
739  // Release all the implicit physical register defs that are live.
740  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
741       I != E; ++I) {
742    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
743    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
744      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
745      --NumLiveRegs;
746      LiveRegDefs[I->getReg()] = nullptr;
747      LiveRegGens[I->getReg()] = nullptr;
748      releaseInterferences(I->getReg());
749    }
750  }
751  // Release the special call resource dependence, if this is the beginning
752  // of a call.
753  unsigned CallResource = TRI->getNumRegs();
754  if (LiveRegDefs[CallResource] == SU)
755    for (const SDNode *SUNode = SU->getNode(); SUNode;
756         SUNode = SUNode->getGluedNode()) {
757      if (SUNode->isMachineOpcode() &&
758          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
759        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
760        --NumLiveRegs;
761        LiveRegDefs[CallResource] = nullptr;
762        LiveRegGens[CallResource] = nullptr;
763        releaseInterferences(CallResource);
764      }
765    }
766
767  resetVRegCycle(SU);
768
769  SU->isScheduled = true;
770
771  // Conditions under which the scheduler should eagerly advance the cycle:
772  // (1) No available instructions
773  // (2) All pipelines full, so available instructions must have hazards.
774  //
775  // If HazardRec is disabled, the cycle was pre-advanced before calling
776  // ReleasePredecessors. In that case, IssueCount should remain 0.
777  //
778  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
779  if (HazardRec->isEnabled() || AvgIPC > 1) {
780    if (SU->getNode() && SU->getNode()->isMachineOpcode())
781      ++IssueCount;
782    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
783        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
784      AdvanceToCycle(CurCycle + 1);
785  }
786}
787
788/// CapturePred - This does the opposite of ReleasePred. Since SU is being
789/// unscheduled, incrcease the succ left count of its predecessors. Remove
790/// them from AvailableQueue if necessary.
791void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
792  SUnit *PredSU = PredEdge->getSUnit();
793  if (PredSU->isAvailable) {
794    PredSU->isAvailable = false;
795    if (!PredSU->isPending)
796      AvailableQueue->remove(PredSU);
797  }
798
799  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
800  ++PredSU->NumSuccsLeft;
801}
802
803/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
804/// its predecessor states to reflect the change.
805void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
806  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
807  DEBUG(SU->dump(this));
808
809  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
810       I != E; ++I) {
811    CapturePred(&*I);
812    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
813      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
814      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
815             "Physical register dependency violated?");
816      --NumLiveRegs;
817      LiveRegDefs[I->getReg()] = nullptr;
818      LiveRegGens[I->getReg()] = nullptr;
819      releaseInterferences(I->getReg());
820    }
821  }
822
823  // Reclaim the special call resource dependence, if this is the beginning
824  // of a call.
825  unsigned CallResource = TRI->getNumRegs();
826  for (const SDNode *SUNode = SU->getNode(); SUNode;
827       SUNode = SUNode->getGluedNode()) {
828    if (SUNode->isMachineOpcode() &&
829        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
830      ++NumLiveRegs;
831      LiveRegDefs[CallResource] = SU;
832      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
833    }
834  }
835
836  // Release the special call resource dependence, if this is the end
837  // of a call.
838  if (LiveRegGens[CallResource] == SU)
839    for (const SDNode *SUNode = SU->getNode(); SUNode;
840         SUNode = SUNode->getGluedNode()) {
841      if (SUNode->isMachineOpcode() &&
842          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
843        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
844        --NumLiveRegs;
845        LiveRegDefs[CallResource] = nullptr;
846        LiveRegGens[CallResource] = nullptr;
847        releaseInterferences(CallResource);
848      }
849    }
850
851  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
852       I != E; ++I) {
853    if (I->isAssignedRegDep()) {
854      if (!LiveRegDefs[I->getReg()])
855        ++NumLiveRegs;
856      // This becomes the nearest def. Note that an earlier def may still be
857      // pending if this is a two-address node.
858      LiveRegDefs[I->getReg()] = SU;
859      if (LiveRegGens[I->getReg()] == nullptr ||
860          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
861        LiveRegGens[I->getReg()] = I->getSUnit();
862    }
863  }
864  if (SU->getHeight() < MinAvailableCycle)
865    MinAvailableCycle = SU->getHeight();
866
867  SU->setHeightDirty();
868  SU->isScheduled = false;
869  SU->isAvailable = true;
870  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
871    // Don't make available until backtracking is complete.
872    SU->isPending = true;
873    PendingQueue.push_back(SU);
874  }
875  else {
876    AvailableQueue->push(SU);
877  }
878  AvailableQueue->unscheduledNode(SU);
879}
880
881/// After backtracking, the hazard checker needs to be restored to a state
882/// corresponding the current cycle.
883void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
884  HazardRec->Reset();
885
886  unsigned LookAhead = std::min((unsigned)Sequence.size(),
887                                HazardRec->getMaxLookAhead());
888  if (LookAhead == 0)
889    return;
890
891  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
892  unsigned HazardCycle = (*I)->getHeight();
893  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
894    SUnit *SU = *I;
895    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
896      HazardRec->RecedeCycle();
897    }
898    EmitNode(SU);
899  }
900}
901
902/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
903/// BTCycle in order to schedule a specific node.
904void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
905  SUnit *OldSU = Sequence.back();
906  while (true) {
907    Sequence.pop_back();
908    // FIXME: use ready cycle instead of height
909    CurCycle = OldSU->getHeight();
910    UnscheduleNodeBottomUp(OldSU);
911    AvailableQueue->setCurCycle(CurCycle);
912    if (OldSU == BtSU)
913      break;
914    OldSU = Sequence.back();
915  }
916
917  assert(!SU->isSucc(OldSU) && "Something is wrong!");
918
919  RestoreHazardCheckerBottomUp();
920
921  ReleasePending();
922
923  ++NumBacktracks;
924}
925
926static bool isOperandOf(const SUnit *SU, SDNode *N) {
927  for (const SDNode *SUNode = SU->getNode(); SUNode;
928       SUNode = SUNode->getGluedNode()) {
929    if (SUNode->isOperandOf(N))
930      return true;
931  }
932  return false;
933}
934
935/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
936/// successors to the newly created node.
937SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
938  SDNode *N = SU->getNode();
939  if (!N)
940    return nullptr;
941
942  if (SU->getNode()->getGluedNode())
943    return nullptr;
944
945  SUnit *NewSU;
946  bool TryUnfold = false;
947  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
948    EVT VT = N->getValueType(i);
949    if (VT == MVT::Glue)
950      return nullptr;
951    else if (VT == MVT::Other)
952      TryUnfold = true;
953  }
954  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
955    const SDValue &Op = N->getOperand(i);
956    EVT VT = Op.getNode()->getValueType(Op.getResNo());
957    if (VT == MVT::Glue)
958      return nullptr;
959  }
960
961  if (TryUnfold) {
962    SmallVector<SDNode*, 2> NewNodes;
963    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
964      return nullptr;
965
966    // unfolding an x86 DEC64m operation results in store, dec, load which
967    // can't be handled here so quit
968    if (NewNodes.size() == 3)
969      return nullptr;
970
971    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
972    assert(NewNodes.size() == 2 && "Expected a load folding node!");
973
974    N = NewNodes[1];
975    SDNode *LoadNode = NewNodes[0];
976    unsigned NumVals = N->getNumValues();
977    unsigned OldNumVals = SU->getNode()->getNumValues();
978    for (unsigned i = 0; i != NumVals; ++i)
979      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
980    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
981                                   SDValue(LoadNode, 1));
982
983    // LoadNode may already exist. This can happen when there is another
984    // load from the same location and producing the same type of value
985    // but it has different alignment or volatileness.
986    bool isNewLoad = true;
987    SUnit *LoadSU;
988    if (LoadNode->getNodeId() != -1) {
989      LoadSU = &SUnits[LoadNode->getNodeId()];
990      isNewLoad = false;
991    } else {
992      LoadSU = CreateNewSUnit(LoadNode);
993      LoadNode->setNodeId(LoadSU->NodeNum);
994
995      InitNumRegDefsLeft(LoadSU);
996      computeLatency(LoadSU);
997    }
998
999    SUnit *NewSU = CreateNewSUnit(N);
1000    assert(N->getNodeId() == -1 && "Node already inserted!");
1001    N->setNodeId(NewSU->NodeNum);
1002
1003    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1004    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1005      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1006        NewSU->isTwoAddress = true;
1007        break;
1008      }
1009    }
1010    if (MCID.isCommutable())
1011      NewSU->isCommutable = true;
1012
1013    InitNumRegDefsLeft(NewSU);
1014    computeLatency(NewSU);
1015
1016    // Record all the edges to and from the old SU, by category.
1017    SmallVector<SDep, 4> ChainPreds;
1018    SmallVector<SDep, 4> ChainSuccs;
1019    SmallVector<SDep, 4> LoadPreds;
1020    SmallVector<SDep, 4> NodePreds;
1021    SmallVector<SDep, 4> NodeSuccs;
1022    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1023         I != E; ++I) {
1024      if (I->isCtrl())
1025        ChainPreds.push_back(*I);
1026      else if (isOperandOf(I->getSUnit(), LoadNode))
1027        LoadPreds.push_back(*I);
1028      else
1029        NodePreds.push_back(*I);
1030    }
1031    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1032         I != E; ++I) {
1033      if (I->isCtrl())
1034        ChainSuccs.push_back(*I);
1035      else
1036        NodeSuccs.push_back(*I);
1037    }
1038
1039    // Now assign edges to the newly-created nodes.
1040    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
1041      const SDep &Pred = ChainPreds[i];
1042      RemovePred(SU, Pred);
1043      if (isNewLoad)
1044        AddPred(LoadSU, Pred);
1045    }
1046    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
1047      const SDep &Pred = LoadPreds[i];
1048      RemovePred(SU, Pred);
1049      if (isNewLoad)
1050        AddPred(LoadSU, Pred);
1051    }
1052    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
1053      const SDep &Pred = NodePreds[i];
1054      RemovePred(SU, Pred);
1055      AddPred(NewSU, Pred);
1056    }
1057    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
1058      SDep D = NodeSuccs[i];
1059      SUnit *SuccDep = D.getSUnit();
1060      D.setSUnit(SU);
1061      RemovePred(SuccDep, D);
1062      D.setSUnit(NewSU);
1063      AddPred(SuccDep, D);
1064      // Balance register pressure.
1065      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
1066          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1067        --NewSU->NumRegDefsLeft;
1068    }
1069    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
1070      SDep D = ChainSuccs[i];
1071      SUnit *SuccDep = D.getSUnit();
1072      D.setSUnit(SU);
1073      RemovePred(SuccDep, D);
1074      if (isNewLoad) {
1075        D.setSUnit(LoadSU);
1076        AddPred(SuccDep, D);
1077      }
1078    }
1079
1080    // Add a data dependency to reflect that NewSU reads the value defined
1081    // by LoadSU.
1082    SDep D(LoadSU, SDep::Data, 0);
1083    D.setLatency(LoadSU->Latency);
1084    AddPred(NewSU, D);
1085
1086    if (isNewLoad)
1087      AvailableQueue->addNode(LoadSU);
1088    AvailableQueue->addNode(NewSU);
1089
1090    ++NumUnfolds;
1091
1092    if (NewSU->NumSuccsLeft == 0) {
1093      NewSU->isAvailable = true;
1094      return NewSU;
1095    }
1096    SU = NewSU;
1097  }
1098
1099  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
1100  NewSU = CreateClone(SU);
1101
1102  // New SUnit has the exact same predecessors.
1103  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1104       I != E; ++I)
1105    if (!I->isArtificial())
1106      AddPred(NewSU, *I);
1107
1108  // Only copy scheduled successors. Cut them from old node's successor
1109  // list and move them over.
1110  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1111  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1112       I != E; ++I) {
1113    if (I->isArtificial())
1114      continue;
1115    SUnit *SuccSU = I->getSUnit();
1116    if (SuccSU->isScheduled) {
1117      SDep D = *I;
1118      D.setSUnit(NewSU);
1119      AddPred(SuccSU, D);
1120      D.setSUnit(SU);
1121      DelDeps.push_back(std::make_pair(SuccSU, D));
1122    }
1123  }
1124  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1125    RemovePred(DelDeps[i].first, DelDeps[i].second);
1126
1127  AvailableQueue->updateNode(SU);
1128  AvailableQueue->addNode(NewSU);
1129
1130  ++NumDups;
1131  return NewSU;
1132}
1133
1134/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1135/// scheduled successors of the given SUnit to the last copy.
1136void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1137                                              const TargetRegisterClass *DestRC,
1138                                              const TargetRegisterClass *SrcRC,
1139                                              SmallVectorImpl<SUnit*> &Copies) {
1140  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1141  CopyFromSU->CopySrcRC = SrcRC;
1142  CopyFromSU->CopyDstRC = DestRC;
1143
1144  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1145  CopyToSU->CopySrcRC = DestRC;
1146  CopyToSU->CopyDstRC = SrcRC;
1147
1148  // Only copy scheduled successors. Cut them from old node's successor
1149  // list and move them over.
1150  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1151  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1152       I != E; ++I) {
1153    if (I->isArtificial())
1154      continue;
1155    SUnit *SuccSU = I->getSUnit();
1156    if (SuccSU->isScheduled) {
1157      SDep D = *I;
1158      D.setSUnit(CopyToSU);
1159      AddPred(SuccSU, D);
1160      DelDeps.push_back(std::make_pair(SuccSU, *I));
1161    }
1162    else {
1163      // Avoid scheduling the def-side copy before other successors. Otherwise
1164      // we could introduce another physreg interference on the copy and
1165      // continue inserting copies indefinitely.
1166      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1167    }
1168  }
1169  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1170    RemovePred(DelDeps[i].first, DelDeps[i].second);
1171
1172  SDep FromDep(SU, SDep::Data, Reg);
1173  FromDep.setLatency(SU->Latency);
1174  AddPred(CopyFromSU, FromDep);
1175  SDep ToDep(CopyFromSU, SDep::Data, 0);
1176  ToDep.setLatency(CopyFromSU->Latency);
1177  AddPred(CopyToSU, ToDep);
1178
1179  AvailableQueue->updateNode(SU);
1180  AvailableQueue->addNode(CopyFromSU);
1181  AvailableQueue->addNode(CopyToSU);
1182  Copies.push_back(CopyFromSU);
1183  Copies.push_back(CopyToSU);
1184
1185  ++NumPRCopies;
1186}
1187
1188/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1189/// definition of the specified node.
1190/// FIXME: Move to SelectionDAG?
1191static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1192                                 const TargetInstrInfo *TII) {
1193  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1194  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1195  unsigned NumRes = MCID.getNumDefs();
1196  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1197    if (Reg == *ImpDef)
1198      break;
1199    ++NumRes;
1200  }
1201  return N->getValueType(NumRes);
1202}
1203
1204/// CheckForLiveRegDef - Return true and update live register vector if the
1205/// specified register def of the specified SUnit clobbers any "live" registers.
1206static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1207                               std::vector<SUnit*> &LiveRegDefs,
1208                               SmallSet<unsigned, 4> &RegAdded,
1209                               SmallVectorImpl<unsigned> &LRegs,
1210                               const TargetRegisterInfo *TRI) {
1211  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1212
1213    // Check if Ref is live.
1214    if (!LiveRegDefs[*AliasI]) continue;
1215
1216    // Allow multiple uses of the same def.
1217    if (LiveRegDefs[*AliasI] == SU) continue;
1218
1219    // Add Reg to the set of interfering live regs.
1220    if (RegAdded.insert(*AliasI)) {
1221      LRegs.push_back(*AliasI);
1222    }
1223  }
1224}
1225
1226/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1227/// by RegMask, and add them to LRegs.
1228static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1229                                     std::vector<SUnit*> &LiveRegDefs,
1230                                     SmallSet<unsigned, 4> &RegAdded,
1231                                     SmallVectorImpl<unsigned> &LRegs) {
1232  // Look at all live registers. Skip Reg0 and the special CallResource.
1233  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1234    if (!LiveRegDefs[i]) continue;
1235    if (LiveRegDefs[i] == SU) continue;
1236    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1237    if (RegAdded.insert(i))
1238      LRegs.push_back(i);
1239  }
1240}
1241
1242/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1243static const uint32_t *getNodeRegMask(const SDNode *N) {
1244  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1245    if (const RegisterMaskSDNode *Op =
1246        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
1247      return Op->getRegMask();
1248  return nullptr;
1249}
1250
1251/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1252/// scheduling of the given node to satisfy live physical register dependencies.
1253/// If the specific node is the last one that's available to schedule, do
1254/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1255bool ScheduleDAGRRList::
1256DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1257  if (NumLiveRegs == 0)
1258    return false;
1259
1260  SmallSet<unsigned, 4> RegAdded;
1261  // If this node would clobber any "live" register, then it's not ready.
1262  //
1263  // If SU is the currently live definition of the same register that it uses,
1264  // then we are free to schedule it.
1265  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1266       I != E; ++I) {
1267    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1268      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1269                         RegAdded, LRegs, TRI);
1270  }
1271
1272  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1273    if (Node->getOpcode() == ISD::INLINEASM) {
1274      // Inline asm can clobber physical defs.
1275      unsigned NumOps = Node->getNumOperands();
1276      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1277        --NumOps;  // Ignore the glue operand.
1278
1279      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1280        unsigned Flags =
1281          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1282        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1283
1284        ++i; // Skip the ID value.
1285        if (InlineAsm::isRegDefKind(Flags) ||
1286            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1287            InlineAsm::isClobberKind(Flags)) {
1288          // Check for def of register or earlyclobber register.
1289          for (; NumVals; --NumVals, ++i) {
1290            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1291            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1292              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1293          }
1294        } else
1295          i += NumVals;
1296      }
1297      continue;
1298    }
1299
1300    if (!Node->isMachineOpcode())
1301      continue;
1302    // If we're in the middle of scheduling a call, don't begin scheduling
1303    // another call. Also, don't allow any physical registers to be live across
1304    // the call.
1305    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1306      // Check the special calling-sequence resource.
1307      unsigned CallResource = TRI->getNumRegs();
1308      if (LiveRegDefs[CallResource]) {
1309        SDNode *Gen = LiveRegGens[CallResource]->getNode();
1310        while (SDNode *Glued = Gen->getGluedNode())
1311          Gen = Glued;
1312        if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
1313          LRegs.push_back(CallResource);
1314      }
1315    }
1316    if (const uint32_t *RegMask = getNodeRegMask(Node))
1317      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
1318
1319    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1320    if (!MCID.ImplicitDefs)
1321      continue;
1322    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1323      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1324  }
1325
1326  return !LRegs.empty();
1327}
1328
1329void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1330  // Add the nodes that aren't ready back onto the available list.
1331  for (unsigned i = Interferences.size(); i > 0; --i) {
1332    SUnit *SU = Interferences[i-1];
1333    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1334    if (Reg) {
1335      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1336      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
1337        continue;
1338    }
1339    SU->isPending = false;
1340    // The interfering node may no longer be available due to backtracking.
1341    // Furthermore, it may have been made available again, in which case it is
1342    // now already in the AvailableQueue.
1343    if (SU->isAvailable && !SU->NodeQueueId) {
1344      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
1345      AvailableQueue->push(SU);
1346    }
1347    if (i < Interferences.size())
1348      Interferences[i-1] = Interferences.back();
1349    Interferences.pop_back();
1350    LRegsMap.erase(LRegsPos);
1351  }
1352}
1353
1354/// Return a node that can be scheduled in this cycle. Requirements:
1355/// (1) Ready: latency has been satisfied
1356/// (2) No Hazards: resources are available
1357/// (3) No Interferences: may unschedule to break register interferences.
1358SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1359  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1360  while (CurSU) {
1361    SmallVector<unsigned, 4> LRegs;
1362    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1363      break;
1364    DEBUG(dbgs() << "    Interfering reg " <<
1365          (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1366           : TRI->getName(LRegs[0]))
1367           << " SU #" << CurSU->NodeNum << '\n');
1368    std::pair<LRegsMapT::iterator, bool> LRegsPair =
1369      LRegsMap.insert(std::make_pair(CurSU, LRegs));
1370    if (LRegsPair.second) {
1371      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1372      Interferences.push_back(CurSU);
1373    }
1374    else {
1375      assert(CurSU->isPending && "Intereferences are pending");
1376      // Update the interference with current live regs.
1377      LRegsPair.first->second = LRegs;
1378    }
1379    CurSU = AvailableQueue->pop();
1380  }
1381  if (CurSU)
1382    return CurSU;
1383
1384  // All candidates are delayed due to live physical reg dependencies.
1385  // Try backtracking, code duplication, or inserting cross class copies
1386  // to resolve it.
1387  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1388    SUnit *TrySU = Interferences[i];
1389    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1390
1391    // Try unscheduling up to the point where it's safe to schedule
1392    // this node.
1393    SUnit *BtSU = nullptr;
1394    unsigned LiveCycle = UINT_MAX;
1395    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1396      unsigned Reg = LRegs[j];
1397      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1398        BtSU = LiveRegGens[Reg];
1399        LiveCycle = BtSU->getHeight();
1400      }
1401    }
1402    if (!WillCreateCycle(TrySU, BtSU))  {
1403      // BacktrackBottomUp mutates Interferences!
1404      BacktrackBottomUp(TrySU, BtSU);
1405
1406      // Force the current node to be scheduled before the node that
1407      // requires the physical reg dep.
1408      if (BtSU->isAvailable) {
1409        BtSU->isAvailable = false;
1410        if (!BtSU->isPending)
1411          AvailableQueue->remove(BtSU);
1412      }
1413      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1414            << TrySU->NodeNum << ")\n");
1415      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1416
1417      // If one or more successors has been unscheduled, then the current
1418      // node is no longer available.
1419      if (!TrySU->isAvailable)
1420        CurSU = AvailableQueue->pop();
1421      else {
1422        AvailableQueue->remove(TrySU);
1423        CurSU = TrySU;
1424      }
1425      // Interferences has been mutated. We must break.
1426      break;
1427    }
1428  }
1429
1430  if (!CurSU) {
1431    // Can't backtrack. If it's too expensive to copy the value, then try
1432    // duplicate the nodes that produces these "too expensive to copy"
1433    // values to break the dependency. In case even that doesn't work,
1434    // insert cross class copies.
1435    // If it's not too expensive, i.e. cost != -1, issue copies.
1436    SUnit *TrySU = Interferences[0];
1437    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1438    assert(LRegs.size() == 1 && "Can't handle this yet!");
1439    unsigned Reg = LRegs[0];
1440    SUnit *LRDef = LiveRegDefs[Reg];
1441    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1442    const TargetRegisterClass *RC =
1443      TRI->getMinimalPhysRegClass(Reg, VT);
1444    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1445
1446    // If cross copy register class is the same as RC, then it must be possible
1447    // copy the value directly. Do not try duplicate the def.
1448    // If cross copy register class is not the same as RC, then it's possible to
1449    // copy the value but it require cross register class copies and it is
1450    // expensive.
1451    // If cross copy register class is null, then it's not possible to copy
1452    // the value at all.
1453    SUnit *NewDef = nullptr;
1454    if (DestRC != RC) {
1455      NewDef = CopyAndMoveSuccessors(LRDef);
1456      if (!DestRC && !NewDef)
1457        report_fatal_error("Can't handle live physical register dependency!");
1458    }
1459    if (!NewDef) {
1460      // Issue copies, these can be expensive cross register class copies.
1461      SmallVector<SUnit*, 2> Copies;
1462      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1463      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1464            << " to SU #" << Copies.front()->NodeNum << "\n");
1465      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1466      NewDef = Copies.back();
1467    }
1468
1469    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1470          << " to SU #" << TrySU->NodeNum << "\n");
1471    LiveRegDefs[Reg] = NewDef;
1472    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1473    TrySU->isAvailable = false;
1474    CurSU = NewDef;
1475  }
1476  assert(CurSU && "Unable to resolve live physical register dependencies!");
1477  return CurSU;
1478}
1479
1480/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1481/// schedulers.
1482void ScheduleDAGRRList::ListScheduleBottomUp() {
1483  // Release any predecessors of the special Exit node.
1484  ReleasePredecessors(&ExitSU);
1485
1486  // Add root to Available queue.
1487  if (!SUnits.empty()) {
1488    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1489    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1490    RootSU->isAvailable = true;
1491    AvailableQueue->push(RootSU);
1492  }
1493
1494  // While Available queue is not empty, grab the node with the highest
1495  // priority. If it is not ready put it back.  Schedule the node.
1496  Sequence.reserve(SUnits.size());
1497  while (!AvailableQueue->empty() || !Interferences.empty()) {
1498    DEBUG(dbgs() << "\nExamining Available:\n";
1499          AvailableQueue->dump(this));
1500
1501    // Pick the best node to schedule taking all constraints into
1502    // consideration.
1503    SUnit *SU = PickNodeToScheduleBottomUp();
1504
1505    AdvancePastStalls(SU);
1506
1507    ScheduleNodeBottomUp(SU);
1508
1509    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1510      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1511      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1512      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1513    }
1514  }
1515
1516  // Reverse the order if it is bottom up.
1517  std::reverse(Sequence.begin(), Sequence.end());
1518
1519#ifndef NDEBUG
1520  VerifyScheduledSequence(/*isBottomUp=*/true);
1521#endif
1522}
1523
1524//===----------------------------------------------------------------------===//
1525//                RegReductionPriorityQueue Definition
1526//===----------------------------------------------------------------------===//
1527//
1528// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1529// to reduce register pressure.
1530//
1531namespace {
1532class RegReductionPQBase;
1533
1534struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1535  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1536};
1537
1538#ifndef NDEBUG
1539template<class SF>
1540struct reverse_sort : public queue_sort {
1541  SF &SortFunc;
1542  reverse_sort(SF &sf) : SortFunc(sf) {}
1543
1544  bool operator()(SUnit* left, SUnit* right) const {
1545    // reverse left/right rather than simply !SortFunc(left, right)
1546    // to expose different paths in the comparison logic.
1547    return SortFunc(right, left);
1548  }
1549};
1550#endif // NDEBUG
1551
1552/// bu_ls_rr_sort - Priority function for bottom up register pressure
1553// reduction scheduler.
1554struct bu_ls_rr_sort : public queue_sort {
1555  enum {
1556    IsBottomUp = true,
1557    HasReadyFilter = false
1558  };
1559
1560  RegReductionPQBase *SPQ;
1561  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1562
1563  bool operator()(SUnit* left, SUnit* right) const;
1564};
1565
1566// src_ls_rr_sort - Priority function for source order scheduler.
1567struct src_ls_rr_sort : public queue_sort {
1568  enum {
1569    IsBottomUp = true,
1570    HasReadyFilter = false
1571  };
1572
1573  RegReductionPQBase *SPQ;
1574  src_ls_rr_sort(RegReductionPQBase *spq)
1575    : SPQ(spq) {}
1576
1577  bool operator()(SUnit* left, SUnit* right) const;
1578};
1579
1580// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1581struct hybrid_ls_rr_sort : public queue_sort {
1582  enum {
1583    IsBottomUp = true,
1584    HasReadyFilter = false
1585  };
1586
1587  RegReductionPQBase *SPQ;
1588  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1589    : SPQ(spq) {}
1590
1591  bool isReady(SUnit *SU, unsigned CurCycle) const;
1592
1593  bool operator()(SUnit* left, SUnit* right) const;
1594};
1595
1596// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1597// scheduler.
1598struct ilp_ls_rr_sort : public queue_sort {
1599  enum {
1600    IsBottomUp = true,
1601    HasReadyFilter = false
1602  };
1603
1604  RegReductionPQBase *SPQ;
1605  ilp_ls_rr_sort(RegReductionPQBase *spq)
1606    : SPQ(spq) {}
1607
1608  bool isReady(SUnit *SU, unsigned CurCycle) const;
1609
1610  bool operator()(SUnit* left, SUnit* right) const;
1611};
1612
1613class RegReductionPQBase : public SchedulingPriorityQueue {
1614protected:
1615  std::vector<SUnit*> Queue;
1616  unsigned CurQueueId;
1617  bool TracksRegPressure;
1618  bool SrcOrder;
1619
1620  // SUnits - The SUnits for the current graph.
1621  std::vector<SUnit> *SUnits;
1622
1623  MachineFunction &MF;
1624  const TargetInstrInfo *TII;
1625  const TargetRegisterInfo *TRI;
1626  const TargetLowering *TLI;
1627  ScheduleDAGRRList *scheduleDAG;
1628
1629  // SethiUllmanNumbers - The SethiUllman number for each node.
1630  std::vector<unsigned> SethiUllmanNumbers;
1631
1632  /// RegPressure - Tracking current reg pressure per register class.
1633  ///
1634  std::vector<unsigned> RegPressure;
1635
1636  /// RegLimit - Tracking the number of allocatable registers per register
1637  /// class.
1638  std::vector<unsigned> RegLimit;
1639
1640public:
1641  RegReductionPQBase(MachineFunction &mf,
1642                     bool hasReadyFilter,
1643                     bool tracksrp,
1644                     bool srcorder,
1645                     const TargetInstrInfo *tii,
1646                     const TargetRegisterInfo *tri,
1647                     const TargetLowering *tli)
1648    : SchedulingPriorityQueue(hasReadyFilter),
1649      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1650      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
1651    if (TracksRegPressure) {
1652      unsigned NumRC = TRI->getNumRegClasses();
1653      RegLimit.resize(NumRC);
1654      RegPressure.resize(NumRC);
1655      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1656      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1657      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1658             E = TRI->regclass_end(); I != E; ++I)
1659        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1660    }
1661  }
1662
1663  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1664    scheduleDAG = scheduleDag;
1665  }
1666
1667  ScheduleHazardRecognizer* getHazardRec() {
1668    return scheduleDAG->getHazardRec();
1669  }
1670
1671  void initNodes(std::vector<SUnit> &sunits) override;
1672
1673  void addNode(const SUnit *SU) override;
1674
1675  void updateNode(const SUnit *SU) override;
1676
1677  void releaseState() override {
1678    SUnits = nullptr;
1679    SethiUllmanNumbers.clear();
1680    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1681  }
1682
1683  unsigned getNodePriority(const SUnit *SU) const;
1684
1685  unsigned getNodeOrdering(const SUnit *SU) const {
1686    if (!SU->getNode()) return 0;
1687
1688    return SU->getNode()->getIROrder();
1689  }
1690
1691  bool empty() const override { return Queue.empty(); }
1692
1693  void push(SUnit *U) override {
1694    assert(!U->NodeQueueId && "Node in the queue already");
1695    U->NodeQueueId = ++CurQueueId;
1696    Queue.push_back(U);
1697  }
1698
1699  void remove(SUnit *SU) override {
1700    assert(!Queue.empty() && "Queue is empty!");
1701    assert(SU->NodeQueueId != 0 && "Not in queue!");
1702    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1703                                                 SU);
1704    if (I != std::prev(Queue.end()))
1705      std::swap(*I, Queue.back());
1706    Queue.pop_back();
1707    SU->NodeQueueId = 0;
1708  }
1709
1710  bool tracksRegPressure() const override { return TracksRegPressure; }
1711
1712  void dumpRegPressure() const;
1713
1714  bool HighRegPressure(const SUnit *SU) const;
1715
1716  bool MayReduceRegPressure(SUnit *SU) const;
1717
1718  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1719
1720  void scheduledNode(SUnit *SU) override;
1721
1722  void unscheduledNode(SUnit *SU) override;
1723
1724protected:
1725  bool canClobber(const SUnit *SU, const SUnit *Op);
1726  void AddPseudoTwoAddrDeps();
1727  void PrescheduleNodesWithMultipleUses();
1728  void CalculateSethiUllmanNumbers();
1729};
1730
1731template<class SF>
1732static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1733  std::vector<SUnit *>::iterator Best = Q.begin();
1734  for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
1735         E = Q.end(); I != E; ++I)
1736    if (Picker(*Best, *I))
1737      Best = I;
1738  SUnit *V = *Best;
1739  if (Best != std::prev(Q.end()))
1740    std::swap(*Best, Q.back());
1741  Q.pop_back();
1742  return V;
1743}
1744
1745template<class SF>
1746SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1747#ifndef NDEBUG
1748  if (DAG->StressSched) {
1749    reverse_sort<SF> RPicker(Picker);
1750    return popFromQueueImpl(Q, RPicker);
1751  }
1752#endif
1753  (void)DAG;
1754  return popFromQueueImpl(Q, Picker);
1755}
1756
1757template<class SF>
1758class RegReductionPriorityQueue : public RegReductionPQBase {
1759  SF Picker;
1760
1761public:
1762  RegReductionPriorityQueue(MachineFunction &mf,
1763                            bool tracksrp,
1764                            bool srcorder,
1765                            const TargetInstrInfo *tii,
1766                            const TargetRegisterInfo *tri,
1767                            const TargetLowering *tli)
1768    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1769                         tii, tri, tli),
1770      Picker(this) {}
1771
1772  bool isBottomUp() const override { return SF::IsBottomUp; }
1773
1774  bool isReady(SUnit *U) const override {
1775    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1776  }
1777
1778  SUnit *pop() override {
1779    if (Queue.empty()) return nullptr;
1780
1781    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1782    V->NodeQueueId = 0;
1783    return V;
1784  }
1785
1786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1787  void dump(ScheduleDAG *DAG) const override {
1788    // Emulate pop() without clobbering NodeQueueIds.
1789    std::vector<SUnit*> DumpQueue = Queue;
1790    SF DumpPicker = Picker;
1791    while (!DumpQueue.empty()) {
1792      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1793      dbgs() << "Height " << SU->getHeight() << ": ";
1794      SU->dump(DAG);
1795    }
1796  }
1797#endif
1798};
1799
1800typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1801BURegReductionPriorityQueue;
1802
1803typedef RegReductionPriorityQueue<src_ls_rr_sort>
1804SrcRegReductionPriorityQueue;
1805
1806typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1807HybridBURRPriorityQueue;
1808
1809typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1810ILPBURRPriorityQueue;
1811} // end anonymous namespace
1812
1813//===----------------------------------------------------------------------===//
1814//           Static Node Priority for Register Pressure Reduction
1815//===----------------------------------------------------------------------===//
1816
1817// Check for special nodes that bypass scheduling heuristics.
1818// Currently this pushes TokenFactor nodes down, but may be used for other
1819// pseudo-ops as well.
1820//
1821// Return -1 to schedule right above left, 1 for left above right.
1822// Return 0 if no bias exists.
1823static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1824  bool LSchedLow = left->isScheduleLow;
1825  bool RSchedLow = right->isScheduleLow;
1826  if (LSchedLow != RSchedLow)
1827    return LSchedLow < RSchedLow ? 1 : -1;
1828  return 0;
1829}
1830
1831/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1832/// Smaller number is the higher priority.
1833static unsigned
1834CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1835  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1836  if (SethiUllmanNumber != 0)
1837    return SethiUllmanNumber;
1838
1839  unsigned Extra = 0;
1840  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1841       I != E; ++I) {
1842    if (I->isCtrl()) continue;  // ignore chain preds
1843    SUnit *PredSU = I->getSUnit();
1844    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1845    if (PredSethiUllman > SethiUllmanNumber) {
1846      SethiUllmanNumber = PredSethiUllman;
1847      Extra = 0;
1848    } else if (PredSethiUllman == SethiUllmanNumber)
1849      ++Extra;
1850  }
1851
1852  SethiUllmanNumber += Extra;
1853
1854  if (SethiUllmanNumber == 0)
1855    SethiUllmanNumber = 1;
1856
1857  return SethiUllmanNumber;
1858}
1859
1860/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1861/// scheduling units.
1862void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1863  SethiUllmanNumbers.assign(SUnits->size(), 0);
1864
1865  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1866    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1867}
1868
1869void RegReductionPQBase::addNode(const SUnit *SU) {
1870  unsigned SUSize = SethiUllmanNumbers.size();
1871  if (SUnits->size() > SUSize)
1872    SethiUllmanNumbers.resize(SUSize*2, 0);
1873  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1874}
1875
1876void RegReductionPQBase::updateNode(const SUnit *SU) {
1877  SethiUllmanNumbers[SU->NodeNum] = 0;
1878  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1879}
1880
1881// Lower priority means schedule further down. For bottom-up scheduling, lower
1882// priority SUs are scheduled before higher priority SUs.
1883unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1884  assert(SU->NodeNum < SethiUllmanNumbers.size());
1885  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1886  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1887    // CopyToReg should be close to its uses to facilitate coalescing and
1888    // avoid spilling.
1889    return 0;
1890  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1891      Opc == TargetOpcode::SUBREG_TO_REG ||
1892      Opc == TargetOpcode::INSERT_SUBREG)
1893    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1894    // close to their uses to facilitate coalescing.
1895    return 0;
1896  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1897    // If SU does not have a register use, i.e. it doesn't produce a value
1898    // that would be consumed (e.g. store), then it terminates a chain of
1899    // computation.  Give it a large SethiUllman number so it will be
1900    // scheduled right before its predecessors that it doesn't lengthen
1901    // their live ranges.
1902    return 0xffff;
1903  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1904    // If SU does not have a register def, schedule it close to its uses
1905    // because it does not lengthen any live ranges.
1906    return 0;
1907#if 1
1908  return SethiUllmanNumbers[SU->NodeNum];
1909#else
1910  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1911  if (SU->isCallOp) {
1912    // FIXME: This assumes all of the defs are used as call operands.
1913    int NP = (int)Priority - SU->getNode()->getNumValues();
1914    return (NP > 0) ? NP : 0;
1915  }
1916  return Priority;
1917#endif
1918}
1919
1920//===----------------------------------------------------------------------===//
1921//                     Register Pressure Tracking
1922//===----------------------------------------------------------------------===//
1923
1924void RegReductionPQBase::dumpRegPressure() const {
1925#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1926  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1927         E = TRI->regclass_end(); I != E; ++I) {
1928    const TargetRegisterClass *RC = *I;
1929    unsigned Id = RC->getID();
1930    unsigned RP = RegPressure[Id];
1931    if (!RP) continue;
1932    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
1933          << '\n');
1934  }
1935#endif
1936}
1937
1938bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1939  if (!TLI)
1940    return false;
1941
1942  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1943       I != E; ++I) {
1944    if (I->isCtrl())
1945      continue;
1946    SUnit *PredSU = I->getSUnit();
1947    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1948    // to cover the number of registers defined (they are all live).
1949    if (PredSU->NumRegDefsLeft == 0) {
1950      continue;
1951    }
1952    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1953         RegDefPos.IsValid(); RegDefPos.Advance()) {
1954      unsigned RCId, Cost;
1955      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
1956
1957      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1958        return true;
1959    }
1960  }
1961  return false;
1962}
1963
1964bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1965  const SDNode *N = SU->getNode();
1966
1967  if (!N->isMachineOpcode() || !SU->NumSuccs)
1968    return false;
1969
1970  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1971  for (unsigned i = 0; i != NumDefs; ++i) {
1972    MVT VT = N->getSimpleValueType(i);
1973    if (!N->hasAnyUseOfValue(i))
1974      continue;
1975    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1976    if (RegPressure[RCId] >= RegLimit[RCId])
1977      return true;
1978  }
1979  return false;
1980}
1981
1982// Compute the register pressure contribution by this instruction by count up
1983// for uses that are not live and down for defs. Only count register classes
1984// that are already under high pressure. As a side effect, compute the number of
1985// uses of registers that are already live.
1986//
1987// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1988// so could probably be factored.
1989int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1990  LiveUses = 0;
1991  int PDiff = 0;
1992  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1993       I != E; ++I) {
1994    if (I->isCtrl())
1995      continue;
1996    SUnit *PredSU = I->getSUnit();
1997    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1998    // to cover the number of registers defined (they are all live).
1999    if (PredSU->NumRegDefsLeft == 0) {
2000      if (PredSU->getNode()->isMachineOpcode())
2001        ++LiveUses;
2002      continue;
2003    }
2004    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2005         RegDefPos.IsValid(); RegDefPos.Advance()) {
2006      MVT VT = RegDefPos.GetValue();
2007      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2008      if (RegPressure[RCId] >= RegLimit[RCId])
2009        ++PDiff;
2010    }
2011  }
2012  const SDNode *N = SU->getNode();
2013
2014  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2015    return PDiff;
2016
2017  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2018  for (unsigned i = 0; i != NumDefs; ++i) {
2019    MVT VT = N->getSimpleValueType(i);
2020    if (!N->hasAnyUseOfValue(i))
2021      continue;
2022    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2023    if (RegPressure[RCId] >= RegLimit[RCId])
2024      --PDiff;
2025  }
2026  return PDiff;
2027}
2028
2029void RegReductionPQBase::scheduledNode(SUnit *SU) {
2030  if (!TracksRegPressure)
2031    return;
2032
2033  if (!SU->getNode())
2034    return;
2035
2036  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2037       I != E; ++I) {
2038    if (I->isCtrl())
2039      continue;
2040    SUnit *PredSU = I->getSUnit();
2041    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2042    // to cover the number of registers defined (they are all live).
2043    if (PredSU->NumRegDefsLeft == 0) {
2044      continue;
2045    }
2046    // FIXME: The ScheduleDAG currently loses information about which of a
2047    // node's values is consumed by each dependence. Consequently, if the node
2048    // defines multiple register classes, we don't know which to pressurize
2049    // here. Instead the following loop consumes the register defs in an
2050    // arbitrary order. At least it handles the common case of clustered loads
2051    // to the same class. For precise liveness, each SDep needs to indicate the
2052    // result number. But that tightly couples the ScheduleDAG with the
2053    // SelectionDAG making updates tricky. A simpler hack would be to attach a
2054    // value type or register class to SDep.
2055    //
2056    // The most important aspect of register tracking is balancing the increase
2057    // here with the reduction further below. Note that this SU may use multiple
2058    // defs in PredSU. The can't be determined here, but we've already
2059    // compensated by reducing NumRegDefsLeft in PredSU during
2060    // ScheduleDAGSDNodes::AddSchedEdges.
2061    --PredSU->NumRegDefsLeft;
2062    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2063    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2064         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2065      if (SkipRegDefs)
2066        continue;
2067
2068      unsigned RCId, Cost;
2069      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2070      RegPressure[RCId] += Cost;
2071      break;
2072    }
2073  }
2074
2075  // We should have this assert, but there may be dead SDNodes that never
2076  // materialize as SUnits, so they don't appear to generate liveness.
2077  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2078  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2079  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2080       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2081    if (SkipRegDefs > 0)
2082      continue;
2083    unsigned RCId, Cost;
2084    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2085    if (RegPressure[RCId] < Cost) {
2086      // Register pressure tracking is imprecise. This can happen. But we try
2087      // hard not to let it happen because it likely results in poor scheduling.
2088      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
2089      RegPressure[RCId] = 0;
2090    }
2091    else {
2092      RegPressure[RCId] -= Cost;
2093    }
2094  }
2095  dumpRegPressure();
2096}
2097
2098void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2099  if (!TracksRegPressure)
2100    return;
2101
2102  const SDNode *N = SU->getNode();
2103  if (!N) return;
2104
2105  if (!N->isMachineOpcode()) {
2106    if (N->getOpcode() != ISD::CopyToReg)
2107      return;
2108  } else {
2109    unsigned Opc = N->getMachineOpcode();
2110    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2111        Opc == TargetOpcode::INSERT_SUBREG ||
2112        Opc == TargetOpcode::SUBREG_TO_REG ||
2113        Opc == TargetOpcode::REG_SEQUENCE ||
2114        Opc == TargetOpcode::IMPLICIT_DEF)
2115      return;
2116  }
2117
2118  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2119       I != E; ++I) {
2120    if (I->isCtrl())
2121      continue;
2122    SUnit *PredSU = I->getSUnit();
2123    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2124    // counts data deps.
2125    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2126      continue;
2127    const SDNode *PN = PredSU->getNode();
2128    if (!PN->isMachineOpcode()) {
2129      if (PN->getOpcode() == ISD::CopyFromReg) {
2130        MVT VT = PN->getSimpleValueType(0);
2131        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2132        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2133      }
2134      continue;
2135    }
2136    unsigned POpc = PN->getMachineOpcode();
2137    if (POpc == TargetOpcode::IMPLICIT_DEF)
2138      continue;
2139    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2140        POpc == TargetOpcode::INSERT_SUBREG ||
2141        POpc == TargetOpcode::SUBREG_TO_REG) {
2142      MVT VT = PN->getSimpleValueType(0);
2143      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2144      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2145      continue;
2146    }
2147    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2148    for (unsigned i = 0; i != NumDefs; ++i) {
2149      MVT VT = PN->getSimpleValueType(i);
2150      if (!PN->hasAnyUseOfValue(i))
2151        continue;
2152      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2153      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2154        // Register pressure tracking is imprecise. This can happen.
2155        RegPressure[RCId] = 0;
2156      else
2157        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2158    }
2159  }
2160
2161  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2162  // may transfer data dependencies to CopyToReg.
2163  if (SU->NumSuccs && N->isMachineOpcode()) {
2164    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2165    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2166      MVT VT = N->getSimpleValueType(i);
2167      if (VT == MVT::Glue || VT == MVT::Other)
2168        continue;
2169      if (!N->hasAnyUseOfValue(i))
2170        continue;
2171      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2172      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2173    }
2174  }
2175
2176  dumpRegPressure();
2177}
2178
2179//===----------------------------------------------------------------------===//
2180//           Dynamic Node Priority for Register Pressure Reduction
2181//===----------------------------------------------------------------------===//
2182
2183/// closestSucc - Returns the scheduled cycle of the successor which is
2184/// closest to the current cycle.
2185static unsigned closestSucc(const SUnit *SU) {
2186  unsigned MaxHeight = 0;
2187  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2188       I != E; ++I) {
2189    if (I->isCtrl()) continue;  // ignore chain succs
2190    unsigned Height = I->getSUnit()->getHeight();
2191    // If there are bunch of CopyToRegs stacked up, they should be considered
2192    // to be at the same position.
2193    if (I->getSUnit()->getNode() &&
2194        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2195      Height = closestSucc(I->getSUnit())+1;
2196    if (Height > MaxHeight)
2197      MaxHeight = Height;
2198  }
2199  return MaxHeight;
2200}
2201
2202/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2203/// for scratch registers, i.e. number of data dependencies.
2204static unsigned calcMaxScratches(const SUnit *SU) {
2205  unsigned Scratches = 0;
2206  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2207       I != E; ++I) {
2208    if (I->isCtrl()) continue;  // ignore chain preds
2209    Scratches++;
2210  }
2211  return Scratches;
2212}
2213
2214/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2215/// CopyFromReg from a virtual register.
2216static bool hasOnlyLiveInOpers(const SUnit *SU) {
2217  bool RetVal = false;
2218  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2219       I != E; ++I) {
2220    if (I->isCtrl()) continue;
2221    const SUnit *PredSU = I->getSUnit();
2222    if (PredSU->getNode() &&
2223        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2224      unsigned Reg =
2225        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2226      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2227        RetVal = true;
2228        continue;
2229      }
2230    }
2231    return false;
2232  }
2233  return RetVal;
2234}
2235
2236/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2237/// CopyToReg to a virtual register. This SU def is probably a liveout and
2238/// it has no other use. It should be scheduled closer to the terminator.
2239static bool hasOnlyLiveOutUses(const SUnit *SU) {
2240  bool RetVal = false;
2241  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2242       I != E; ++I) {
2243    if (I->isCtrl()) continue;
2244    const SUnit *SuccSU = I->getSUnit();
2245    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2246      unsigned Reg =
2247        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2248      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2249        RetVal = true;
2250        continue;
2251      }
2252    }
2253    return false;
2254  }
2255  return RetVal;
2256}
2257
2258// Set isVRegCycle for a node with only live in opers and live out uses. Also
2259// set isVRegCycle for its CopyFromReg operands.
2260//
2261// This is only relevant for single-block loops, in which case the VRegCycle
2262// node is likely an induction variable in which the operand and target virtual
2263// registers should be coalesced (e.g. pre/post increment values). Setting the
2264// isVRegCycle flag helps the scheduler prioritize other uses of the same
2265// CopyFromReg so that this node becomes the virtual register "kill". This
2266// avoids interference between the values live in and out of the block and
2267// eliminates a copy inside the loop.
2268static void initVRegCycle(SUnit *SU) {
2269  if (DisableSchedVRegCycle)
2270    return;
2271
2272  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2273    return;
2274
2275  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2276
2277  SU->isVRegCycle = true;
2278
2279  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2280       I != E; ++I) {
2281    if (I->isCtrl()) continue;
2282    I->getSUnit()->isVRegCycle = true;
2283  }
2284}
2285
2286// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2287// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2288static void resetVRegCycle(SUnit *SU) {
2289  if (!SU->isVRegCycle)
2290    return;
2291
2292  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2293       I != E; ++I) {
2294    if (I->isCtrl()) continue;  // ignore chain preds
2295    SUnit *PredSU = I->getSUnit();
2296    if (PredSU->isVRegCycle) {
2297      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2298             "VRegCycle def must be CopyFromReg");
2299      I->getSUnit()->isVRegCycle = 0;
2300    }
2301  }
2302}
2303
2304// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2305// means a node that defines the VRegCycle has not been scheduled yet.
2306static bool hasVRegCycleUse(const SUnit *SU) {
2307  // If this SU also defines the VReg, don't hoist it as a "use".
2308  if (SU->isVRegCycle)
2309    return false;
2310
2311  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2312       I != E; ++I) {
2313    if (I->isCtrl()) continue;  // ignore chain preds
2314    if (I->getSUnit()->isVRegCycle &&
2315        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2316      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2317      return true;
2318    }
2319  }
2320  return false;
2321}
2322
2323// Check for either a dependence (latency) or resource (hazard) stall.
2324//
2325// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2326static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2327  if ((int)SPQ->getCurCycle() < Height) return true;
2328  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2329      != ScheduleHazardRecognizer::NoHazard)
2330    return true;
2331  return false;
2332}
2333
2334// Return -1 if left has higher priority, 1 if right has higher priority.
2335// Return 0 if latency-based priority is equivalent.
2336static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2337                            RegReductionPQBase *SPQ) {
2338  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2339  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2340  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2341  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2342  int LHeight = (int)left->getHeight() + LPenalty;
2343  int RHeight = (int)right->getHeight() + RPenalty;
2344
2345  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2346    BUHasStall(left, LHeight, SPQ);
2347  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2348    BUHasStall(right, RHeight, SPQ);
2349
2350  // If scheduling one of the node will cause a pipeline stall, delay it.
2351  // If scheduling either one of the node will cause a pipeline stall, sort
2352  // them according to their height.
2353  if (LStall) {
2354    if (!RStall)
2355      return 1;
2356    if (LHeight != RHeight)
2357      return LHeight > RHeight ? 1 : -1;
2358  } else if (RStall)
2359    return -1;
2360
2361  // If either node is scheduling for latency, sort them by height/depth
2362  // and latency.
2363  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2364                     right->SchedulingPref == Sched::ILP)) {
2365    // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2366    // is enabled, grouping instructions by cycle, then its height is already
2367    // covered so only its depth matters. We also reach this point if both stall
2368    // but have the same height.
2369    if (!SPQ->getHazardRec()->isEnabled()) {
2370      if (LHeight != RHeight)
2371        return LHeight > RHeight ? 1 : -1;
2372    }
2373    int LDepth = left->getDepth() - LPenalty;
2374    int RDepth = right->getDepth() - RPenalty;
2375    if (LDepth != RDepth) {
2376      DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2377            << ") depth " << LDepth << " vs SU (" << right->NodeNum
2378            << ") depth " << RDepth << "\n");
2379      return LDepth < RDepth ? 1 : -1;
2380    }
2381    if (left->Latency != right->Latency)
2382      return left->Latency > right->Latency ? 1 : -1;
2383  }
2384  return 0;
2385}
2386
2387static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2388  // Schedule physical register definitions close to their use. This is
2389  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2390  // long as shortening physreg live ranges is generally good, we can defer
2391  // creating a subtarget hook.
2392  if (!DisableSchedPhysRegJoin) {
2393    bool LHasPhysReg = left->hasPhysRegDefs;
2394    bool RHasPhysReg = right->hasPhysRegDefs;
2395    if (LHasPhysReg != RHasPhysReg) {
2396      #ifndef NDEBUG
2397      static const char *const PhysRegMsg[] = { " has no physreg",
2398                                                " defines a physreg" };
2399      #endif
2400      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2401            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2402            << PhysRegMsg[RHasPhysReg] << "\n");
2403      return LHasPhysReg < RHasPhysReg;
2404    }
2405  }
2406
2407  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2408  unsigned LPriority = SPQ->getNodePriority(left);
2409  unsigned RPriority = SPQ->getNodePriority(right);
2410
2411  // Be really careful about hoisting call operands above previous calls.
2412  // Only allows it if it would reduce register pressure.
2413  if (left->isCall && right->isCallOp) {
2414    unsigned RNumVals = right->getNode()->getNumValues();
2415    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2416  }
2417  if (right->isCall && left->isCallOp) {
2418    unsigned LNumVals = left->getNode()->getNumValues();
2419    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2420  }
2421
2422  if (LPriority != RPriority)
2423    return LPriority > RPriority;
2424
2425  // One or both of the nodes are calls and their sethi-ullman numbers are the
2426  // same, then keep source order.
2427  if (left->isCall || right->isCall) {
2428    unsigned LOrder = SPQ->getNodeOrdering(left);
2429    unsigned ROrder = SPQ->getNodeOrdering(right);
2430
2431    // Prefer an ordering where the lower the non-zero order number, the higher
2432    // the preference.
2433    if ((LOrder || ROrder) && LOrder != ROrder)
2434      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2435  }
2436
2437  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2438  // e.g.
2439  // t1 = op t2, c1
2440  // t3 = op t4, c2
2441  //
2442  // and the following instructions are both ready.
2443  // t2 = op c3
2444  // t4 = op c4
2445  //
2446  // Then schedule t2 = op first.
2447  // i.e.
2448  // t4 = op c4
2449  // t2 = op c3
2450  // t1 = op t2, c1
2451  // t3 = op t4, c2
2452  //
2453  // This creates more short live intervals.
2454  unsigned LDist = closestSucc(left);
2455  unsigned RDist = closestSucc(right);
2456  if (LDist != RDist)
2457    return LDist < RDist;
2458
2459  // How many registers becomes live when the node is scheduled.
2460  unsigned LScratch = calcMaxScratches(left);
2461  unsigned RScratch = calcMaxScratches(right);
2462  if (LScratch != RScratch)
2463    return LScratch > RScratch;
2464
2465  // Comparing latency against a call makes little sense unless the node
2466  // is register pressure-neutral.
2467  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2468    return (left->NodeQueueId > right->NodeQueueId);
2469
2470  // Do not compare latencies when one or both of the nodes are calls.
2471  if (!DisableSchedCycles &&
2472      !(left->isCall || right->isCall)) {
2473    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2474    if (result != 0)
2475      return result > 0;
2476  }
2477  else {
2478    if (left->getHeight() != right->getHeight())
2479      return left->getHeight() > right->getHeight();
2480
2481    if (left->getDepth() != right->getDepth())
2482      return left->getDepth() < right->getDepth();
2483  }
2484
2485  assert(left->NodeQueueId && right->NodeQueueId &&
2486         "NodeQueueId cannot be zero");
2487  return (left->NodeQueueId > right->NodeQueueId);
2488}
2489
2490// Bottom up
2491bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2492  if (int res = checkSpecialNodes(left, right))
2493    return res > 0;
2494
2495  return BURRSort(left, right, SPQ);
2496}
2497
2498// Source order, otherwise bottom up.
2499bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2500  if (int res = checkSpecialNodes(left, right))
2501    return res > 0;
2502
2503  unsigned LOrder = SPQ->getNodeOrdering(left);
2504  unsigned ROrder = SPQ->getNodeOrdering(right);
2505
2506  // Prefer an ordering where the lower the non-zero order number, the higher
2507  // the preference.
2508  if ((LOrder || ROrder) && LOrder != ROrder)
2509    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2510
2511  return BURRSort(left, right, SPQ);
2512}
2513
2514// If the time between now and when the instruction will be ready can cover
2515// the spill code, then avoid adding it to the ready queue. This gives long
2516// stalls highest priority and allows hoisting across calls. It should also
2517// speed up processing the available queue.
2518bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2519  static const unsigned ReadyDelay = 3;
2520
2521  if (SPQ->MayReduceRegPressure(SU)) return true;
2522
2523  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2524
2525  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2526      != ScheduleHazardRecognizer::NoHazard)
2527    return false;
2528
2529  return true;
2530}
2531
2532// Return true if right should be scheduled with higher priority than left.
2533bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2534  if (int res = checkSpecialNodes(left, right))
2535    return res > 0;
2536
2537  if (left->isCall || right->isCall)
2538    // No way to compute latency of calls.
2539    return BURRSort(left, right, SPQ);
2540
2541  bool LHigh = SPQ->HighRegPressure(left);
2542  bool RHigh = SPQ->HighRegPressure(right);
2543  // Avoid causing spills. If register pressure is high, schedule for
2544  // register pressure reduction.
2545  if (LHigh && !RHigh) {
2546    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2547          << right->NodeNum << ")\n");
2548    return true;
2549  }
2550  else if (!LHigh && RHigh) {
2551    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2552          << left->NodeNum << ")\n");
2553    return false;
2554  }
2555  if (!LHigh && !RHigh) {
2556    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2557    if (result != 0)
2558      return result > 0;
2559  }
2560  return BURRSort(left, right, SPQ);
2561}
2562
2563// Schedule as many instructions in each cycle as possible. So don't make an
2564// instruction available unless it is ready in the current cycle.
2565bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2566  if (SU->getHeight() > CurCycle) return false;
2567
2568  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2569      != ScheduleHazardRecognizer::NoHazard)
2570    return false;
2571
2572  return true;
2573}
2574
2575static bool canEnableCoalescing(SUnit *SU) {
2576  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2577  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2578    // CopyToReg should be close to its uses to facilitate coalescing and
2579    // avoid spilling.
2580    return true;
2581
2582  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2583      Opc == TargetOpcode::SUBREG_TO_REG ||
2584      Opc == TargetOpcode::INSERT_SUBREG)
2585    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2586    // close to their uses to facilitate coalescing.
2587    return true;
2588
2589  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2590    // If SU does not have a register def, schedule it close to its uses
2591    // because it does not lengthen any live ranges.
2592    return true;
2593
2594  return false;
2595}
2596
2597// list-ilp is currently an experimental scheduler that allows various
2598// heuristics to be enabled prior to the normal register reduction logic.
2599bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2600  if (int res = checkSpecialNodes(left, right))
2601    return res > 0;
2602
2603  if (left->isCall || right->isCall)
2604    // No way to compute latency of calls.
2605    return BURRSort(left, right, SPQ);
2606
2607  unsigned LLiveUses = 0, RLiveUses = 0;
2608  int LPDiff = 0, RPDiff = 0;
2609  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2610    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2611    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2612  }
2613  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2614    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2615          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2616    return LPDiff > RPDiff;
2617  }
2618
2619  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2620    bool LReduce = canEnableCoalescing(left);
2621    bool RReduce = canEnableCoalescing(right);
2622    if (LReduce && !RReduce) return false;
2623    if (RReduce && !LReduce) return true;
2624  }
2625
2626  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2627    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2628          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2629    return LLiveUses < RLiveUses;
2630  }
2631
2632  if (!DisableSchedStalls) {
2633    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2634    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2635    if (LStall != RStall)
2636      return left->getHeight() > right->getHeight();
2637  }
2638
2639  if (!DisableSchedCriticalPath) {
2640    int spread = (int)left->getDepth() - (int)right->getDepth();
2641    if (std::abs(spread) > MaxReorderWindow) {
2642      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2643            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2644            << right->getDepth() << "\n");
2645      return left->getDepth() < right->getDepth();
2646    }
2647  }
2648
2649  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2650    int spread = (int)left->getHeight() - (int)right->getHeight();
2651    if (std::abs(spread) > MaxReorderWindow)
2652      return left->getHeight() > right->getHeight();
2653  }
2654
2655  return BURRSort(left, right, SPQ);
2656}
2657
2658void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2659  SUnits = &sunits;
2660  // Add pseudo dependency edges for two-address nodes.
2661  if (!Disable2AddrHack)
2662    AddPseudoTwoAddrDeps();
2663  // Reroute edges to nodes with multiple uses.
2664  if (!TracksRegPressure && !SrcOrder)
2665    PrescheduleNodesWithMultipleUses();
2666  // Calculate node priorities.
2667  CalculateSethiUllmanNumbers();
2668
2669  // For single block loops, mark nodes that look like canonical IV increments.
2670  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2671    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2672      initVRegCycle(&sunits[i]);
2673    }
2674  }
2675}
2676
2677//===----------------------------------------------------------------------===//
2678//                    Preschedule for Register Pressure
2679//===----------------------------------------------------------------------===//
2680
2681bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2682  if (SU->isTwoAddress) {
2683    unsigned Opc = SU->getNode()->getMachineOpcode();
2684    const MCInstrDesc &MCID = TII->get(Opc);
2685    unsigned NumRes = MCID.getNumDefs();
2686    unsigned NumOps = MCID.getNumOperands() - NumRes;
2687    for (unsigned i = 0; i != NumOps; ++i) {
2688      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2689        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2690        if (DU->getNodeId() != -1 &&
2691            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2692          return true;
2693      }
2694    }
2695  }
2696  return false;
2697}
2698
2699/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2700/// successor's explicit physregs whose definition can reach DepSU.
2701/// i.e. DepSU should not be scheduled above SU.
2702static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2703                                         ScheduleDAGRRList *scheduleDAG,
2704                                         const TargetInstrInfo *TII,
2705                                         const TargetRegisterInfo *TRI) {
2706  const uint16_t *ImpDefs
2707    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2708  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2709  if(!ImpDefs && !RegMask)
2710    return false;
2711
2712  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2713       SI != SE; ++SI) {
2714    SUnit *SuccSU = SI->getSUnit();
2715    for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2716           PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2717      if (!PI->isAssignedRegDep())
2718        continue;
2719
2720      if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
2721          scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2722        return true;
2723
2724      if (ImpDefs)
2725        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2726          // Return true if SU clobbers this physical register use and the
2727          // definition of the register reaches from DepSU. IsReachable queries
2728          // a topological forward sort of the DAG (following the successors).
2729          if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2730              scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2731            return true;
2732    }
2733  }
2734  return false;
2735}
2736
2737/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2738/// physical register defs.
2739static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2740                                  const TargetInstrInfo *TII,
2741                                  const TargetRegisterInfo *TRI) {
2742  SDNode *N = SuccSU->getNode();
2743  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2744  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2745  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2746  for (const SDNode *SUNode = SU->getNode(); SUNode;
2747       SUNode = SUNode->getGluedNode()) {
2748    if (!SUNode->isMachineOpcode())
2749      continue;
2750    const uint16_t *SUImpDefs =
2751      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2752    const uint32_t *SURegMask = getNodeRegMask(SUNode);
2753    if (!SUImpDefs && !SURegMask)
2754      continue;
2755    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2756      EVT VT = N->getValueType(i);
2757      if (VT == MVT::Glue || VT == MVT::Other)
2758        continue;
2759      if (!N->hasAnyUseOfValue(i))
2760        continue;
2761      unsigned Reg = ImpDefs[i - NumDefs];
2762      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2763        return true;
2764      if (!SUImpDefs)
2765        continue;
2766      for (;*SUImpDefs; ++SUImpDefs) {
2767        unsigned SUReg = *SUImpDefs;
2768        if (TRI->regsOverlap(Reg, SUReg))
2769          return true;
2770      }
2771    }
2772  }
2773  return false;
2774}
2775
2776/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2777/// are not handled well by the general register pressure reduction
2778/// heuristics. When presented with code like this:
2779///
2780///      N
2781///    / |
2782///   /  |
2783///  U  store
2784///  |
2785/// ...
2786///
2787/// the heuristics tend to push the store up, but since the
2788/// operand of the store has another use (U), this would increase
2789/// the length of that other use (the U->N edge).
2790///
2791/// This function transforms code like the above to route U's
2792/// dependence through the store when possible, like this:
2793///
2794///      N
2795///      ||
2796///      ||
2797///     store
2798///       |
2799///       U
2800///       |
2801///      ...
2802///
2803/// This results in the store being scheduled immediately
2804/// after N, which shortens the U->N live range, reducing
2805/// register pressure.
2806///
2807void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2808  // Visit all the nodes in topological order, working top-down.
2809  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2810    SUnit *SU = &(*SUnits)[i];
2811    // For now, only look at nodes with no data successors, such as stores.
2812    // These are especially important, due to the heuristics in
2813    // getNodePriority for nodes with no data successors.
2814    if (SU->NumSuccs != 0)
2815      continue;
2816    // For now, only look at nodes with exactly one data predecessor.
2817    if (SU->NumPreds != 1)
2818      continue;
2819    // Avoid prescheduling copies to virtual registers, which don't behave
2820    // like other nodes from the perspective of scheduling heuristics.
2821    if (SDNode *N = SU->getNode())
2822      if (N->getOpcode() == ISD::CopyToReg &&
2823          TargetRegisterInfo::isVirtualRegister
2824            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2825        continue;
2826
2827    // Locate the single data predecessor.
2828    SUnit *PredSU = nullptr;
2829    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2830         EE = SU->Preds.end(); II != EE; ++II)
2831      if (!II->isCtrl()) {
2832        PredSU = II->getSUnit();
2833        break;
2834      }
2835    assert(PredSU);
2836
2837    // Don't rewrite edges that carry physregs, because that requires additional
2838    // support infrastructure.
2839    if (PredSU->hasPhysRegDefs)
2840      continue;
2841    // Short-circuit the case where SU is PredSU's only data successor.
2842    if (PredSU->NumSuccs == 1)
2843      continue;
2844    // Avoid prescheduling to copies from virtual registers, which don't behave
2845    // like other nodes from the perspective of scheduling heuristics.
2846    if (SDNode *N = SU->getNode())
2847      if (N->getOpcode() == ISD::CopyFromReg &&
2848          TargetRegisterInfo::isVirtualRegister
2849            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2850        continue;
2851
2852    // Perform checks on the successors of PredSU.
2853    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2854         EE = PredSU->Succs.end(); II != EE; ++II) {
2855      SUnit *PredSuccSU = II->getSUnit();
2856      if (PredSuccSU == SU) continue;
2857      // If PredSU has another successor with no data successors, for
2858      // now don't attempt to choose either over the other.
2859      if (PredSuccSU->NumSuccs == 0)
2860        goto outer_loop_continue;
2861      // Don't break physical register dependencies.
2862      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2863        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2864          goto outer_loop_continue;
2865      // Don't introduce graph cycles.
2866      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2867        goto outer_loop_continue;
2868    }
2869
2870    // Ok, the transformation is safe and the heuristics suggest it is
2871    // profitable. Update the graph.
2872    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2873                 << " next to PredSU #" << PredSU->NodeNum
2874                 << " to guide scheduling in the presence of multiple uses\n");
2875    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2876      SDep Edge = PredSU->Succs[i];
2877      assert(!Edge.isAssignedRegDep());
2878      SUnit *SuccSU = Edge.getSUnit();
2879      if (SuccSU != SU) {
2880        Edge.setSUnit(PredSU);
2881        scheduleDAG->RemovePred(SuccSU, Edge);
2882        scheduleDAG->AddPred(SU, Edge);
2883        Edge.setSUnit(SU);
2884        scheduleDAG->AddPred(SuccSU, Edge);
2885        --i;
2886      }
2887    }
2888  outer_loop_continue:;
2889  }
2890}
2891
2892/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2893/// it as a def&use operand. Add a pseudo control edge from it to the other
2894/// node (if it won't create a cycle) so the two-address one will be scheduled
2895/// first (lower in the schedule). If both nodes are two-address, favor the
2896/// one that has a CopyToReg use (more likely to be a loop induction update).
2897/// If both are two-address, but one is commutable while the other is not
2898/// commutable, favor the one that's not commutable.
2899void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2900  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2901    SUnit *SU = &(*SUnits)[i];
2902    if (!SU->isTwoAddress)
2903      continue;
2904
2905    SDNode *Node = SU->getNode();
2906    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2907      continue;
2908
2909    bool isLiveOut = hasOnlyLiveOutUses(SU);
2910    unsigned Opc = Node->getMachineOpcode();
2911    const MCInstrDesc &MCID = TII->get(Opc);
2912    unsigned NumRes = MCID.getNumDefs();
2913    unsigned NumOps = MCID.getNumOperands() - NumRes;
2914    for (unsigned j = 0; j != NumOps; ++j) {
2915      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2916        continue;
2917      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2918      if (DU->getNodeId() == -1)
2919        continue;
2920      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2921      if (!DUSU) continue;
2922      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2923           E = DUSU->Succs.end(); I != E; ++I) {
2924        if (I->isCtrl()) continue;
2925        SUnit *SuccSU = I->getSUnit();
2926        if (SuccSU == SU)
2927          continue;
2928        // Be conservative. Ignore if nodes aren't at roughly the same
2929        // depth and height.
2930        if (SuccSU->getHeight() < SU->getHeight() &&
2931            (SU->getHeight() - SuccSU->getHeight()) > 1)
2932          continue;
2933        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2934        // constrains whatever is using the copy, instead of the copy
2935        // itself. In the case that the copy is coalesced, this
2936        // preserves the intent of the pseudo two-address heurietics.
2937        while (SuccSU->Succs.size() == 1 &&
2938               SuccSU->getNode()->isMachineOpcode() &&
2939               SuccSU->getNode()->getMachineOpcode() ==
2940                 TargetOpcode::COPY_TO_REGCLASS)
2941          SuccSU = SuccSU->Succs.front().getSUnit();
2942        // Don't constrain non-instruction nodes.
2943        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2944          continue;
2945        // Don't constrain nodes with physical register defs if the
2946        // predecessor can clobber them.
2947        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2948          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2949            continue;
2950        }
2951        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2952        // these may be coalesced away. We want them close to their uses.
2953        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2954        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2955            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2956            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2957          continue;
2958        if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2959            (!canClobber(SuccSU, DUSU) ||
2960             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2961             (!SU->isCommutable && SuccSU->isCommutable)) &&
2962            !scheduleDAG->IsReachable(SuccSU, SU)) {
2963          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2964                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2965          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
2966        }
2967      }
2968    }
2969  }
2970}
2971
2972//===----------------------------------------------------------------------===//
2973//                         Public Constructor Functions
2974//===----------------------------------------------------------------------===//
2975
2976llvm::ScheduleDAGSDNodes *
2977llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2978                                 CodeGenOpt::Level OptLevel) {
2979  const TargetMachine &TM = IS->TM;
2980  const TargetInstrInfo *TII = TM.getInstrInfo();
2981  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2982
2983  BURegReductionPriorityQueue *PQ =
2984    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
2985  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2986  PQ->setScheduleDAG(SD);
2987  return SD;
2988}
2989
2990llvm::ScheduleDAGSDNodes *
2991llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
2992                                   CodeGenOpt::Level OptLevel) {
2993  const TargetMachine &TM = IS->TM;
2994  const TargetInstrInfo *TII = TM.getInstrInfo();
2995  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
2996
2997  SrcRegReductionPriorityQueue *PQ =
2998    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
2999  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3000  PQ->setScheduleDAG(SD);
3001  return SD;
3002}
3003
3004llvm::ScheduleDAGSDNodes *
3005llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
3006                                   CodeGenOpt::Level OptLevel) {
3007  const TargetMachine &TM = IS->TM;
3008  const TargetInstrInfo *TII = TM.getInstrInfo();
3009  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
3010  const TargetLowering *TLI = IS->getTargetLowering();
3011
3012  HybridBURRPriorityQueue *PQ =
3013    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3014
3015  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3016  PQ->setScheduleDAG(SD);
3017  return SD;
3018}
3019
3020llvm::ScheduleDAGSDNodes *
3021llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
3022                                CodeGenOpt::Level OptLevel) {
3023  const TargetMachine &TM = IS->TM;
3024  const TargetInstrInfo *TII = TM.getInstrInfo();
3025  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
3026  const TargetLowering *TLI = IS->getTargetLowering();
3027
3028  ILPBURRPriorityQueue *PQ =
3029    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3030  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3031  PQ->setScheduleDAG(SD);
3032  return SD;
3033}
3034