1//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements a simple VLIW packetizer using DFA. The packetizer works on
11// machine basic blocks. For each instruction I in BB, the packetizer consults
12// the DFA to see if machine resources are available to execute I. If so, the
13// packetizer checks if I depends on any instruction J in the current packet.
14// If no dependency is found, I is added to current packet and machine resource
15// is marked as taken. If any dependency is found, a target API call is made to
16// prune the dependence.
17//
18//===----------------------------------------------------------------------===//
19#include "HexagonRegisterInfo.h"
20#include "HexagonSubtarget.h"
21#include "HexagonTargetMachine.h"
22#include "HexagonVLIWPacketizer.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/CodeGen/MachineDominators.h"
25#include "llvm/CodeGen/MachineFunctionAnalysis.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineLoopInfo.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/Passes.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/Debug.h"
32#include <map>
33#include <vector>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "packets"
38
39static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
40  cl::ZeroOrMore, cl::init(false),
41  cl::desc("Disable Hexagon packetizer pass"));
42
43static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
44  cl::ZeroOrMore, cl::Hidden, cl::init(true),
45  cl::desc("Allow non-solo packetization of volatile memory references"));
46
47static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
48  cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
49
50static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
51  cl::init(false), cl::Hidden, cl::ZeroOrMore,
52  cl::desc("Disable vector double new-value-stores"));
53
54extern cl::opt<bool> ScheduleInlineAsm;
55
56namespace llvm {
57  FunctionPass *createHexagonPacketizer();
58  void initializeHexagonPacketizerPass(PassRegistry&);
59}
60
61
62namespace {
63  class HexagonPacketizer : public MachineFunctionPass {
64  public:
65    static char ID;
66    HexagonPacketizer() : MachineFunctionPass(ID) {
67      initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
68    }
69
70    void getAnalysisUsage(AnalysisUsage &AU) const override {
71      AU.setPreservesCFG();
72      AU.addRequired<AAResultsWrapperPass>();
73      AU.addRequired<MachineBranchProbabilityInfo>();
74      AU.addRequired<MachineDominatorTree>();
75      AU.addRequired<MachineLoopInfo>();
76      AU.addPreserved<MachineDominatorTree>();
77      AU.addPreserved<MachineLoopInfo>();
78      MachineFunctionPass::getAnalysisUsage(AU);
79    }
80    const char *getPassName() const override {
81      return "Hexagon Packetizer";
82    }
83    bool runOnMachineFunction(MachineFunction &Fn) override;
84
85  private:
86    const HexagonInstrInfo *HII;
87    const HexagonRegisterInfo *HRI;
88  };
89
90  char HexagonPacketizer::ID = 0;
91}
92
93INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
94                      false, false)
95INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
96INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
97INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
98INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
99INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
100                    false, false)
101
102
103HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
104      MachineLoopInfo &MLI, AliasAnalysis *AA,
105      const MachineBranchProbabilityInfo *MBPI)
106    : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
107  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
108  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
109}
110
111// Check if FirstI modifies a register that SecondI reads.
112static bool hasWriteToReadDep(const MachineInstr *FirstI,
113      const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
114  for (auto &MO : FirstI->operands()) {
115    if (!MO.isReg() || !MO.isDef())
116      continue;
117    unsigned R = MO.getReg();
118    if (SecondI->readsRegister(R, TRI))
119      return true;
120  }
121  return false;
122}
123
124
125static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
126      MachineBasicBlock::iterator BundleIt, bool Before) {
127  MachineBasicBlock::instr_iterator InsertPt;
128  if (Before)
129    InsertPt = BundleIt.getInstrIterator();
130  else
131    InsertPt = std::next(BundleIt).getInstrIterator();
132
133  MachineBasicBlock &B = *MI->getParent();
134  // The instruction should at least be bundled with the preceding instruction
135  // (there will always be one, i.e. BUNDLE, if nothing else).
136  assert(MI->isBundledWithPred());
137  if (MI->isBundledWithSucc()) {
138    MI->clearFlag(MachineInstr::BundledSucc);
139    MI->clearFlag(MachineInstr::BundledPred);
140  } else {
141    // If it's not bundled with the successor (i.e. it is the last one
142    // in the bundle), then we can simply unbundle it from the predecessor,
143    // which will take care of updating the predecessor's flag.
144    MI->unbundleFromPred();
145  }
146  B.splice(InsertPt, &B, MI);
147
148  // Get the size of the bundle without asserting.
149  MachineBasicBlock::const_instr_iterator I(BundleIt);
150  MachineBasicBlock::const_instr_iterator E = B.instr_end();
151  unsigned Size = 0;
152  for (++I; I != E && I->isBundledWithPred(); ++I)
153    ++Size;
154
155  // If there are still two or more instructions, then there is nothing
156  // else to be done.
157  if (Size > 1)
158    return BundleIt;
159
160  // Otherwise, extract the single instruction out and delete the bundle.
161  MachineBasicBlock::iterator NextIt = std::next(BundleIt);
162  MachineInstr *SingleI = BundleIt->getNextNode();
163  SingleI->unbundleFromPred();
164  assert(!SingleI->isBundledWithSucc());
165  BundleIt->eraseFromParent();
166  return NextIt;
167}
168
169
170bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
171  if (DisablePacketizer)
172    return false;
173
174  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
175  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
176  auto &MLI = getAnalysis<MachineLoopInfo>();
177  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
178  auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
179
180  if (EnableGenAllInsnClass)
181    HII->genAllInsnTimingClasses(MF);
182
183  // Instantiate the packetizer.
184  HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
185
186  // DFA state table should not be empty.
187  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
188
189  //
190  // Loop over all basic blocks and remove KILL pseudo-instructions
191  // These instructions confuse the dependence analysis. Consider:
192  // D0 = ...   (Insn 0)
193  // R0 = KILL R0, D0 (Insn 1)
194  // R0 = ... (Insn 2)
195  // Here, Insn 1 will result in the dependence graph not emitting an output
196  // dependence between Insn 0 and Insn 2. This can lead to incorrect
197  // packetization
198  //
199  for (auto &MB : MF) {
200    auto End = MB.end();
201    auto MI = MB.begin();
202    while (MI != End) {
203      auto NextI = std::next(MI);
204      if (MI->isKill()) {
205        MB.erase(MI);
206        End = MB.end();
207      }
208      MI = NextI;
209    }
210  }
211
212  // Loop over all of the basic blocks.
213  for (auto &MB : MF) {
214    auto Begin = MB.begin(), End = MB.end();
215    while (Begin != End) {
216      // First the first non-boundary starting from the end of the last
217      // scheduling region.
218      MachineBasicBlock::iterator RB = Begin;
219      while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
220        ++RB;
221      // First the first boundary starting from the beginning of the new
222      // region.
223      MachineBasicBlock::iterator RE = RB;
224      while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
225        ++RE;
226      // Add the scheduling boundary if it's not block end.
227      if (RE != End)
228        ++RE;
229      // If RB == End, then RE == End.
230      if (RB != End)
231        Packetizer.PacketizeMIs(&MB, RB, RE);
232
233      Begin = RE;
234    }
235  }
236
237  Packetizer.unpacketizeSoloInstrs(MF);
238  return true;
239}
240
241
242// Reserve resources for a constant extender. Trigger an assertion if the
243// reservation fails.
244void HexagonPacketizerList::reserveResourcesForConstExt() {
245  if (!tryAllocateResourcesForConstExt(true))
246    llvm_unreachable("Resources not available");
247}
248
249bool HexagonPacketizerList::canReserveResourcesForConstExt() {
250  return tryAllocateResourcesForConstExt(false);
251}
252
253// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
254// return true, otherwise, return false.
255bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
256  auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
257  bool Avail = ResourceTracker->canReserveResources(ExtMI);
258  if (Reserve && Avail)
259    ResourceTracker->reserveResources(ExtMI);
260  MF.DeleteMachineInstr(ExtMI);
261  return Avail;
262}
263
264
265bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
266      SDep::Kind DepType, unsigned DepReg) {
267  // Check for LR dependence.
268  if (DepReg == HRI->getRARegister())
269    return true;
270
271  if (HII->isDeallocRet(MI))
272    if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
273      return true;
274
275  // Check if this is a predicate dependence.
276  const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
277  if (RC == &Hexagon::PredRegsRegClass)
278    return true;
279
280  // Assumes that the first operand of the CALLr is the function address.
281  if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
282    MachineOperand MO = MI->getOperand(0);
283    if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
284      return true;
285  }
286
287  return false;
288}
289
290static bool isRegDependence(const SDep::Kind DepType) {
291  return DepType == SDep::Data || DepType == SDep::Anti ||
292         DepType == SDep::Output;
293}
294
295static bool isDirectJump(const MachineInstr* MI) {
296  return MI->getOpcode() == Hexagon::J2_jump;
297}
298
299static bool isSchedBarrier(const MachineInstr* MI) {
300  switch (MI->getOpcode()) {
301  case Hexagon::Y2_barrier:
302    return true;
303  }
304  return false;
305}
306
307static bool isControlFlow(const MachineInstr* MI) {
308  return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
309}
310
311
312/// Returns true if the instruction modifies a callee-saved register.
313static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
314                                     const TargetRegisterInfo *TRI) {
315  const MachineFunction &MF = *MI->getParent()->getParent();
316  for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
317    if (MI->modifiesRegister(*CSR, TRI))
318      return true;
319  return false;
320}
321
322// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
323// Returns true if an instruction can be promoted to .new predicate or
324// new-value store.
325bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
326  return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
327}
328
329// Promote an instructiont to its .cur form.
330// At this time, we have already made a call to canPromoteToDotCur and made
331// sure that it can *indeed* be promoted.
332bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
333      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
334      const TargetRegisterClass* RC) {
335  assert(DepType == SDep::Data);
336  int CurOpcode = HII->getDotCurOp(MI);
337  MI->setDesc(HII->get(CurOpcode));
338  return true;
339}
340
341void HexagonPacketizerList::cleanUpDotCur() {
342  MachineInstr *MI = NULL;
343  for (auto BI : CurrentPacketMIs) {
344    DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
345    if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
346      MI = BI;
347      continue;
348    }
349    if (MI) {
350      for (auto &MO : BI->operands())
351        if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
352          return;
353    }
354  }
355  if (!MI)
356    return;
357  // We did not find a use of the CUR, so de-cur it.
358  MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
359  DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
360}
361
362// Check to see if an instruction can be dot cur.
363bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
364      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
365      const TargetRegisterClass *RC) {
366  if (!HII->isV60VectorInstruction(MI))
367    return false;
368  if (!HII->isV60VectorInstruction(MII))
369    return false;
370
371  // Already a dot new instruction.
372  if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
373    return false;
374
375  if (!HII->mayBeCurLoad(MI))
376    return false;
377
378  // The "cur value" cannot come from inline asm.
379  if (PacketSU->getInstr()->isInlineAsm())
380    return false;
381
382  // Make sure candidate instruction uses cur.
383  DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
384        MI->dump();
385        dbgs() << "in packet\n";);
386  MachineInstr *MJ = MII;
387  DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
388  unsigned DestReg = MI->getOperand(0).getReg();
389  bool FoundMatch = false;
390  for (auto &MO : MJ->operands())
391    if (MO.isReg() && MO.getReg() == DestReg)
392      FoundMatch = true;
393  if (!FoundMatch)
394    return false;
395
396  // Check for existing uses of a vector register within the packet which
397  // would be affected by converting a vector load into .cur formt.
398  for (auto BI : CurrentPacketMIs) {
399    DEBUG(dbgs() << "packet has "; BI->dump(););
400    if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
401      return false;
402  }
403
404  DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
405  // We can convert the opcode into a .cur.
406  return true;
407}
408
409// Promote an instruction to its .new form. At this time, we have already
410// made a call to canPromoteToDotNew and made sure that it can *indeed* be
411// promoted.
412bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
413      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
414      const TargetRegisterClass* RC) {
415  assert (DepType == SDep::Data);
416  int NewOpcode;
417  if (RC == &Hexagon::PredRegsRegClass)
418    NewOpcode = HII->getDotNewPredOp(MI, MBPI);
419  else
420    NewOpcode = HII->getDotNewOp(MI);
421  MI->setDesc(HII->get(NewOpcode));
422  return true;
423}
424
425bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
426  int NewOpcode = HII->getDotOldOp(MI->getOpcode());
427  MI->setDesc(HII->get(NewOpcode));
428  return true;
429}
430
431enum PredicateKind {
432  PK_False,
433  PK_True,
434  PK_Unknown
435};
436
437/// Returns true if an instruction is predicated on p0 and false if it's
438/// predicated on !p0.
439static PredicateKind getPredicateSense(const MachineInstr *MI,
440                                       const HexagonInstrInfo *HII) {
441  if (!HII->isPredicated(MI))
442    return PK_Unknown;
443  if (HII->isPredicatedTrue(MI))
444    return PK_True;
445  return PK_False;
446}
447
448static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
449      const HexagonInstrInfo *HII) {
450  assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
451#ifndef NDEBUG
452  // Post Increment means duplicates. Use dense map to find duplicates in the
453  // list. Caution: Densemap initializes with the minimum of 64 buckets,
454  // whereas there are at most 5 operands in the post increment.
455  DenseSet<unsigned> DefRegsSet;
456  for (auto &MO : MI->operands())
457    if (MO.isReg() && MO.isDef())
458      DefRegsSet.insert(MO.getReg());
459
460  for (auto &MO : MI->operands())
461    if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
462      return MO;
463#else
464  if (MI->mayLoad()) {
465    const MachineOperand &Op1 = MI->getOperand(1);
466    // The 2nd operand is always the post increment operand in load.
467    assert(Op1.isReg() && "Post increment operand has be to a register.");
468    return Op1;
469  }
470  if (MI->getDesc().mayStore()) {
471    const MachineOperand &Op0 = MI->getOperand(0);
472    // The 1st operand is always the post increment operand in store.
473    assert(Op0.isReg() && "Post increment operand has be to a register.");
474    return Op0;
475  }
476#endif
477  // we should never come here.
478  llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
479}
480
481// Get the value being stored.
482static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
483  // value being stored is always the last operand.
484  return MI->getOperand(MI->getNumOperands()-1);
485}
486
487static bool isLoadAbsSet(const MachineInstr *MI) {
488  unsigned Opc = MI->getOpcode();
489  switch (Opc) {
490    case Hexagon::L4_loadrd_ap:
491    case Hexagon::L4_loadrb_ap:
492    case Hexagon::L4_loadrh_ap:
493    case Hexagon::L4_loadrub_ap:
494    case Hexagon::L4_loadruh_ap:
495    case Hexagon::L4_loadri_ap:
496      return true;
497  }
498  return false;
499}
500
501static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
502  assert(isLoadAbsSet(MI));
503  return MI->getOperand(1);
504}
505
506
507// Can be new value store?
508// Following restrictions are to be respected in convert a store into
509// a new value store.
510// 1. If an instruction uses auto-increment, its address register cannot
511//    be a new-value register. Arch Spec 5.4.2.1
512// 2. If an instruction uses absolute-set addressing mode, its address
513//    register cannot be a new-value register. Arch Spec 5.4.2.1.
514// 3. If an instruction produces a 64-bit result, its registers cannot be used
515//    as new-value registers. Arch Spec 5.4.2.2.
516// 4. If the instruction that sets the new-value register is conditional, then
517//    the instruction that uses the new-value register must also be conditional,
518//    and both must always have their predicates evaluate identically.
519//    Arch Spec 5.4.2.3.
520// 5. There is an implied restriction that a packet cannot have another store,
521//    if there is a new value store in the packet. Corollary: if there is
522//    already a store in a packet, there can not be a new value store.
523//    Arch Spec: 3.4.4.2
524bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
525      const MachineInstr *PacketMI, unsigned DepReg) {
526  // Make sure we are looking at the store, that can be promoted.
527  if (!HII->mayBeNewStore(MI))
528    return false;
529
530  // Make sure there is dependency and can be new value'd.
531  const MachineOperand &Val = getStoreValueOperand(MI);
532  if (Val.isReg() && Val.getReg() != DepReg)
533    return false;
534
535  const MCInstrDesc& MCID = PacketMI->getDesc();
536
537  // First operand is always the result.
538  const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
539  // Double regs can not feed into new value store: PRM section: 5.4.2.2.
540  if (PacketRC == &Hexagon::DoubleRegsRegClass)
541    return false;
542
543  // New-value stores are of class NV (slot 0), dual stores require class ST
544  // in slot 0 (PRM 5.5).
545  for (auto I : CurrentPacketMIs) {
546    SUnit *PacketSU = MIToSUnit.find(I)->second;
547    if (PacketSU->getInstr()->mayStore())
548      return false;
549  }
550
551  // Make sure it's NOT the post increment register that we are going to
552  // new value.
553  if (HII->isPostIncrement(MI) &&
554      getPostIncrementOperand(MI, HII).getReg() == DepReg) {
555    return false;
556  }
557
558  if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
559      getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
560    // If source is post_inc, or absolute-set addressing, it can not feed
561    // into new value store
562    //   r3 = memw(r2++#4)
563    //   memw(r30 + #-1404) = r2.new -> can not be new value store
564    // arch spec section: 5.4.2.1.
565    return false;
566  }
567
568  if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
569    return false;
570
571  // If the source that feeds the store is predicated, new value store must
572  // also be predicated.
573  if (HII->isPredicated(PacketMI)) {
574    if (!HII->isPredicated(MI))
575      return false;
576
577    // Check to make sure that they both will have their predicates
578    // evaluate identically.
579    unsigned predRegNumSrc = 0;
580    unsigned predRegNumDst = 0;
581    const TargetRegisterClass* predRegClass = nullptr;
582
583    // Get predicate register used in the source instruction.
584    for (auto &MO : PacketMI->operands()) {
585      if (!MO.isReg())
586        continue;
587      predRegNumSrc = MO.getReg();
588      predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
589      if (predRegClass == &Hexagon::PredRegsRegClass)
590        break;
591    }
592    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
593        "predicate register not found in a predicated PacketMI instruction");
594
595    // Get predicate register used in new-value store instruction.
596    for (auto &MO : MI->operands()) {
597      if (!MO.isReg())
598        continue;
599      predRegNumDst = MO.getReg();
600      predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
601      if (predRegClass == &Hexagon::PredRegsRegClass)
602        break;
603    }
604    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
605           "predicate register not found in a predicated MI instruction");
606
607    // New-value register producer and user (store) need to satisfy these
608    // constraints:
609    // 1) Both instructions should be predicated on the same register.
610    // 2) If producer of the new-value register is .new predicated then store
611    // should also be .new predicated and if producer is not .new predicated
612    // then store should not be .new predicated.
613    // 3) Both new-value register producer and user should have same predicate
614    // sense, i.e, either both should be negated or both should be non-negated.
615    if (predRegNumDst != predRegNumSrc ||
616        HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI)  ||
617        getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
618      return false;
619  }
620
621  // Make sure that other than the new-value register no other store instruction
622  // register has been modified in the same packet. Predicate registers can be
623  // modified by they should not be modified between the producer and the store
624  // instruction as it will make them both conditional on different values.
625  // We already know this to be true for all the instructions before and
626  // including PacketMI. Howerver, we need to perform the check for the
627  // remaining instructions in the packet.
628
629  unsigned StartCheck = 0;
630
631  for (auto I : CurrentPacketMIs) {
632    SUnit *TempSU = MIToSUnit.find(I)->second;
633    MachineInstr* TempMI = TempSU->getInstr();
634
635    // Following condition is true for all the instructions until PacketMI is
636    // reached (StartCheck is set to 0 before the for loop).
637    // StartCheck flag is 1 for all the instructions after PacketMI.
638    if (TempMI != PacketMI && !StartCheck) // Start processing only after
639      continue;                            // encountering PacketMI.
640
641    StartCheck = 1;
642    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
643      continue;
644
645    for (auto &MO : MI->operands())
646      if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
647        return false;
648  }
649
650  // Make sure that for non-POST_INC stores:
651  // 1. The only use of reg is DepReg and no other registers.
652  //    This handles V4 base+index registers.
653  //    The following store can not be dot new.
654  //    Eg.   r0 = add(r0, #3)
655  //          memw(r1+r0<<#2) = r0
656  if (!HII->isPostIncrement(MI)) {
657    for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
658      const MachineOperand &MO = MI->getOperand(opNum);
659      if (MO.isReg() && MO.getReg() == DepReg)
660        return false;
661    }
662  }
663
664  // If data definition is because of implicit definition of the register,
665  // do not newify the store. Eg.
666  // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
667  // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
668  for (auto &MO : PacketMI->operands()) {
669    if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
670      continue;
671    unsigned R = MO.getReg();
672    if (R == DepReg || HRI->isSuperRegister(DepReg, R))
673      return false;
674  }
675
676  // Handle imp-use of super reg case. There is a target independent side
677  // change that should prevent this situation but I am handling it for
678  // just-in-case. For example, we cannot newify R2 in the following case:
679  // %R3<def> = A2_tfrsi 0;
680  // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
681  for (auto &MO : MI->operands()) {
682    if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
683      return false;
684  }
685
686  // Can be dot new store.
687  return true;
688}
689
690// Can this MI to promoted to either new value store or new value jump.
691bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
692      const SUnit *PacketSU, unsigned DepReg,
693      MachineBasicBlock::iterator &MII) {
694  if (!HII->mayBeNewStore(MI))
695    return false;
696
697  // Check to see the store can be new value'ed.
698  MachineInstr *PacketMI = PacketSU->getInstr();
699  if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
700    return true;
701
702  // Check to see the compare/jump can be new value'ed.
703  // This is done as a pass on its own. Don't need to check it here.
704  return false;
705}
706
707static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
708  for (auto &MO : I->operands())
709    if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
710      return true;
711  return false;
712}
713
714// Check to see if an instruction can be dot new
715// There are three kinds.
716// 1. dot new on predicate - V2/V3/V4
717// 2. dot new on stores NV/ST - V4
718// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
719bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
720      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
721      const TargetRegisterClass* RC) {
722  // Already a dot new instruction.
723  if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
724    return false;
725
726  if (!isNewifiable(MI))
727    return false;
728
729  const MachineInstr *PI = PacketSU->getInstr();
730
731  // The "new value" cannot come from inline asm.
732  if (PI->isInlineAsm())
733    return false;
734
735  // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
736  // sense.
737  if (PI->isImplicitDef())
738    return false;
739
740  // If dependency is trough an implicitly defined register, we should not
741  // newify the use.
742  if (isImplicitDependency(PI, DepReg))
743    return false;
744
745  const MCInstrDesc& MCID = PI->getDesc();
746  const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
747  if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
748    return false;
749
750  // predicate .new
751  // bug 5670: until that is fixed
752  // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
753  if (RC == &Hexagon::PredRegsRegClass)
754    if (HII->isCondInst(MI) || MI->isReturn())
755      return HII->predCanBeUsedAsDotNew(PI, DepReg);
756
757  if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
758    return false;
759
760  // Create a dot new machine instruction to see if resources can be
761  // allocated. If not, bail out now.
762  int NewOpcode = HII->getDotNewOp(MI);
763  const MCInstrDesc &D = HII->get(NewOpcode);
764  MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
765  bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
766  MF.DeleteMachineInstr(NewMI);
767  if (!ResourcesAvailable)
768    return false;
769
770  // New Value Store only. New Value Jump generated as a separate pass.
771  if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
772    return false;
773
774  return true;
775}
776
777// Go through the packet instructions and search for an anti dependency between
778// them and DepReg from MI. Consider this case:
779// Trying to add
780// a) %R1<def> = TFRI_cdNotPt %P3, 2
781// to this packet:
782// {
783//   b) %P0<def> = C2_or %P3<kill>, %P0<kill>
784//   c) %P3<def> = C2_tfrrp %R23
785//   d) %R1<def> = C2_cmovenewit %P3, 4
786//  }
787// The P3 from a) and d) will be complements after
788// a)'s P3 is converted to .new form
789// Anti-dep between c) and b) is irrelevant for this case
790bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
791                                                        unsigned DepReg) {
792  SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
793
794  for (auto I : CurrentPacketMIs) {
795    // We only care for dependencies to predicated instructions
796    if (!HII->isPredicated(I))
797      continue;
798
799    // Scheduling Unit for current insn in the packet
800    SUnit *PacketSU = MIToSUnit.find(I)->second;
801
802    // Look at dependencies between current members of the packet and
803    // predicate defining instruction MI. Make sure that dependency is
804    // on the exact register we care about.
805    if (PacketSU->isSucc(PacketSUDep)) {
806      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
807        auto &Dep = PacketSU->Succs[i];
808        if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
809            Dep.getReg() == DepReg)
810          return true;
811      }
812    }
813  }
814
815  return false;
816}
817
818
819/// Gets the predicate register of a predicated instruction.
820static unsigned getPredicatedRegister(MachineInstr *MI,
821                                      const HexagonInstrInfo *QII) {
822  /// We use the following rule: The first predicate register that is a use is
823  /// the predicate register of a predicated instruction.
824  assert(QII->isPredicated(MI) && "Must be predicated instruction");
825
826  for (auto &Op : MI->operands()) {
827    if (Op.isReg() && Op.getReg() && Op.isUse() &&
828        Hexagon::PredRegsRegClass.contains(Op.getReg()))
829      return Op.getReg();
830  }
831
832  llvm_unreachable("Unknown instruction operand layout");
833  return 0;
834}
835
836// Given two predicated instructions, this function detects whether
837// the predicates are complements.
838bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
839                                                     MachineInstr *MI2) {
840  // If we don't know the predicate sense of the instructions bail out early, we
841  // need it later.
842  if (getPredicateSense(MI1, HII) == PK_Unknown ||
843      getPredicateSense(MI2, HII) == PK_Unknown)
844    return false;
845
846  // Scheduling unit for candidate.
847  SUnit *SU = MIToSUnit[MI1];
848
849  // One corner case deals with the following scenario:
850  // Trying to add
851  // a) %R24<def> = A2_tfrt %P0, %R25
852  // to this packet:
853  // {
854  //   b) %R25<def> = A2_tfrf %P0, %R24
855  //   c) %P0<def> = C2_cmpeqi %R26, 1
856  // }
857  //
858  // On general check a) and b) are complements, but presence of c) will
859  // convert a) to .new form, and then it is not a complement.
860  // We attempt to detect it by analyzing existing dependencies in the packet.
861
862  // Analyze relationships between all existing members of the packet.
863  // Look for Anti dependecy on the same predicate reg as used in the
864  // candidate.
865  for (auto I : CurrentPacketMIs) {
866    // Scheduling Unit for current insn in the packet.
867    SUnit *PacketSU = MIToSUnit.find(I)->second;
868
869    // If this instruction in the packet is succeeded by the candidate...
870    if (PacketSU->isSucc(SU)) {
871      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
872        auto Dep = PacketSU->Succs[i];
873        // The corner case exist when there is true data dependency between
874        // candidate and one of current packet members, this dep is on
875        // predicate reg, and there already exist anti dep on the same pred in
876        // the packet.
877        if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
878            Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
879          // Here I know that I is predicate setting instruction with true
880          // data dep to candidate on the register we care about - c) in the
881          // above example. Now I need to see if there is an anti dependency
882          // from c) to any other instruction in the same packet on the pred
883          // reg of interest.
884          if (restrictingDepExistInPacket(I, Dep.getReg()))
885            return false;
886        }
887      }
888    }
889  }
890
891  // If the above case does not apply, check regular complement condition.
892  // Check that the predicate register is the same and that the predicate
893  // sense is different We also need to differentiate .old vs. .new: !p0
894  // is not complementary to p0.new.
895  unsigned PReg1 = getPredicatedRegister(MI1, HII);
896  unsigned PReg2 = getPredicatedRegister(MI2, HII);
897  return PReg1 == PReg2 &&
898         Hexagon::PredRegsRegClass.contains(PReg1) &&
899         Hexagon::PredRegsRegClass.contains(PReg2) &&
900         getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
901         HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
902}
903
904// Initialize packetizer flags.
905void HexagonPacketizerList::initPacketizerState() {
906  Dependence = false;
907  PromotedToDotNew = false;
908  GlueToNewValueJump = false;
909  GlueAllocframeStore = false;
910  FoundSequentialDependence = false;
911}
912
913// Ignore bundling of pseudo instructions.
914bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
915      const MachineBasicBlock*) {
916  if (MI->isDebugValue())
917    return true;
918
919  if (MI->isCFIInstruction())
920    return false;
921
922  // We must print out inline assembly.
923  if (MI->isInlineAsm())
924    return false;
925
926  if (MI->isImplicitDef())
927    return false;
928
929  // We check if MI has any functional units mapped to it. If it doesn't,
930  // we ignore the instruction.
931  const MCInstrDesc& TID = MI->getDesc();
932  auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
933  unsigned FuncUnits = IS->getUnits();
934  return !FuncUnits;
935}
936
937bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
938  if (MI->isEHLabel() || MI->isCFIInstruction())
939    return true;
940
941  // Consider inline asm to not be a solo instruction by default.
942  // Inline asm will be put in a packet temporarily, but then it will be
943  // removed, and placed outside of the packet (before or after, depending
944  // on dependencies).  This is to reduce the impact of inline asm as a
945  // "packet splitting" instruction.
946  if (MI->isInlineAsm() && !ScheduleInlineAsm)
947    return true;
948
949  // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
950  // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
951  // They must not be grouped with other instructions in a packet.
952  if (isSchedBarrier(MI))
953    return true;
954
955  if (HII->isSolo(MI))
956    return true;
957
958  if (MI->getOpcode() == Hexagon::A2_nop)
959    return true;
960
961  return false;
962}
963
964
965// Quick check if instructions MI and MJ cannot coexist in the same packet.
966// Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
967// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
968// For full test call this function twice:
969//   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
970// Doing the test only one way saves the amount of code in this function,
971// since every test would need to be repeated with the MI and MJ reversed.
972static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
973      const HexagonInstrInfo &HII) {
974  const MachineFunction *MF = MI->getParent()->getParent();
975  if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
976      HII.isHVXMemWithAIndirect(MI, MJ))
977    return true;
978
979  // An inline asm cannot be together with a branch, because we may not be
980  // able to remove the asm out after packetizing (i.e. if the asm must be
981  // moved past the bundle).  Similarly, two asms cannot be together to avoid
982  // complications when determining their relative order outside of a bundle.
983  if (MI->isInlineAsm())
984    return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
985           MJ->isCall() || MJ->isTerminator();
986
987  // "False" really means that the quick check failed to determine if
988  // I and J cannot coexist.
989  return false;
990}
991
992
993// Full, symmetric check.
994bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
995      const MachineInstr *MJ) {
996  return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
997}
998
999void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
1000  for (auto &B : MF) {
1001    MachineBasicBlock::iterator BundleIt;
1002    MachineBasicBlock::instr_iterator NextI;
1003    for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
1004      NextI = std::next(I);
1005      MachineInstr *MI = &*I;
1006      if (MI->isBundle())
1007        BundleIt = I;
1008      if (!MI->isInsideBundle())
1009        continue;
1010
1011      // Decide on where to insert the instruction that we are pulling out.
1012      // Debug instructions always go before the bundle, but the placement of
1013      // INLINE_ASM depends on potential dependencies.  By default, try to
1014      // put it before the bundle, but if the asm writes to a register that
1015      // other instructions in the bundle read, then we need to place it
1016      // after the bundle (to preserve the bundle semantics).
1017      bool InsertBeforeBundle;
1018      if (MI->isInlineAsm())
1019        InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
1020      else if (MI->isDebugValue())
1021        InsertBeforeBundle = true;
1022      else
1023        continue;
1024
1025      BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
1026    }
1027  }
1028}
1029
1030// Check if a given instruction is of class "system".
1031static bool isSystemInstr(const MachineInstr *MI) {
1032  unsigned Opc = MI->getOpcode();
1033  switch (Opc) {
1034    case Hexagon::Y2_barrier:
1035    case Hexagon::Y2_dcfetchbo:
1036      return true;
1037  }
1038  return false;
1039}
1040
1041bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
1042                                              const MachineInstr *J) {
1043  // The dependence graph may not include edges between dead definitions,
1044  // so without extra checks, we could end up packetizing two instruction
1045  // defining the same (dead) register.
1046  if (I->isCall() || J->isCall())
1047    return false;
1048  if (HII->isPredicated(I) || HII->isPredicated(J))
1049    return false;
1050
1051  BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
1052  for (auto &MO : I->operands()) {
1053    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1054      continue;
1055    DeadDefs[MO.getReg()] = true;
1056  }
1057
1058  for (auto &MO : J->operands()) {
1059    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
1060      continue;
1061    unsigned R = MO.getReg();
1062    if (R != Hexagon::USR_OVF && DeadDefs[R])
1063      return true;
1064  }
1065  return false;
1066}
1067
1068bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
1069                                                 const MachineInstr *J) {
1070  // A save callee-save register function call can only be in a packet
1071  // with instructions that don't write to the callee-save registers.
1072  if ((HII->isSaveCalleeSavedRegsCall(I) &&
1073       doesModifyCalleeSavedReg(J, HRI)) ||
1074      (HII->isSaveCalleeSavedRegsCall(J) &&
1075       doesModifyCalleeSavedReg(I, HRI)))
1076    return true;
1077
1078  // Two control flow instructions cannot go in the same packet.
1079  if (isControlFlow(I) && isControlFlow(J))
1080    return true;
1081
1082  // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1083  // contain a speculative indirect jump,
1084  // a new-value compare jump or a dealloc_return.
1085  auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
1086    if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
1087      return true;
1088    if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
1089      return true;
1090    return false;
1091  };
1092
1093  if (HII->isLoopN(I) && isBadForLoopN(J))
1094    return true;
1095  if (HII->isLoopN(J) && isBadForLoopN(I))
1096    return true;
1097
1098  // dealloc_return cannot appear in the same packet as a conditional or
1099  // unconditional jump.
1100  return HII->isDeallocRet(I) &&
1101         (J->isBranch() || J->isCall() || J->isBarrier());
1102}
1103
1104bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
1105                                                    const MachineInstr *J) {
1106  bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
1107  bool StoreI = I->mayStore(), StoreJ = J->mayStore();
1108  if ((SysI && StoreJ) || (SysJ && StoreI))
1109    return true;
1110
1111  if (StoreI && StoreJ) {
1112    if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
1113      return true;
1114  } else {
1115    // A memop cannot be in the same packet with another memop or a store.
1116    // Two stores can be together, but here I and J cannot both be stores.
1117    bool MopStI = HII->isMemOp(I) || StoreI;
1118    bool MopStJ = HII->isMemOp(J) || StoreJ;
1119    if (MopStI && MopStJ)
1120      return true;
1121  }
1122
1123  return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
1124}
1125
1126// SUI is the current instruction that is out side of the current packet.
1127// SUJ is the current instruction inside the current packet against which that
1128// SUI will be packetized.
1129bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
1130  MachineInstr *I = SUI->getInstr();
1131  MachineInstr *J = SUJ->getInstr();
1132  assert(I && J && "Unable to packetize null instruction!");
1133
1134  // Clear IgnoreDepMIs when Packet starts.
1135  if (CurrentPacketMIs.size() == 1)
1136    IgnoreDepMIs.clear();
1137
1138  MachineBasicBlock::iterator II = I;
1139  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
1140
1141  // Solo instructions cannot go in the packet.
1142  assert(!isSoloInstruction(I) && "Unexpected solo instr!");
1143
1144  if (cannotCoexist(I, J))
1145    return false;
1146
1147  Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
1148  if (Dependence)
1149    return false;
1150
1151  // V4 allows dual stores. It does not allow second store, if the first
1152  // store is not in SLOT0. New value store, new value jump, dealloc_return
1153  // and memop always take SLOT0. Arch spec 3.4.4.2.
1154  Dependence = hasV4SpecificDependence(I, J);
1155  if (Dependence)
1156    return false;
1157
1158  // If an instruction feeds new value jump, glue it.
1159  MachineBasicBlock::iterator NextMII = I;
1160  ++NextMII;
1161  if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
1162    MachineInstr *NextMI = NextMII;
1163
1164    bool secondRegMatch = false;
1165    const MachineOperand &NOp0 = NextMI->getOperand(0);
1166    const MachineOperand &NOp1 = NextMI->getOperand(1);
1167
1168    if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
1169      secondRegMatch = true;
1170
1171    for (auto I : CurrentPacketMIs) {
1172      SUnit *PacketSU = MIToSUnit.find(I)->second;
1173      MachineInstr *PI = PacketSU->getInstr();
1174      // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
1175      if (PI->isCall()) {
1176        Dependence = true;
1177        break;
1178      }
1179      // Validate:
1180      // 1. Packet does not have a store in it.
1181      // 2. If the first operand of the nvj is newified, and the second
1182      //    operand is also a reg, it (second reg) is not defined in
1183      //    the same packet.
1184      // 3. If the second operand of the nvj is newified, (which means
1185      //    first operand is also a reg), first reg is not defined in
1186      //    the same packet.
1187      if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
1188          HII->isLoopN(PI)) {
1189        Dependence = true;
1190        break;
1191      }
1192      // Check #2/#3.
1193      const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
1194      if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
1195        Dependence = true;
1196        break;
1197      }
1198    }
1199
1200    if (Dependence)
1201      return false;
1202    GlueToNewValueJump = true;
1203  }
1204
1205  // There no dependency between a prolog instruction and its successor.
1206  if (!SUJ->isSucc(SUI))
1207    return true;
1208
1209  for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
1210    if (FoundSequentialDependence)
1211      break;
1212
1213    if (SUJ->Succs[i].getSUnit() != SUI)
1214      continue;
1215
1216    SDep::Kind DepType = SUJ->Succs[i].getKind();
1217    // For direct calls:
1218    // Ignore register dependences for call instructions for packetization
1219    // purposes except for those due to r31 and predicate registers.
1220    //
1221    // For indirect calls:
1222    // Same as direct calls + check for true dependences to the register
1223    // used in the indirect call.
1224    //
1225    // We completely ignore Order dependences for call instructions.
1226    //
1227    // For returns:
1228    // Ignore register dependences for return instructions like jumpr,
1229    // dealloc return unless we have dependencies on the explicit uses
1230    // of the registers used by jumpr (like r31) or dealloc return
1231    // (like r29 or r30).
1232    //
1233    // TODO: Currently, jumpr is handling only return of r31. So, the
1234    // following logic (specificaly isCallDependent) is working fine.
1235    // We need to enable jumpr for register other than r31 and then,
1236    // we need to rework the last part, where it handles indirect call
1237    // of that (isCallDependent) function. Bug 6216 is opened for this.
1238    unsigned DepReg = 0;
1239    const TargetRegisterClass *RC = nullptr;
1240    if (DepType == SDep::Data) {
1241      DepReg = SUJ->Succs[i].getReg();
1242      RC = HRI->getMinimalPhysRegClass(DepReg);
1243    }
1244
1245    if (I->isCall() || I->isReturn()) {
1246      if (!isRegDependence(DepType))
1247        continue;
1248      if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
1249        continue;
1250    }
1251
1252    if (DepType == SDep::Data) {
1253      if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
1254        if (promoteToDotCur(J, DepType, II, RC))
1255          continue;
1256    }
1257
1258    // Data dpendence ok if we have load.cur.
1259    if (DepType == SDep::Data && HII->isDotCurInst(J)) {
1260      if (HII->isV60VectorInstruction(I))
1261        continue;
1262    }
1263
1264    // For instructions that can be promoted to dot-new, try to promote.
1265    if (DepType == SDep::Data) {
1266      if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
1267        if (promoteToDotNew(I, DepType, II, RC)) {
1268          PromotedToDotNew = true;
1269          continue;
1270        }
1271      }
1272      if (HII->isNewValueJump(I))
1273        continue;
1274    }
1275
1276    // For predicated instructions, if the predicates are complements then
1277    // there can be no dependence.
1278    if (HII->isPredicated(I) && HII->isPredicated(J) &&
1279        arePredicatesComplements(I, J)) {
1280      // Not always safe to do this translation.
1281      // DAG Builder attempts to reduce dependence edges using transitive
1282      // nature of dependencies. Here is an example:
1283      //
1284      // r0 = tfr_pt ... (1)
1285      // r0 = tfr_pf ... (2)
1286      // r0 = tfr_pt ... (3)
1287      //
1288      // There will be an output dependence between (1)->(2) and (2)->(3).
1289      // However, there is no dependence edge between (1)->(3). This results
1290      // in all 3 instructions going in the same packet. We ignore dependce
1291      // only once to avoid this situation.
1292      auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
1293      if (Itr != IgnoreDepMIs.end()) {
1294        Dependence = true;
1295        return false;
1296      }
1297      IgnoreDepMIs.push_back(I);
1298      continue;
1299    }
1300
1301    // Ignore Order dependences between unconditional direct branches
1302    // and non-control-flow instructions.
1303    if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
1304        DepType == SDep::Order)
1305      continue;
1306
1307    // Ignore all dependences for jumps except for true and output
1308    // dependences.
1309    if (I->isConditionalBranch() && DepType != SDep::Data &&
1310        DepType != SDep::Output)
1311      continue;
1312
1313    // Ignore output dependences due to superregs. We can write to two
1314    // different subregisters of R1:0 for instance in the same cycle.
1315
1316    // If neither I nor J defines DepReg, then this is a superfluous output
1317    // dependence. The dependence must be of the form:
1318    //   R0 = ...
1319    //   R1 = ...
1320    // and there is an output dependence between the two instructions with
1321    // DepReg = D0.
1322    // We want to ignore these dependences. Ideally, the dependence
1323    // constructor should annotate such dependences. We can then avoid this
1324    // relatively expensive check.
1325    //
1326    if (DepType == SDep::Output) {
1327      // DepReg is the register that's responsible for the dependence.
1328      unsigned DepReg = SUJ->Succs[i].getReg();
1329
1330      // Check if I and J really defines DepReg.
1331      if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
1332        continue;
1333      FoundSequentialDependence = true;
1334      break;
1335    }
1336
1337    // For Order dependences:
1338    // 1. On V4 or later, volatile loads/stores can be packetized together,
1339    //    unless other rules prevent is.
1340    // 2. Store followed by a load is not allowed.
1341    // 3. Store followed by a store is only valid on V4 or later.
1342    // 4. Load followed by any memory operation is allowed.
1343    if (DepType == SDep::Order) {
1344      if (!PacketizeVolatiles) {
1345        bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
1346        if (OrdRefs) {
1347          FoundSequentialDependence = true;
1348          break;
1349        }
1350      }
1351      // J is first, I is second.
1352      bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
1353      bool LoadI = I->mayLoad(), StoreI = I->mayStore();
1354      if (StoreJ) {
1355        // Two stores are only allowed on V4+. Load following store is never
1356        // allowed.
1357        if (LoadI) {
1358          FoundSequentialDependence = true;
1359          break;
1360        }
1361      } else if (!LoadJ || (!LoadI && !StoreI)) {
1362        // If J is neither load nor store, assume a dependency.
1363        // If J is a load, but I is neither, also assume a dependency.
1364        FoundSequentialDependence = true;
1365        break;
1366      }
1367      // Store followed by store: not OK on V2.
1368      // Store followed by load: not OK on all.
1369      // Load followed by store: OK on all.
1370      // Load followed by load: OK on all.
1371      continue;
1372    }
1373
1374    // For V4, special case ALLOCFRAME. Even though there is dependency
1375    // between ALLOCFRAME and subsequent store, allow it to be packetized
1376    // in a same packet. This implies that the store is using the caller's
1377    // SP. Hence, offset needs to be updated accordingly.
1378    if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
1379      unsigned Opc = I->getOpcode();
1380      switch (Opc) {
1381        case Hexagon::S2_storerd_io:
1382        case Hexagon::S2_storeri_io:
1383        case Hexagon::S2_storerh_io:
1384        case Hexagon::S2_storerb_io:
1385          if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
1386            int64_t Imm = I->getOperand(1).getImm();
1387            int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
1388            if (HII->isValidOffset(Opc, NewOff)) {
1389              GlueAllocframeStore = true;
1390              // Since this store is to be glued with allocframe in the same
1391              // packet, it will use SP of the previous stack frame, i.e.
1392              // caller's SP. Therefore, we need to recalculate offset
1393              // according to this change.
1394              I->getOperand(1).setImm(NewOff);
1395              continue;
1396            }
1397          }
1398        default:
1399          break;
1400      }
1401    }
1402
1403    // Skip over anti-dependences. Two instructions that are anti-dependent
1404    // can share a packet.
1405    if (DepType != SDep::Anti) {
1406      FoundSequentialDependence = true;
1407      break;
1408    }
1409  }
1410
1411  if (FoundSequentialDependence) {
1412    Dependence = true;
1413    return false;
1414  }
1415
1416  return true;
1417}
1418
1419bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
1420  MachineInstr *I = SUI->getInstr();
1421  MachineInstr *J = SUJ->getInstr();
1422  assert(I && J && "Unable to packetize null instruction!");
1423
1424  if (cannotCoexist(I, J))
1425    return false;
1426
1427  if (!Dependence)
1428    return true;
1429
1430  // Check if the instruction was promoted to a dot-new. If so, demote it
1431  // back into a dot-old.
1432  if (PromotedToDotNew)
1433    demoteToDotOld(I);
1434
1435  cleanUpDotCur();
1436  // Check if the instruction (must be a store) was glued with an allocframe
1437  // instruction. If so, restore its offset to its original value, i.e. use
1438  // current SP instead of caller's SP.
1439  if (GlueAllocframeStore) {
1440    unsigned FrameSize = MF.getFrameInfo()->getStackSize();
1441    MachineOperand &MOff = I->getOperand(1);
1442    MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
1443  }
1444  return false;
1445}
1446
1447
1448MachineBasicBlock::iterator
1449HexagonPacketizerList::addToPacket(MachineInstr *MI) {
1450  MachineBasicBlock::iterator MII = MI;
1451  MachineBasicBlock *MBB = MI->getParent();
1452  if (MI->isImplicitDef()) {
1453    unsigned R = MI->getOperand(0).getReg();
1454    if (Hexagon::IntRegsRegClass.contains(R)) {
1455      MCSuperRegIterator S(R, HRI, false);
1456      MI->addOperand(MachineOperand::CreateReg(*S, true, true));
1457    }
1458    return MII;
1459  }
1460  assert(ResourceTracker->canReserveResources(MI));
1461
1462  bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
1463  bool Good = true;
1464
1465  if (GlueToNewValueJump) {
1466    MachineInstr *NvjMI = ++MII;
1467    // We need to put both instructions in the same packet: MI and NvjMI.
1468    // Either of them can require a constant extender. Try to add both to
1469    // the current packet, and if that fails, end the packet and start a
1470    // new one.
1471    ResourceTracker->reserveResources(MI);
1472    if (ExtMI)
1473      Good = tryAllocateResourcesForConstExt(true);
1474
1475    bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
1476    if (Good) {
1477      if (ResourceTracker->canReserveResources(NvjMI))
1478        ResourceTracker->reserveResources(NvjMI);
1479      else
1480        Good = false;
1481    }
1482    if (Good && ExtNvjMI)
1483      Good = tryAllocateResourcesForConstExt(true);
1484
1485    if (!Good) {
1486      endPacket(MBB, MI);
1487      assert(ResourceTracker->canReserveResources(MI));
1488      ResourceTracker->reserveResources(MI);
1489      if (ExtMI) {
1490        assert(canReserveResourcesForConstExt());
1491        tryAllocateResourcesForConstExt(true);
1492      }
1493      assert(ResourceTracker->canReserveResources(NvjMI));
1494      ResourceTracker->reserveResources(NvjMI);
1495      if (ExtNvjMI) {
1496        assert(canReserveResourcesForConstExt());
1497        reserveResourcesForConstExt();
1498      }
1499    }
1500    CurrentPacketMIs.push_back(MI);
1501    CurrentPacketMIs.push_back(NvjMI);
1502    return MII;
1503  }
1504
1505  ResourceTracker->reserveResources(MI);
1506  if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
1507    endPacket(MBB, MI);
1508    if (PromotedToDotNew)
1509      demoteToDotOld(MI);
1510    ResourceTracker->reserveResources(MI);
1511    reserveResourcesForConstExt();
1512  }
1513
1514  CurrentPacketMIs.push_back(MI);
1515  return MII;
1516}
1517
1518void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
1519      MachineInstr *MI) {
1520  OldPacketMIs = CurrentPacketMIs;
1521  VLIWPacketizerList::endPacket(MBB, MI);
1522}
1523
1524bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
1525  return !producesStall(MI);
1526}
1527
1528
1529// Return true when ConsMI uses a register defined by ProdMI.
1530static bool isDependent(const MachineInstr *ProdMI,
1531      const MachineInstr *ConsMI) {
1532  if (!ProdMI->getOperand(0).isReg())
1533    return false;
1534  unsigned DstReg = ProdMI->getOperand(0).getReg();
1535
1536  for (auto &Op : ConsMI->operands())
1537    if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
1538      // The MIs depend on each other.
1539      return true;
1540
1541  return false;
1542}
1543
1544// V60 forward scheduling.
1545bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
1546  // Check whether the previous packet is in a different loop. If this is the
1547  // case, there is little point in trying to avoid a stall because that would
1548  // favor the rare case (loop entry) over the common case (loop iteration).
1549  //
1550  // TODO: We should really be able to check all the incoming edges if this is
1551  // the first packet in a basic block, so we can avoid stalls from the loop
1552  // backedge.
1553  if (!OldPacketMIs.empty()) {
1554    auto *OldBB = OldPacketMIs.front()->getParent();
1555    auto *ThisBB = I->getParent();
1556    if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
1557      return false;
1558  }
1559
1560  // Check for stall between two vector instructions.
1561  if (HII->isV60VectorInstruction(I)) {
1562    for (auto J : OldPacketMIs) {
1563      if (!HII->isV60VectorInstruction(J))
1564        continue;
1565      if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
1566        return true;
1567    }
1568    return false;
1569  }
1570
1571  // Check for stall between two scalar instructions. First, check that
1572  // there is no definition of a use in the current packet, because it
1573  // may be a candidate for .new.
1574  for (auto J : CurrentPacketMIs)
1575    if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
1576      return false;
1577
1578  // Check for stall between I and instructions in the previous packet.
1579  if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
1580    for (auto J : OldPacketMIs) {
1581      if (HII->isV60VectorInstruction(J))
1582        continue;
1583      if (!HII->isLateInstrFeedsEarlyInstr(J, I))
1584        continue;
1585      if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
1586        return true;
1587    }
1588  }
1589
1590  return false;
1591}
1592
1593
1594//===----------------------------------------------------------------------===//
1595//                         Public Constructor Functions
1596//===----------------------------------------------------------------------===//
1597
1598FunctionPass *llvm::createHexagonPacketizer() {
1599  return new HexagonPacketizer();
1600}
1601
1602